All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/9] netfilter: nf_tables: delete devices from flowtable
From: Pablo Neira Ayuso @ 2020-05-29 17:50 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba
In-Reply-To: <20200529175026.30541-1-pablo@netfilter.org>

This patch allows users to delete devices from existing flowtables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   1 +
 net/netfilter/nf_tables_api.c     | 113 +++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4f58c4411bb4..6f0f6fca9ac3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1002,6 +1002,7 @@ struct nft_stats {
 
 struct nft_hook {
 	struct list_head	list;
+	bool			inactive;
 	struct nf_hook_ops	ops;
 	struct rcu_head		rcu;
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 98f2cbb97e39..1c2c3bb78fa0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1669,6 +1669,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
 		goto err_hook_dev;
 	}
 	hook->ops.dev = dev;
+	hook->inactive = false;
 
 	return hook;
 
@@ -1678,17 +1679,17 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
 	return ERR_PTR(err);
 }
 
-static bool nft_hook_list_find(struct list_head *hook_list,
-			       const struct nft_hook *this)
+static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
+					   const struct nft_hook *this)
 {
 	struct nft_hook *hook;
 
 	list_for_each_entry(hook, hook_list, list) {
 		if (this->ops.dev == hook->ops.dev)
-			return true;
+			return hook;
 	}
 
-	return false;
+	return NULL;
 }
 
 static int nf_tables_parse_netdev_hooks(struct net *net,
@@ -6530,6 +6531,51 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	return err;
 }
 
+static int nft_delflowtable_hook(struct nft_ctx *ctx,
+				 struct nft_flowtable *flowtable)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_flowtable_hook flowtable_hook;
+	struct nft_hook *this, *next, *hook;
+	struct nft_trans *trans;
+	int err;
+
+	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, &flowtable->data);
+	if (err < 0)
+		return err;
+
+	list_for_each_entry_safe(this, next, &flowtable_hook.list, list) {
+		hook = nft_hook_list_find(&flowtable->hook_list, this);
+		if (!hook) {
+			err = -ENOENT;
+			goto err_flowtable_del_hook;
+		}
+		hook->inactive = true;
+		list_del(&this->list);
+		kfree(this);
+	}
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
+				sizeof(struct nft_trans_flowtable));
+	if (!trans)
+		return -ENOMEM;
+
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_flowtable_update(trans) = true;
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+
+err_flowtable_del_hook:
+	list_for_each_entry(hook, &flowtable_hook.list, list)
+		hook->inactive = false;
+
+	return err;
+}
+
 static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
@@ -6568,13 +6614,17 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(flowtable);
 	}
+
+	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+	if (nla[NFTA_FLOWTABLE_HOOK])
+		return nft_delflowtable_hook(&ctx, flowtable);
+
 	if (flowtable->use > 0) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
-
 	return nft_delflowtable(&ctx, flowtable);
 }
 
@@ -7184,7 +7234,10 @@ static void nft_commit_release(struct nft_trans *trans)
 		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
 		break;
 	case NFT_MSG_DELFLOWTABLE:
-		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+		if (nft_trans_flowtable_update(trans))
+			nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+		else
+			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
 		break;
 	}
 
@@ -7345,6 +7398,17 @@ static void nft_chain_del(struct nft_chain *chain)
 	list_del_rcu(&chain->list);
 }
 
+static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
+				    struct list_head *hook_list)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+		if (hook->inactive)
+			list_move(&hook->list, hook_list);
+	}
+}
+
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
 	struct nft_module_request *req, *next;
@@ -7570,13 +7634,24 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELFLOWTABLE:
-			list_del_rcu(&nft_trans_flowtable(trans)->list);
-			nf_tables_flowtable_notify(&trans->ctx,
-						   nft_trans_flowtable(trans),
-						   &nft_trans_flowtable(trans)->hook_list,
-						   NFT_MSG_DELFLOWTABLE);
-			nft_unregister_flowtable_net_hooks(net,
-					&nft_trans_flowtable(trans)->hook_list);
+			if (nft_trans_flowtable_update(trans)) {
+				nft_flowtable_hooks_del(nft_trans_flowtable(trans),
+							&nft_trans_flowtable_hooks(trans));
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable_hooks(trans),
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+								   &nft_trans_flowtable_hooks(trans));
+			} else {
+				list_del_rcu(&nft_trans_flowtable(trans)->list);
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable(trans)->hook_list,
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable(trans)->hook_list);
+			}
 			break;
 		}
 	}
@@ -7638,6 +7713,7 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 {
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
+	struct nft_hook *hook;
 
 	list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
 					 list) {
@@ -7746,8 +7822,13 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 			}
 			break;
 		case NFT_MSG_DELFLOWTABLE:
-			trans->ctx.table->use++;
-			nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+			if (nft_trans_flowtable_update(trans)) {
+				list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
+					hook->inactive = false;
+			} else {
+				trans->ctx.table->use++;
+				nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+			}
 			nft_trans_destroy(trans);
 			break;
 		}
-- 
2.20.1


^ permalink raw reply related

* [PATCH 1/9] netfilter: ctnetlink: add kernel side filtering for dump
From: Pablo Neira Ayuso @ 2020-05-29 17:50 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba
In-Reply-To: <20200529175026.30541-1-pablo@netfilter.org>

From: Romain Bellan <romain.bellan@wifirst.fr>

Conntrack dump does not support kernel side filtering (only get exists,
but it returns only one entry. And user has to give a full valid tuple)

It means that userspace has to implement filtering after receiving many
irrelevant entries, consuming resources (conntrack table is sometimes
very huge, much more than a routing table for example).

This patch adds filtering in kernel side. To achieve this goal, we:

 * Add a new CTA_FILTER netlink attributes, actually a flag list to
   parametize filtering
 * Convert some *nlattr_to_tuple() functions, to allow a partial parsing
   of CTA_TUPLE_ORIG and CTA_TUPLE_REPLY (so nf_conntrack_tuple it not
   fully set)

Filtering is now possible on:
 * IP SRC/DST values
 * Ports for TCP and UDP flows
 * IMCP(v6) codes types and IDs

Filtering is done as an "AND" operator. For example, when flags
PROTO_SRC_PORT, PROTO_NUM and IP_SRC are sets, only entries matching all
values are dumped.

Changes since v1:
  Set NLM_F_DUMP_FILTERED in nlm flags if entries are filtered

Changes since v2:
  Move several constants to nf_internals.h
  Move a fix on netlink values check in a separate patch
  Add a check on not-supported flags
  Return EOPNOTSUPP if CDA_FILTER is set in ctnetlink_flush_conntrack
  (not yet implemented)
  Code style issues

Changes since v3:
  Fix compilation warning reported by kbuild test robot

Changes since v4:
  Fix a regression introduced in v3 (returned EINVAL for valid netlink
  messages without CTA_MARK)

Changes since v5:
  Change definition of CTA_FILTER_F_ALL
  Fix a regression when CTA_TUPLE_ZONE is not set

Signed-off-by: Romain Bellan <romain.bellan@wifirst.fr>
Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h  |   6 +-
 .../linux/netfilter/nfnetlink_conntrack.h     |   9 +
 net/netfilter/nf_conntrack_core.c             |  19 +-
 net/netfilter/nf_conntrack_netlink.c          | 334 ++++++++++++++++--
 net/netfilter/nf_conntrack_proto_icmp.c       |  40 ++-
 net/netfilter/nf_conntrack_proto_icmpv6.c     |  42 ++-
 net/netfilter/nf_internals.h                  |  17 +
 7 files changed, 394 insertions(+), 73 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 4cad1f0a327a..88186b95b3c2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -42,7 +42,8 @@ struct nf_conntrack_l4proto {
 	/* Calculate tuple nlattr size */
 	unsigned int (*nlattr_tuple_size)(void);
 	int (*nlattr_to_tuple)(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags);
 	const struct nla_policy *nla_policy;
 
 	struct {
@@ -152,7 +153,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple);
 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags);
 unsigned int nf_ct_port_nlattr_tuple_size(void);
 extern const struct nla_policy nf_ct_port_nla_policy[];
 
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 1d41810d17e2..262881792671 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -55,6 +55,7 @@ enum ctattr_type {
 	CTA_LABELS,
 	CTA_LABELS_MASK,
 	CTA_SYNPROXY,
+	CTA_FILTER,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -276,4 +277,12 @@ enum ctattr_expect_stats {
 };
 #define CTA_STATS_EXP_MAX (__CTA_STATS_EXP_MAX - 1)
 
+enum ctattr_filter {
+	CTA_FILTER_UNSPEC,
+	CTA_FILTER_ORIG_FLAGS,
+	CTA_FILTER_REPLY_FLAGS,
+	__CTA_FILTER_MAX
+};
+#define CTA_FILTER_MAX (__CTA_FILTER_MAX - 1)
+
 #endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1d57b95d3481..8abb1727bcc4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1974,13 +1974,22 @@ const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
 
 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t)
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) {
+		if (!tb[CTA_PROTO_SRC_PORT])
+			return -EINVAL;
+
+		t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
+	}
 
-	t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
-	t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) {
+		if (!tb[CTA_PROTO_DST_PORT])
+			return -EINVAL;
+
+		t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9ddfcd002d3b..d7bd8b1f27d5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -54,6 +54,8 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
+#include "nf_internals.h"
+
 MODULE_LICENSE("GPL");
 
 static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
@@ -544,14 +546,16 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
 
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
-		    struct nf_conn *ct, bool extinfo)
+		    struct nf_conn *ct, bool extinfo, unsigned int flags)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
-	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
+	unsigned int event;
 
+	if (portid)
+		flags |= NLM_F_MULTI;
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
@@ -847,17 +851,70 @@ static int ctnetlink_done(struct netlink_callback *cb)
 }
 
 struct ctnetlink_filter {
+	u_int32_t cta_flags;
 	u8 family;
+
+	u_int32_t orig_flags;
+	u_int32_t reply_flags;
+
+	struct nf_conntrack_tuple orig;
+	struct nf_conntrack_tuple reply;
+	struct nf_conntrack_zone zone;
+
 	struct {
 		u_int32_t val;
 		u_int32_t mask;
 	} mark;
 };
 
+static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
+	[CTA_FILTER_ORIG_FLAGS]		= { .type = NLA_U32 },
+	[CTA_FILTER_REPLY_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_parse_filter(const struct nlattr *attr,
+				  struct ctnetlink_filter *filter)
+{
+	struct nlattr *tb[CTA_FILTER_MAX + 1];
+	int ret = 0;
+
+	ret = nla_parse_nested(tb, CTA_FILTER_MAX, attr, cta_filter_nla_policy,
+			       NULL);
+	if (ret)
+		return ret;
+
+	if (tb[CTA_FILTER_ORIG_FLAGS]) {
+		filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
+		if (filter->orig_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	if (tb[CTA_FILTER_REPLY_FLAGS]) {
+		filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
+		if (filter->reply_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int ctnetlink_parse_zone(const struct nlattr *attr,
+				struct nf_conntrack_zone *zone);
+static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+					 struct nf_conntrack_tuple *tuple,
+					 u32 type, u_int8_t l3num,
+					 struct nf_conntrack_zone *zone,
+					 u_int32_t flags);
+
+/* applied on filters */
+#define CTA_FILTER_F_CTA_MARK			(1 << 0)
+#define CTA_FILTER_F_CTA_MARK_MASK		(1 << 1)
+
 static struct ctnetlink_filter *
 ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
 	struct ctnetlink_filter *filter;
+	int err;
 
 #ifndef CONFIG_NF_CONNTRACK_MARK
 	if (cda[CTA_MARK] || cda[CTA_MARK_MASK])
@@ -871,14 +928,65 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 	filter->family = family;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+	if (cda[CTA_MARK]) {
 		filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-		filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+		filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK);
+
+		if (cda[CTA_MARK_MASK]) {
+			filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+			filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK);
+		} else {
+			filter->mark.mask = 0xffffffff;
+		}
+	} else if (cda[CTA_MARK_MASK]) {
+		return ERR_PTR(-EINVAL);
 	}
 #endif
+	if (!cda[CTA_FILTER])
+		return filter;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (filter->orig_flags) {
+		if (!cda[CTA_TUPLE_ORIG])
+			return ERR_PTR(-EINVAL);
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->orig,
+						   CTA_TUPLE_ORIG,
+						   filter->family,
+						   &filter->zone,
+						   filter->orig_flags);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
+	if (filter->reply_flags) {
+		if (!cda[CTA_TUPLE_REPLY])
+			return ERR_PTR(-EINVAL);
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->reply,
+						   CTA_TUPLE_REPLY,
+						   filter->family,
+						   &filter->zone,
+						   filter->orig_flags);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
 	return filter;
 }
 
+static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
+{
+	return family || cda[CTA_MARK] || cda[CTA_FILTER];
+}
+
 static int ctnetlink_start(struct netlink_callback *cb)
 {
 	const struct nlattr * const *cda = cb->data;
@@ -886,7 +994,7 @@ static int ctnetlink_start(struct netlink_callback *cb)
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	u8 family = nfmsg->nfgen_family;
 
-	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+	if (ctnetlink_needs_filter(family, cda)) {
 		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
@@ -896,9 +1004,79 @@ static int ctnetlink_start(struct netlink_callback *cb)
 	return 0;
 }
 
+static int ctnetlink_filter_match_tuple(struct nf_conntrack_tuple *filter_tuple,
+					struct nf_conntrack_tuple *ct_tuple,
+					u_int32_t flags, int family)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    filter_tuple->src.u3.ip != ct_tuple->src.u3.ip)
+			return  0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    filter_tuple->dst.u3.ip != ct_tuple->dst.u3.ip)
+			return  0;
+		break;
+	case NFPROTO_IPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    !ipv6_addr_cmp(&filter_tuple->src.u3.in6,
+				   &ct_tuple->src.u3.in6))
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    !ipv6_addr_cmp(&filter_tuple->dst.u3.in6,
+				   &ct_tuple->dst.u3.in6))
+			return 0;
+		break;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) &&
+	    filter_tuple->dst.protonum != ct_tuple->dst.protonum)
+		return 0;
+
+	switch (ct_tuple->dst.protonum) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) &&
+		    filter_tuple->src.u.tcp.port != ct_tuple->src.u.tcp.port)
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) &&
+		    filter_tuple->dst.u.tcp.port != ct_tuple->dst.u.tcp.port)
+			return 0;
+		break;
+	case IPPROTO_ICMP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	case IPPROTO_ICMPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
 static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 {
 	struct ctnetlink_filter *filter = data;
+	struct nf_conntrack_tuple *tuple;
 
 	if (filter == NULL)
 		goto out;
@@ -910,8 +1088,28 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 	if (filter->family && nf_ct_l3num(ct) != filter->family)
 		goto ignore_entry;
 
+	if (filter->orig_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
+		if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
+						  filter->orig_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
+	if (filter->reply_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_REPLY);
+		if (!ctnetlink_filter_match_tuple(&filter->reply, tuple,
+						  filter->reply_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if ((ct->mark & filter->mark.mask) != filter->mark.val)
+	if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) &&
+	    (ct->mark & filter->mark.mask) != filter->mark.val)
+		goto ignore_entry;
+	else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) &&
+		 ct->mark != filter->mark.val)
 		goto ignore_entry;
 #endif
 
@@ -925,6 +1123,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0;
 	struct net *net = sock_net(skb->sk);
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
@@ -979,7 +1178,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-					    ct, true);
+					    ct, true, flags);
 			if (res < 0) {
 				nf_conntrack_get(&ct->ct_general);
 				cb->args[1] = (unsigned long)ct;
@@ -1014,31 +1213,50 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
 {
-	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V4_SRC])
+			return -EINVAL;
+
+		t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V4_DST])
+			return -EINVAL;
 
-	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
-	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+		t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+	}
 
 	return 0;
 }
 
 static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
 {
-	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V6_SRC])
+			return -EINVAL;
 
-	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
-	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+		t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V6_DST])
+			return -EINVAL;
+
+		t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+	}
 
 	return 0;
 }
 
 static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
-				    struct nf_conntrack_tuple *tuple)
+				    struct nf_conntrack_tuple *tuple,
+				    u_int32_t flags)
 {
 	struct nlattr *tb[CTA_IP_MAX+1];
 	int ret = 0;
@@ -1054,10 +1272,10 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
 
 	switch (tuple->src.l3num) {
 	case NFPROTO_IPV4:
-		ret = ipv4_nlattr_to_tuple(tb, tuple);
+		ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
 		break;
 	case NFPROTO_IPV6:
-		ret = ipv6_nlattr_to_tuple(tb, tuple);
+		ret = ipv6_nlattr_to_tuple(tb, tuple, flags);
 		break;
 	}
 
@@ -1069,7 +1287,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
-				       struct nf_conntrack_tuple *tuple)
+				       struct nf_conntrack_tuple *tuple,
+				       u_int32_t flags)
 {
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTO_MAX+1];
@@ -1080,8 +1299,12 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	if (ret < 0)
 		return ret;
 
+	if (!(flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)))
+		return 0;
+
 	if (!tb[CTA_PROTO_NUM])
 		return -EINVAL;
+
 	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
 	rcu_read_lock();
@@ -1092,7 +1315,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 						     l4proto->nla_policy,
 						     NULL);
 		if (ret == 0)
-			ret = l4proto->nlattr_to_tuple(tb, tuple);
+			ret = l4proto->nlattr_to_tuple(tb, tuple, flags);
 	}
 
 	rcu_read_unlock();
@@ -1143,10 +1366,21 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 	[CTA_TUPLE_ZONE]	= { .type = NLA_U16 },
 };
 
+#define CTA_FILTER_F_ALL_CTA_PROTO \
+  (CTA_FILTER_F_CTA_PROTO_SRC_PORT | \
+   CTA_FILTER_F_CTA_PROTO_DST_PORT | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_ID | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_ID)
+
 static int
-ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple, u32 type,
-		      u_int8_t l3num, struct nf_conntrack_zone *zone)
+ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+			      struct nf_conntrack_tuple *tuple, u32 type,
+			      u_int8_t l3num, struct nf_conntrack_zone *zone,
+			      u_int32_t flags)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1158,23 +1392,32 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	if (err < 0)
 		return err;
 
-	if (!tb[CTA_TUPLE_IP])
-		return -EINVAL;
 
 	tuple->src.l3num = l3num;
 
-	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple);
-	if (err < 0)
-		return err;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST) ||
+	    flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_TUPLE_IP])
+			return -EINVAL;
 
-	if (!tb[CTA_TUPLE_PROTO])
-		return -EINVAL;
+		err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple, flags);
+		if (err < 0)
+			return err;
+	}
 
-	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple);
-	if (err < 0)
-		return err;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) {
+		if (!tb[CTA_TUPLE_PROTO])
+			return -EINVAL;
 
-	if (tb[CTA_TUPLE_ZONE]) {
+		err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple, flags);
+		if (err < 0)
+			return err;
+	} else if (flags & CTA_FILTER_FLAG(ALL_CTA_PROTO)) {
+		/* Can't manage proto flags without a protonum  */
+		return -EINVAL;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_TUPLE_ZONE)) && tb[CTA_TUPLE_ZONE]) {
 		if (!zone)
 			return -EINVAL;
 
@@ -1193,6 +1436,15 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static int
+ctnetlink_parse_tuple(const struct nlattr * const cda[],
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
+{
+	return ctnetlink_parse_tuple_filter(cda, tuple, type, l3num, zone,
+					    CTA_FILTER_FLAG(ALL));
+}
+
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
 	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING,
 				    .len = NF_CT_HELPER_NAME_LEN - 1 },
@@ -1240,6 +1492,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 				    .len = NF_CT_LABELS_MAX_SIZE },
 	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
 				    .len = NF_CT_LABELS_MAX_SIZE },
+	[CTA_FILTER]		= { .type = NLA_NESTED },
 };
 
 static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
@@ -1256,7 +1509,10 @@ static int ctnetlink_flush_conntrack(struct net *net,
 {
 	struct ctnetlink_filter *filter = NULL;
 
-	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+	if (ctnetlink_needs_filter(family, cda)) {
+		if (cda[CTA_FILTER])
+			return -EOPNOTSUPP;
+
 		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
@@ -1385,7 +1641,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 	}
 
 	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
-				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
+				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true, 0);
 	nf_ct_put(ct);
 	if (err <= 0)
 		goto free;
@@ -1458,7 +1714,7 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
 			res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
 						  cb->nlh->nlmsg_seq,
 						  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-						  ct, dying ? true : false);
+						  ct, dying ? true : false, 0);
 			if (res < 0) {
 				if (!atomic_inc_not_zero(&ct->ct_general.use))
 					continue;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index c2e3dff773bc..4efd8741c105 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -20,6 +20,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
+#include "nf_internals.h"
+
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
 bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -271,20 +273,32 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int icmp_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_ICMP_TYPE] ||
-	    !tb[CTA_PROTO_ICMP_CODE] ||
-	    !tb[CTA_PROTO_ICMP_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
-
-	if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) {
+		if (!tb[CTA_PROTO_ICMP_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+		if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) {
+		if (!tb[CTA_PROTO_ICMP_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) {
+		if (!tb[CTA_PROTO_ICMP_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 6f9144e1f1c1..facd8c64ec4e 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -24,6 +24,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
+#include "nf_internals.h"
+
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
 bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
@@ -193,21 +195,33 @@ static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
-	    !tb[CTA_PROTO_ICMPV6_CODE] ||
-	    !tb[CTA_PROTO_ICMPV6_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
-
-	if (tuple->dst.u.icmp.type < 128 ||
-	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type - 128])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) {
+		if (!tb[CTA_PROTO_ICMPV6_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+		if (tuple->dst.u.icmp.type < 128 ||
+		    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type - 128])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) {
+		if (!tb[CTA_PROTO_ICMPV6_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) {
+		if (!tb[CTA_PROTO_ICMPV6_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index d6c43902ebd7..832ae64179f0 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -6,6 +6,23 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
+/* nf_conntrack_netlink.c: applied on tuple filters */
+#define CTA_FILTER_F_CTA_IP_SRC			(1 << 0)
+#define CTA_FILTER_F_CTA_IP_DST			(1 << 1)
+#define CTA_FILTER_F_CTA_TUPLE_ZONE		(1 << 2)
+#define CTA_FILTER_F_CTA_PROTO_NUM		(1 << 3)
+#define CTA_FILTER_F_CTA_PROTO_SRC_PORT		(1 << 4)
+#define CTA_FILTER_F_CTA_PROTO_DST_PORT		(1 << 5)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_TYPE	(1 << 6)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_CODE	(1 << 7)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_ID		(1 << 8)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE	(1 << 9)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE	(1 << 10)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_ID	(1 << 11)
+#define CTA_FILTER_F_MAX			(1 << 12)
+#define CTA_FILTER_F_ALL			(CTA_FILTER_F_MAX-1)
+#define CTA_FILTER_FLAG(ctattr) CTA_FILTER_F_ ## ctattr
+
 /* nf_queue.c */
 void nf_queue_nf_hook_drop(struct net *net);
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH] env: Add H_DEFAULT flag
From: Marek Vasut @ 2020-05-29 17:50 UTC (permalink / raw)
  To: u-boot

Add another internal environment flag which indicates that the operation
is resetting the environment to the default one. This allows the env code
to discern between import of external environment and reset to default.

Signed-off-by: Marek Vasut <marex@denx.de>
---
 env/common.c     | 3 ++-
 include/search.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/env/common.c b/env/common.c
index 088b2aebb4..0db56e610a 100644
--- a/env/common.c
+++ b/env/common.c
@@ -81,6 +81,7 @@ void env_set_default(const char *s, int flags)
 		debug("Using default environment\n");
 	}
 
+	flags |= H_DEFAULT;
 	if (himport_r(&env_htab, (char *)default_environment,
 			sizeof(default_environment), '\0', flags, 0,
 			0, NULL) == 0)
@@ -99,7 +100,7 @@ int env_set_default_vars(int nvars, char * const vars[], int flags)
 	 * Special use-case: import from default environment
 	 * (and use \0 as a separator)
 	 */
-	flags |= H_NOCLEAR;
+	flags |= H_NOCLEAR | H_DEFAULT;
 	return himport_r(&env_htab, (const char *)default_environment,
 				sizeof(default_environment), '\0',
 				flags, 0, nvars, vars);
diff --git a/include/search.h b/include/search.h
index bca36d3abc..c4b50c9630 100644
--- a/include/search.h
+++ b/include/search.h
@@ -112,5 +112,6 @@ int hwalk_r(struct hsearch_data *htab,
 #define H_MATCH_METHOD	(H_MATCH_IDENT | H_MATCH_SUBSTR | H_MATCH_REGEX)
 #define H_PROGRAMMATIC	(1 << 9) /* indicate that an import is from env_set() */
 #define H_ORIGIN_FLAGS	(H_INTERACTIVE | H_PROGRAMMATIC)
+#define H_DEFAULT	(1 << 10) /* indicate that an import is default env */
 
 #endif /* _SEARCH_H_ */
-- 
2.25.1

^ permalink raw reply related

* [PATCH 2/9] netfilter: nf_tables: generalise flowtable hook parsing
From: Pablo Neira Ayuso @ 2020-05-29 17:50 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba
In-Reply-To: <20200529175026.30541-1-pablo@netfilter.org>

Update nft_flowtable_parse_hook() to take the flowtable hook list as
parameter. This allows to reuse this function to update the hooks.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3558e76e2733..87945b4a6789 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6178,21 +6178,30 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
+struct nft_flowtable_hook {
+	u32			num;
+	int			priority;
+	struct list_head	list;
+};
+
 static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
 	[NFTA_FLOWTABLE_HOOK_NUM]	= { .type = NLA_U32 },
 	[NFTA_FLOWTABLE_HOOK_PRIORITY]	= { .type = NLA_U32 },
 	[NFTA_FLOWTABLE_HOOK_DEVS]	= { .type = NLA_NESTED },
 };
 
-static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
-					  const struct nlattr *attr,
-					  struct nft_flowtable *flowtable)
+static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
+				    const struct nlattr *attr,
+				    struct nft_flowtable_hook *flowtable_hook,
+				    struct nf_flowtable *ft)
 {
 	struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
 	struct nft_hook *hook;
 	int hooknum, priority;
 	int err;
 
+	INIT_LIST_HEAD(&flowtable_hook->list);
+
 	err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
 					  nft_flowtable_hook_policy, NULL);
 	if (err < 0)
@@ -6211,19 +6220,19 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 
 	err = nf_tables_parse_netdev_hooks(ctx->net,
 					   tb[NFTA_FLOWTABLE_HOOK_DEVS],
-					   &flowtable->hook_list);
+					   &flowtable_hook->list);
 	if (err < 0)
 		return err;
 
-	flowtable->hooknum		= hooknum;
-	flowtable->data.priority	= priority;
+	flowtable_hook->priority	= priority;
+	flowtable_hook->num		= hooknum;
 
-	list_for_each_entry(hook, &flowtable->hook_list, list) {
+	list_for_each_entry(hook, &flowtable_hook->list, list) {
 		hook->ops.pf		= NFPROTO_NETDEV;
 		hook->ops.hooknum	= hooknum;
 		hook->ops.priority	= priority;
-		hook->ops.priv		= &flowtable->data;
-		hook->ops.hook		= flowtable->data.type->hook;
+		hook->ops.priv		= ft;
+		hook->ops.hook		= ft->type->hook;
 	}
 
 	return err;
@@ -6336,6 +6345,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 				  struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_flowtable_hook flowtable_hook;
 	const struct nf_flowtable_type *type;
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -6409,11 +6419,15 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		goto err3;
 
-	err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
-					     flowtable);
+	err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, &flowtable->data);
 	if (err < 0)
 		goto err4;
 
+	list_splice(&flowtable_hook.list, &flowtable->hook_list);
+	flowtable->data.priority = flowtable_hook.priority;
+	flowtable->hooknum = flowtable_hook.num;
+
 	err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
 	if (err < 0) {
 		list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
-- 
2.20.1


^ permalink raw reply related

* [PATCH] env: Fix invalid env handling in env_init()
From: Marek Vasut @ 2020-05-29 17:51 UTC (permalink / raw)
  To: u-boot

In case the env storage driver marks environment as ENV_INVALID, we must
reset the $ret return value to -ENOENT to let the env init code reset the
environment to the default one a bit further down.

Signed-off-by: Marek Vasut <marex@denx.de>
---
 env/env.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/env/env.c b/env/env.c
index dcc25c030b..024d36fdbe 100644
--- a/env/env.c
+++ b/env/env.c
@@ -300,6 +300,9 @@ int env_init(void)
 
 		debug("%s: Environment %s init done (ret=%d)\n", __func__,
 		      drv->name, ret);
+
+		if (gd->env_valid == ENV_INVALID)
+			ret = -ENOENT;
 	}
 
 	if (!prio)
-- 
2.25.1

^ permalink raw reply related

* [PATCH] env: nowhere: Implement .load callback
From: Marek Vasut @ 2020-05-29 17:51 UTC (permalink / raw)
  To: u-boot

Add .load callback for the 'nowhere' environment driver. This is useful
for when the 'nowhere' driver is used in combination with other drivers
and should be used to load the default environment.

Signed-off-by: Marek Vasut <marex@denx.de>
---
 env/nowhere.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/env/nowhere.c b/env/nowhere.c
index f5b0a17652..417a636f83 100644
--- a/env/nowhere.c
+++ b/env/nowhere.c
@@ -15,6 +15,12 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+static int env_nowhere_load(void)
+{
+	env_set_default(NULL, 0);
+
+	return 0;
+}
 /*
  * Because we only ever have the default environment available we must mark
  * it as invalid.
@@ -30,5 +36,6 @@ static int env_nowhere_init(void)
 U_BOOT_ENV_LOCATION(nowhere) = {
 	.location	= ENVL_NOWHERE,
 	.init		= env_nowhere_init,
+	.load		= env_nowhere_load,
 	ENV_NAME("nowhere")
 };
-- 
2.25.1

^ permalink raw reply related

* Re: [PATCH v2 09/14] device core: Add ability to handle multiple dma offsets
From: Jim Quinlan @ 2020-05-29 17:51 UTC (permalink / raw)
  To: Rob Herring
  Cc: Nicolas Saenz Julienne,
	open list:PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS,
	Christoph Hellwig, maintainer:BROADCOM BCM7XXX ARM ARCHITECTURE,
	Frank Rowand, Greg Kroah-Hartman, Marek Szyprowski, Robin Murphy,
	Alan Stern, Oliver Neukum, Rafael J. Wysocki, Andy Shevchenko,
	Wolfram Sang, Corey Minyard, Srinivas Kandagatla,
	Suzuki K Poulose, Saravana Kannan, Heikki Krogerus, Dan Williams,
	open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE, open list,
	open list:USB SUBSYSTEM, open list:DMA MAPPING HELPERS
In-Reply-To: <CAL_JsqJsxxC6msUXBCa9naitMLfOcVZauk44gPJNGGe3iXRzsA@mail.gmail.com>

On Fri, May 29, 2020 at 1:35 PM Rob Herring <robh+dt@kernel.org> wrote:
>
> On Wed, May 27, 2020 at 9:43 AM Jim Quinlan <james.quinlan@broadcom.com> wrote:
> >
> > Hi Nicolas,
> >
> > On Wed, May 27, 2020 at 11:00 AM Nicolas Saenz Julienne
> > <nsaenzjulienne@suse.de> wrote:
> > >
> > > Hi Jim,
> > > one thing comes to mind, there is a small test suite in drivers/of/unittest.c
> > > (specifically of_unittest_pci_dma_ranges()) you could extend it to include your
> > > use cases.
> > Sure, will check out.
> > >
> > > On Tue, 2020-05-26 at 15:12 -0400, Jim Quinlan wrote:
> > > > The new field in struct device 'dma_pfn_offset_map' is used to facilitate
> > > > the use of multiple pfn offsets between cpu addrs and dma addrs.  It is
> > > > similar to 'dma_pfn_offset' except that the offset chosen depends on the
> > > > cpu or dma address involved.
> > > >
> > > > Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
> > > > ---
> > > >  drivers/of/address.c        | 65 +++++++++++++++++++++++++++++++++++--
> > > >  drivers/usb/core/message.c  |  3 ++
> > > >  drivers/usb/core/usb.c      |  3 ++
> > > >  include/linux/device.h      | 10 +++++-
> > > >  include/linux/dma-direct.h  | 10 ++++--
> > > >  include/linux/dma-mapping.h | 46 ++++++++++++++++++++++++++
> > > >  kernel/dma/Kconfig          | 13 ++++++++
> > > >  7 files changed, 144 insertions(+), 6 deletions(-)
> > > >
> > >
> > > [...]
> > >
> > > > @@ -977,10 +1020,19 @@ int of_dma_get_range(struct device *dev, struct
> > > > device_node *np, u64 *dma_addr,
> > > >               pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
> > > >                        range.bus_addr, range.cpu_addr, range.size);
> > > >
> > > > +             num_ranges++;
> > > >               if (dma_offset && range.cpu_addr - range.bus_addr != dma_offset)
> > > > {
> > > > -                     pr_warn("Can't handle multiple dma-ranges with different
> > > > offsets on node(%pOF)\n", node);
> > > > -                     /* Don't error out as we'd break some existing DTs */
> > > > -                     continue;
> > > > +                     if (!IS_ENABLED(CONFIG_DMA_PFN_OFFSET_MAP)) {
> > > > +                             pr_warn("Can't handle multiple dma-ranges with
> > > > different offsets on node(%pOF)\n", node);
> > > > +                             pr_warn("Perhaps set DMA_PFN_OFFSET_MAP=y?\n");
> > > > +                             /*
> > > > +                              * Don't error out as we'd break some existing
> > > > +                              * DTs that are using configs w/o
> > > > +                              * CONFIG_DMA_PFN_OFFSET_MAP set.
> > > > +                              */
> > > > +                             continue;
> > >
> > > dev->bus_dma_limit is set in of_dma_configure(), this function's caller, based
> > > on dma_start's value (set after this continue). So you'd be effectively setting
> > > the dev->bus_dma_limit to whatever we get from the first dma-range.
> > I'm not seeing that at all.  On the  evaluation of each dma-range,
> > dma_start and dma_end are re-evaluated to be the lowest and highest
> > bus values of the  dma-ranges seen so far.  After all dma-ranges are
> > examined,  dev->bus_dma_limit being set to the highest.  In fact, the
> > current code -- ie before my commits -- already does this for multiple
> > dma-ranges as long as the cpu-bus offset is the same in the
> > dma-ranges.
> > >
> > > This can be troublesome depending on how the dma-ranges are setup, for example
> > > if the first dma-range doesn't include the CMA area, in arm64 generally set as
> > > high as possible in ZONE_DMA32, that would render it useless for
> > > dma/{direct/swiotlb}. Again depending on the bus_dma_limit value, if smaller
> > > than ZONE_DMA you'd be unable to allocate any DMA memory.
> > >
> > > IMO, a solution to this calls for a revamp of dma-direct's dma_capable(): match
> > > the target DMA memory area with each dma-range we have to see if it fits.
> > >
> > > > +                     }
> > > > +                     dma_multi_pfn_offset = true;
> > > >               }
> > > >               dma_offset = range.cpu_addr - range.bus_addr;
> > > >
> > > > @@ -991,6 +1043,13 @@ int of_dma_get_range(struct device *dev, struct
> > > > device_node *np, u64 *dma_addr,
> > > >                       dma_end = range.bus_addr + range.size;
> > > >       }
> > > >
> > > > +     if (dma_multi_pfn_offset) {
> > > > +             dma_offset = 0;
> > > > +             ret = attach_dma_pfn_offset_map(dev, node, num_ranges);
> > > > +             if (ret)
> > > > +                     return ret;
> > > > +     }
> > > > +
> > > >       if (dma_start >= dma_end) {
> > > >               ret = -EINVAL;
> > > >               pr_debug("Invalid DMA ranges configuration on node(%pOF)\n",
> > > > diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
> > > > index 6197938dcc2d..aaa3e58f5eb4 100644
> > > > --- a/drivers/usb/core/message.c
> > > > +++ b/drivers/usb/core/message.c
> > > > @@ -1960,6 +1960,9 @@ int usb_set_configuration(struct usb_device *dev, int
> > > > configuration)
> > > >                */
> > > >               intf->dev.dma_mask = dev->dev.dma_mask;
> > > >               intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset;
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +             intf->dev.dma_pfn_offset_map = dev->dev.dma_pfn_offset_map;
> > > > +#endif
> > >
> > > Thanks for looking at this, that said, I see more instances of drivers changing
> > > dma_pfn_offset outside of the core code. Why not doing this there too?
> > >
> > > Also, are we 100% sure that dev->dev.dma_pfn_offset isn't going to be freed
> > > before we're done using intf->dev? Maybe it's safer to copy the ranges?
> > >
> > > >               INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
> > > >               intf->minor = -1;
> > > >               device_initialize(&intf->dev);
> > > > diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
> > > > index f16c26dc079d..d2ed4d90e56e 100644
> > > > --- a/drivers/usb/core/usb.c
> > > > +++ b/drivers/usb/core/usb.c
> > > > @@ -612,6 +612,9 @@ struct usb_device *usb_alloc_dev(struct usb_device
> > > > *parent,
> > > >        */
> > > >       dev->dev.dma_mask = bus->sysdev->dma_mask;
> > > >       dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset;
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +     dev->dev.dma_pfn_offset_map = bus->sysdev->dma_pfn_offset_map;
> > > > +#endif
> > > >       set_dev_node(&dev->dev, dev_to_node(bus->sysdev));
> > > >       dev->state = USB_STATE_ATTACHED;
> > > >       dev->lpm_disable_count = 1;
> > > > diff --git a/include/linux/device.h b/include/linux/device.h
> > > > index ac8e37cd716a..67a240ad4fc5 100644
> > > > --- a/include/linux/device.h
> > > > +++ b/include/linux/device.h
> > > > @@ -493,6 +493,8 @@ struct dev_links_info {
> > > >   * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller
> > > >   *           DMA limit than the device itself supports.
> > > >   * @dma_pfn_offset: offset of DMA memory range relatively of RAM
> > > > + * @dma_pfn_offset_map:      Like dma_pfn_offset but used when there are
> > > > multiple
> > > > + *           pfn offsets for multiple dma-ranges.
> > > >   * @dma_parms:       A low level driver may set these to teach IOMMU code
> > > > about
> > > >   *           segment limitations.
> > > >   * @dma_pools:       Dma pools (if dma'ble device).
> > > > @@ -578,7 +580,13 @@ struct device {
> > > >                                            allocations such descriptors. */
> > > >       u64             bus_dma_limit;  /* upstream dma constraint */
> > > >       unsigned long   dma_pfn_offset;
> > > > -
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +     const struct dma_pfn_offset_region *dma_pfn_offset_map;
> > > > +                                     /* Like dma_pfn_offset, but for
> > > > +                                      * the unlikely case of multiple
> > > > +                                      * offsets. If non-null, dma_pfn_offset
> > > > +                                      * will be set to 0. */
> > > > +#endif
> > >
> > > I'm still sad this doesn't fully replace dma_pfn_offset & bus_dma_limit. I feel
> > > the extra logic involved in incorporating this as default isn't going to be
> > > noticeable as far as performance is concerned to single dma-range users, and
> > > it'd make for a nicer DMA code. Also you'd force everyone to test their changes
> > > on the multi dma-ranges code path, as opposed to having this disabled 99.9% of
> > > the time (hence broken every so often).
> > Good point.
>
> +1
>
> > > Note that I sympathize with the amount of work involved on improving that, so
> > > better wait to hear what more knowledgeable people have to say about this :)
> > Yes, I agree.  I want to avoid coding and testing one solution only to
> > have a different reviewer NAK it.
>
> It's a pretty safe bet that everyone will prefer one code path over 2.
>
> Rob
Thanks for  the input.  Will do, and send out v3 ASAP.
Thanks, Jim

^ permalink raw reply

* Re: [PATCH v2 09/14] device core: Add ability to handle multiple dma offsets
From: Jim Quinlan via iommu @ 2020-05-29 17:51 UTC (permalink / raw)
  To: Rob Herring
  Cc: open list:USB SUBSYSTEM,
	open list:PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS,
	Heikki Krogerus, Srinivas Kandagatla, Frank Rowand,
	Christoph Hellwig, Saravana Kannan, Rafael J. Wysocki,
	maintainer:BROADCOM BCM7XXX ARM ARCHITECTURE, Alan Stern,
	open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE, Corey Minyard,
	Suzuki K Poulose, Dan Williams, Andy Shevchenko,
	Greg Kroah-Hartman, Oliver Neukum, open list, Wolfram Sang,
	open list:DMA MAPPING HELPERS, Robin Murphy,
	Nicolas Saenz Julienne
In-Reply-To: <CAL_JsqJsxxC6msUXBCa9naitMLfOcVZauk44gPJNGGe3iXRzsA@mail.gmail.com>

On Fri, May 29, 2020 at 1:35 PM Rob Herring <robh+dt@kernel.org> wrote:
>
> On Wed, May 27, 2020 at 9:43 AM Jim Quinlan <james.quinlan@broadcom.com> wrote:
> >
> > Hi Nicolas,
> >
> > On Wed, May 27, 2020 at 11:00 AM Nicolas Saenz Julienne
> > <nsaenzjulienne@suse.de> wrote:
> > >
> > > Hi Jim,
> > > one thing comes to mind, there is a small test suite in drivers/of/unittest.c
> > > (specifically of_unittest_pci_dma_ranges()) you could extend it to include your
> > > use cases.
> > Sure, will check out.
> > >
> > > On Tue, 2020-05-26 at 15:12 -0400, Jim Quinlan wrote:
> > > > The new field in struct device 'dma_pfn_offset_map' is used to facilitate
> > > > the use of multiple pfn offsets between cpu addrs and dma addrs.  It is
> > > > similar to 'dma_pfn_offset' except that the offset chosen depends on the
> > > > cpu or dma address involved.
> > > >
> > > > Signed-off-by: Jim Quinlan <james.quinlan@broadcom.com>
> > > > ---
> > > >  drivers/of/address.c        | 65 +++++++++++++++++++++++++++++++++++--
> > > >  drivers/usb/core/message.c  |  3 ++
> > > >  drivers/usb/core/usb.c      |  3 ++
> > > >  include/linux/device.h      | 10 +++++-
> > > >  include/linux/dma-direct.h  | 10 ++++--
> > > >  include/linux/dma-mapping.h | 46 ++++++++++++++++++++++++++
> > > >  kernel/dma/Kconfig          | 13 ++++++++
> > > >  7 files changed, 144 insertions(+), 6 deletions(-)
> > > >
> > >
> > > [...]
> > >
> > > > @@ -977,10 +1020,19 @@ int of_dma_get_range(struct device *dev, struct
> > > > device_node *np, u64 *dma_addr,
> > > >               pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
> > > >                        range.bus_addr, range.cpu_addr, range.size);
> > > >
> > > > +             num_ranges++;
> > > >               if (dma_offset && range.cpu_addr - range.bus_addr != dma_offset)
> > > > {
> > > > -                     pr_warn("Can't handle multiple dma-ranges with different
> > > > offsets on node(%pOF)\n", node);
> > > > -                     /* Don't error out as we'd break some existing DTs */
> > > > -                     continue;
> > > > +                     if (!IS_ENABLED(CONFIG_DMA_PFN_OFFSET_MAP)) {
> > > > +                             pr_warn("Can't handle multiple dma-ranges with
> > > > different offsets on node(%pOF)\n", node);
> > > > +                             pr_warn("Perhaps set DMA_PFN_OFFSET_MAP=y?\n");
> > > > +                             /*
> > > > +                              * Don't error out as we'd break some existing
> > > > +                              * DTs that are using configs w/o
> > > > +                              * CONFIG_DMA_PFN_OFFSET_MAP set.
> > > > +                              */
> > > > +                             continue;
> > >
> > > dev->bus_dma_limit is set in of_dma_configure(), this function's caller, based
> > > on dma_start's value (set after this continue). So you'd be effectively setting
> > > the dev->bus_dma_limit to whatever we get from the first dma-range.
> > I'm not seeing that at all.  On the  evaluation of each dma-range,
> > dma_start and dma_end are re-evaluated to be the lowest and highest
> > bus values of the  dma-ranges seen so far.  After all dma-ranges are
> > examined,  dev->bus_dma_limit being set to the highest.  In fact, the
> > current code -- ie before my commits -- already does this for multiple
> > dma-ranges as long as the cpu-bus offset is the same in the
> > dma-ranges.
> > >
> > > This can be troublesome depending on how the dma-ranges are setup, for example
> > > if the first dma-range doesn't include the CMA area, in arm64 generally set as
> > > high as possible in ZONE_DMA32, that would render it useless for
> > > dma/{direct/swiotlb}. Again depending on the bus_dma_limit value, if smaller
> > > than ZONE_DMA you'd be unable to allocate any DMA memory.
> > >
> > > IMO, a solution to this calls for a revamp of dma-direct's dma_capable(): match
> > > the target DMA memory area with each dma-range we have to see if it fits.
> > >
> > > > +                     }
> > > > +                     dma_multi_pfn_offset = true;
> > > >               }
> > > >               dma_offset = range.cpu_addr - range.bus_addr;
> > > >
> > > > @@ -991,6 +1043,13 @@ int of_dma_get_range(struct device *dev, struct
> > > > device_node *np, u64 *dma_addr,
> > > >                       dma_end = range.bus_addr + range.size;
> > > >       }
> > > >
> > > > +     if (dma_multi_pfn_offset) {
> > > > +             dma_offset = 0;
> > > > +             ret = attach_dma_pfn_offset_map(dev, node, num_ranges);
> > > > +             if (ret)
> > > > +                     return ret;
> > > > +     }
> > > > +
> > > >       if (dma_start >= dma_end) {
> > > >               ret = -EINVAL;
> > > >               pr_debug("Invalid DMA ranges configuration on node(%pOF)\n",
> > > > diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
> > > > index 6197938dcc2d..aaa3e58f5eb4 100644
> > > > --- a/drivers/usb/core/message.c
> > > > +++ b/drivers/usb/core/message.c
> > > > @@ -1960,6 +1960,9 @@ int usb_set_configuration(struct usb_device *dev, int
> > > > configuration)
> > > >                */
> > > >               intf->dev.dma_mask = dev->dev.dma_mask;
> > > >               intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset;
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +             intf->dev.dma_pfn_offset_map = dev->dev.dma_pfn_offset_map;
> > > > +#endif
> > >
> > > Thanks for looking at this, that said, I see more instances of drivers changing
> > > dma_pfn_offset outside of the core code. Why not doing this there too?
> > >
> > > Also, are we 100% sure that dev->dev.dma_pfn_offset isn't going to be freed
> > > before we're done using intf->dev? Maybe it's safer to copy the ranges?
> > >
> > > >               INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
> > > >               intf->minor = -1;
> > > >               device_initialize(&intf->dev);
> > > > diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
> > > > index f16c26dc079d..d2ed4d90e56e 100644
> > > > --- a/drivers/usb/core/usb.c
> > > > +++ b/drivers/usb/core/usb.c
> > > > @@ -612,6 +612,9 @@ struct usb_device *usb_alloc_dev(struct usb_device
> > > > *parent,
> > > >        */
> > > >       dev->dev.dma_mask = bus->sysdev->dma_mask;
> > > >       dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset;
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +     dev->dev.dma_pfn_offset_map = bus->sysdev->dma_pfn_offset_map;
> > > > +#endif
> > > >       set_dev_node(&dev->dev, dev_to_node(bus->sysdev));
> > > >       dev->state = USB_STATE_ATTACHED;
> > > >       dev->lpm_disable_count = 1;
> > > > diff --git a/include/linux/device.h b/include/linux/device.h
> > > > index ac8e37cd716a..67a240ad4fc5 100644
> > > > --- a/include/linux/device.h
> > > > +++ b/include/linux/device.h
> > > > @@ -493,6 +493,8 @@ struct dev_links_info {
> > > >   * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller
> > > >   *           DMA limit than the device itself supports.
> > > >   * @dma_pfn_offset: offset of DMA memory range relatively of RAM
> > > > + * @dma_pfn_offset_map:      Like dma_pfn_offset but used when there are
> > > > multiple
> > > > + *           pfn offsets for multiple dma-ranges.
> > > >   * @dma_parms:       A low level driver may set these to teach IOMMU code
> > > > about
> > > >   *           segment limitations.
> > > >   * @dma_pools:       Dma pools (if dma'ble device).
> > > > @@ -578,7 +580,13 @@ struct device {
> > > >                                            allocations such descriptors. */
> > > >       u64             bus_dma_limit;  /* upstream dma constraint */
> > > >       unsigned long   dma_pfn_offset;
> > > > -
> > > > +#ifdef CONFIG_DMA_PFN_OFFSET_MAP
> > > > +     const struct dma_pfn_offset_region *dma_pfn_offset_map;
> > > > +                                     /* Like dma_pfn_offset, but for
> > > > +                                      * the unlikely case of multiple
> > > > +                                      * offsets. If non-null, dma_pfn_offset
> > > > +                                      * will be set to 0. */
> > > > +#endif
> > >
> > > I'm still sad this doesn't fully replace dma_pfn_offset & bus_dma_limit. I feel
> > > the extra logic involved in incorporating this as default isn't going to be
> > > noticeable as far as performance is concerned to single dma-range users, and
> > > it'd make for a nicer DMA code. Also you'd force everyone to test their changes
> > > on the multi dma-ranges code path, as opposed to having this disabled 99.9% of
> > > the time (hence broken every so often).
> > Good point.
>
> +1
>
> > > Note that I sympathize with the amount of work involved on improving that, so
> > > better wait to hear what more knowledgeable people have to say about this :)
> > Yes, I agree.  I want to avoid coding and testing one solution only to
> > have a different reviewer NAK it.
>
> It's a pretty safe bet that everyone will prefer one code path over 2.
>
> Rob
Thanks for  the input.  Will do, and send out v3 ASAP.
Thanks, Jim
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply

* Re: [PATCH 1/2] dt-bindings: ASoC: renesas,rsnd: Add r8a7742 support
From: Rob Herring @ 2020-05-29 17:50 UTC (permalink / raw)
  To: Lad Prabhakar
  Cc: devicetree, alsa-devel, Geert Uytterhoeven, Mark Brown,
	Magnus Damm, linux-kernel, Liam Girdwood, linux-renesas-soc,
	Prabhakar, Rob Herring
In-Reply-To: <1590526904-13855-2-git-send-email-prabhakar.mahadev-lad.rj@bp.renesas.com>

On Tue, 26 May 2020 22:01:43 +0100, Lad Prabhakar wrote:
> Document RZ/G1H (R8A7742) SoC bindings.
> 
> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
> Reviewed-by: Marian-Cristian Rotariu <marian-cristian.rotariu.rb@bp.renesas.com>
> ---
>  Documentation/devicetree/bindings/sound/renesas,rsnd.txt | 1 +
>  1 file changed, 1 insertion(+)
> 

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* Re: Patches potentially missing from stable releases
From: Henri Rosten @ 2020-05-29 17:52 UTC (permalink / raw)
  To: Greg KH; +Cc: stable, lukas.bulwahn
In-Reply-To: <20200529124655.GA1714108@kroah.com>

On Fri, May 29, 2020 at 02:46:55PM +0200, Greg KH wrote:
> On Fri, May 29, 2020 at 03:24:47PM +0300, Henri Rosten wrote:
> > We did some work on analyzing patches potentially missing from stable 
> > releases based on the Fixes: and Revert references in the commit 
> > messages. Our script is based on similar idea as described by Guenter 
> > Roeck in this earlier mail: 
> > https://lore.kernel.org/stable/20190827171621.GA30360@roeck-us.net/.
> > 
> > Although the list is not comprehensive, we figured it makes sense to 
> > publish it in case the results are of interest to someone else also.
> > 
> > The below list of potentially missing commits is based on 4.19, but some 
> > of the commits might also apply to 5.4 and 5.6.
> > 
> > For each potentially missing commit flagged by the script, we read the 
> > commit message and had a short look at the change. We then added 
> > comments on our own judgement if it might be stable material or not. No 
> > comments simply means the potentially missing patch appears stable 
> > material. "Based on commit" is the mainline patch that has been 
> > backported to 4.19 and is referenced by the missing commit. We did not 
> > check if the patch applies without changes, nor did we build or execute 
> > any tests.
> 
> That last sentence should have been a huge red flag when writing it and
> sending out this email...

Thank you for your comments.

This list was a byproduct of other analysis we did, I did not intent to 
ask these patches for inclusion into stable. We simply wanted to report 
this list in case it includes some patches that might have dropped off 
your radar. We are aware that such information without further work, 
e.g. backporting and testing does not add much yet.

Thanks,
-- Henri


^ permalink raw reply

* Re: [PATCH] usb: dwc2: Fix shutdown callback in platform
From: Doug Anderson @ 2020-05-29 17:52 UTC (permalink / raw)
  To: Frank Mori Hess
  Cc: Minas Harutyunyan, John Youn, Felipe Balbi, Greg Kroah-Hartman,
	linux-usb@vger.kernel.org, Felipe Balbi, Heiko Stübner,
	# 4.0+
In-Reply-To: <CAJz5OpfDnHfGf=dLbc0hTtaz-CERsQyaBNeqDiRz7u4jMywNow@mail.gmail.com>

Hi,

On Fri, May 29, 2020 at 10:45 AM Frank Mori Hess <fmh6jj@gmail.com> wrote:
>
> On Fri, May 29, 2020 at 12:37 PM Doug Anderson <dianders@chromium.org> wrote:
> >
> > I'm not sure I understand.  Are you saying that you'll just add
> > shutdown callbacks to all the drivers using this IRQ and call
> > disable_irq() there?  That seems like the best solution to me.
>
> I don't get it.  A hypothetical machine could have literally anything
> sharing the IRQ line, right?

It's not a real physical line, though?  I don't think it's common to
have a shared interrupt between different IP blocks in a given SoC.
Even if it existed, all the drivers should disable their interrupts?


> If it is important to call disable_irq
> during shutdown (I have no idea if it is) then shouldn't the kernel
> core just disable all irqs after calling all the driver shutdown
> callbacks?

It seems like a reasonable idea for this to be in the core but I
haven't dug into whether the core has enough knowledge or if there
would be other problems.


> Anyways, my screaming interrupt occurs after a a new kernel has been
> booted with kexec.  In this case, it doesn't matter if the old kernel
> called disable_irq or not.  As soon as the new kernel re-enables the
> interrupt line, the kernel immediately disables it again with a
> backtrace due to the unhandled screaming interrupt.  That's why the
> dwc2 hardware needs to have its interrupts turned off when the old
> kernel is shutdown.

Isn't that a bug with your new kernel?  I've seen plenty of bugs where
drivers enable their interrupt before their interrupt handler is set
to handle it.  You never know what state the bootloader (or previous
kernel) might have left things in and if an interrupt was pending it
shouldn't kill you.

-Doug

^ permalink raw reply

* [PATCH] rockchip: enable RNG on RockPro64
From: Marcin Juszkiewicz @ 2020-05-29 17:53 UTC (permalink / raw)
  To: u-boot

RNG module works fine on RockPro64 so let's enable it by default.

=>  rng
00000000: 77 08 dd 04 2c 4d b8 cf 25 07 29 2d c0 ce 28 3b  w...,M..%.)-..(;
00000010: d4 5a 38 68 b1 40 97 59 b8 a5 7c 42 f8 4c 38 28  .Z8h. at .Y..|B.L8(
00000020: 25 20 0b 4b f0 1a d5 c1 e6 a2 c2 34 5a 5e 64 26  % .K.......4Z^d&
00000030: 41 8b d8 7e c4 d6 9e e3 da 0e 98 a2 2f 38 0d 9f  A..~......../8..

Signed-off-by: Marcin Juszkiewicz <marcin@juszkiewicz.com.pl>
---
 arch/arm/dts/rk3399-rockpro64-u-boot.dtsi | 4 ++++
 configs/rockpro64-rk3399_defconfig        | 2 ++
 2 files changed, 6 insertions(+)

diff --git arch/arm/dts/rk3399-rockpro64-u-boot.dtsi arch/arm/dts/rk3399-rockpro64-u-boot.dtsi
index deaa3efd39..824ee4e3ce 100644
--- arch/arm/dts/rk3399-rockpro64-u-boot.dtsi
+++ arch/arm/dts/rk3399-rockpro64-u-boot.dtsi
@@ -23,3 +23,7 @@
 &vdd_log {
 	regulator-init-microvolt = <950000>;
 };
+
+&rng {
+	status = "okay";
+};
diff --git configs/rockpro64-rk3399_defconfig configs/rockpro64-rk3399_defconfig
index 9d4343a5a7..eb83b1376b 100644
--- configs/rockpro64-rk3399_defconfig
+++ configs/rockpro64-rk3399_defconfig
@@ -43,6 +43,8 @@ CONFIG_REGULATOR_PWM=y
 CONFIG_REGULATOR_RK8XX=y
 CONFIG_PWM_ROCKCHIP=y
 CONFIG_RAM_RK3399_LPDDR4=y
+CONFIG_DM_RNG=y
+CONFIG_RNG_ROCKCHIP=y
 CONFIG_BAUDRATE=1500000
 CONFIG_DEBUG_UART_SHIFT=2
 CONFIG_ROCKCHIP_SPI=y
-- 
2.26.2

^ permalink raw reply related

* Re: [GIT PULL] sh: remove sh5 support
From: Rich Felker @ 2020-05-29 17:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, linux-sh, ysato, linux-kernel, viro, Rob Landley,
	Geert Uytterhoeven, Linus Torvalds
In-Reply-To: <20200529143059.GA25475@infradead.org>

On Fri, May 29, 2020 at 07:30:59AM -0700, Christoph Hellwig wrote:
> On Thu, May 28, 2020 at 12:14:16PM -0400, Rich Felker wrote:
> > It is in active use. Please do not act on such a request. I would be
> > much quicker to ack things that actually need ack if I weren't CC'd on
> > hundreds of random non-arch-specific changes that don't need it, but I
> > understand that's how the kernel process works. If there are things
> > that need ack please feel free to ping.
> > 
> > Note that I specifically acked and requested the sh5 removal.
> 
> But you did not actually pick it up - because of that it still isn't
> in linux-next and thus most likely will miss Linux 5.8.

Arnd sent the pull request for this and I expected it to be pulled
from his tree, since he already had my approval for the change. Maybe
Linus was expecting it to come through me. This seems to have been a
miscommunication.

Frustratingly, I _still_ don't have an official tree on kernel.org for
the purpose of being the canonical place for linux-next to pull from,
due to policies around pgp keys and nobody following up on signing
mine. This is all really silly since there are ridiculously many
independent channels I could cryptographically validate identity
through with vanishing probability that they're all compromised. For
the time being I'll reactivate my repo on git.musl-libc.org.

Rich

^ permalink raw reply

* Re: [GIT PULL] sh: remove sh5 support
From: Rich Felker @ 2020-05-29 17:53 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, linux-sh, ysato, linux-kernel, viro, Rob Landley,
	Geert Uytterhoeven, Linus Torvalds
In-Reply-To: <20200529143059.GA25475@infradead.org>

On Fri, May 29, 2020 at 07:30:59AM -0700, Christoph Hellwig wrote:
> On Thu, May 28, 2020 at 12:14:16PM -0400, Rich Felker wrote:
> > It is in active use. Please do not act on such a request. I would be
> > much quicker to ack things that actually need ack if I weren't CC'd on
> > hundreds of random non-arch-specific changes that don't need it, but I
> > understand that's how the kernel process works. If there are things
> > that need ack please feel free to ping.
> > 
> > Note that I specifically acked and requested the sh5 removal.
> 
> But you did not actually pick it up - because of that it still isn't
> in linux-next and thus most likely will miss Linux 5.8.

Arnd sent the pull request for this and I expected it to be pulled
from his tree, since he already had my approval for the change. Maybe
Linus was expecting it to come through me. This seems to have been a
miscommunication.

Frustratingly, I _still_ don't have an official tree on kernel.org for
the purpose of being the canonical place for linux-next to pull from,
due to policies around pgp keys and nobody following up on signing
mine. This is all really silly since there are ridiculously many
independent channels I could cryptographically validate identity
through with vanishing probability that they're all compromised. For
the time being I'll reactivate my repo on git.musl-libc.org.

Rich

^ permalink raw reply

* Re: [PATCH] spi: bcm2835: Enable shared interrupt support
From: Lukas Wunner @ 2020-05-29 17:53 UTC (permalink / raw)
  To: Florian Fainelli
  Cc: Nicolas Saenz Julienne, Mark Brown, Ray Jui, Scott Branden,
	bcm-kernel-feedback-list, Martin Sperl, linux-spi,
	linux-rpi-kernel, linux-arm-kernel, linux-kernel
In-Reply-To: <36dd65bb-18a9-9697-b9b6-76eaf8cbe45c@gmail.com>

On Fri, May 29, 2020 at 10:46:01AM -0700, Florian Fainelli wrote:
> On 5/29/20 10:43 AM, Lukas Wunner wrote:
> > On Thu, May 28, 2020 at 08:58:04PM +0200, Nicolas Saenz Julienne wrote:
> >> --- a/drivers/spi/spi-bcm2835.c
> >> +++ b/drivers/spi/spi-bcm2835.c
> >> @@ -379,6 +379,10 @@ static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
> >>  	if (bs->tx_len && cs & BCM2835_SPI_CS_DONE)
> >>  		bcm2835_wr_fifo_blind(bs, BCM2835_SPI_FIFO_SIZE);
> >>  
> >> +	/* check if we got interrupt enabled */
> >> +	if (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_INTR))
> >> +		return IRQ_NONE;
> >> +
> >>  	/* Read as many bytes as possible from FIFO */
> >>  	bcm2835_rd_fifo(bs);
> >>  	/* Write as many bytes as possible to FIFO */
[...]
> > Finally, it would be nice if the check would be optimized away when
> > compiling for pre-RasPi4 products, maybe something like:
> > 
> > +	if (IS_ENABLED(CONFIG_ARM_LPAE) && !(cs & BCM2835_SPI_CS_INTR))
> > +		return IRQ_NONE;
> 
> Rather than keying this off ARM_LPAE or any other option, this should be
> keyed off a compatible string, that way we can even conditionally pass
> IRQF_SHARED to the interrupt handler if we care so much about performance.

But a compatible string can't be checked at compile time, can it?

The point is that at the least the Foundation compiles and ships a separate
kernel for each of the three platforms BCM2835, BCM2837, BCM2711.  It's
unnecessary to check whether an interrupt was actually raised if we *know*
in advance that it's not shared (as is the case with kernels compiled for
BCM2835 and BCM2837).

Thanks,

Lukas

^ permalink raw reply

* ia-32/ia-64 fxsave64 instruction behavior when saving mmx
From: Robert Henry @ 2020-05-29 17:38 UTC (permalink / raw)
  To: qemu-devel@nongnu.org

[-- Attachment #1: Type: text/plain, Size: 882 bytes --]

Background: The ia-32/ia-64 fxsave64 instruction saves fp80 or legacy SSE mmx registers. The mmx registers are saved as if they were fp80 values. The lower 64 bits of the constructed fp80 value is the mmx register.  The upper 16 bits of the constructed fp80 value are reserved; see the last row of table 3-44 of https://www.felixcloutier.com/x86/fxsave#tbl-3-44

The Intel core i9-9980XE Skylake metal I have puts 0xffff into these reserved 16 bits when saving MMX.

QEMU appears to put 0's there.

Does anybody have insight as to what "reserved" really means, or must be, in this case?  I take the verb "reserved" to mean something other than "undefined".

I came across this issue when running the remill instruction test engine.  See my issue https://github.com/lifting-bits/remill/issues/423 For better or worse, remill assumes that those bits are 0xffff, not 0x0000


[-- Attachment #2: Type: text/html, Size: 2534 bytes --]

^ permalink raw reply

* [PATCH] env: Add option to only ever append environment
From: Marek Vasut @ 2020-05-29 17:54 UTC (permalink / raw)
  To: u-boot

Add configuration option which prevents the environment hash table to be
ever cleared and reloaded with different content. This is useful in case
the first environment loaded into the hash table contains e.g. sensitive
content which must not be dropped or reloaded.

Signed-off-by: Marek Vasut <marex@denx.de>
---
 env/Kconfig     | 9 +++++++++
 env/env.c       | 2 ++
 lib/hashtable.c | 4 ++++
 3 files changed, 15 insertions(+)

diff --git a/env/Kconfig b/env/Kconfig
index ca7fef682b..8166e5df91 100644
--- a/env/Kconfig
+++ b/env/Kconfig
@@ -604,6 +604,15 @@ config DELAY_ENVIRONMENT
 	  later by U-Boot code. With CONFIG_OF_CONTROL this is instead
 	  controlled by the value of /config/load-environment.
 
+config ENV_APPEND
+	bool "Always append the environment with new data"
+	default n
+	help
+	  If defined, the environment hash table is only ever appended with new
+	  data, but the existing hash table can never be dropped and reloaded
+	  with newly imported data. This may be used in combination with static
+	  flags to e.g. to protect variables which must not be modified.
+
 config ENV_ACCESS_IGNORE_FORCE
 	bool "Block forced environment operations"
 	default n
diff --git a/env/env.c b/env/env.c
index 024d36fdbe..967a9d36d7 100644
--- a/env/env.c
+++ b/env/env.c
@@ -204,7 +204,9 @@ int env_load(void)
 		ret = drv->load();
 		if (!ret) {
 			printf("OK\n");
+#if !CONFIG_IS_ENABLED(ENV_APPEND)
 			return 0;
+#endif
 		} else if (ret == -ENOMSG) {
 			/* Handle "bad CRC" case */
 			if (best_prio == -1)
diff --git a/lib/hashtable.c b/lib/hashtable.c
index b96dbe19be..c2bf75fb76 100644
--- a/lib/hashtable.c
+++ b/lib/hashtable.c
@@ -822,6 +822,10 @@ int himport_r(struct hsearch_data *htab,
 	if (nvars)
 		memcpy(localvars, vars, sizeof(vars[0]) * nvars);
 
+#if CONFIG_IS_ENABLED(ENV_APPEND)
+	flag |= H_NOCLEAR;
+#endif
+
 	if ((flag & H_NOCLEAR) == 0 && !nvars) {
 		/* Destroy old hash table if one exists */
 		debug("Destroy Hash Table: %p table = %p\n", htab,
-- 
2.25.1

^ permalink raw reply related

* [RFC PATCH linux-next] Revert "net: bcmgenet: bcmgenet_hfb_add_filter() can be static
From: kbuild test robot @ 2020-05-29 17:54 UTC (permalink / raw)
  To: kbuild-all
In-Reply-To: <202005300154.63FdkmJb%lkp@intel.com>

[-- Attachment #1: Type: text/plain, Size: 954 bytes --]


Fixes: 14da1510fedc ("Revert "net: bcmgenet: remove unused function in bcmgenet.c"")
Signed-off-by: kbuild test robot <lkp@intel.com>
---
 bcmgenet.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b37ef05c5083a..98e492e066dcb 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2855,8 +2855,8 @@ static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
  * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
  *                         ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
  */
-int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
-			    u32 f_length, u32 rx_queue)
+static int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
+				   u32 f_length, u32 rx_queue)
 {
 	int f_index;
 	u32 i;

^ permalink raw reply related

* [PATCH for_v31 0/6]  x86/sgx: Misc fixes for v31
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx

One bug fix and a variety of cleanups that apply on top of your current
master, commit b625c3d1370ca ("x86/sgx: Update MAINTAINERS").

Sean Christopherson (6):
  x86/sgx: Fix inadvertant early return from sgx_ioctl()
  x86/sgx: Remove unnecessary globals after merging reclaim.c into
    main.c
  x86/sgx: Rename sgx_free_page() to sgx_free_epc_page()
  x86/sgx: Replace "grab" with "alloc" in VA page helper
  x86/sgx: Update stale comment in EPC page allocators
  x86/sgx: Drop the message that fires when there are too many EPC
    sections

 arch/x86/kernel/cpu/sgx/encl.c  | 16 +++++++-------
 arch/x86/kernel/cpu/sgx/encl.h  |  2 +-
 arch/x86/kernel/cpu/sgx/ioctl.c | 10 ++++-----
 arch/x86/kernel/cpu/sgx/main.c  | 39 +++++++++++++--------------------
 arch/x86/kernel/cpu/sgx/sgx.h   |  5 +----
 5 files changed, 30 insertions(+), 42 deletions(-)

-- 
2.26.0


^ permalink raw reply

* [PATCH for_v31 1/6] x86/sgx: Fix inadvertant early return from sgx_ioctl()
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Set 'ret' instead of returning if the enclave is dead in sgx_ioctl(),
otherwise the goto that was added doesn't take effect.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ec5a0f017014b..2019ee1dc0b3e 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -790,7 +790,7 @@ long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 		return -EBUSY;
 
 	if (encl_flags & SGX_ENCL_DEAD) {
-		return -EFAULT;
+		ret = -EFAULT;
 		goto out;
 	}
 
-- 
2.26.0


^ permalink raw reply related

* [PATCH for_v31 2/6] x86/sgx: Remove unnecessary globals after merging reclaim.c into main.c
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Make all variables that are only used in main.c static, rearranging code
as needed.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/main.c | 16 ++++++----------
 arch/x86/kernel/cpu/sgx/sgx.h  |  3 ---
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 5043d3700960b..ef7061bfa2a07 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -14,9 +14,12 @@
 #include "encls.h"
 
 static struct task_struct *ksgxswapd_tsk;
-DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
-LIST_HEAD(sgx_active_page_list);
-DEFINE_SPINLOCK(sgx_active_page_list_lock);
+static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
+static LIST_HEAD(sgx_active_page_list);
+static DEFINE_SPINLOCK(sgx_active_page_list_lock);
+
+struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
+static int sgx_nr_epc_sections;
 
 static void sgx_sanitize_section(struct sgx_epc_section *section)
 {
@@ -45,10 +48,6 @@ static void sgx_sanitize_section(struct sgx_epc_section *section)
 	}
 }
 
-extern struct task_struct *ksgxswapd_tsk;
-extern struct wait_queue_head(ksgxswapd_waitq);
-extern spinlock_t sgx_active_page_list_lock;
-
 static unsigned long sgx_nr_free_pages(void)
 {
 	unsigned long cnt = 0;
@@ -491,9 +490,6 @@ void sgx_reclaim_pages(void)
 	}
 }
 
-struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
-int sgx_nr_epc_sections;
-
 static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
 {
 	struct sgx_epc_page *page;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 923028d568540..5b9dbcef981b0 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -41,14 +41,11 @@ struct sgx_epc_section {
 #define SGX_NR_LOW_PAGES		32
 #define SGX_NR_HIGH_PAGES		64
 
-extern struct list_head sgx_active_page_list;
-
 void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
 int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
 void sgx_reclaim_pages(void);
 
 extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
-extern int sgx_nr_epc_sections;
 
 static inline struct sgx_epc_section *sgx_get_epc_section(
 		struct sgx_epc_page *page)
-- 
2.26.0


^ permalink raw reply related

* [PATCH for_v31 6/6] x86/sgx: Drop the message that fires when there are too many EPC sections
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Terminate the section loop with '<' instead of '<=' and drop the
associated pr_warn in sgx_page_cache_init().  Per Boris, there's no
point in warning the user if they can't do anything about it.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/main.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index ba5486a1ef6f4..c51a3ad7fcce0 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -692,7 +692,7 @@ static bool __init sgx_page_cache_init(void)
 	u64 pa, size;
 	int i;
 
-	for (i = 0; i <= ARRAY_SIZE(sgx_epc_sections); i++) {
+	for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
 		cpuid_count(SGX_CPUID, i + SGX_CPUID_FIRST_VARIABLE_SUB_LEAF,
 			    &eax, &ebx, &ecx, &edx);
 
@@ -705,11 +705,6 @@ static bool __init sgx_page_cache_init(void)
 			break;
 		}
 
-		if (i == ARRAY_SIZE(sgx_epc_sections)) {
-			pr_warn("No free slot for an EPC section\n");
-			break;
-		}
-
 		pa = sgx_calc_section_metric(eax, ebx);
 		size = sgx_calc_section_metric(ecx, edx);
 
-- 
2.26.0


^ permalink raw reply related

* [PATCH for_v31 5/6] x86/sgx: Update stale comment in EPC page allocators
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Replace "Grab" with "Allocate" in the function comments for the EPC page
allocators.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 855b14237e7c4..ba5486a1ef6f4 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -505,7 +505,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec
 }
 
 /**
- * __sgx_alloc_epc_page() - Grab a free EPC page
+ * __sgx_alloc_epc_page() - Allocate a free EPC page
  *
  * Iterate through EPC sections and borrow a free EPC page to the caller. When a
  * page is no longer needed it must be released with sgx_free_epc_page().
@@ -534,7 +534,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
 }
 
 /**
- * sgx_alloc_epc_page() - Grab a free EPC page
+ * sgx_alloc_epc_page() - Allocate a free EPC page
  * @owner:	the owner of the EPC page
  * @reclaim:	reclaim pages if necessary
  *
-- 
2.26.0


^ permalink raw reply related

* [PATCH for_v31 3/6] x86/sgx: Rename sgx_free_page() to sgx_free_epc_page()
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Add "epc" into the free page helper to match sgx_alloc_epc_page().

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 10 +++++-----
 arch/x86/kernel/cpu/sgx/ioctl.c |  6 +++---
 arch/x86/kernel/cpu/sgx/main.c  | 12 ++++++------
 arch/x86/kernel/cpu/sgx/sgx.h   |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index c52c937b8d67b..e9d72f05a1f12 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -73,7 +73,7 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
 
 	ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
 	if (ret) {
-		sgx_free_page(epc_page);
+		sgx_free_epc_page(epc_page);
 		return ERR_PTR(ret);
 	}
 
@@ -482,7 +482,7 @@ void sgx_encl_destroy(struct sgx_encl *encl)
 			if (sgx_unmark_page_reclaimable(entry->epc_page))
 				continue;
 
-			sgx_free_page(entry->epc_page);
+			sgx_free_epc_page(entry->epc_page);
 			encl->secs_child_cnt--;
 			entry->epc_page = NULL;
 		}
@@ -493,7 +493,7 @@ void sgx_encl_destroy(struct sgx_encl *encl)
 	}
 
 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
-		sgx_free_page(encl->secs.epc_page);
+		sgx_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 	}
 
@@ -506,7 +506,7 @@ void sgx_encl_destroy(struct sgx_encl *encl)
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
 		list_del(&va_page->list);
-		sgx_free_page(va_page->epc_page);
+		sgx_free_epc_page(va_page->epc_page);
 		kfree(va_page);
 	}
 }
@@ -706,7 +706,7 @@ struct sgx_epc_page *sgx_grab_va_page(void)
 	ret = __epa(sgx_get_epc_addr(epc_page));
 	if (ret) {
 		WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
-		sgx_free_page(epc_page);
+		sgx_free_epc_page(epc_page);
 		return ERR_PTR(-EFAULT);
 	}
 
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 2019ee1dc0b3e..f7232a5343262 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -50,7 +50,7 @@ static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 	encl->page_cnt--;
 
 	if (va_page) {
-		sgx_free_page(va_page->epc_page);
+		sgx_free_epc_page(va_page->epc_page);
 		list_del(&va_page->list);
 		kfree(va_page);
 	}
@@ -223,7 +223,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
 	return 0;
 
 err_out:
-	sgx_free_page(encl->secs.epc_page);
+	sgx_free_epc_page(encl->secs.epc_page);
 	encl->secs.epc_page = NULL;
 
 err_out_backing:
@@ -449,7 +449,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 	up_read(&current->mm->mmap_sem);
 
 err_out_free:
-	sgx_free_page(epc_page);
+	sgx_free_epc_page(epc_page);
 	kfree(encl_page);
 
 	/*
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index ef7061bfa2a07..855b14237e7c4 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -376,7 +376,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 
 	if (!encl->secs_child_cnt) {
 		if (atomic_read(&encl->flags) & SGX_ENCL_DEAD) {
-			sgx_free_page(encl->secs.epc_page);
+			sgx_free_epc_page(encl->secs.epc_page);
 			encl->secs.epc_page = NULL;
 		} else if (atomic_read(&encl->flags) & SGX_ENCL_INITIALIZED) {
 			ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size),
@@ -386,7 +386,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 
 			sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
 
-			sgx_free_page(encl->secs.epc_page);
+			sgx_free_epc_page(encl->secs.epc_page);
 			encl->secs.epc_page = NULL;
 
 			sgx_encl_put_backing(&secs_backing, true);
@@ -508,7 +508,7 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec
  * __sgx_alloc_epc_page() - Grab a free EPC page
  *
  * Iterate through EPC sections and borrow a free EPC page to the caller. When a
- * page is no longer needed it must be released with sgx_free_page().
+ * page is no longer needed it must be released with sgx_free_epc_page().
  *
  * Return:
  *   an EPC page,
@@ -539,7 +539,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
  * @reclaim:	reclaim pages if necessary
  *
  * Iterate through EPC sections and borrow a free EPC page to the caller. When a
- * page is no longer needed it must be released with sgx_free_page(). If
+ * page is no longer needed it must be released with sgx_free_epc_page(). If
  * @reclaim is set to true, directly reclaim pages when we are out of pages. No
  * mm's can be locked when @reclaim is set to true.
  *
@@ -585,12 +585,12 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
 }
 
 /**
- * sgx_free_page() - Free an EPC page
+ * sgx_free_epc_page() - Free an EPC page
  * @page:	an EPC page
  *
  * Call EREMOVE for an EPC page and insert it back to the list of free pages.
  */
-void sgx_free_page(struct sgx_epc_page *page)
+void sgx_free_epc_page(struct sgx_epc_page *page)
 {
 	struct sgx_epc_section *section = sgx_get_epc_section(page);
 	int ret;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 5b9dbcef981b0..2983c1a3d725d 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -62,6 +62,6 @@ static inline void *sgx_get_epc_addr(struct sgx_epc_page *page)
 
 struct sgx_epc_page *__sgx_alloc_epc_page(void);
 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
-void sgx_free_page(struct sgx_epc_page *page);
+void sgx_free_epc_page(struct sgx_epc_page *page);
 
 #endif /* _X86_SGX_H */
-- 
2.26.0


^ permalink raw reply related

* [PATCH for_v31 4/6] x86/sgx: Replace "grab" with "alloc" in VA page helper
From: Sean Christopherson @ 2020-05-29 17:54 UTC (permalink / raw)
  To: Jarkko Sakkinen; +Cc: linux-sgx
In-Reply-To: <20200529175407.2109-1-sean.j.christopherson@intel.com>

Rename sgx_grab_va_page() to sgx_alloc_va_page() to align with
sgx_alloc_epc_page() as well as sgx_free_epc_page(); the latter is used
directly when freeing the VA page, i.e. the names should be consistent.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 6 +++---
 arch/x86/kernel/cpu/sgx/encl.h  | 2 +-
 arch/x86/kernel/cpu/sgx/ioctl.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index e9d72f05a1f12..df4ae76dd83f6 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -686,15 +686,15 @@ struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
 }
 
 /**
- * sgx_grab_va_page() - Grab a Version Array (VA) page
+ * sgx_alloc_va_page() - Allocate a Version Array (VA) page
  *
- * Grab a free EPC page instance and convert it to a Version Array (VA) page.
+ * Allocate a free EPC page and convert it to a Version Array (VA) page.
  *
  * Return:
  *   a VA page,
  *   -errno otherwise
  */
-struct sgx_epc_page *sgx_grab_va_page(void)
+struct sgx_epc_page *sgx_alloc_va_page(void)
 {
 	struct sgx_epc_page *epc_page;
 	int ret;
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 625ad44a83215..f0f72e5912445 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -120,7 +120,7 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
 struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
 					    unsigned long addr);
 
-struct sgx_epc_page *sgx_grab_va_page(void);
+struct sgx_epc_page *sgx_alloc_va_page(void);
 unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
 void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
 bool sgx_va_page_full(struct sgx_va_page *va_page);
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index f7232a5343262..4f70cb8144ffd 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -32,7 +32,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
 		if (!va_page)
 			return ERR_PTR(-ENOMEM);
 
-		va_page->epc_page = sgx_grab_va_page();
+		va_page->epc_page = sgx_alloc_va_page();
 		if (IS_ERR(va_page->epc_page)) {
 			err = ERR_CAST(va_page->epc_page);
 			kfree(va_page);
-- 
2.26.0


^ permalink raw reply related


This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.