Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 4/4] net-next: stmmac: rework the speed selection
From: Corentin Labbe @ 2017-05-22 12:33 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170522123347.5295-1-clabbe.montjoie@gmail.com>

The current stmmac_adjust_link() part which handle speed have
some if (has_platform) code and my dwmac-sun8i will add more of them.

So we need to handle better speed selection.
Moreover the struct link member speed and port are hard to guess their
purpose. And their unique usage are to be combined for writing speed.

So this patch replace speed/port by simpler
speed10/speed100/speed1000/speed_mask variables.

In dwmac4_core_init and dwmac1000_core_init, port/speed value was used
directly without using the struct link. This patch convert also their
usage to speedxxx.

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |  8 ++++---
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   | 26 +++++++++++++---------
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |  6 +++--
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c  | 26 +++++++++++++---------
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 21 ++++-------------
 5 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index b7ce3fbb5375..e82b4b70b7be 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -549,9 +549,11 @@ extern const struct stmmac_hwtimestamp stmmac_ptp;
 extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
 
 struct mac_link {
-	int port;
-	int duplex;
-	int speed;
+	u32 speed_mask;
+	u32 speed10;
+	u32 speed100;
+	u32 speed1000;
+	u32 duplex;
 };
 
 struct mii_regs {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index f3d9305e5f70..b8848a9d70c5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -45,15 +45,17 @@ static void dwmac1000_core_init(struct mac_device_info *hw, int mtu)
 	if (hw->ps) {
 		value |= GMAC_CONTROL_TE;
 
-		if (hw->ps == SPEED_1000) {
-			value &= ~GMAC_CONTROL_PS;
-		} else {
-			value |= GMAC_CONTROL_PS;
-
-			if (hw->ps == SPEED_10)
-				value &= ~GMAC_CONTROL_FES;
-			else
-				value |= GMAC_CONTROL_FES;
+		value &= ~hw->link.speed_mask;
+		switch (hw->ps) {
+		case SPEED_1000:
+			value |= hw->link.speed1000;
+			break;
+		case SPEED_100:
+			value |= hw->link.speed100;
+			break;
+		case SPEED_10:
+			value |= hw->link.speed10;
+			break;
 		}
 	}
 
@@ -531,9 +533,11 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
 	mac->mac = &dwmac1000_ops;
 	mac->dma = &dwmac1000_dma_ops;
 
-	mac->link.port = GMAC_CONTROL_PS;
 	mac->link.duplex = GMAC_CONTROL_DM;
-	mac->link.speed = GMAC_CONTROL_FES;
+	mac->link.speed10 = GMAC_CONTROL_PS;
+	mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
+	mac->link.speed1000 = 0;
+	mac->link.speed_mask = GENMASK(15, 14);
 	mac->mii.addr = GMAC_MII_ADDR;
 	mac->mii.data = GMAC_MII_DATA;
 	mac->mii.addr_shift = 11;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 1b3609105484..8ef517356313 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -175,9 +175,11 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id)
 	mac->mac = &dwmac100_ops;
 	mac->dma = &dwmac100_dma_ops;
 
-	mac->link.port = MAC_CONTROL_PS;
 	mac->link.duplex = MAC_CONTROL_F;
-	mac->link.speed = 0;
+	mac->link.speed10 = 0;
+	mac->link.speed100 = 0;
+	mac->link.speed1000 = 0;
+	mac->link.speed_mask = MAC_CONTROL_PS;
 	mac->mii.addr = MAC_MII_ADDR;
 	mac->mii.data = MAC_MII_DATA;
 	mac->mii.addr_shift = 11;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 48793f2e9307..d371e18b122c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -35,15 +35,17 @@ static void dwmac4_core_init(struct mac_device_info *hw, int mtu)
 	if (hw->ps) {
 		value |= GMAC_CONFIG_TE;
 
-		if (hw->ps == SPEED_1000) {
-			value &= ~GMAC_CONFIG_PS;
-		} else {
-			value |= GMAC_CONFIG_PS;
-
-			if (hw->ps == SPEED_10)
-				value &= ~GMAC_CONFIG_FES;
-			else
-				value |= GMAC_CONFIG_FES;
+		value &= hw->link.speed_mask;
+		switch (hw->ps) {
+		case SPEED_1000:
+			value |= hw->link.speed1000;
+			break;
+		case SPEED_100:
+			value |= hw->link.speed100;
+			break;
+		case SPEED_10:
+			value |= hw->link.speed10;
+			break;
 		}
 	}
 
@@ -747,9 +749,11 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
 	if (mac->multicast_filter_bins)
 		mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
-	mac->link.port = GMAC_CONFIG_PS;
 	mac->link.duplex = GMAC_CONFIG_DM;
-	mac->link.speed = GMAC_CONFIG_FES;
+	mac->link.speed10 = GMAC_CONFIG_PS;
+	mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
+	mac->link.speed1000 = 0;
+	mac->link.speed_mask = GENMASK(15, 14);
 	mac->mii.addr = GMAC_MDIO_ADDR;
 	mac->mii.data = GMAC_MDIO_DATA;
 	mac->mii.addr_shift = 21;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9bf09100c199..ce99b8aa6172 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -811,29 +811,16 @@ static void stmmac_adjust_link(struct net_device *dev)
 
 		if (phydev->speed != priv->speed) {
 			new_state = true;
+			ctrl &= ~priv->hw->link.speed_mask;
 			switch (phydev->speed) {
 			case SPEED_1000:
-				if (priv->plat->has_gmac ||
-				    priv->plat->has_gmac4)
-					ctrl &= ~priv->hw->link.port;
+				ctrl |= priv->hw->link.speed1000;
 				break;
 			case SPEED_100:
-				if (priv->plat->has_gmac ||
-				    priv->plat->has_gmac4) {
-					ctrl |= priv->hw->link.port;
-					ctrl |= priv->hw->link.speed;
-				} else {
-					ctrl &= ~priv->hw->link.port;
-				}
+				ctrl |= priv->hw->link.speed100;
 				break;
 			case SPEED_10:
-				if (priv->plat->has_gmac ||
-				    priv->plat->has_gmac4) {
-					ctrl |= priv->hw->link.port;
-					ctrl &= ~(priv->hw->link.speed);
-				} else {
-					ctrl &= ~priv->hw->link.port;
-				}
+				ctrl |= priv->hw->link.speed10;
 				break;
 			default:
 				netif_warn(priv, link, priv->dev,
-- 
2.13.0

^ permalink raw reply related

* [PATCH v2 1/4] net-next: stmmac: Convert new_state to bool
From: Corentin Labbe @ 2017-05-22 12:33 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170522123347.5295-1-clabbe.montjoie@gmail.com>

This patch convert new_state from int to bool since it store only 1 or 0

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1da17cd519f6..94b37323844b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -785,7 +785,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
-	int new_state = 0;
+	bool new_state = false;
 
 	if (!phydev)
 		return;
@@ -798,7 +798,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 		/* Now we make sure that we can be in full duplex mode.
 		 * If not, we operate in half-duplex mode. */
 		if (phydev->duplex != priv->oldduplex) {
-			new_state = 1;
+			new_state = true;
 			if (!(phydev->duplex))
 				ctrl &= ~priv->hw->link.duplex;
 			else
@@ -810,7 +810,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 			stmmac_mac_flow_ctrl(priv, phydev->duplex);
 
 		if (phydev->speed != priv->speed) {
-			new_state = 1;
+			new_state = true;
 			switch (phydev->speed) {
 			case 1000:
 				if (priv->plat->has_gmac ||
@@ -849,11 +849,11 @@ static void stmmac_adjust_link(struct net_device *dev)
 		writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
 
 		if (!priv->oldlink) {
-			new_state = 1;
+			new_state = true;
 			priv->oldlink = 1;
 		}
 	} else if (priv->oldlink) {
-		new_state = 1;
+		new_state = true;
 		priv->oldlink = 0;
 		priv->speed = SPEED_UNKNOWN;
 		priv->oldduplex = DUPLEX_UNKNOWN;
-- 
2.13.0

^ permalink raw reply related

* Re: [patch net-next 2/2] net/sched: fix filter flushing
From: Jiri Pirko @ 2017-05-22 12:36 UTC (permalink / raw)
  To: Jamal Hadi Salim
  Cc: Cong Wang, Linux Kernel Network Developers, David Miller,
	Eric Dumazet, Daniel Borkmann, Simon Horman, mlxsw, Colin King
In-Reply-To: <ebf100e6-e927-35bd-3c3a-9bd481237979@mojatatu.com>

Mon, May 22, 2017 at 12:42:44PM CEST, jhs@mojatatu.com wrote:
>On 17-05-21 03:19 PM, Jiri Pirko wrote:
>> Sun, May 21, 2017 at 08:27:21PM CEST, xiyou.wangcong@gmail.com wrote:
>> > On Sat, May 20, 2017 at 10:54 PM, Jiri Pirko <jiri@resnulli.us> wrote:
>> > > Sun, May 21, 2017 at 02:16:45AM CEST, xiyou.wangcong@gmail.com wrote:
>> > > > On Sat, May 20, 2017 at 6:01 AM, Jiri Pirko <jiri@resnulli.us> wrote:
>> > > > > +static void tcf_chain_destroy(struct tcf_chain *chain)
>> > > > > +{
>> > > > > +       list_del(&chain->list);
>> > > > > +       tcf_chain_flush(chain);
>> > > > >          kfree(chain);
>> > > > >   }
>> > > > > 
>> > > > > @@ -510,7 +517,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
>> > > > > 
>> > > > >          if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
>> > > > >                  tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
>> > > > > -               tcf_chain_destroy(chain);
>> > > > > +               tcf_chain_flush(chain);
>> > > > 
>> > > > 
>> > > > I wonder if we should return EBUSY and do nothing in case of busy?
>> > > > The chain is no longer visual to new actions after your list_del(), but
>> > > > the old one could still use and see it.
>> > > 
>> > > No. User request to flush the chain, that is what happens in the past
>> > > and that is what should happen now.
>> > > If there is still a reference, the chain_put will keep the empty chain.
>> > 
>> > But if you dump the actions, this chain is still shown "goto chain"?
>> 
>> Yes, it will be shown there.
>> 
>> 
>> > You can't claim you really delete it as long as actions can still
>> > see it and dump it.
>> 
>> No, user just wants to delete all the filters. That is done. User does
>> not care if the actual chain structure is there or not.
>> 
>
>I am trying to visualize a scenario where this is a problem.
>Using gact action it may be  possible to cause issues (requires
>validating - when i get time I will test).
>Steps are something like:
>
>1. create filter on chain 11 (refcnt = 1)

refcnt will be 0, chain->filter_chain will be non-NULL.
Please see the code before you assume anything. Namely tcf_chain_get and
tcf_chain_put.


>2. create gact action index 5 goto chain 11 (refcnt =2)

refcnt will be 1 after this


>3'. create new filter on chain 0 ... action gact index 5
>3''. create new filter on chain 0 ... action gact index 5
>
>
>None of the #3 steps will increment the refcnt.

Right


>Delete the filter from #1 (refcnt becomes 1)

Right, refcnt was 1, after delete will still be 1


>Delete the filter from #3'1 (refcnt = 0, destroy happens)

No. refcnt will still be 1.


>Filter #3'' is still hanging there. Dump that and strange things
>happen.

No. I see nothing strange.


>
>cheers,
>jamal

^ permalink raw reply

* [PATCH v2 3/4] net-next: stmmac: use SPEED_xxx instead of raw value
From: Corentin Labbe @ 2017-05-22 12:33 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170522123347.5295-1-clabbe.montjoie@gmail.com>

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 190686e39835..9bf09100c199 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -812,12 +812,12 @@ static void stmmac_adjust_link(struct net_device *dev)
 		if (phydev->speed != priv->speed) {
 			new_state = true;
 			switch (phydev->speed) {
-			case 1000:
+			case SPEED_1000:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4)
 					ctrl &= ~priv->hw->link.port;
 				break;
-			case 100:
+			case SPEED_100:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4) {
 					ctrl |= priv->hw->link.port;
@@ -826,7 +826,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 					ctrl &= ~priv->hw->link.port;
 				}
 				break;
-			case 10:
+			case SPEED_10:
 				if (priv->plat->has_gmac ||
 				    priv->plat->has_gmac4) {
 					ctrl |= priv->hw->link.port;
-- 
2.13.0

^ permalink raw reply related

* [PATCH v2 2/4] net-next: stmmac: Remove unnecessary parenthesis
From: Corentin Labbe @ 2017-05-22 12:33 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe
In-Reply-To: <20170522123347.5295-1-clabbe.montjoie@gmail.com>

Signed-off-by: Corentin Labbe <clabbe.montjoie@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 94b37323844b..190686e39835 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -799,7 +799,7 @@ static void stmmac_adjust_link(struct net_device *dev)
 		 * If not, we operate in half-duplex mode. */
 		if (phydev->duplex != priv->oldduplex) {
 			new_state = true;
-			if (!(phydev->duplex))
+			if (!phydev->duplex)
 				ctrl &= ~priv->hw->link.duplex;
 			else
 				ctrl |= priv->hw->link.duplex;
-- 
2.13.0

^ permalink raw reply related

* [PATCH v2 0/4] net-next: stmmac: rework the speed selection
From: Corentin Labbe @ 2017-05-22 12:33 UTC (permalink / raw)
  To: peppe.cavallaro, alexandre.torgue; +Cc: netdev, linux-kernel, Corentin Labbe

Hello

The current stmmac_adjust_link() part which handle speed have
some if (has_platform) code and my dwmac-sun8i will add more of them.

So we need to handle better speed selection.
Moreover the struct link member speed and port are hard to guess their
purpose. And their unique usage are to be combined for writing speed.

My first try was to create an adjust_link() in stmmac_ops but it duplicate some code

The current solution is to have direct value for 10/100/1000 and a mask for them.

The first 3 patchs fix some minor problem found in stmmac_adjust_link() and reported by Florian Fainelli in my previous serie.
The last patch is the real work.

This serie is tested on cubieboard2 (dwmac1000) and opipc (dwmac-sun8i).

Regards

Changes since v2:
- use true/false for new_state in patch #1

Corentin Labbe (4):
  net: stmmac: Convert new_state to bool
  net: stmmac: Remove unnecessary parenthesis
  net: stmmac: use SPEED_xxx instead of raw value
  net: stmmac: rework the speed selection

 drivers/net/ethernet/stmicro/stmmac/common.h       |  8 +++---
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   | 26 ++++++++++--------
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |  6 +++--
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c  | 26 ++++++++++--------
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 31 +++++++---------------
 5 files changed, 48 insertions(+), 49 deletions(-)

-- 
2.13.0

^ permalink raw reply

* [PATCH] net: set default value for somaxconn
From: Roman Kapl @ 2017-05-22 12:22 UTC (permalink / raw)
  To: davem, netdev; +Cc: Roman Kapl

The default value for somaxconn is set in sysctl_core_net_init(), but this
function is not called when kernel is configured without CONFIG_SYSCTL.

This results in the kernel not being able to accept TCP connections,
because the backlog has zero size. Usually, the user ends up with:
"TCP: request_sock_TCP: Possible SYN flooding on port 7. Dropping request.  Check SNMP counters."

Before ef547f2ac16 (tcp: remove max_qlen_log), the effects were less
severe, because the backlog was always at least eight slots long.

Signed-off-by: Roman Kapl <roman.kapl@sysgo.com>
---
 net/core/net_namespace.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1934efd..4f3bbff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -37,6 +37,9 @@ EXPORT_SYMBOL_GPL(net_namespace_list);
 struct net init_net = {
 	.count		= ATOMIC_INIT(1),
 	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
+	.core = {
+		.sysctl_somaxconn = SOMAXCONN,
+	},
 };
 EXPORT_SYMBOL(init_net);
 
-- 
2.10.1

^ permalink raw reply related

* Re: 4.12-RC2 BUG: scheduling while atomic: irq/47-iwlwifi
From: Arend van Spriel @ 2017-05-22 12:09 UTC (permalink / raw)
  To: Johannes Berg, Sander Eikelenboom, linux-wireless; +Cc: netdev
In-Reply-To: <1495450628.2653.14.camel@sipsolutions.net>

On 5/22/2017 12:57 PM, Johannes Berg wrote:
> On Mon, 2017-05-22 at 12:36 +0200, Sander Eikelenboom wrote:
>> Hi,
>>
>> I encountered this splat with 4.12-RC2.
> 
> Ugh, yeah, I should've seen that in the review.
> 
> Arend, please take a look at this. cfg80211_sched_scan_results() cannot
> sleep, so you can't rtnl_lock() in there. Looks like you can just rely
> on RCU though?

I see. I think you are right on RCU. Don't have the code in front of me 
now, but I think the lookup has an ASSERT_RTNL. Will look into it after 
my monday meeting :-p

Regards,
Arend

^ permalink raw reply

* Re: [PATCH v4 next 0/3] modules: automatic module loading restrictions
From: Solar Designer @ 2017-05-22 12:08 UTC (permalink / raw)
  To: Djalal Harouni
  Cc: linux-kernel, netdev, linux-security-module, kernel-hardening,
	Andy Lutomirski, Kees Cook, Andrew Morton, Rusty Russell,
	Serge E. Hallyn, Jessica Yu, David S. Miller, James Morris,
	Paul Moore, Stephen Smalley, Greg Kroah-Hartman, Tetsuo Handa,
	Ingo Molnar, Linux API, Dongsu Park, Casey Schaufler,
	Jonathan Corbet, Arnaldo Carvalho de Melo, Mauro Carvalho Chehab,
	Pete
In-Reply-To: <1495454226-10027-1-git-send-email-tixxdz@gmail.com>

Hi Djalal,

Thank you for your work on this!

On Mon, May 22, 2017 at 01:57:03PM +0200, Djalal Harouni wrote:
> *) When modules_autoload_mode is set to (2), automatic module loading is
> disabled for all. Once set, this value can not be changed.

What purpose does this securelevel-like property ("Once set, this value
can not be changed.") serve here?  I think this mode 2 is needed, but
without this extra property, which is bypassable by e.g. explicitly
loaded kernel modules anyway (and that's OK).

I'm sorry if this has been discussed before.

Alexander

^ permalink raw reply

* [PATCH v4 next 3/3] modules:capabilities: add a per-task modules auto-load mode
From: Djalal Harouni @ 2017-05-22 11:57 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-security-module, kernel-hardening,
	Andy Lutomirski, Kees Cook, Andrew Morton, Rusty Russell,
	Serge E. Hallyn, Jessica Yu
  Cc: David S. Miller, James Morris, Paul Moore, Stephen Smalley,
	Greg Kroah-Hartman, Tetsuo Handa, Ingo Molnar, Linux API,
	Dongsu Park, Casey Schaufler, Jonathan Corbet,
	Arnaldo Carvalho de Melo, Mauro Carvalho Chehab, Peter Zijlstra,
	Zendyani, linux-doc, Al Viro, Ben Hutchings, Djalal Harouni
In-Reply-To: <1495454226-10027-1-git-send-email-tixxdz@gmail.com>

Previous patches added the global sysctl "modules_autoload_mode". This patch
make it possible to support process trees, containers, and sandboxes by
providing an inherited per-task "modules_autoload_mode" flag that cannot be
re-enabled once disabled. This allows to restrict automatic module loading
without affecting the rest of the system.

Why we need this ?

Usually a request to a kernel feature that is implemented by a module
that is not loaded may trigger automatic module loading feature,
allowing to transparently satisfy userspace, and provide numeours
features as they are needed. In this case an implicit kernel module load
operation happens.

In most cases to load or unload a kernel module, an explicit operation
happens where programs are required to have CAP_SYS_MODULE capability to
perform so. However, in general with implicit module loading, no
capabilities are required as automatic module loading is one of the most
important and transparent operations of Linux.

Recent vulnerabilities showed that automatic module loading can be
abused in order to expose more bugs. Some of these vulnerabilities are:

* DCCP use after free CVE-2017-6074 [1]
  Unprivileged to local root PoC.

* XFRM framework CVE-2017-7184 [2]
  As advertised it seems it was used to break Ubuntu at a security
  contest.

* n_hldc CVE-2017-2636
* L2TPv3 CVE-2016-10200

Currently most of Linux code is in a form of modules, and not all
modules are written or maintained in the same way. In a container or
sandbox world, apps can be moved from one context to another or from
one Linux system to another one, the ability to restrict some of these
apps to load extra kernel modules will prevent exposing some kernel
interfaces that have not been updated withing such systems.

The DCCP vulnerability CVE-2017-6074 that can be triggered by
unprivileged, or CVE-2017-7184 in the XFRM framework are some recent
real examples. CVE-2017-7184 was used to break Ubuntu at a security
contest. Ubuntu is more of desktop distro, using a global switch to
disable automatic module loading will harm users. Actually this design
will always end up being ignored by such kind of systems that need to
offer a competitive and interactive solution for their users.

>From this and from observing how apps are being run, this patch
introduces a per-task "modules_autoload_mode" to restrict automatic
module loading. This offers the following advantages:

1) Automatic module loading is still available to the rest of the
system.

2) It is easy to use in containers and sandboxes. DCCP example could
have been used to escape containers. The XFRM framework CVE-2017-7184
needs CAP_NET_ADMIN, but attackers may start to target CAP_NET_ADMIN,
a per-task flag will make it harder.

3) Suitable for desktop and more interactive Linux systems.

4) Will allow in future to implement a per user policy.
The user database format is old and not extensible, as discussed maybe
with a modern format we may achieve the following:

User=djalal
NewKernelFeatures=yes

Which means that that interactive user will be allowed to load extra
Linux features. Others, volatile accounts or guests can be easily
blocked from doing so.

5) CAP_NET_ADMIN is useful, it handles lot of operations, at same time it
started to look more like CAP_SYS_ADMIN which is overloaded. We need
CAP_NET_ADMIN, containers need it, but at same time maybe we do not
want programs running with it to load 'netdev-%s' modules. Having an
extra per-task flag allow to discharge a bit CAP_NET_ADMIN and clearly
target automatic module loading operations.

Usage:
------

To set the per-task "modules_autoload_mode":

        prctl(PR_SET_MODULES_AUTOLOAD_MODE, mode, 0, 0, 0);

When a module auto-load request is triggered by current task, then the
operation has first to satisfy the per-task access mode before attempting
to implicitly load the module. Once set, this setting is inherited across
fork, clone and execve.

Prior to use, the task must call prctl(PR_SET_NO_NEW_PRIVS, 1) or run with
CAP_SYS_ADMIN privileges in its namespace.  If these are not true, -EACCES
will be returned.  This requirement ensures that unprivileged programs cannot
affect the behaviour or surprise privileged children.

The per-task "modules_autoload_mode" supports the following values:
0       There are no restrictions, usually the default unless set
        by parent.
1       The task must have CAP_SYS_MODULE to be able to trigger a
        module auto-load operation, or CAP_NET_ADMIN for modules with
        a 'netdev-%s' alias.
2       Automatic modules loading is disabled for the current task.

The mode may only be increased, never decreased, thus ensuring that once
applied, processes can never relax their setting. This make it easy for
developers and users to handle.

Note that even if the per-task "modules_autoload_mode" allows to auto-load
the corresponding modules, automatic module loading may still fail due to
the global sysctl "modules_autoload_mode". For more details please see
Documentation/sysctl/kernel.txt, section "modules_autoload_mode".

When a request to a kernel module is denied, the module name with the
corresponding process name and its pid are logged. Administrators can use
such information to explicitly load the appropriate modules.

The original idea of module auto-load restriction comes from
'GRKERNSEC_MODHARDEN' config option.

Testing per-task or per container setup
---------------------------------------

The following tool can be used to test the feature:
https://gist.githubusercontent.com/tixxdz/f6d77e5a45f9f8cfa4bcc0ab526ce5cf/raw/5f12f98e4dfc8a94f76b13dc290f077a153e74d8/pr_modules_autoload_mode_test.c

Example 1)

Before patch:
$ lsmod | grep ipip -
$ sudo ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
$ lsmod | grep ipip -
ipip                   16384  0
tunnel4                16384  1 ipip
ip_tunnel              28672  1 ipip
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0

After patch:
Set task "modules_autoload_mode" to disabled.
$ lsmod | grep ipip -
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0
$ su - root
# ./pr_modules_autoload_mode_test 2
task modules_autoload_mode: 2
# grep Modules /proc/self/status
ModulesAutoloadMode:    2
# ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
add tunnel "tunl0" failed: No such device
...
[  634.954652] module: automatic module loading of netdev-tunl0 by "ip"[1560] was denied
[  634.955775] module: automatic module loading of tunl0 by "ip"[1560] was denied
...

Example 2)

Sample with XFRM tunnel mode.

Before patch:
$ lsmod | grep xfrm -
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0
$ sudo ip xfrm state add src 10.0.2.100 dst 10.0.1.100 proto esp spi $id1 \
> reqid $id2 mode tunnel auth "hmac(sha256)" $key1 enc "cbc(aes)" $key2
$ lsmod | grep xfrm
xfrm4_mode_tunnel      16384  2

After patch:
Set task "modules_autoload_mode" to disabled.
$ lsmod | grep xfrm -
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0
$ su - root
# ./pr_modules_autoload_mode_test 2
task modules_autoload_mode: 2
# grep Modules /proc/self/status
ModulesAutoloadMode:    2
# ip xfrm state add src 10.0.2.100 dst 10.0.1.100 proto esp spi $id1 \
> reqid $id2 mode tunnel auth "hmac(sha256)" $key1 enc "cbc(aes)" $key2
RTNETLINK answers: Protocol not supported
...
[ 3458.139490] module: automatic module loading of xfrm-mode-2-1 by "ip"[1506] was denied
...

Example 3)

Here we use DCCP as an example since the public PoC was against it.

DCCP use after free CVE-2017-6074 (unprivileged to local root):
The code path can be triggered by unprivileged, using the trigger.c
program for DCCP use after free [3] and that was fixed by
commit 5edabca9d4cff7f "dccp: fix freeing skb too early for IPV6_RECVPKTINFO".

Before patch:
$ lsmod | grep dccp
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = 3
...
$ lsmod | grep dccp
dccp_ipv6              24576  5
dccp_ipv4              24576  5 dccp_ipv6
dccp                  102400  2 dccp_ipv6,dccp_ipv4
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0

After patch:

Set task "modules_autoload_mode" to 1, privileged mode.
$ lsmod | grep dccp
$ ./pr_set_no_new_privs
$ grep NoNewPrivs /proc/self/status
NoNewPrivs:     1
$ ./pr_modules_autoload_mode_test 1
$ grep Modules /proc/self/status
ModulesAutoloadMode:    1
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = -1 ESOCKTNOSUPPORT (Socket type not supported)
...
$ lsmod | grep dccp
$ dmesg
...
[ 4662.171994] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1759] was denied
[ 4662.177284] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1759] was denied
[ 4662.180181] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1759] was denied
[ 4662.181709] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1759] was denied

Set task "modules_autoload_mode" to 2, disabled mode.
$ lsmod | grep dccp
$ su - root
# ./pr_modules_autoload_mode_test 2
task modules_autoload_mode: 2
# grep Modules /proc/self/status
ModulesAutoloadMode:    2
# strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = -1 ESOCKTNOSUPPORT (Socket type not supported)
...
...
[ 5154.218740] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1873] was denied
[ 5154.219828] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1873] was denied
[ 5154.221814] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1873] was denied
[ 5154.222731] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1873] was denied

As showed, this blocks automatic module loading per-task. This allows to
provide a usable system, where only some sandboxed apps or containers will be
restricted to trigger automatic module loading, other parts of the
system can continue to use the system as it is which is the case of the
desktop.

[1] http://www.openwall.com/lists/oss-security/2017/02/22/3
[2] http://www.openwall.com/lists/oss-security/2017/03/29/2
[3] https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-6074

Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
---
 Documentation/filesystems/proc.txt                 |   3 +
 Documentation/userspace-api/index.rst              |   1 +
 .../userspace-api/modules_autoload_mode.rst        | 115 +++++++++++++++++++++
 fs/proc/array.c                                    |   6 ++
 include/linux/init_task.h                          |   8 ++
 include/linux/module.h                             |  26 ++++-
 include/linux/sched.h                              |   5 +
 include/uapi/linux/prctl.h                         |   8 ++
 kernel/module.c                                    |  61 ++++++++++-
 security/commoncap.c                               |  38 ++++++-
 10 files changed, 263 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/userspace-api/modules_autoload_mode.rst

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index adba21b..58127f0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -194,6 +194,7 @@ read the file /proc/PID/status:
   CapBnd: ffffffffffffffff
   NoNewPrivs:     0
   Seccomp:        0
+  ModulesAutoloadMode:    0
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
 
@@ -267,6 +268,8 @@ Table 1-2: Contents of the status files (as of 4.8)
  CapBnd                      bitmap of capabilities bounding set
  NoNewPrivs                  no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...)
  Seccomp                     seccomp mode, like prctl(PR_GET_SECCOMP, ...)
+ ModulesAutoloadMode         modules auto-load mode, like
+                             prctl(PR_GET_MODULES_AUTOLOAD_MODE, ...)
  Cpus_allowed                mask of CPUs on which this process may run
  Cpus_allowed_list           Same as previous, but in "list format"
  Mems_allowed                mask of memory nodes allowed to this process
diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 7b2eb1b..bfd51b7 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -17,6 +17,7 @@ place where this information is gathered.
    :maxdepth: 2
 
    no_new_privs
+   modules_autoload_mode
    seccomp_filter
    unshare
 
diff --git a/Documentation/userspace-api/modules_autoload_mode.rst b/Documentation/userspace-api/modules_autoload_mode.rst
new file mode 100644
index 0000000..7355b00
--- /dev/null
+++ b/Documentation/userspace-api/modules_autoload_mode.rst
@@ -0,0 +1,115 @@
+======================================
+Per-task module auto-load restrictions
+======================================
+
+
+Introduction
+============
+
+Usually a request to a kernel feature that is implemented by a module
+that is not loaded may trigger automatic module loading feature, allowing
+to transparently satisfy userspace, and provide numerous other features
+as they are needed. In this case an implicit kernel module load
+operation happens.
+
+In most cases to load or unload a kernel module, an explicit operation
+happens where programs are required to have ``CAP_SYS_MODULE`` capability
+to perform so. However, with implicit module loading, no capabilities are
+required, or only ``CAP_NET_ADMIN`` in rare cases where the module has the
+'netdev-%s' alias. Historically this was always the case as automatic
+module loading is one of the most important and transparent operations
+of Linux, users expect that their programs just work, yet, recent cases
+showed that this can be abused by unprivileged users or attackers to load
+modules that were not updated, or modules that contain bugs and
+vulnerabilities.
+
+Currently most of Linux code is in a form of modules, hence, allowing to
+control automatic module loading in some cases is as important as the
+operation itself, especially in the context where Linux is used in
+different appliances.
+
+Restricting automatic module loading allows administratros to have the
+appropriate time to update or deny module autoloading in advance. In a
+container or sandbox world where apps can be moved from one context to
+another, the ability to restrict some containers or apps to load extra
+kernel modules will prevent exposing some kernel interfaces that may not
+receive the same care as some other parts of the core. The DCCP vulnerability
+CVE-2017-6074 that can be triggered by unprivileged, or CVE-2017-7184
+in the XFRM framework are some real examples where users or programs are
+able to expose such kernel interfaces and escape their sandbox.
+
+The per-task ``modules_autoload_mode`` allow to restrict automatic module
+loading per task, preventing the kernel from exposing more of its
+interface. This is particularly useful for containers and sandboxes as
+noted above, they are restricted from affecting the rest of the system
+without affecting its functionality, automatic module loading is still
+available for others.
+
+
+Usage
+=====
+
+When the kernel is compiled with modules support ``CONFIG_MODULES``, then:
+
+``PR_SET_MODULES_AUTOLOAD_MODE``:
+        Set the current task ``modules_autoload_mode``. When a module
+        auto-load request is triggered by current task, then the
+        operation has first to satisfy the per-task access mode before
+        attempting to implicitly load the module. As an example,
+        automatic loading of modules that contain bugs or vulnerabilities
+        can be restricted and unprivileged users can no longer abuse such
+        interfaces. Once set, this setting is inherited across ``fork(2)``,
+        ``clone(2)`` and ``execve(2)``.
+
+        Prior to use, the task must call ``prctl(PR_SET_NO_NEW_PRIVS, 1)``
+        or run with ``CAP_SYS_ADMIN`` privileges in its namespace.  If
+        these are not true, ``-EACCES`` will be returned.  This requirement
+        ensures that unprivileged programs cannot affect the behaviour or
+        surprise privileged children.
+
+        Usage:
+                ``prctl(PR_SET_MODULES_AUTOLOAD_MODE, mode, 0, 0, 0);``
+
+        The 'mode' argument supports the following values:
+        0       There are no restrictions, usually the default unless set
+                by parent.
+        1       The task must have ``CAP_SYS_MODULE`` to be able to trigger a
+                module auto-load operation, or ``CAP_NET_ADMIN`` for modules
+                with a 'netdev-%s' alias.
+        2       Automatic modules loading is disabled for the current task.
+
+        The mode may only be increased, never decreased, thus ensuring
+        that once applied, processes can never relax their setting.
+
+
+        Returned values:
+        0               On success.
+        ``-EINVAL``     If 'mode' is not valid, or the operation is not
+                        supported.
+        ``-EACCES``     If task does not have ``CAP_SYS_ADMIN`` in its namespace
+                        or is not running with ``no_new_privs``.
+        ``-EPERM``      If 'mode' is less strict than current task
+                        ``modules_autoload_mode``.
+
+
+        Note that even if the per-task ``modules_autoload_mode`` allows to
+        auto-load the corresponding modules, automatic module loading
+        may still fail due to the global sysctl ``modules_autoload_mode``.
+        For more details please see Documentation/sysctl/kernel.txt,
+        section "modules_autoload_mode".
+
+
+        When a request to a kernel module is denied, the module name with the
+        corresponding process name and its pid are logged. Administrators can
+        use such information to explicitly load the appropriate modules.
+
+
+``PR_GET_MODULES_AUTOLOAD_MODE``:
+        Return the current task ``modules_autoload_mode``.
+
+        Usage:
+                ``prctl(PR_GET_MODULES_AUTOLOAD_MODE, 0, 0, 0, 0);``
+
+        Returned values:
+        mode            The task's ``modules_autoload_mode``
+        ``-ENOSYS``     If the kernel was compiled without ``CONFIG_MODULES``.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c3555..b2113e9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -88,6 +88,7 @@
 #include <linux/string_helpers.h>
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
+#include <linux/module.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -346,10 +347,15 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
 
 static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
 {
+	int autoload = task_modules_autoload_mode(p);
+
 	seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
 #ifdef CONFIG_SECCOMP
 	seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
 #endif
+	if (autoload != -ENOSYS)
+		seq_put_decimal_ull(m, "\nModulesAutoloadMode:\t", autoload);
+
 	seq_putc(m, '\n');
 }
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e049526..97fbb08 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -159,6 +159,13 @@ extern struct cred init_cred;
 # define INIT_CGROUP_SCHED(tsk)
 #endif
 
+#ifdef CONFIG_MODULES
+# define INIT_MODULES_AUTOLOAD_MODE(tsk)				\
+	.modules_autoload_mode = 0,
+#else
+# define INIT_MODULES_AUTOLOAD_MODE(tsk)
+#endif
+
 #ifdef CONFIG_PERF_EVENTS
 # define INIT_PERF_EVENTS(tsk)						\
 	.perf_event_mutex = 						\
@@ -257,6 +264,7 @@ extern struct cred init_cred;
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	INIT_PUSHABLE_TASKS(tsk)					\
 	INIT_CGROUP_SCHED(tsk)						\
+	INIT_MODULES_AUTOLOAD_MODE(tsk)					\
 	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
 	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
 	.real_parent	= &tsk,						\
diff --git a/include/linux/module.h b/include/linux/module.h
index 9b64896..9f6ec47 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -13,6 +13,7 @@
 #include <linux/kmod.h>
 #include <linux/init.h>
 #include <linux/elf.h>
+#include <linux/sched.h>
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
@@ -507,7 +508,16 @@ bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);
 
 /* Determine whether a module auto-load operation is permitted. */
-int may_autoload_module(char *kmod_name, int allow_cap);
+int may_autoload_module(struct task_struct *task, char *kmod_name, int allow_cap);
+
+/* Set modules_autoload_mode of current task */
+int task_set_modules_autoload_mode(unsigned long value);
+
+/* Read task's modules_autoload_mode */
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+	return task->modules_autoload_mode;
+}
 
 static inline bool within_module_core(unsigned long addr,
 				      const struct module *mod)
@@ -653,11 +663,23 @@ static inline bool is_livepatch_module(struct module *mod)
 
 #else /* !CONFIG_MODULES... */
 
-static inline int may_autoload_module(char *kmod_name, int allow_cap)
+static inline int may_autoload_module(struct task_struct *task, char *kmod_name,
+				      int allow_cap)
 {
 	return -ENOSYS;
 }
 
+int task_set_modules_autoload_mode(unsigned long value)
+{
+	return -ENOSYS;
+}
+
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+	return -ENOSYS;
+}
+
+static inline bool within_module_core(unsigned long addr,
 static inline struct module *__module_address(unsigned long addr)
 {
 	return NULL;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c533851..031a369 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -613,6 +613,11 @@ struct task_struct {
 
 	struct restart_block		restart_block;
 
+#ifdef CONFIG_MODULES
+	/* per-task modules auto-load mode */
+	unsigned			modules_autoload_mode:2;
+#endif
+
 	pid_t				pid;
 	pid_t				tgid;
 
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759..bf73607 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,12 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/*
+ * Control the per-task modules auto-load mode
+ *
+ * See Documentation/prctl/modules_autoload_mode.txt for more details.
+ */
+#define PR_SET_MODULES_AUTOLOAD_MODE	48
+#define PR_GET_MODULES_AUTOLOAD_MODE	49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/module.c b/kernel/module.c
index ce7a146..8739e4c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4301,12 +4301,15 @@ EXPORT_SYMBOL_GPL(__module_text_address);
 /**
  * may_autoload_module - Determine whether a module auto-load operation
  * is permitted
+ * @task: The task performing the request
  * @kmod_name: The module name
  * @allow_cap: if positive, may allow to auto-load the module if this capability
  * is set
  *
- * Determine whether a module auto-load operation is allowed or not. The check
- * uses the sysctl "modules_autoload_mode" value.
+ * Determine whether a module auto-load operation is allowed or not. First we
+ * check if the task is allowed to perform the module auto-load request, we
+ * check per-task "modules_autoload_mode", if the access is not denied, then
+ * we check the global sysctl "modules_autoload_mode".
  *
  * This allows to have more control on automatic module loading, and align it
  * with explicit load/unload module operations. The kernel contains several
@@ -4323,11 +4326,14 @@ EXPORT_SYMBOL_GPL(__module_text_address);
  *
  * Returns 0 if the module request is allowed or -EPERM if not.
  */
-int may_autoload_module(char *kmod_name, int allow_cap)
+int may_autoload_module(struct task_struct *task, char *kmod_name, int allow_cap)
 {
-	if (modules_autoload_mode == MODULES_AUTOLOAD_ALLOWED)
+	unsigned int autoload = max_t(unsigned int, modules_autoload_mode,
+				      task->modules_autoload_mode);
+
+	if (autoload == MODULES_AUTOLOAD_ALLOWED)
 		return 0;
-	else if (modules_autoload_mode == MODULES_AUTOLOAD_PRIVILEGED) {
+	else if (autoload == MODULES_AUTOLOAD_PRIVILEGED) {
 		/* Check CAP_SYS_MODULE then allow_cap if valid */
 		if (capable(CAP_SYS_MODULE) ||
 		    (allow_cap > 0 && capable(allow_cap)))
@@ -4338,6 +4344,51 @@ int may_autoload_module(char *kmod_name, int allow_cap)
 	return -EPERM;
 }
 
+/**
+ * task_set_modules_autoload_mode - Set per-task modules auto-load mode
+ * @value: Value to set "modules_autoload_mode" of current task
+ *
+ * Set current task "modules_autoload_mode". The task has to have
+ * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. This
+ * avoids scenarios where unprivileged tasks can affect the behaviour of
+ * privilged children by restricting module features.
+ *
+ * The task's "modules_autoload_mode" may only be increased, never decreased.
+ *
+ * Returns 0 on success, -EINVAL if @value is not valid, -EACCES if task does
+ * not have CAP_SYS_ADMIN in its namespace or is not running with no_new_privs,
+ * and finally -EPERM if @value is less strict than current task
+ * "modules_autoload_mode".
+ *
+ */
+int task_set_modules_autoload_mode(unsigned long value)
+{
+	if (value > MODULES_AUTOLOAD_DISABLED)
+		return -EINVAL;
+
+	/*
+	 * To set task "modules_autoload_mode" requires that the task has
+	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
+	 * This avoids scenarios where unprivileged tasks can affect the
+	 * behaviour of privileged children by restricting module features.
+	 */
+	if (!task_no_new_privs(current) &&
+	    security_capable_noaudit(current_cred(), current_user_ns(),
+				     CAP_SYS_ADMIN) != 0)
+		return -EACCES;
+
+	/*
+	 * The "modules_autoload_mode" may only be increased, never decreased,
+	 * ensuring that once applied, processes can never relax their settings.
+	 */
+	if (current->modules_autoload_mode > value)
+		return -EPERM;
+	else if (current->modules_autoload_mode < value)
+		current->modules_autoload_mode = value;
+
+	return 0;
+}
+
 /* Don't grab lock, we're oopsing. */
 void print_modules(void)
 {
diff --git a/security/commoncap.c b/security/commoncap.c
index d629d28..dbf0d51 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -886,6 +886,36 @@ static int cap_prctl_drop(unsigned long cap)
 	return commit_creds(new);
 }
 
+/*
+ * Implement PR_SET_MODULES_AUTOLOAD_MODE.
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static int pr_set_modules_autoload_mode(unsigned long arg2, unsigned long arg3,
+					unsigned long arg4, unsigned long arg5)
+{
+	if (arg3 || arg4 || arg5)
+		return -EINVAL;
+
+	return task_set_modules_autoload_mode(arg2);
+}
+
+/*
+ * Implement PR_GET_MODULES_AUTOLOAD_MODE.
+ *
+ * Return current task "modules_autoload_mode", -ve on error.
+ */
+static inline int pr_get_modules_autoload_mode(unsigned long arg2,
+					       unsigned long arg3,
+					       unsigned long arg4,
+					       unsigned long arg5)
+{
+	if (arg3 || arg4 || arg5)
+		return -EINVAL;
+
+	return task_modules_autoload_mode(current);
+}
+
 /**
  * cap_task_prctl - Implement process control functions for this security module
  * @option: The process control function requested
@@ -1016,6 +1046,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			return commit_creds(new);
 		}
 
+	case PR_SET_MODULES_AUTOLOAD_MODE:
+		return pr_set_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
+	case PR_GET_MODULES_AUTOLOAD_MODE:
+		return pr_get_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
 	default:
 		/* No functionality available - continue with default */
 		return -ENOSYS;
@@ -1083,7 +1119,7 @@ int cap_kernel_module_request(char *kmod_name, int allow_cap)
 	int ret;
 	char comm[sizeof(current->comm)];
 
-	ret = may_autoload_module(kmod_name, allow_cap);
+	ret = may_autoload_module(current, kmod_name, allow_cap);
 	if (ret < 0)
 		pr_notice_ratelimited(
 			"module: automatic module loading of %.64s by \"%s\"[%d] was denied\n",
-- 
2.10.2

^ permalink raw reply related

* [PATCH v4 next 2/3] modules:capabilities: automatic module loading restriction
From: Djalal Harouni @ 2017-05-22 11:57 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-security-module, kernel-hardening,
	Andy Lutomirski, Kees Cook, Andrew Morton, Rusty Russell,
	Serge E. Hallyn, Jessica Yu
  Cc: David S. Miller, James Morris, Paul Moore, Stephen Smalley,
	Greg Kroah-Hartman, Tetsuo Handa, Ingo Molnar, Linux API,
	Dongsu Park, Casey Schaufler, Jonathan Corbet,
	Arnaldo Carvalho de Melo, Mauro Carvalho Chehab, Peter Zijlstra,
	Zendyani, linux-doc, Al Viro, Ben Hutchings, Djalal Harouni
In-Reply-To: <1495454226-10027-1-git-send-email-tixxdz@gmail.com>

Currently, an explicit call to load or unload kernel modules require
CAP_SYS_MODULE capability. However unprivileged users have always been
able to load some modules using the implicit auto-load operation. An
automatic module loading happens when programs request a kernel feature
from a module that is not loaded. In order to satisfy userspace, the
kernel then automatically load all these required modules.

Historically, the kernel was always able to automatically load modules
if they are not blacklisted. This is one of the most important and
transparent operations of Linux, it allows to provide numerous other
features as they are needed which is crucial for a better user experience.
However, as Linux is popular now and used for different appliances some
of these may need to control such operations. For such systems, recent
needs showed that in some cases allowing to control automatic module
loading is as important as the operation itself. Restricting unprivileged
programs or attackers that abuse this feature to load unused modules or
modules that contain bugs is a significant security measure.

This allows administrators or some special programs to have the
appropriate time to update and deny module autoloading in advance, then
blacklist the corresponding ones. Not doing so may affect the global state
of the machine, especially containers where some apps are moved from one
context to another and not having such mechanisms may allow to expose
and exploit the vulnerable parts to escape the container sandbox.

Embedded or IoT devices also started to ship as containers using generic
distros, some vendors do not have the appropriate time to make their own
OS, hence, using base images is getting popular. These setups may include
unnecessary modules that the final applications will not need. Untrusted
access may abuse the module auto-load feature to expose vulnerabilities.

As every code contains bugs or vulnerabilties, the following
vulnerabilities that affected some features that are often compiled as
modules could have been completely blocked, by restricting autoloading
modules if the system does not need them.

Past months:
* DCCP use after free CVE-2017-6074 [1]
  Unprivileged to local root.

* XFRM framework CVE-2017-7184 [2]
  As advertised it seems it was used to break Ubuntu on a security
  contest.

* n_hldc CVE-2017-2636
* L2TPv3 CVE-2016-10200

This is a short list. Fixing this is a high priority.

To improve the current status, this patch introduces "modules_autoload_mode"
kernel sysctl flag. The flag controls modules auto-load feature and
complements "modules_disabled" which apply to all modules operations.
This new flag allows to control only automatic module loading and if it is
allowed or not, aligning in the process the implicit operation with the
explicit one where both now are covered by capabilities checks.

The three modes that "modules_autoload_mode" support allow to provide
restrictions on automatic module loading without breaking user
experience.

The sysctl flag is available at "/proc/sys/kernel/modules_autoload_mode"

When modules_autoload_mode is set to (0), the default, there are no
restrictions.

When modules_autoload_mode is set to (1), processes must have
CAP_SYS_MODULE to be able to trigger a module auto-load operation,
or CAP_NET_ADMIN for modules with a 'netdev-%s' alias.

When modules_autoload_mode is set to (2), automatic module loading is
disabled for all. Once set, this value can not be changed.

Notes on relation between "modules_disabled=0" and
"modules_autoload_mode=2":
1) Restricting automatic module loading does not interfere with
explicit module load or unload operations.

2) New features provided by modules can be made available without
rebooting the system.

3) A bad version of a module can be unloaded and replaced with a
better one without rebooting the system.

The original idea of module auto-load restriction comes from
'GRKERNSEC_MODHARDEN' config option.

Testing
-------

Example 1)

Before:
$ lsmod | grep ipip -
$ sudo ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
$ lsmod | grep ipip -
ipip                   16384  0
tunnel4                16384  1 ipip
ip_tunnel              28672  1 ipip
$ cat /proc/sys/kernel/modules_autoload_mode
0

After:
$ lsmod | grep ipip -
# echo 2 > /proc/sys/kernel/modules_autoload_mode
$ sudo ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
add tunnel "tunl0" failed: No such device
$ dmesg
...
[ 1876.378389] module: automatic module loading of netdev-tunl0 by "ip"[1453] was denied
[ 1876.380994] module: automatic module loading of tunl0 by "ip"[1453] was denied
...
$ lsmod | grep ipip -
$

Example 2)

DCCP use after free CVE-2017-6074:
The code path can be triggered by unprivileged, using the trigger.c
program for DCCP use after free [3] and that was fixed by
commit 5edabca9d4cff7f "dccp: fix freeing skb too early for IPV6_RECVPKTINFO".

Before:
$ lsmod | grep dccp
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = 3
...
$ lsmod | grep dccp
dccp_ipv6              24576  5
dccp_ipv4              24576  5 dccp_ipv6
dccp                  102400  2 dccp_ipv6,dccp_ipv4

After:
Only privileged:
# echo 1 > /proc/sys/kernel/modules_autoload_mode
$ lsmod | grep dccp
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = -1 ESOCKTNOSUPPORT (Socket type not supported)
...
$ lsmod | grep dccp
$ dmesg
...
[  175.945063] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1390] was denied
[  175.947952] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1390] was denied
[  175.956061] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1390] was denied
[  175.959733] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1390] was denied

$ sudo strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = 3
...
$ lsmod | grep dccp
dccp_ipv6              24576  6
dccp_ipv4              24576  5 dccp_ipv6
dccp                  102400  2 dccp_ipv6,dccp_ipv4

Disable automatic module loading:
$ lsmod | grep dccp
$ su - root
# echo 2 > /proc/sys/kernel/modules_autoload_mode
# strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = -1 ESOCKTNOSUPPORT (Socket type not supported)
...
$ lsmod | grep dccp
$ dmesg
...
[  126.596545] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1291] was denied
[  126.598800] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1291] was denied
[  126.601264] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1291] was denied
[  126.602839] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1291] was denied

As an example, this blocks abuses, DCCP still can be explicilty loaded by
an administrator using modprobe, at same time automatic module loading is
disabled forever.

[1] http://www.openwall.com/lists/oss-security/2017/02/22/3
[2] http://www.openwall.com/lists/oss-security/2017/03/29/2
[3] https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-6074

Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andy Lutomirski <luto@kernel.org>
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
---
 Documentation/sysctl/kernel.txt | 51 +++++++++++++++++++++++++++++++++++++++++
 include/linux/module.h          | 19 ++++++++++++++-
 include/linux/security.h        |  3 ++-
 kernel/module.c                 | 42 +++++++++++++++++++++++++++++++++
 kernel/sysctl.c                 | 40 ++++++++++++++++++++++++++++++++
 security/commoncap.c            | 24 +++++++++++++++++++
 6 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index bac23c1..3cc6592 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -43,6 +43,7 @@ show up in /proc/sys/kernel:
 - l2cr                        [ PPC only ]
 - modprobe                    ==> Documentation/debugging-modules.txt
 - modules_disabled
+- modules_autoload_mode
 - msg_next_id		      [ sysv ipc ]
 - msgmax
 - msgmnb
@@ -411,6 +412,56 @@ to false.  Generally used with the "kexec_load_disabled" toggle.
 
 ==============================================================
 
+modules_autoload_mode:
+
+A sysctl to control if modules auto-load feature is allowed or not.
+This sysctl complements "modules_disabled" which is for all module
+operations where this flag applies only to automatic module loading.
+Automatic module loading happens when programs request a kernel
+feature that is implemented by an unloaded module, the kernel
+automatically runs the program pointed by "modprobe" sysctl in order
+to load the corresponding module.
+
+Historically, the kernel was always able to automatically load modules
+if they are not blacklisted. This is one of the most important and
+transparent operations of Linux, it allows to provide numerous other
+features as they are needed which is crucial for a better user experience.
+However, as Linux is popular now and used for different appliances some
+of these may need to control such operations. For such systems, recent
+needs showed that in some cases allowing to control automatic module
+loading is as important as the operation itself. Restricting unprivileged
+programs or attackers that abuse this feature to load unused modules or
+modules that contain bugs is a significant security measure.
+
+The three modes that "modules_autoload_mode" support allow to provide
+restrictions on automatic module loading without breaking user
+experience.
+
+When modules_autoload_mode is set to (0), the default, there are no
+restrictions.
+
+When modules_autoload_mode is set to (1), processes must have
+CAP_SYS_MODULE to be able to trigger a module auto-load operation,
+or CAP_NET_ADMIN for modules with a 'netdev-%s' alias.
+
+When modules_autoload_mode is set to (2), automatic module loading is
+disabled for all. Once set, this value can not be changed.
+
+
+Notes on relation between "modules_disabled=0" and
+"modules_autoload_mode=2":
+1) Restricting automatic module loading does not interfere with
+explicit module load or unload operations.
+2) New features provided by modules can be made available without
+rebooting the system.
+3) A bad version of a module can be unloaded and replaced with a
+better one without rebooting the system.
+
+The original idea of module auto-load restriction comes from
+grsecurity 'GRKERNSEC_MODHARDEN' config option.
+
+==============================================================
+
 msg_next_id, sem_next_id, and shm_next_id:
 
 These three toggles allows to specify desired id for next allocated IPC
diff --git a/include/linux/module.h b/include/linux/module.h
index 21f5639..9b64896 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -261,7 +261,16 @@ struct notifier_block;
 
 #ifdef CONFIG_MODULES
 
-extern int modules_disabled; /* for sysctl */
+enum {
+	MODULES_AUTOLOAD_ALLOWED	= 0,
+	MODULES_AUTOLOAD_PRIVILEGED	= 1,
+	MODULES_AUTOLOAD_DISABLED	= 2,
+};
+
+extern int modules_disabled; /* sysctl for explicit module load/unload */
+extern int modules_autoload_mode; /* sysctl for automatic module loading */
+extern const int modules_autoload_max; /* max value for modules_autoload_mode */
+
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
 void *__symbol_get_gpl(const char *symbol);
@@ -497,6 +506,9 @@ bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr);
 bool is_module_percpu_address(unsigned long addr);
 bool is_module_text_address(unsigned long addr);
 
+/* Determine whether a module auto-load operation is permitted. */
+int may_autoload_module(char *kmod_name, int allow_cap);
+
 static inline bool within_module_core(unsigned long addr,
 				      const struct module *mod)
 {
@@ -641,6 +653,11 @@ static inline bool is_livepatch_module(struct module *mod)
 
 #else /* !CONFIG_MODULES... */
 
+static inline int may_autoload_module(char *kmod_name, int allow_cap)
+{
+	return -ENOSYS;
+}
+
 static inline struct module *__module_address(unsigned long addr)
 {
 	return NULL;
diff --git a/include/linux/security.h b/include/linux/security.h
index 2f4c9d3..90fe0cb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -101,6 +101,7 @@ extern int cap_task_setscheduler(struct task_struct *p);
 extern int cap_task_setioprio(struct task_struct *p, int ioprio);
 extern int cap_task_setnice(struct task_struct *p, int nice);
 extern int cap_vm_enough_memory(struct mm_struct *mm, long pages);
+extern int cap_kernel_module_request(char *kmod_name, int allow_cap);
 
 struct msghdr;
 struct sk_buff;
@@ -928,7 +929,7 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 
 static inline int security_kernel_module_request(char *kmod_name, int allow_cap)
 {
-	return 0;
+	return cap_kernel_module_request(kmod_name, allow_cap);
 }
 
 static inline int security_kernel_read_file(struct file *file,
diff --git a/kernel/module.c b/kernel/module.c
index 4a3665f..ce7a146 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -282,6 +282,8 @@ module_param(sig_enforce, bool_enable_only, 0644);
 
 /* Block module loading/unloading? */
 int modules_disabled = 0;
+int modules_autoload_mode = MODULES_AUTOLOAD_ALLOWED;
+const int modules_autoload_max = MODULES_AUTOLOAD_DISABLED;
 core_param(nomodule, modules_disabled, bint, 0);
 
 /* Waiting for a module to finish initializing? */
@@ -4296,6 +4298,46 @@ struct module *__module_text_address(unsigned long addr)
 }
 EXPORT_SYMBOL_GPL(__module_text_address);
 
+/**
+ * may_autoload_module - Determine whether a module auto-load operation
+ * is permitted
+ * @kmod_name: The module name
+ * @allow_cap: if positive, may allow to auto-load the module if this capability
+ * is set
+ *
+ * Determine whether a module auto-load operation is allowed or not. The check
+ * uses the sysctl "modules_autoload_mode" value.
+ *
+ * This allows to have more control on automatic module loading, and align it
+ * with explicit load/unload module operations. The kernel contains several
+ * modules, some of them are not updated often and may contain bugs and
+ * vulnerabilities.
+ *
+ * The "allow_cap" is passed by callers to explicitly note that the module has
+ * the appropriate alias and that the "allow_cap" capability is set. This is
+ * for backward compatibility, the aim is to have a clear picture where:
+ *
+ * 1) Implicit module loading is allowed
+ * 2) Implicit module loading as with the explicit one requires CAP_SYS_MODULE.
+ * 3) Implicit module loading as with the explicit one can be disabled.
+ *
+ * Returns 0 if the module request is allowed or -EPERM if not.
+ */
+int may_autoload_module(char *kmod_name, int allow_cap)
+{
+	if (modules_autoload_mode == MODULES_AUTOLOAD_ALLOWED)
+		return 0;
+	else if (modules_autoload_mode == MODULES_AUTOLOAD_PRIVILEGED) {
+		/* Check CAP_SYS_MODULE then allow_cap if valid */
+		if (capable(CAP_SYS_MODULE) ||
+		    (allow_cap > 0 && capable(allow_cap)))
+		       return 0;
+	}
+
+	/* MODULES_AUTOLOAD_DISABLED or not enough caps */
+	return -EPERM;
+}
+
 /* Don't grab lock, we're oopsing. */
 void print_modules(void)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a..727c6a7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -186,6 +186,11 @@ static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_MODULES
+static int modules_autoload_dointvec_minmax(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 #ifdef CONFIG_PRINTK
 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -661,6 +666,16 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "modules_autoload_mode",
+		.data		= &modules_autoload_mode,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* Handle pinning to max value */
+		.proc_handler	= modules_autoload_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= (void *)&modules_autoload_max,
+	},
 #endif
 #ifdef CONFIG_UEVENT_HELPER
 	{
@@ -2334,6 +2349,31 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 }
 #endif
 
+#ifdef CONFIG_MODULES
+static int modules_autoload_dointvec_minmax(struct ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table t;
+
+	/*
+	 * Only CAP_SYS_MODULE in init user namespace are allowed to change this
+	 */
+	if (write && !capable(CAP_SYS_MODULE))
+		return -EPERM;
+
+	t = *table;
+	/*
+	 * If "modules_autoload_mode" already equals max value
+	 * MODULES_AUTOLOAD_DISABLED, then modules autoload is disabled
+	 * and can not be changed anymore.
+	 */
+	if (modules_autoload_mode == modules_autoload_max)
+		t.extra1 = t.extra2;
+
+	return proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+}
+#endif
+
 struct do_proc_dointvec_minmax_conv_param {
 	int *min;
 	int *max;
diff --git a/security/commoncap.c b/security/commoncap.c
index 7abebd7..d629d28 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1069,6 +1069,29 @@ int cap_mmap_file(struct file *file, unsigned long reqprot,
 	return 0;
 }
 
+/**
+ * cap_kernel_module_request - Determine whether a module auto-load is permitted
+ * @kmod_name: The module name
+ * @allow_cap: if positive, may allow to auto-load the module if this capability
+ * is set
+ *
+ * Determine whether a module should be automatically loaded.
+ * Returns 0 if the module request should be allowed, -EPERM if not.
+ */
+int cap_kernel_module_request(char *kmod_name, int allow_cap)
+{
+	int ret;
+	char comm[sizeof(current->comm)];
+
+	ret = may_autoload_module(kmod_name, allow_cap);
+	if (ret < 0)
+		pr_notice_ratelimited(
+			"module: automatic module loading of %.64s by \"%s\"[%d] was denied\n",
+			kmod_name, get_task_comm(comm, current), current->pid);
+
+	return ret;
+}
+
 #ifdef CONFIG_SECURITY
 
 struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
@@ -1090,6 +1113,7 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
 	LSM_HOOK_INIT(task_setnice, cap_task_setnice),
 	LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
+	LSM_HOOK_INIT(kernel_module_request, cap_kernel_module_request),
 };
 
 void __init capability_add_hooks(void)
-- 
2.10.2

^ permalink raw reply related

* [PATCH v4 next 1/3] modules:capabilities: allow __request_module() to take a capability argument
From: Djalal Harouni @ 2017-05-22 11:57 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-security-module, kernel-hardening,
	Andy Lutomirski, Kees Cook, Andrew Morton, Rusty Russell,
	Serge E. Hallyn, Jessica Yu
  Cc: David S. Miller, James Morris, Paul Moore, Stephen Smalley,
	Greg Kroah-Hartman, Tetsuo Handa, Ingo Molnar, Linux API,
	Dongsu Park, Casey Schaufler, Jonathan Corbet,
	Arnaldo Carvalho de Melo, Mauro Carvalho Chehab, Peter Zijlstra,
	Zendyani, linux-doc, Al Viro, Ben Hutchings, Djalal Harouni
In-Reply-To: <1495454226-10027-1-git-send-email-tixxdz@gmail.com>

This is a preparation patch for the module auto-load restriction feature.

In order to restrict module auto-load operations we need to check if the
caller has CAP_SYS_MODULE capability. This allows to align security
checks of automatic module loading with the checks of the explicit operations.

However for "netdev-%s" modules, they are allowed to be loaded if
CAP_NET_ADMIN is set. Therefore, in order to not break this assumption,
and allow userspace to only load "netdev-%s" modules with CAP_NET_ADMIN
capability which is considered a privileged operation, we have two
choices: 1) parse "netdev-%s" alias and check the capability or 2) hand
the capability form request_module() to security_kernel_module_request()
hook and let the capability subsystem decide.

After a discussion with Rusty Russell [1], the suggestion was to pass
the capability from request_module() to security_kernel_module_request()
for 'netdev-%s' modules that need CAP_NET_ADMIN.

The patch does not update request_module(), it updates the internal
__request_module() that will take an extra "allow_cap" argument. If
positive, then automatic module load operation can be allowed.

__request_module() will be only called by networking code which is the
exception to this, so we do not break userspace and CAP_NET_ADMIN can
continue to load 'netdev-%s' modules. Other kernel code should continue
to use request_module() which calls security_kernel_module_request() and
will check for CAP_SYS_MODULE capability in next patch. Allowing more
control on who can trigger automatic module loading.

This patch updates security_kernel_module_request() to take the
'allow_cap' argument and SELinux which is currently the only user of
security_kernel_module_request() hook.

Based on patch by Rusty Russell:
https://lkml.org/lkml/2017/4/26/735

Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andy Lutomirski <luto@kernel.org>
Suggested-by: Rusty Russell <rusty@rustcorp.com.au>
Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Djalal Harouni <tixxdz@gmail.com>

[1] https://lkml.org/lkml/2017/4/24/7
---
 include/linux/kmod.h      | 15 ++++++++-------
 include/linux/lsm_hooks.h |  4 +++-
 include/linux/security.h  |  4 ++--
 kernel/kmod.c             | 15 +++++++++++++--
 net/core/dev_ioctl.c      | 10 +++++++++-
 security/security.c       |  4 ++--
 security/selinux/hooks.c  |  2 +-
 7 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index c4e441e..a314432 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -32,18 +32,19 @@
 extern char modprobe_path[]; /* for sysctl */
 /* modprobe exit status on success, -ve on error.  Return value
  * usually useless though. */
-extern __printf(2, 3)
-int __request_module(bool wait, const char *name, ...);
-#define request_module(mod...) __request_module(true, mod)
-#define request_module_nowait(mod...) __request_module(false, mod)
+extern __printf(3, 4)
+int __request_module(bool wait, int allow_cap, const char *name, ...);
 #define try_then_request_module(x, mod...) \
-	((x) ?: (__request_module(true, mod), (x)))
+	((x) ?: (__request_module(true, -1, mod), (x)))
 #else
-static inline int request_module(const char *name, ...) { return -ENOSYS; }
-static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; }
+static inline __printf(3, 4)
+int __request_module(bool wait, int allow_cap, const char *name, ...)
+{ return -ENOSYS; }
 #define try_then_request_module(x, mod...) (x)
 #endif
 
+#define request_module(mod...) __request_module(true, -1, mod)
+#define request_module_nowait(mod...) __request_module(false, -1, mod)
 
 struct cred;
 struct file;
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index f7914d9..7688f79 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -578,6 +578,8 @@
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
+ *	@allow_cap capability that allows to automatically load a kernel
+ *	module.
  *	Return 0 if successful.
  * @kernel_read_file:
  *	Read a file specified by userspace.
@@ -1516,7 +1518,7 @@ union security_list_options {
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
-	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_module_request)(char *kmod_name, int allow_cap);
 	int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
 				     enum kernel_read_file_id id);
diff --git a/include/linux/security.h b/include/linux/security.h
index 549cb82..2f4c9d3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -325,7 +325,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
-int security_kernel_module_request(char *kmod_name);
+int security_kernel_module_request(char *kmod_name, int allow_cap);
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id);
@@ -926,7 +926,7 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
-static inline int security_kernel_module_request(char *kmod_name)
+static inline int security_kernel_module_request(char *kmod_name, int allow_cap)
 {
 	return 0;
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 563f97e..15c96e8 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -110,6 +110,7 @@ static int call_modprobe(char *module_name, int wait)
 /**
  * __request_module - try to load a kernel module
  * @wait: wait (or not) for the operation to complete
+ * @allow_cap: if positive, may allow modprobe if this capability is set.
  * @fmt: printf style format string for the name of the module
  * @...: arguments as specified in the format string
  *
@@ -120,10 +121,20 @@ static int call_modprobe(char *module_name, int wait)
  * must check that the service they requested is now available not blindly
  * invoke it.
  *
+ * If "allow_cap" is positive, The security subsystem will trust the caller
+ * that "allow_cap" may allow to load some modules with a specific alias,
+ * the security subsystem will make some exceptions based on that. This is
+ * primally useful for backward compatibility. A permission check should not
+ * be that strict and userspace should be able to continue to trigger module
+ * auto-loading if needed.
+ *
  * If module auto-loading support is disabled then this function
  * becomes a no-operation.
+ *
+ * This function should not be directly used by other subsystems, for that
+ * please call request_module().
  */
-int __request_module(bool wait, const char *fmt, ...)
+int __request_module(bool wait, int allow_cap, const char *fmt, ...)
 {
 	va_list args;
 	char module_name[MODULE_NAME_LEN];
@@ -150,7 +161,7 @@ int __request_module(bool wait, const char *fmt, ...)
 	if (ret >= MODULE_NAME_LEN)
 		return -ENAMETOOLONG;
 
-	ret = security_kernel_module_request(module_name);
+	ret = security_kernel_module_request(module_name, allow_cap);
 	if (ret)
 		return ret;
 
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d2..c494351 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -366,8 +366,16 @@ void dev_load(struct net *net, const char *name)
 	rcu_read_unlock();
 
 	no_module = !dev;
+	/*
+	 * First do the CAP_NET_ADMIN check, then let the security
+	 * subsystem checks know that this can be allowed since this is
+	 * a "netdev-%s" module and CAP_NET_ADMIN is set.
+	 *
+	 * For this exception call __request_module().
+	 */
 	if (no_module && capable(CAP_NET_ADMIN))
-		no_module = request_module("netdev-%s", name);
+		no_module = __request_module(true, CAP_NET_ADMIN,
+					     "netdev-%s", name);
 	if (no_module && capable(CAP_SYS_MODULE))
 		request_module("%s", name);
 }
diff --git a/security/security.c b/security/security.c
index 714433e..cedb790 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1021,9 +1021,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return call_int_hook(kernel_create_files_as, 0, new, inode);
 }
 
-int security_kernel_module_request(char *kmod_name)
+int security_kernel_module_request(char *kmod_name, int allow_cap)
 {
-	return call_int_hook(kernel_module_request, 0, kmod_name);
+	return call_int_hook(kernel_module_request, 0, kmod_name, allow_cap);
 }
 
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 158f6a0..85eeff6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3842,7 +3842,7 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return ret;
 }
 
-static int selinux_kernel_module_request(char *kmod_name)
+static int selinux_kernel_module_request(char *kmod_name, int allow_cap)
 {
 	struct common_audit_data ad;
 
-- 
2.10.2


^ permalink raw reply related

* [PATCH v4 next 0/3] modules: automatic module loading restrictions
From: Djalal Harouni @ 2017-05-22 11:57 UTC (permalink / raw)
  To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-security-module-u79uwXL29TY76Z2rM5mHXA,
	kernel-hardening-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8,
	Andy Lutomirski, Kees Cook, Andrew Morton, Rusty Russell,
	Serge E. Hallyn, Jessica Yu
  Cc: David S. Miller, James Morris, Paul Moore, Stephen Smalley,
	Greg Kroah-Hartman, Tetsuo Handa, Ingo Molnar, Linux API,
	Dongsu Park, Casey Schaufler, Jonathan Corbet,
	Arnaldo Carvalho de Melo, Mauro Carvalho Chehab, Peter Zijlstra,
	Zendyani, linux-doc-u79uwXL29TY76Z2rM5mHXA, Al Viro,
	Ben Hutchings, Djalal Harouni

Hi List,

This is v4 of the automatic module load restriction series.

v1 and v2 implementation were presented as a stackable LSM [1] [2].
v3 was updated to be integrated with the core kernel inside the
capabilities subsystem as suggested by Kees Cook [3].

This v4 is even better, lot of documentation and code comments.

All suggestions were improved and fixed. Please see changelog for more
details.

These patches are against next-20170522

==============

Currently, an explicit call to load or unload kernel modules require
CAP_SYS_MODULE capability. However unprivileged users have always been
able to load some modules using the implicit auto-load operation. An
automatic module loading happens when programs request a kernel feature
from a module that is not loaded. In order to satisfy userspace, the
kernel then automatically load all these required modules.

Historically, the kernel was always able to automatically load modules
if they are not blacklisted. This is one of the most important and
transparent operations of Linux, it allows to provide numerous other
features as they are needed which is crucial for a better user experience.
However, as Linux is popular now and used for different appliances some
of these may need to control such operations. For such systems, recent
needs showed that in some cases allowing to control automatic module
loading is as important as the operation itself. Restricting unprivileged
programs or attackers that abuse this feature to load unused modules or
modules that contain bugs is a significant security measure.

This allows administrators or some special programs to have the
appropriate time to update and deny module autoloading in advance, then
blacklist the corresponding ones. Not doing so may affect the global state
of the machine, especially containers where some apps are moved from one
context to another and not having such mechanisms may allow to expose
and exploit the vulnerable parts to escape the container sandbox.

Embedded or IoT devices also started to ship as containers using generic
distros, some vendors do not have the appropriate time to make their own
OS, hence, using base images is getting popular. These setups may include
unnecessary modules that the final applications will not need. Untrusted
access may abuse the module auto-load feature to expose vulnerabilities.

As every code contains bugs or vulnerabilties, the following
vulnerabilities that affected some features that are often compiled as
modules could have been completely blocked, by restricting autoloading
modules if the system does not need them.

Past months:
* DCCP use after free CVE-2017-6074 [4]
  Unprivileged to local root.

* XFRM framework CVE-2017-7184 [5]
  As advertised it seems it was used to break Ubuntu on a security
  contest.

* n_hldc CVE-2017-2636
* L2TPv3 CVE-2016-10200

This is a short list.


To improve the current status, this series introduces a global
"modules_autoload_mode" sysctl flag, and a per-task one.

The sysctl controls modules auto-load feature and complements
"modules_disabled" which apply to all modules operations. This new flag
allows to control only automatic module loading and if it is allowed or
not, aligning in the process the implicit operation with the explicit one
where both now are covered by capabilities checks.

The three modes that "modules_autoload_mode" sysctl support allow to
provide restrictions on automatic module loading without breaking user
experience.

The sysctl flag is available at "/proc/sys/kernel/modules_autoload_mode"

*) When modules_autoload_mode is set to (0), the default, there are no
restrictions.

*) When modules_autoload_mode is set to (1), processes must have
CAP_SYS_MODULE to be able to trigger a module auto-load operation,
or CAP_NET_ADMIN for modules with a 'netdev-%s' alias.

*) When modules_autoload_mode is set to (2), automatic module loading is
disabled for all. Once set, this value can not be changed.

Notes on relation between "modules_disabled=0" and
"modules_autoload_mode=2":
1) Restricting automatic module loading does not interfere with
explicit module load or unload operations.

2) New features provided by modules can be made available without
rebooting the system.

3) A bad version of a module can be unloaded and replaced with a
better one without rebooting the system.


The original idea of module auto-load restriction comes from
'GRKERNSEC_MODHARDEN' config option.

==========================

The patches also support process trees, containers, and sandboxes by
providing an inherited per-task "modules_autoload_mode" flag that cannot be
re-enabled once disabled. This offers the following advantages:

1) Automatic module loading is still available to the rest of the
system.

2) It is easy to use in containers and sandboxes. DCCP example could
have been used to escape containers. The XFRM framework CVE-2017-7184
needs CAP_NET_ADMIN, but attackers may start to target CAP_NET_ADMIN,
a per-task flag will make it harder.

3) Suitable for desktop and more interactive Linux systems.

4) Will allow in future to implement a per user policy.
The user database format is old and not extensible, as discussed maybe
with a modern format we may achieve the following:

User=djalal
NewKernelFeatures=yes

Which means that that interactive user will be allowed to load extra
Linux features. Others, volatile accounts or guests can be easily
blocked from doing so.

5) CAP_NET_ADMIN is useful, it handles lot of operations, at same time it
started to look more like CAP_SYS_ADMIN which is overloaded. We need
CAP_NET_ADMIN, containers need it, but at same time maybe we do not
want programs running with it to load 'netdev-%s' modules. Having an
extra per-task flag allow to discharge a bit CAP_NET_ADMIN and clearly
target automatic module loading operations.


Usage:
        prctl(PR_SET_MODULES_AUTOLOAD_mode, value, 0, 0, 0).

The per-task "modules_autoload_mode" supports the following values:
0       There are no restrictions, usually the default unless set
        by parent.
1       The task must have CAP_SYS_MODULE to be able to trigger a
        module auto-load operation, or CAP_NET_ADMIN for modules with
        a 'netdev-%s' alias.
2       Automatic modules loading is disabled for the current task.

The mode may only be increased, never decreased, thus ensuring that once
applied, processes can never relax their setting. This make it easy for
developers and users to handle.

Note that even if the per-task "modules_autoload_mode" allows to auto-load
the corresponding modules, automatic module loading may still fail due to
the global sysctl "modules_autoload_mode". For more details please see
Documentation/sysctl/kernel.txt, section "modules_autoload_mode".

When a request to a kernel module is denied, the module name with the
corresponding process name and its pid are logged. Administrators can use
such information to explicitly load the appropriate modules.


# Testing:

##) Global sysctl "modules_autoload_mode"

Before patch:
$ lsmod | grep ipip -
$ sudo ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
$ lsmod | grep ipip -
ipip                   16384  0
tunnel4                16384  1 ipip
ip_tunnel              28672  1 ipip
$ cat /proc/sys/kernel/modules_autoload_mode
0

After patch:
$ lsmod | grep ipip -
# echo 2 > /proc/sys/kernel/modules_autoload_mode
$ sudo ip tunnel add mytun mode ipip remote 10.0.2.100 local 10.0.2.15 ttl 255
add tunnel "tunl0" failed: No such device
$ dmesg
...
[ 1876.378389] module: automatic module loading of netdev-tunl0 by "ip"[1453] was denied
[ 1876.380994] module: automatic module loading of tunl0 by "ip"[1453] was denied
...
$ lsmod | grep ipip -


##) Per-task "modules_autoload_mode" flag

Here we use DCCP as an example since the public PoC was against it.

The following tool can be used to test the feature:
https://gist.githubusercontent.com/tixxdz/f6d77e5a45f9f8cfa4bcc0ab526ce5cf/raw/5f12f98e4dfc8a94f76b13dc290f077a153e74d8/pr_modules_autoload_mode_test.c

DCCP use after free CVE-2017-6074 (unprivileged to local root):
The code path can be triggered by unprivileged, using the trigger.c
program for DCCP use after free [6] and that was fixed by
commit 5edabca9d4cff7f "dccp: fix freeing skb too early for IPV6_RECVPKTINFO".

Before patch:
$ lsmod | grep dccp
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = 3
...
$ lsmod | grep dccp
dccp_ipv6              24576  5
dccp_ipv4              24576  5 dccp_ipv6
dccp                  102400  2 dccp_ipv6,dccp_ipv4
$ grep Modules /proc/self/status
ModulesAutoloadMode:    0


After:
Set task "modules_autoload_mode" to 1, privileged mode.
$ lsmod | grep dccp
$ ./pr_set_no_new_privs
$ grep NoNewPrivs /proc/self/status
NoNewPrivs:     1
$ ./pr_modules_autoload_mode_test 1
$ grep Modules /proc/self/status
ModulesAutoloadMode:    1
$ strace ./dccp_trigger
...
socket(AF_INET6, SOCK_DCCP, IPPROTO_IP) = -1 ESOCKTNOSUPPORT (Socket type not supported)
...
$ lsmod | grep dccp
$ dmesg
...
[ 4662.171994] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1759] was denied
[ 4662.177284] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1759] was denied
[ 4662.180181] module: automatic module loading of net-pf-10-proto-0-type-6 by "dccp_trigger"[1759] was denied
[ 4662.181709] module: automatic module loading of net-pf-10-proto-0 by "dccp_trigger"[1759] was denied



As showed, this blocks automatic module loading per-task. This allows to
provide a usable system, where only some sandboxed apps or containers will be
restricted to trigger automatic module loading, other parts of the
system can continue to use the system as it is which is the case of the
desktop.


Finally we already have a use case for the prctl() interface to enforce
some systemd services [7], and we plan to use it for our containers and
sandboxes. That pull request will be updated if this feature is merged,
We will provide "ProtectKernelModulesMode=strict" as a new directive for
users that can be enforced to make sure that if their services/apps are
compromised they won't be able to abuse the module auto-load operation.


# Changes since v3:
*) Renamed the sysctl from "modules_autoload" to "modules_autoload_mode"
   and the prctl() operation flag to "PR_{SET|GET}_MODULES_AUTOLOAD_MODE"
   as it was requested.

   Suggested-by: Ben Hutchings <ben.hutchings-4yDnlxn2s6sWdaTGBSpHTA@public.gmane.org>


*) Updated __request_module() to take the capability that may allow to
   auto-load a module with the appropriate alias. This way we never
   parse aliases as it was requested by Rusty Russell. Security and
   SELinux hooks were updated too.

   Suggested-by: Rusty Russell <rusty-8n+1lVoiYb80n/F98K4Iww@public.gmane.org>
   https://lkml.org/lkml/2017/4/24/7


*) Updated code to set prctl(PR_SET_MODULES_AUTOLOAD_MODE, 1, 0, 0, 0),
   the task must call prctl(PR_SET_NO_NEW_PRIVS, 1) before or run with
   CAP_SYS_ADMIN privileges in its namespace. If these are not true,
   -EACCES will be returned.

   Suggested-by: Andy Lutomirski <luto-kltTT9wpgjJwATOyAt5JVQ@public.gmane.org>
   https://lkml.org/lkml/2017/4/22/22


*) Remove task initialization logic and other cleanups
   Suggested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>


*) Other code and documentation cleanups.
   

# Changes since v2:
*) Implemented as a core kernel feature inside capabilities subsystem
*) Renamed sysctl to "modules_autoload" to align with "modules_disabled"

   Suggested-by: Kees Cook <keescook-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>

*) Improved documentation.
*) Removed unused code.


# Changes since v1:
*) Renamed module to ModAutoRestrict
*) Improved documentation to explicity refer to module autoloading.
*) Switched to use the new task_security_alloc() hook.
*) Switched from rhash tables to use task->security since it is in
   linux-security/next branch now.
*) Check all parameters passed to prctl() syscall.
*) Many other bug fixes and documentation improvements.


Patches (3) Djalal Harouni:
 (1/3) modules:capabilities: allow __request_module() to take a capability argument
 (2/3) modules:capabilities: automatic module loading restriction
 (3/3) modules:capabilities: add a per-task modules auto-load mode

 Documentation/filesystems/proc.txt                 |   3 +
 Documentation/sysctl/kernel.txt                    |  51 +++++++++
 Documentation/userspace-api/index.rst              |   1 +
 .../userspace-api/modules_autoload_mode.rst        | 115 +++++++++++++++++++++
 fs/proc/array.c                                    |   6 ++
 include/linux/init_task.h                          |   8 ++
 include/linux/kmod.h                               |  15 +--
 include/linux/lsm_hooks.h                          |   4 +-
 include/linux/module.h                             |  41 +++++++-
 include/linux/sched.h                              |   5 +
 include/linux/security.h                           |   7 +-
 include/uapi/linux/prctl.h                         |   8 ++
 kernel/kmod.c                                      |  15 ++-
 kernel/module.c                                    |  93 +++++++++++++++++
 kernel/sysctl.c                                    |  40 +++++++
 net/core/dev_ioctl.c                               |  10 +-
 security/commoncap.c                               |  60 +++++++++++
 security/security.c                                |   4 +-
 security/selinux/hooks.c                           |   2 +-
 19 files changed, 470 insertions(+), 18 deletions(-)

        
References:        
[1] http://www.openwall.com/lists/kernel-hardening/2017/02/02/21
[2] http://www.openwall.com/lists/kernel-hardening/2017/04/09/1
[3] https://lkml.org/lkml/2017/4/19/1086
[4] http://www.openwall.com/lists/oss-security/2017/02/22/3
[5] http://www.openwall.com/lists/oss-security/2017/03/29/2
[6] https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-6074
[7] https://github.com/systemd/systemd/pull/5736

^ permalink raw reply

* Re: vhost/scsi: Delete error messages for failed memory allocations in five functions
From: SF Markus Elfring @ 2017-05-22 11:34 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: kvm, netdev, virtualization, kernel-janitors, Jason Wang,
	Michael S. Tsirkin, LKML, Wolfram Sang
In-Reply-To: <20170522112320.GA22806@stefanha-x1.localdomain>

>> Do you find information from a Linux allocation failure report sufficient
>> for any function implementations here?
> 
> If kmalloc() and friends guarantee to print a warning and backtrace on
> every allocation failure, then there's no need for error messages in
> callers.
> 
> That seems like good justification that can go in the commit
> description, but I'm not sure if kmalloc() and friends guarantee to show
> a message (not just the first time, but for every failed allocation)?

I am also looking for a more complete and easier accessible documentation
for this aspect of the desired exception handling.
How would we like to resolve any remaining open issues there?

Regards,
Markus

^ permalink raw reply

* Re: [PATCH 3/3] stmmac: pci: Use dmi_system_id table for retrieving PHY addresses
From: Joe Perches @ 2017-05-22 11:33 UTC (permalink / raw)
  To: Jan Kiszka, Giuseppe Cavallaro, Alexandre Torgue, David Miller
  Cc: netdev, linux-kernel, Andy Shevchenko
In-Reply-To: <c8e6966227a9c2981e822b2deac8c036651b088c.1495451529.git.jan.kiszka@siemens.com>

On Mon, 2017-05-22 at 13:12 +0200, Jan Kiszka wrote:
> Avoids reimplementation of DMI matching in stmmac_pci_find_phy_addr.
[]
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
[]
> @@ -31,65 +31,78 @@
[]
> +static const struct stmmac_pci_dmi_data iot2040_stmmac_dmi_data[] = {
>  	{
> -		.name = "GalileoGen2",
>  		.func = 6,
>  		.phy_addr = 1,
>  	},
>  	{
> -		.name = "SIMATIC IOT2000",
> -		.asset_tag = "6ES7647-0AA00-0YA2",
> -		.func = 6,
> +		.func = 7,

Why change this from 6 to 7?

^ permalink raw reply

* [PATCH net-next 11/11] qed: Replace set_id() api with set_name()
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Yuval Mintz, Manish Rangankar
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

Current API between qed and protocol modules allows passing an
additional private string - but it doesn't get utilized by qed
anywhere.

Clarify the API by removing it and renaming it 'set_name'.

CC: Manish Rangankar <Manish.Rangankar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h        | 1 -
 drivers/net/ethernet/qlogic/qed/qed_main.c   | 9 +++------
 drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++--
 drivers/scsi/qedf/qedf_main.c                | 2 +-
 drivers/scsi/qedi/qedi_main.c                | 2 +-
 include/linux/qed/qed_if.h                   | 4 +---
 6 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 2eb6031..e0becec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -638,7 +638,6 @@ struct qed_dev {
 
 	int				pcie_width;
 	int				pcie_speed;
-	u8				ver_str[VER_SIZE];
 
 	/* Add MF related configuration */
 	u8				mcp_rev;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b5313c5..c5bb80b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -338,6 +338,7 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 	if (!cdev)
 		goto err0;
 
+	cdev->drv_type = DRV_ID_DRV_TYPE_LINUX;
 	cdev->protocol = params->protocol;
 
 	if (params->is_vf)
@@ -1128,17 +1129,13 @@ static int qed_slowpath_stop(struct qed_dev *cdev)
 	return 0;
 }
 
-static void qed_set_id(struct qed_dev *cdev, char name[NAME_SIZE],
-		       char ver_str[VER_SIZE])
+static void qed_set_name(struct qed_dev *cdev, char name[NAME_SIZE])
 {
 	int i;
 
 	memcpy(cdev->name, name, NAME_SIZE);
 	for_each_hwfn(cdev, i)
 		snprintf(cdev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i);
-
-	memcpy(cdev->ver_str, ver_str, VER_SIZE);
-	cdev->drv_type = DRV_ID_DRV_TYPE_LINUX;
 }
 
 static u32 qed_sb_init(struct qed_dev *cdev,
@@ -1692,7 +1689,7 @@ static int qed_update_mtu(struct qed_dev *cdev, u16 mtu)
 	.probe = &qed_probe,
 	.remove = &qed_remove,
 	.set_power_state = &qed_set_power_state,
-	.set_id = &qed_set_id,
+	.set_name = &qed_set_name,
 	.update_pf_params = &qed_update_pf_params,
 	.slowpath_start = &qed_slowpath_start,
 	.slowpath_stop = &qed_slowpath_stop,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index d496ba7..00c7062 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -259,7 +259,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event,
 		/* Notify qed of the name change */
 		if (!edev->ops || !edev->ops->common)
 			goto done;
-		edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede");
+		edev->ops->common->set_name(edev->cdev, edev->ndev->name);
 		break;
 	case NETDEV_CHANGEADDR:
 		edev = netdev_priv(ndev);
@@ -967,7 +967,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 		goto err4;
 	}
 
-	edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION);
+	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index cceddd9..afbb06d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2954,7 +2954,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		   "WWPN=%016llx.\n", qedf->wwnn, qedf->wwpn);
 
 	sprintf(host_buf, "host_%d", host->host_no);
-	qed_ops->common->set_id(qedf->cdev, host_buf, QEDF_VERSION);
+	qed_ops->common->set_name(qedf->cdev, host_buf);
 
 
 	/* Set xid max values */
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 92775a8..073b305 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1843,7 +1843,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
 		  qedi->mac);
 
 	sprintf(host_buf, "host_%d", qedi->shost->host_no);
-	qedi_ops->common->set_id(qedi->cdev, host_buf, QEDI_MODULE_VERSION);
+	qedi_ops->common->set_name(qedi->cdev, host_buf);
 
 	qedi_ops->register_ops(qedi->cdev, &qedi_cb_ops, qedi);
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b00e675..73c46d6 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -520,9 +520,7 @@ struct qed_common_ops {
 	int		(*set_power_state)(struct qed_dev *cdev,
 					   pci_power_t state);
 
-	void		(*set_id)(struct qed_dev *cdev,
-				  char name[],
-				  char ver_str[]);
+	void (*set_name) (struct qed_dev *cdev, char name[]);
 
 	/* Client drivers need to make this call before slowpath_start.
 	 * PF params required for the call before slowpath_start is
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 10/11] qede: Log probe of PCI device
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

Replace meaningless logged print ('Ending successfully qede probe')
with a single-liner containing interesting information about probed
device.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c | 40 ++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index f0871e1..d496ba7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -852,6 +852,43 @@ static void qede_update_pf_params(struct qed_dev *cdev)
 	qed_ops->common->update_pf_params(cdev, &pf_params);
 }
 
+#define QEDE_FW_VER_STR_SIZE	80
+
+static void qede_log_probe(struct qede_dev *edev)
+{
+	struct qed_dev_info *p_dev_info = &edev->dev_info.common;
+	u8 buf[QEDE_FW_VER_STR_SIZE];
+	size_t left_size;
+
+	snprintf(buf, QEDE_FW_VER_STR_SIZE,
+		 "Storm FW %d.%d.%d.%d, Management FW %d.%d.%d.%d",
+		 p_dev_info->fw_major, p_dev_info->fw_minor, p_dev_info->fw_rev,
+		 p_dev_info->fw_eng,
+		 (p_dev_info->mfw_rev & QED_MFW_VERSION_3_MASK) >>
+		 QED_MFW_VERSION_3_OFFSET,
+		 (p_dev_info->mfw_rev & QED_MFW_VERSION_2_MASK) >>
+		 QED_MFW_VERSION_2_OFFSET,
+		 (p_dev_info->mfw_rev & QED_MFW_VERSION_1_MASK) >>
+		 QED_MFW_VERSION_1_OFFSET,
+		 (p_dev_info->mfw_rev & QED_MFW_VERSION_0_MASK) >>
+		 QED_MFW_VERSION_0_OFFSET);
+
+	left_size = QEDE_FW_VER_STR_SIZE - strlen(buf);
+	if (p_dev_info->mbi_version && left_size)
+		snprintf(buf + strlen(buf), left_size,
+			 " [MBI %d.%d.%d]",
+			 (p_dev_info->mbi_version & QED_MBI_VERSION_2_MASK) >>
+			 QED_MBI_VERSION_2_OFFSET,
+			 (p_dev_info->mbi_version & QED_MBI_VERSION_1_MASK) >>
+			 QED_MBI_VERSION_1_OFFSET,
+			 (p_dev_info->mbi_version & QED_MBI_VERSION_0_MASK) >>
+			 QED_MBI_VERSION_0_OFFSET);
+
+	pr_info("qede %02x:%02x.%02x: %s [%s]\n", edev->pdev->bus->number,
+		PCI_SLOT(edev->pdev->devfn), PCI_FUNC(edev->pdev->devfn),
+		buf, edev->ndev->name);
+}
+
 enum qede_probe_mode {
 	QEDE_PROBE_NORMAL,
 };
@@ -945,8 +982,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	edev->rx_copybreak = QEDE_RX_HDR_SIZE;
 
-	DP_INFO(edev, "Ending successfully qede probe\n");
-
+	qede_log_probe(edev);
 	return 0;
 
 err4:
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 09/11] qed: Provide MBI information in dev_info
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Tomer Tayar, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Tomer Tayar <Tomer.Tayar@cavium.com>

Pass additional information about package installed on persistent memory
so that protocol drivers would be able to log it.

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  |  6 ++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c |  3 +++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 30 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 12 ++++++++++++
 include/linux/qed/qed_if.h                 | 17 +++++++++++++++++
 5 files changed, 68 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 4755d0b..802c162 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -11782,6 +11782,12 @@ struct nvm_cfg1_glob {
 	u32 led_global_settings;
 	u32 generic_cont1;
 	u32 mbi_version;
+#define NVM_CFG1_GLOB_MBI_VERSION_0_MASK		0x000000FF
+#define NVM_CFG1_GLOB_MBI_VERSION_0_OFFSET		0
+#define NVM_CFG1_GLOB_MBI_VERSION_1_MASK		0x0000FF00
+#define NVM_CFG1_GLOB_MBI_VERSION_1_OFFSET		8
+#define NVM_CFG1_GLOB_MBI_VERSION_2_MASK		0x00FF0000
+#define NVM_CFG1_GLOB_MBI_VERSION_2_OFFSET		16
 	u32 mbi_date;
 	u32 misc_sig;
 	u32 device_capabilities;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 3043dcce..b5313c5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -281,6 +281,9 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 			qed_mcp_get_mfw_ver(QED_LEADING_HWFN(cdev), ptt,
 					    &dev_info->mfw_rev, NULL);
 
+			qed_mcp_get_mbi_ver(QED_LEADING_HWFN(cdev), ptt,
+					    &dev_info->mbi_version);
+
 			qed_mcp_get_flash_size(QED_LEADING_HWFN(cdev), ptt,
 					       &dev_info->flash_size);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index b32e819..fc49c75e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1523,6 +1523,36 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt, u32 *p_mbi_ver)
+{
+	u32 nvm_cfg_addr, nvm_cfg1_offset, mbi_ver_addr;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	/* Read the address of the nvm_cfg */
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+	if (!nvm_cfg_addr) {
+		DP_NOTICE(p_hwfn, "Shared memory not initialized\n");
+		return -EINVAL;
+	}
+
+	/* Read the offset of nvm_cfg1 */
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+
+	mbi_ver_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+		       offsetof(struct nvm_cfg1, glob) +
+		       offsetof(struct nvm_cfg1_glob, mbi_version);
+	*p_mbi_ver = qed_rd(p_hwfn, p_ptt,
+			    mbi_ver_addr) &
+		     (NVM_CFG1_GLOB_MBI_VERSION_0_MASK |
+		      NVM_CFG1_GLOB_MBI_VERSION_1_MASK |
+		      NVM_CFG1_GLOB_MBI_VERSION_2_MASK);
+
+	return 0;
+}
+
 int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 {
 	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 3e5bffe..4024759 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -256,6 +256,18 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
 			u32 *p_mfw_ver, u32 *p_running_bundle_id);
 
 /**
+ * @brief Get the MBI version value
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mbi_ver - A pointer to a variable to be filled with the MBI version.
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt, u32 *p_mbi_ver);
+
+/**
  * @brief Get media type value of the port.
  *
  * @param cdev      - qed dev pointer
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ff590cb..b00e675 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -328,6 +328,14 @@ struct qed_dev_info {
 
 	/* MFW version */
 	u32		mfw_rev;
+#define QED_MFW_VERSION_0_MASK		0x000000FF
+#define QED_MFW_VERSION_0_OFFSET	0
+#define QED_MFW_VERSION_1_MASK		0x0000FF00
+#define QED_MFW_VERSION_1_OFFSET	8
+#define QED_MFW_VERSION_2_MASK		0x00FF0000
+#define QED_MFW_VERSION_2_OFFSET	16
+#define QED_MFW_VERSION_3_MASK		0xFF000000
+#define QED_MFW_VERSION_3_OFFSET	24
 
 	u32		flash_size;
 	u8		mf_mode;
@@ -337,6 +345,15 @@ struct qed_dev_info {
 
 	bool wol_support;
 
+	/* MBI version */
+	u32 mbi_version;
+#define QED_MBI_VERSION_0_MASK		0x000000FF
+#define QED_MBI_VERSION_0_OFFSET	0
+#define QED_MBI_VERSION_1_MASK		0x0000FF00
+#define QED_MBI_VERSION_1_OFFSET	8
+#define QED_MBI_VERSION_2_MASK		0x00FF0000
+#define QED_MBI_VERSION_2_OFFSET	16
+
 	enum qed_dev_type dev_type;
 
 	/* Output parameters for qede */
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 08/11] qed: Enable RoCE parser searching on fp init
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Michal Kalderon, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Michal Kalderon <Michal.Kalderon@cavium.com>

Since we're closing the parser searching for RDMA when stoping the
fastpath, we need to re-enable it when starting the fastpath once again.

Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3262aaa..072d950 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1948,6 +1948,13 @@ int qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
 	if (!p_ptt)
 		return -EAGAIN;
 
+	/* If roce info is allocated it means roce is initialized and should
+	 * be enabled in searcher.
+	 */
+	if (p_hwfn->p_rdma_info &&
+	    p_hwfn->b_rdma_enabled_in_prs)
+		qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0x1);
+
 	/* Re-open incoming traffic */
 	qed_wr(p_hwfn, p_ptt, NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 	qed_ptt_release(p_hwfn, p_ptt);
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 06/11] qed: Remove BB_A0 references
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

A0 never went public, so no need to protect against it.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h     | 6 ------
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 63c44c6..2eb6031 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -598,16 +598,11 @@ struct qed_dev {
 	enum	qed_dev_type type;
 /* Translate type/revision combo into the proper conditions */
 #define QED_IS_BB(dev)  ((dev)->type == QED_DEV_TYPE_BB)
-#define QED_IS_BB_A0(dev)       (QED_IS_BB(dev) && \
-				 CHIP_REV_IS_A0(dev))
 #define QED_IS_BB_B0(dev)       (QED_IS_BB(dev) && \
 				 CHIP_REV_IS_B0(dev))
 #define QED_IS_AH(dev)  ((dev)->type == QED_DEV_TYPE_AH)
 #define QED_IS_K2(dev)  QED_IS_AH(dev)
 
-#define QED_GET_TYPE(dev)       (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \
-				 QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2)
-
 	u16	vendor_id;
 	u16	device_id;
 #define QED_DEV_ID_MASK		0xff00
@@ -621,7 +616,6 @@ struct qed_dev {
 	u16	chip_rev;
 #define CHIP_REV_MASK                   0xf
 #define CHIP_REV_SHIFT                  12
-#define CHIP_REV_IS_A0(_cdev)   (!(_cdev)->chip_rev)
 #define CHIP_REV_IS_B0(_cdev)   ((_cdev)->chip_rev == 1)
 
 	u16				chip_metal;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 51ae907..3262aaa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2812,12 +2812,6 @@ static int qed_get_dev_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		cdev->chip_num, cdev->chip_rev,
 		cdev->chip_bond_id, cdev->chip_metal);
 
-	if (QED_IS_BB(cdev) && CHIP_REV_IS_A0(cdev)) {
-		DP_NOTICE(cdev->hwfns,
-			  "The chip type/rev (BB A0) is not supported!\n");
-		return -EINVAL;
-	}
-
 	return 0;
 }
 
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 07/11] qed: Flush slowpath tasklet on stop
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Tomer Tayar, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Tomer Tayar <Tomer.Tayar@cavium.com>

Today, driver has a synchronization point while closing
the device which synchronizes its slowpath interrupt line.
However, that's insufficient as that ISR would schedule the
slowpath-tasklet - so even after ISR is over it's possible the
handling of the interrupt has not completed.

By doing a disable/enable on the taskelt we guarantee that all
HW events that should no longer be genereated from that point
onward in the flow are truly behind us.

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index f286daa..3043dcce 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -606,6 +606,18 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 	return rc;
 }
 
+static void qed_slowpath_tasklet_flush(struct qed_hwfn *p_hwfn)
+{
+	/* Calling the disable function will make sure that any
+	 * currently-running function is completed. The following call to the
+	 * enable function makes this sequence a flush-like operation.
+	 */
+	if (p_hwfn->b_sp_dpc_enabled) {
+		tasklet_disable(p_hwfn->sp_dpc);
+		tasklet_enable(p_hwfn->sp_dpc);
+	}
+}
+
 void qed_slowpath_irq_sync(struct qed_hwfn *p_hwfn)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
@@ -617,6 +629,8 @@ void qed_slowpath_irq_sync(struct qed_hwfn *p_hwfn)
 		synchronize_irq(cdev->int_params.msix_table[id].vector);
 	else
 		synchronize_irq(cdev->pdev->irq);
+
+	qed_slowpath_tasklet_flush(p_hwfn);
 }
 
 static void qed_slowpath_irq_free(struct qed_dev *cdev)
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 05/11] qed: Drop the 's' from num_ports_in_engines
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Tomer Tayar, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Tomer Tayar <Tomer.Tayar@cavium.com>

The parameter reflects the number of physical ports connected to a single
engine, not all.

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed.h     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 36 +++++++++++++++----------------
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_ptp.c |  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fd8cf31..63c44c6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -633,7 +633,7 @@ struct qed_dev {
 #define CHIP_BOND_ID_SHIFT              0
 
 	u8				num_engines;
-	u8				num_ports_in_engines;
+	u8				num_ports_in_engine;
 	u8				num_funcs_in_port;
 
 	u8				path_id;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index b01df24..51ae907 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -300,7 +300,7 @@ static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
 	qm_info->vport_wfq_en = 1;
 
 	/* TC config is different for AH 4 port */
-	four_port = p_hwfn->cdev->num_ports_in_engines == MAX_NUM_PORTS_K2;
+	four_port = p_hwfn->cdev->num_ports_in_engine == MAX_NUM_PORTS_K2;
 
 	/* in AH 4 port we have fewer TCs per port */
 	qm_info->max_phys_tcs_per_port = four_port ? NUM_PHYS_TCS_4PORT_K2 :
@@ -329,7 +329,7 @@ static void qed_init_qm_vport_params(struct qed_hwfn *p_hwfn)
 static void qed_init_qm_port_params(struct qed_hwfn *p_hwfn)
 {
 	/* Initialize qm port parameters */
-	u8 i, active_phys_tcs, num_ports = p_hwfn->cdev->num_ports_in_engines;
+	u8 i, active_phys_tcs, num_ports = p_hwfn->cdev->num_ports_in_engine;
 
 	/* indicate how ooo and high pri traffic is dealt with */
 	active_phys_tcs = num_ports == MAX_NUM_PORTS_K2 ?
@@ -693,7 +693,7 @@ static void qed_dp_init_qm_params(struct qed_hwfn *p_hwfn)
 		   qm_info->num_pf_rls, qed_get_pq_flags(p_hwfn));
 
 	/* port table */
-	for (i = 0; i < p_hwfn->cdev->num_ports_in_engines; i++) {
+	for (i = 0; i < p_hwfn->cdev->num_ports_in_engine; i++) {
 		port = &(qm_info->qm_port_params[i]);
 		DP_VERBOSE(p_hwfn,
 			   NETIF_MSG_HW,
@@ -823,7 +823,7 @@ static int qed_alloc_qm_data(struct qed_hwfn *p_hwfn)
 		goto alloc_err;
 
 	qm_info->qm_port_params = kzalloc(sizeof(*qm_info->qm_port_params) *
-					  p_hwfn->cdev->num_ports_in_engines,
+					  p_hwfn->cdev->num_ports_in_engine,
 					  GFP_KERNEL);
 	if (!qm_info->qm_port_params)
 		goto alloc_err;
@@ -1108,7 +1108,7 @@ static int qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
 		return -EINVAL;
 	}
 
-	switch (p_hwfn->cdev->num_ports_in_engines) {
+	switch (p_hwfn->cdev->num_ports_in_engine) {
 	case 1:
 		hw_mode |= 1 << MODE_PORTS_PER_ENG_1;
 		break;
@@ -1120,7 +1120,7 @@ static int qed_calc_hw_mode(struct qed_hwfn *p_hwfn)
 		break;
 	default:
 		DP_NOTICE(p_hwfn, "num_ports_in_engine = %d not supported\n",
-			  p_hwfn->cdev->num_ports_in_engines);
+			  p_hwfn->cdev->num_ports_in_engine);
 		return -EINVAL;
 	}
 
@@ -1253,7 +1253,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	}
 
 	memset(&params, 0, sizeof(params));
-	params.max_ports_per_engine = p_hwfn->cdev->num_ports_in_engines;
+	params.max_ports_per_engine = p_hwfn->cdev->num_ports_in_engine;
 	params.max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
 	params.pf_rl_en = qm_info->pf_rl_en;
 	params.pf_wfq_en = qm_info->pf_wfq_en;
@@ -2245,7 +2245,7 @@ int qed_hw_get_dflt_resc(struct qed_hwfn *p_hwfn,
 	case QED_BDQ:
 		if (!*p_resc_num)
 			*p_resc_start = 0;
-		else if (p_hwfn->cdev->num_ports_in_engines == 4)
+		else if (p_hwfn->cdev->num_ports_in_engine == 4)
 			*p_resc_start = p_hwfn->port_id;
 		else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI)
 			*p_resc_start = p_hwfn->port_id;
@@ -2662,15 +2662,15 @@ static void qed_hw_info_port_num_bb(struct qed_hwfn *p_hwfn,
 	port_mode = qed_rd(p_hwfn, p_ptt, CNIG_REG_NW_PORT_MODE_BB_B0);
 
 	if (port_mode < 3) {
-		p_hwfn->cdev->num_ports_in_engines = 1;
+		p_hwfn->cdev->num_ports_in_engine = 1;
 	} else if (port_mode <= 5) {
-		p_hwfn->cdev->num_ports_in_engines = 2;
+		p_hwfn->cdev->num_ports_in_engine = 2;
 	} else {
 		DP_NOTICE(p_hwfn, "PORT MODE: %d not supported\n",
-			  p_hwfn->cdev->num_ports_in_engines);
+			  p_hwfn->cdev->num_ports_in_engine);
 
-		/* Default num_ports_in_engines to something */
-		p_hwfn->cdev->num_ports_in_engines = 1;
+		/* Default num_ports_in_engine to something */
+		p_hwfn->cdev->num_ports_in_engine = 1;
 	}
 }
 
@@ -2680,20 +2680,20 @@ static void qed_hw_info_port_num_ah(struct qed_hwfn *p_hwfn,
 	u32 port;
 	int i;
 
-	p_hwfn->cdev->num_ports_in_engines = 0;
+	p_hwfn->cdev->num_ports_in_engine = 0;
 
 	for (i = 0; i < MAX_NUM_PORTS_K2; i++) {
 		port = qed_rd(p_hwfn, p_ptt,
 			      CNIG_REG_NIG_PORT0_CONF_K2 + (i * 4));
 		if (port & 1)
-			p_hwfn->cdev->num_ports_in_engines++;
+			p_hwfn->cdev->num_ports_in_engine++;
 	}
 
-	if (!p_hwfn->cdev->num_ports_in_engines) {
+	if (!p_hwfn->cdev->num_ports_in_engine) {
 		DP_NOTICE(p_hwfn, "All NIG ports are inactive\n");
 
 		/* Default num_ports_in_engine to something */
-		p_hwfn->cdev->num_ports_in_engines = 1;
+		p_hwfn->cdev->num_ports_in_engine = 1;
 	}
 }
 
@@ -4067,7 +4067,7 @@ static int qed_device_num_ports(struct qed_dev *cdev)
 	if (cdev->num_hwfns > 1)
 		return 1;
 
-	return cdev->num_ports_in_engines * qed_device_num_engines(cdev);
+	return cdev->num_ports_in_engine * qed_device_num_engines(cdev);
 }
 
 int qed_device_get_port_id(struct qed_dev *cdev)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 2b09b85..3e5bffe 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -482,7 +482,7 @@ int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
 #define MCP_PF_ID(p_hwfn) MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
 
 #define MFW_PORT(_p_hwfn)       ((_p_hwfn)->abs_pf_id %			  \
-				 ((_p_hwfn)->cdev->num_ports_in_engines * \
+				 ((_p_hwfn)->cdev->num_ports_in_engine * \
 				  qed_device_num_engines((_p_hwfn)->cdev)))
 
 struct qed_mcp_info {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 434a164..5a90d69 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -80,7 +80,7 @@ static int qed_ptp_res_lock(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		/* MFW doesn't support resource locking, first PF on the port
 		 * has lock ownership.
 		 */
-		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engines)
+		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engine)
 			return 0;
 
 		DP_INFO(p_hwfn, "PF doesn't have lock ownership\n");
@@ -108,7 +108,7 @@ static int qed_ptp_res_unlock(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	rc = qed_mcp_resc_unlock(p_hwfn, p_ptt, &params);
 	if (rc == -EINVAL) {
 		/* MFW doesn't support locking, first PF has lock ownership */
-		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engines) {
+		if (p_hwfn->abs_pf_id < p_hwfn->cdev->num_ports_in_engine) {
 			rc = 0;
 		} else {
 			DP_INFO(p_hwfn, "PF doesn't have lock ownership\n");
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 04/11] qed: Log incorrectly installed board
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Tomer Tayar, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Tomer Tayar <Tomer.Tayar@cavium.com>

In case nvram layout of board is incorrect, board may exhibit peculiar
oddities. Log such a rare event.

Signed-off-by: Tomer Tayar <Tomer.Tayar@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 +++++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 9dd28ba..b01df24 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1698,6 +1698,11 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			return mfw_rc;
 		}
 
+		/* Check if there is a DID mismatch between nvm-cfg/efuse */
+		if (param & FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR)
+			DP_NOTICE(p_hwfn,
+				  "warning: device configuration is not supported on this board type. The device may not function as expected.\n");
+
 		/* send DCBX attention request command */
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_DCB,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index eedf79a..4755d0b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -11655,6 +11655,8 @@ struct public_drv_mb {
 #define FW_MB_PARAM_GET_PF_RDMA_IWARP		0x2
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
 
+#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR	(1 << 0)
+
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK			0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 03/11] qed: !main_ptt for tunnel configuration
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Manish Chopra, Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

From: Manish Chopra <Manish.Chopra@cavium.com>

Flows configuring tunnel ports in HW use the main_ptt which should
be reserved for core-functionality.

Signed-off-by: Manish Chopra <Manish.Chopra@cavium.com>
Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c         |  3 ++-
 drivers/net/ethernet/qlogic/qed/qed_l2.c          | 17 +++++++++++++++--
 drivers/net/ethernet/qlogic/qed/qed_sp.h          |  3 +++
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 14 +++++++++-----
 drivers/net/ethernet/qlogic/qed/qed_sriov.c       |  2 +-
 5 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3fc3b2e..9dd28ba 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1513,7 +1513,8 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
 		qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
 
 		/* send function start command */
-		rc = qed_sp_pf_start(p_hwfn, p_tunn, p_hwfn->cdev->mf_mode,
+		rc = qed_sp_pf_start(p_hwfn, p_ptt, p_tunn,
+				     p_hwfn->cdev->mf_mode,
 				     allow_npar_tx_switch);
 		if (rc) {
 			DP_NOTICE(p_hwfn, "Function start ramrod failed\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index fab6e69..93dd781 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2300,14 +2300,25 @@ static int qed_tunn_configure(struct qed_dev *cdev,
 
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[i];
+		struct qed_ptt *p_ptt;
 		struct qed_tunnel_info *tun;
 
 		tun = &hwfn->cdev->tunnel;
+		if (IS_PF(cdev)) {
+			p_ptt = qed_ptt_acquire(hwfn);
+			if (!p_ptt)
+				return -EAGAIN;
+		} else {
+			p_ptt = NULL;
+		}
 
-		rc = qed_sp_pf_update_tunn_cfg(hwfn, &tunn_info,
+		rc = qed_sp_pf_update_tunn_cfg(hwfn, p_ptt, &tunn_info,
 					       QED_SPQ_MODE_EBLOCK, NULL);
-		if (rc)
+		if (rc) {
+			if (IS_PF(cdev))
+				qed_ptt_release(hwfn, p_ptt);
 			return rc;
+		}
 
 		if (IS_PF_SRIOV(hwfn)) {
 			u16 vxlan_port, geneve_port;
@@ -2324,6 +2335,8 @@ static int qed_tunn_configure(struct qed_dev *cdev,
 
 			qed_schedule_iov(hwfn, QED_IOV_WQ_BULLETIN_UPDATE_FLAG);
 		}
+		if (IS_PF(cdev))
+			qed_ptt_release(hwfn, p_ptt);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index c0b56b9..ef77de4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -391,6 +391,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
  * to the internal RAM of the UStorm by the Function Start Ramrod.
  *
  * @param p_hwfn
+ * @param p_ptt
  * @param p_tunn
  * @param mode
  * @param allow_npar_tx_switch
@@ -399,6 +400,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
  */
 
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
 		    struct qed_tunnel_info *p_tunn,
 		    enum qed_mf_mode mode, bool allow_npar_tx_switch);
 
@@ -432,6 +434,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 int qed_sp_pf_stop(struct qed_hwfn *p_hwfn);
 
 int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
 			      struct qed_tunnel_info *p_tunn,
 			      enum spq_mode comp_mode,
 			      struct qed_spq_comp_cb *p_comp_data);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 5abcac6..ab09975 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -253,17 +253,18 @@ static void qed_set_hw_tunn_mode(struct qed_hwfn *p_hwfn,
 }
 
 static void qed_set_hw_tunn_mode_port(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
 				      struct qed_tunnel_info *p_tunn)
 {
 	if (p_tunn->vxlan_port.b_update_port)
-		qed_set_vxlan_dest_port(p_hwfn, p_hwfn->p_main_ptt,
+		qed_set_vxlan_dest_port(p_hwfn, p_ptt,
 					p_tunn->vxlan_port.port);
 
 	if (p_tunn->geneve_port.b_update_port)
-		qed_set_geneve_dest_port(p_hwfn, p_hwfn->p_main_ptt,
+		qed_set_geneve_dest_port(p_hwfn, p_ptt,
 					 p_tunn->geneve_port.port);
 
-	qed_set_hw_tunn_mode(p_hwfn, p_hwfn->p_main_ptt, p_tunn);
+	qed_set_hw_tunn_mode(p_hwfn, p_ptt, p_tunn);
 }
 
 static void
@@ -303,6 +304,7 @@ static void qed_set_hw_tunn_mode_port(struct qed_hwfn *p_hwfn,
 }
 
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
 		    struct qed_tunnel_info *p_tunn,
 		    enum qed_mf_mode mode, bool allow_npar_tx_switch)
 {
@@ -399,7 +401,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
 	if (p_tunn)
-		qed_set_hw_tunn_mode_port(p_hwfn, &p_hwfn->cdev->tunnel);
+		qed_set_hw_tunn_mode_port(p_hwfn, p_ptt,
+					  &p_hwfn->cdev->tunnel);
 
 	return rc;
 }
@@ -430,6 +433,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn)
 
 /* Set pf update ramrod command params */
 int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
 			      struct qed_tunnel_info *p_tunn,
 			      enum spq_mode comp_mode,
 			      struct qed_spq_comp_cb *p_comp_data)
@@ -464,7 +468,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	qed_set_hw_tunn_mode_port(p_hwfn, &p_hwfn->cdev->tunnel);
+	qed_set_hw_tunn_mode_port(p_hwfn, p_ptt, &p_hwfn->cdev->tunnel);
 
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index f5ed54d..71e392f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -2209,7 +2209,7 @@ static void qed_iov_vf_mbx_update_tunn_param(struct qed_hwfn *p_hwfn,
 	if (b_update_required) {
 		u16 geneve_port;
 
-		rc = qed_sp_pf_update_tunn_cfg(p_hwfn, &tunn,
+		rc = qed_sp_pf_update_tunn_cfg(p_hwfn, p_ptt, &tunn,
 					       QED_SPQ_MODE_EBLOCK, NULL);
 		if (rc)
 			status = PFVF_STATUS_FAILURE;
-- 
1.9.3

^ permalink raw reply related

* [PATCH net-next 02/11] qed: Align DP_ERR style with other DP macros
From: Yuval Mintz @ 2017-05-22 11:32 UTC (permalink / raw)
  To: netdev, davem; +Cc: Yuval Mintz
In-Reply-To: <1495452731-27171-1-git-send-email-Yuval.Mintz@cavium.com>

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 include/linux/qed/qed_if.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index c70ac13..ff590cb 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -700,11 +700,13 @@ struct qed_common_ops {
 	(((value) >> (name ## _SHIFT)) & name ## _MASK)
 
 /* Debug print definitions */
-#define DP_ERR(cdev, fmt, ...)						     \
-		pr_err("[%s:%d(%s)]" fmt,				     \
-		       __func__, __LINE__,				     \
-		       DP_NAME(cdev) ? DP_NAME(cdev) : "",		     \
-		       ## __VA_ARGS__)					     \
+#define DP_ERR(cdev, fmt, ...)					\
+	do {							\
+		pr_err("[%s:%d(%s)]" fmt,			\
+		       __func__, __LINE__,			\
+		       DP_NAME(cdev) ? DP_NAME(cdev) : "",	\
+		       ## __VA_ARGS__);				\
+	} while (0)
 
 #define DP_NOTICE(cdev, fmt, ...)				      \
 	do {							      \
-- 
1.9.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox