Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v2] cxgb4: RSS table is 4k for T6
From: Ganesh Goudar @ 2017-12-19  1:52 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, indranil, venkatesh, Ganesh Goudar

RSS table is 4k for T6 and later cards, add check for the
same.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
v2: Not a series, It is single patch
---
 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c     |  5 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c   |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |  7 ++---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         | 13 +++++++--
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.h         | 31 +++++++++++-----------
 6 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index d73fb6a..336670d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1004,9 +1004,10 @@ int cudbg_collect_rss(struct cudbg_init *pdbg_init,
 {
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_buffer temp_buff = { 0 };
-	int rc;
+	int rc, nentries;
 
-	rc = cudbg_get_buff(dbg_buff, RSS_NENTRIES * sizeof(u16), &temp_buff);
+	nentries = t4_chip_rss_size(padap);
+	rc = cudbg_get_buff(dbg_buff, nentries * sizeof(u16), &temp_buff);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index b1df2aa..69d0b64 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1528,6 +1528,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 		     int port, int pf, int vf, u8 mac[]);
 int t4_port_init(struct adapter *adap, int mbox, int pf, int vf);
 void t4_fatal_err(struct adapter *adapter);
+unsigned int t4_chip_rss_size(struct adapter *adapter);
 int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
 			int start, int n, const u16 *rspq, unsigned int nrspq);
 int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 41c8736..581d628 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -179,7 +179,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
 		len = cudbg_mbytes_to_bytes(len);
 		break;
 	case CUDBG_RSS:
-		len = RSS_NENTRIES * sizeof(u16);
+		len = t4_chip_rss_size(adap) * sizeof(u16);
 		break;
 	case CUDBG_RSS_VF_CONF:
 		len = adap->params.arch.vfcount *
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index d8efcd9..d3ced04 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2021,11 +2021,12 @@ static int rss_show(struct seq_file *seq, void *v, int idx)
 
 static int rss_open(struct inode *inode, struct file *file)
 {
-	int ret;
-	struct seq_tab *p;
 	struct adapter *adap = inode->i_private;
+	int ret, nentries;
+	struct seq_tab *p;
 
-	p = seq_open_tab(file, RSS_NENTRIES / 8, 8 * sizeof(u16), 0, rss_show);
+	nentries = t4_chip_rss_size(adap);
+	p = seq_open_tab(file, nentries / 8, 8 * sizeof(u16), 0, rss_show);
 	if (!p)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f044717..242bcdd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4927,6 +4927,14 @@ void t4_intr_disable(struct adapter *adapter)
 	t4_set_reg_field(adapter, PL_INT_MAP0_A, 1 << pf, 0);
 }
 
+unsigned int t4_chip_rss_size(struct adapter *adap)
+{
+	if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+		return RSS_NENTRIES;
+	else
+		return T6_RSS_NENTRIES;
+}
+
 /**
  *	t4_config_rss_range - configure a portion of the RSS mapping table
  *	@adapter: the adapter
@@ -5065,10 +5073,11 @@ static int rd_rss_row(struct adapter *adap, int row, u32 *val)
  */
 int t4_read_rss(struct adapter *adapter, u16 *map)
 {
+	int i, ret, nentries;
 	u32 val;
-	int i, ret;
 
-	for (i = 0; i < RSS_NENTRIES / 2; ++i) {
+	nentries = t4_chip_rss_size(adapter);
+	for (i = 0; i < nentries / 2; ++i) {
 		ret = rd_rss_row(adapter, i, &val);
 		if (ret)
 			return ret;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 872a91b..361d503 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -38,21 +38,22 @@
 #include <linux/types.h>
 
 enum {
-	NCHAN          = 4,     /* # of HW channels */
-	MAX_MTU        = 9600,  /* max MAC MTU, excluding header + FCS */
-	EEPROMSIZE     = 17408, /* Serial EEPROM physical size */
-	EEPROMVSIZE    = 32768, /* Serial EEPROM virtual address space size */
-	EEPROMPFSIZE   = 1024,  /* EEPROM writable area size for PFn, n>0 */
-	RSS_NENTRIES   = 2048,  /* # of entries in RSS mapping table */
-	TCB_SIZE       = 128,   /* TCB size */
-	NMTUS          = 16,    /* size of MTU table */
-	NCCTRL_WIN     = 32,    /* # of congestion control windows */
-	NTX_SCHED      = 8,     /* # of HW Tx scheduling queues */
-	PM_NSTATS      = 5,     /* # of PM stats */
-	T6_PM_NSTATS   = 7,     /* # of PM stats in T6 */
-	MBOX_LEN       = 64,    /* mailbox size in bytes */
-	TRACE_LEN      = 112,   /* length of trace data and mask */
-	FILTER_OPT_LEN = 36,    /* filter tuple width for optional components */
+	NCHAN           = 4,    /* # of HW channels */
+	MAX_MTU         = 9600, /* max MAC MTU, excluding header + FCS */
+	EEPROMSIZE      = 17408,/* Serial EEPROM physical size */
+	EEPROMVSIZE     = 32768,/* Serial EEPROM virtual address space size */
+	EEPROMPFSIZE    = 1024, /* EEPROM writable area size for PFn, n>0 */
+	RSS_NENTRIES    = 2048, /* # of entries in RSS mapping table */
+	T6_RSS_NENTRIES = 4096, /* # of entries in RSS mapping table */
+	TCB_SIZE        = 128,  /* TCB size */
+	NMTUS           = 16,   /* size of MTU table */
+	NCCTRL_WIN      = 32,   /* # of congestion control windows */
+	NTX_SCHED       = 8,    /* # of HW Tx scheduling queues */
+	PM_NSTATS       = 5,    /* # of PM stats */
+	T6_PM_NSTATS    = 7,    /* # of PM stats in T6 */
+	MBOX_LEN        = 64,   /* mailbox size in bytes */
+	TRACE_LEN       = 112,  /* length of trace data and mask */
+	FILTER_OPT_LEN  = 36,   /* filter tuple width for optional components */
 };
 
 enum {
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH net-next 17/17] net: hns3: change TM sched mode to TC-based mode when SRIOV enabled
From: lipeng (Y) @ 2017-12-19  1:41 UTC (permalink / raw)
  To: Sergei Shtylyov, davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta
In-Reply-To: <378b5b7e-e1fc-4c35-d198-8cd9c61b0db9@cogentembedded.com>



On 2017/12/18 17:08, Sergei Shtylyov wrote:
> On 12/18/2017 12:31 PM, Lipeng wrote:
>
>> TC-based sched mode supports SRIOV enabled and SRIOV disabled. This
>> patch change the TM sched mode to TC-based mode in initialization
>> process.
>>
>> Fixes: cc9bb43 (net: hns3: Add tc-based TM support for sriov enabled 
>> port)
>
>    Need at least 12 hex digits.
>

agree , may lost some hex digits,  will fix it.

>> Signed-off-by: Lipeng <lipeng321@huawei.com>
> [...]
>
> MBR, Sergei
>
>

^ permalink raw reply

* Re: [PATCH net-next 14/17] net: hns3: add Asym Pause support to phy default features
From: lipeng (Y) @ 2017-12-19  1:40 UTC (permalink / raw)
  To: Sergei Shtylyov, davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta
In-Reply-To: <b3ec4aee-5a79-5f5a-9c11-f0645ae4f237@cogentembedded.com>



On 2017/12/18 17:07, Sergei Shtylyov wrote:
> Hello!
>
> On 12/18/2017 12:31 PM, Lipeng wrote:
>
>> From: Fuyun Liang <liangfuyun1@huawei.com>
>>
>> commit c4fb2cdf575d (net: hns3: fix a bug for phy supported feature
>> initialization) adds default supported features for phy, but our 
>> hardware
>
>    Ten cited commit's summary needs to be enclosed in (""), not just 
> ()...
>
Thanks , will fix it.

>> also supports Asym Pause. This patch adds Asym Pause support to phy
>> default features to prevent Asym Pause can not be advertised when the 
>> phy
>> negotiates flow control.
>>
>> Fixes: c4fb2cdf575d (net: hns3: fix a bug for phy supported feature 
>> initialization)
>
>    Here as well...
>
will fix here too.

Thanks

>> Signed-off-by: Fuyun Liang <liangfuyun1@huawei.com>
>> Signed-off-by: Lipeng <lipeng321@huawei.com>
> [...]
>
> MBR, Sergei
>
>

^ permalink raw reply

* [PATCH net-next 1/2] cxgb4: RSS table is 4k for T6
From: Ganesh Goudar @ 2017-12-19  1:39 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, indranil, venkatesh, Ganesh Goudar

RSS table is 4k for T6 and later cards, add check for the
same.

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c     |  5 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |  1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c   |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |  7 ++---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         | 13 +++++++--
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.h         | 31 +++++++++++-----------
 6 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index d73fb6a..336670d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1004,9 +1004,10 @@ int cudbg_collect_rss(struct cudbg_init *pdbg_init,
 {
 	struct adapter *padap = pdbg_init->adap;
 	struct cudbg_buffer temp_buff = { 0 };
-	int rc;
+	int rc, nentries;
 
-	rc = cudbg_get_buff(dbg_buff, RSS_NENTRIES * sizeof(u16), &temp_buff);
+	nentries = t4_chip_rss_size(padap);
+	rc = cudbg_get_buff(dbg_buff, nentries * sizeof(u16), &temp_buff);
 	if (rc)
 		return rc;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index b1df2aa..69d0b64 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1528,6 +1528,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
 		     int port, int pf, int vf, u8 mac[]);
 int t4_port_init(struct adapter *adap, int mbox, int pf, int vf);
 void t4_fatal_err(struct adapter *adapter);
+unsigned int t4_chip_rss_size(struct adapter *adapter);
 int t4_config_rss_range(struct adapter *adapter, int mbox, unsigned int viid,
 			int start, int n, const u16 *rspq, unsigned int nrspq);
 int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 41c8736..581d628 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -179,7 +179,7 @@ static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
 		len = cudbg_mbytes_to_bytes(len);
 		break;
 	case CUDBG_RSS:
-		len = RSS_NENTRIES * sizeof(u16);
+		len = t4_chip_rss_size(adap) * sizeof(u16);
 		break;
 	case CUDBG_RSS_VF_CONF:
 		len = adap->params.arch.vfcount *
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 4956e42..200bf67 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2021,11 +2021,12 @@ static int rss_show(struct seq_file *seq, void *v, int idx)
 
 static int rss_open(struct inode *inode, struct file *file)
 {
-	int ret;
-	struct seq_tab *p;
 	struct adapter *adap = inode->i_private;
+	int ret, nentries;
+	struct seq_tab *p;
 
-	p = seq_open_tab(file, RSS_NENTRIES / 8, 8 * sizeof(u16), 0, rss_show);
+	nentries = t4_chip_rss_size(adap);
+	p = seq_open_tab(file, nentries / 8, 8 * sizeof(u16), 0, rss_show);
 	if (!p)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index f044717..242bcdd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4927,6 +4927,14 @@ void t4_intr_disable(struct adapter *adapter)
 	t4_set_reg_field(adapter, PL_INT_MAP0_A, 1 << pf, 0);
 }
 
+unsigned int t4_chip_rss_size(struct adapter *adap)
+{
+	if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+		return RSS_NENTRIES;
+	else
+		return T6_RSS_NENTRIES;
+}
+
 /**
  *	t4_config_rss_range - configure a portion of the RSS mapping table
  *	@adapter: the adapter
@@ -5065,10 +5073,11 @@ static int rd_rss_row(struct adapter *adap, int row, u32 *val)
  */
 int t4_read_rss(struct adapter *adapter, u16 *map)
 {
+	int i, ret, nentries;
 	u32 val;
-	int i, ret;
 
-	for (i = 0; i < RSS_NENTRIES / 2; ++i) {
+	nentries = t4_chip_rss_size(adapter);
+	for (i = 0; i < nentries / 2; ++i) {
 		ret = rd_rss_row(adapter, i, &val);
 		if (ret)
 			return ret;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
index 872a91b..361d503 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
@@ -38,21 +38,22 @@
 #include <linux/types.h>
 
 enum {
-	NCHAN          = 4,     /* # of HW channels */
-	MAX_MTU        = 9600,  /* max MAC MTU, excluding header + FCS */
-	EEPROMSIZE     = 17408, /* Serial EEPROM physical size */
-	EEPROMVSIZE    = 32768, /* Serial EEPROM virtual address space size */
-	EEPROMPFSIZE   = 1024,  /* EEPROM writable area size for PFn, n>0 */
-	RSS_NENTRIES   = 2048,  /* # of entries in RSS mapping table */
-	TCB_SIZE       = 128,   /* TCB size */
-	NMTUS          = 16,    /* size of MTU table */
-	NCCTRL_WIN     = 32,    /* # of congestion control windows */
-	NTX_SCHED      = 8,     /* # of HW Tx scheduling queues */
-	PM_NSTATS      = 5,     /* # of PM stats */
-	T6_PM_NSTATS   = 7,     /* # of PM stats in T6 */
-	MBOX_LEN       = 64,    /* mailbox size in bytes */
-	TRACE_LEN      = 112,   /* length of trace data and mask */
-	FILTER_OPT_LEN = 36,    /* filter tuple width for optional components */
+	NCHAN           = 4,    /* # of HW channels */
+	MAX_MTU         = 9600, /* max MAC MTU, excluding header + FCS */
+	EEPROMSIZE      = 17408,/* Serial EEPROM physical size */
+	EEPROMVSIZE     = 32768,/* Serial EEPROM virtual address space size */
+	EEPROMPFSIZE    = 1024, /* EEPROM writable area size for PFn, n>0 */
+	RSS_NENTRIES    = 2048, /* # of entries in RSS mapping table */
+	T6_RSS_NENTRIES = 4096, /* # of entries in RSS mapping table */
+	TCB_SIZE        = 128,  /* TCB size */
+	NMTUS           = 16,   /* size of MTU table */
+	NCCTRL_WIN      = 32,   /* # of congestion control windows */
+	NTX_SCHED       = 8,    /* # of HW Tx scheduling queues */
+	PM_NSTATS       = 5,    /* # of PM stats */
+	T6_PM_NSTATS    = 7,    /* # of PM stats in T6 */
+	MBOX_LEN        = 64,   /* mailbox size in bytes */
+	TRACE_LEN       = 112,  /* length of trace data and mask */
+	FILTER_OPT_LEN  = 36,   /* filter tuple width for optional components */
 };
 
 enum {
-- 
2.1.0

^ permalink raw reply related

* Re: [v2 PATCH -tip 3/6] net: sctp: Add SCTP ACK tracking trace event
From: Masami Hiramatsu @ 2017-12-19  1:31 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Ian McDonald, Vlad Yasevich, Stephen Hemminger,
	Peter Zijlstra, Thomas Gleixner, LKML, H . Peter Anvin,
	Gerrit Renker, David S . Miller, Neil Horman, dccp, netdev,
	linux-sctp, Stephen Rothwell
In-Reply-To: <20171218120516.2d4398b2@gandalf.local.home>

On Mon, 18 Dec 2017 12:05:16 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Mon, 18 Dec 2017 17:12:15 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > Add SCTP ACK tracking trace event to trace the changes of SCTP
> > association state in response to incoming packets.
> > It is used for debugging SCTP congestion control algorithms,
> > and will replace sctp_probe module.
> > 
> > Note that this event a bit tricky. Since this consists of 2
> > events (sctp_probe and sctp_probe_path) so you have to enable
> > both events as below.
> > 
> >   # cd /sys/kernel/debug/tracing
> >   # echo 1 > events/sctp/sctp_probe/enable
> >   # echo 1 > events/sctp/sctp_probe_path/enable
> > 
> > Or, you can enable all the events under sctp.
> > 
> >   # echo 1 > events/sctp/enable
> > 
> > Since sctp_probe_path event is always invoked from sctp_probe
> > event, you can not see any output if you only enable
> > sctp_probe_path.
> 
> I have to ask, why did you do it this way?
> 
> 
> > +#include <trace/define_trace.h>
> > diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
> > index 8f8ccded13e4..c5f92b2cc5c3 100644
> > --- a/net/sctp/sm_statefuns.c
> > +++ b/net/sctp/sm_statefuns.c
> > @@ -59,6 +59,9 @@
> >  #include <net/sctp/sm.h>
> >  #include <net/sctp/structs.h>
> >  
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/sctp.h>
> > +
> >  static struct sctp_packet *sctp_abort_pkt_new(
> >  					struct net *net,
> >  					const struct sctp_endpoint *ep,
> > @@ -3219,6 +3222,8 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
> >  	struct sctp_sackhdr *sackh;
> >  	__u32 ctsn;
> >  
> > +	trace_sctp_probe(ep, asoc, chunk);
> 
> What about doing this right after this probe:
> 
> 	if (trace_sctp_probe_path_enabled()) {
> 		struct sctp_transport *sp;
> 
> 		list_for_each_entry(sp, &asoc->peer.transpor_addr_list,
> 				    transports) {
> 			trace_sctp_probe_path(sp, asoc);
> 		}
> 	}
> 
> The "trace_sctp_probe_path_enabled()" is a static branch, which means
> it's a nop just like a tracepoint is, and will not add any overhead if
> the trace_sctp_probe_path is not enabled.

That's a good idea! I'll update to use it :)

Thank you,

> 
> -- Steve
> 
> > +
> >  	if (!sctp_vtag_verify(chunk, asoc))
> >  		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
> >  
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply

* Re: [Patch net-next] net_sched: properly check for empty skb array on error path
From: John Fastabend @ 2017-12-19  1:25 UTC (permalink / raw)
  To: Cong Wang, netdev
In-Reply-To: <20171218223426.4685-1-xiyou.wangcong@gmail.com>

On 12/18/2017 02:34 PM, Cong Wang wrote:
> First, the check of &q->ring.queue against NULL is wrong, it
> is always false. We should check the value rather than the address.
> 

Thanks.

> Secondly, we need the same check in pfifo_fast_reset() too,
> as both ->reset() and ->destroy() are called in qdisc_destroy().
> 

not that it hurts to have the check here, but if init fails
in qdisc_create it seems only ->destroy() is called without
a ->reset().

Is there another path for init() to fail that I'm missing.

> Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> ---
>  net/sched/sch_generic.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 

^ permalink raw reply

* linux-next: manual merge of the net-next tree with the net tree
From: Stephen Rothwell @ 2017-12-19  0:51 UTC (permalink / raw)
  To: David Miller, Networking
  Cc: Linux-Next Mailing List, Linux Kernel Mailing List, Zhao Qiang,
	Heiner Kallweit

Hi all,

Today's linux-next merge of the net-next tree got a conflict in:

  drivers/net/phy/marvell.c

between commit:

  c505873eaece ("net: phy: marvell: Limit 88m1101 autoneg errata to 88E1145 as well.")

from the net tree and commit:

  80274abafc60 ("net: phy: remove generic settings for callbacks config_aneg and read_status from drivers")

from the net-next tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/net/phy/marvell.c
index 82104edca393,2fc026dc170a..000000000000
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@@ -2085,8 -2070,7 +2082,7 @@@ static struct phy_driver marvell_driver
  		.flags = PHY_HAS_INTERRUPT,
  		.probe = marvell_probe,
  		.config_init = &m88e1145_config_init,
 -		.config_aneg = &marvell_config_aneg,
 +		.config_aneg = &m88e1101_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,

^ permalink raw reply

* Re: [PATCH net-next] bpf/cgroup: fix a verification error for a CGROUP_DEVICE type prog
From: Daniel Borkmann @ 2017-12-19  0:46 UTC (permalink / raw)
  To: Yonghong Song, ast, netdev; +Cc: guro, kernel-team
In-Reply-To: <20171218181344.2000185-1-yhs@fb.com>

On 12/18/2017 07:13 PM, Yonghong Song wrote:
> The tools/testing/selftests/bpf test program
> test_dev_cgroup fails with the following error
> when compiled with llvm 6.0. (I did not try
> with earlier versions.)
> 
>   libbpf: load bpf program failed: Permission denied
>   libbpf: -- BEGIN DUMP LOG ---
>   libbpf:
>   0: (61) r2 = *(u32 *)(r1 +4)
>   1: (b7) r0 = 0
>   2: (55) if r2 != 0x1 goto pc+8
>    R0=inv0 R1=ctx(id=0,off=0,imm=0) R2=inv1 R10=fp0
>   3: (69) r2 = *(u16 *)(r1 +0)
>   invalid bpf_context access off=0 size=2
>   ...
> 
> The culprit is the following statement in dev_cgroup.c:
>   short type = ctx->access_type & 0xFFFF;
> This code is typical as the ctx->access_type is assigned
> as below in kernel/bpf/cgroup.c:
>   struct bpf_cgroup_dev_ctx ctx = {
>         .access_type = (access << 16) | dev_type,
>         .major = major,
>         .minor = minor,
>   };
> 
> The compiler converts it to u16 access while
> the verifier cgroup_dev_is_valid_access rejects
> any non u32 access.
> 
> This patch permits the field access_type to be accessible
> with type u16 and u8 as well.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> Tested-by: Roman Gushchin <guro@fb.com>

Looks good, applied to bpf-next, thanks Yonghong!

^ permalink raw reply

* [RFC PATCH] virtio_net: Extend virtio to use VF datapath when available
From: Sridhar Samudrala @ 2017-12-19  0:40 UTC (permalink / raw)
  To: mst, stephen, netdev, virtualization, alexander.duyck,
	sridhar.samudrala

This patch enables virtio to switch over to a VF datapath when a VF netdev
is present with the same MAC address.  It allows live migration of a VM
with a direct attached VF without the need to setup a bond/team between a
VF and virtio net device in the guest.

The hypervisor needs to unplug the VF device from the guest on the source
host and reset the MAC filter of the VF to initiate failover of datapath to
virtio before starting the migration. After the migration is completed, the
destination hypervisor sets the MAC filter on the VF and plugs it back to
the guest to switch over to VF datapath.

It is entirely based on netvsc implementation and it should be possible to
make this code generic and move it to a common location that can be shared
by netvsc and virtio.

Also, i think we should make this a negotiated feature that is off by
default via a new feature bit.

This patch is based on the discussion initiated by Jesse on this thread.
https://marc.info/?l=linux-virtualization&m=151189725224231&w=2

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
 drivers/net/virtio_net.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 339 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 559b215c0169..a34c717bb15b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,8 @@
 #include <linux/average.h>
 #include <linux/filter.h>
 #include <net/route.h>
+#include <linux/netdevice.h>
+#include <linux/netpoll.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -56,6 +58,8 @@ module_param(napi_tx, bool, 0644);
  */
 DECLARE_EWMA(pkt_len, 0, 64)
 
+#define VF_TAKEOVER_INT	(HZ / 10)
+
 #define VIRTNET_DRIVER_VERSION "1.0.0"
 
 static const unsigned long guest_offloads[] = {
@@ -117,6 +121,15 @@ struct receive_queue {
 	char name[40];
 };
 
+struct virtnet_vf_pcpu_stats {
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+	struct u64_stats_sync   syncp;
+	u32	tx_dropped;
+};
+
 struct virtnet_info {
 	struct virtio_device *vdev;
 	struct virtqueue *cvq;
@@ -179,6 +192,11 @@ struct virtnet_info {
 	u32 speed;
 
 	unsigned long guest_offloads;
+
+	/* State to manage the associated VF interface. */
+	struct net_device __rcu *vf_netdev;
+	struct virtnet_vf_pcpu_stats __percpu *vf_stats;
+	struct delayed_work vf_takeover;
 };
 
 struct padded_vnet_hdr {
@@ -1300,16 +1318,51 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
 }
 
+/* Send skb on the slave VF device. */
+static int virtnet_vf_xmit(struct net_device *dev, struct net_device *vf_netdev,
+			   struct sk_buff *skb)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	unsigned int len = skb->len;
+	int rc;
+
+	skb->dev = vf_netdev;
+	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
+
+	rc = dev_queue_xmit(skb);
+	if (likely(rc == NET_XMIT_SUCCESS || rc == NET_XMIT_CN)) {
+		struct virtnet_vf_pcpu_stats *pcpu_stats
+			= this_cpu_ptr(vi->vf_stats);
+
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(vi->vf_stats->tx_dropped);
+	}
+
+	return rc;
+}
+
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	int qnum = skb_get_queue_mapping(skb);
 	struct send_queue *sq = &vi->sq[qnum];
+	struct net_device *vf_netdev;
 	int err;
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 	bool kick = !skb->xmit_more;
 	bool use_napi = sq->napi.weight;
 
+	/* if VF is present and up then redirect packets
+	 * called with rcu_read_lock_bh
+	 */
+	vf_netdev = rcu_dereference_bh(vi->vf_netdev);
+	if (vf_netdev && netif_running(vf_netdev) && !netpoll_tx_running(dev))
+		return virtnet_vf_xmit(dev, vf_netdev, skb);
+
 	/* Free up any pending old buffers before queueing new ones. */
 	free_old_xmit_skbs(sq);
 
@@ -1456,10 +1509,41 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p)
 	return ret;
 }
 
+static void virtnet_get_vf_stats(struct net_device *dev,
+				 struct virtnet_vf_pcpu_stats *tot)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	int i;
+
+	memset(tot, 0, sizeof(*tot));
+
+	for_each_possible_cpu(i) {
+		const struct virtnet_vf_pcpu_stats *stats
+				= per_cpu_ptr(vi->vf_stats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			rx_packets = stats->rx_packets;
+			tx_packets = stats->tx_packets;
+			rx_bytes = stats->rx_bytes;
+			tx_bytes = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+		tot->tx_dropped += stats->tx_dropped;
+	}
+}
+
 static void virtnet_stats(struct net_device *dev,
 			  struct rtnl_link_stats64 *tot)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtnet_vf_pcpu_stats vf_stats;
 	int cpu;
 	unsigned int start;
 
@@ -1490,6 +1574,13 @@ static void virtnet_stats(struct net_device *dev,
 	tot->rx_dropped = dev->stats.rx_dropped;
 	tot->rx_length_errors = dev->stats.rx_length_errors;
 	tot->rx_frame_errors = dev->stats.rx_frame_errors;
+
+	virtnet_get_vf_stats(dev, &vf_stats);
+	tot->rx_packets += vf_stats.rx_packets;
+	tot->tx_packets += vf_stats.tx_packets;
+	tot->rx_bytes += vf_stats.rx_bytes;
+	tot->tx_bytes += vf_stats.tx_bytes;
+	tot->tx_dropped += vf_stats.tx_dropped;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2508,6 +2599,47 @@ static int virtnet_validate(struct virtio_device *vdev)
 	return 0;
 }
 
+static void __virtnet_vf_setup(struct net_device *ndev,
+			       struct net_device *vf_netdev)
+{
+	int ret;
+
+	/* Align MTU of VF with master */
+	ret = dev_set_mtu(vf_netdev, ndev->mtu);
+	if (ret)
+		netdev_warn(vf_netdev,
+			    "unable to change mtu to %u\n", ndev->mtu);
+
+	if (netif_running(ndev)) {
+		ret = dev_open(vf_netdev);
+		if (ret)
+			netdev_warn(vf_netdev,
+				    "unable to open: %d\n", ret);
+	}
+}
+
+/* Setup VF as slave of the virtio device.
+ * Runs in workqueue to avoid recursion in netlink callbacks.
+ */
+static void virtnet_vf_setup(struct work_struct *w)
+{
+	struct virtnet_info *vi
+		= container_of(w, struct virtnet_info, vf_takeover.work);
+	struct net_device *ndev = vi->dev;
+	struct net_device *vf_netdev;
+
+	if (!rtnl_trylock()) {
+		schedule_delayed_work(&vi->vf_takeover, 0);
+		return;
+	}
+
+	vf_netdev = rtnl_dereference(vi->vf_netdev);
+	if (vf_netdev)
+		__virtnet_vf_setup(ndev, vf_netdev);
+
+	rtnl_unlock();
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int i, err;
@@ -2600,6 +2732,11 @@ static int virtnet_probe(struct virtio_device *vdev)
 	}
 
 	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+	INIT_DELAYED_WORK(&vi->vf_takeover, virtnet_vf_setup);
+
+	vi->vf_stats = netdev_alloc_pcpu_stats(struct virtnet_vf_pcpu_stats);
+	if (!vi->vf_stats)
+		goto free_stats;
 
 	/* If we can receive ANY GSO packets, we must allocate large ones. */
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -2634,7 +2771,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 			 */
 			dev_err(&vdev->dev, "device MTU appears to have changed "
 				"it is now %d < %d", mtu, dev->min_mtu);
-			goto free_stats;
+			goto free_vf_stats;
 		}
 
 		dev->mtu = mtu;
@@ -2658,7 +2795,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
 	err = init_vqs(vi);
 	if (err)
-		goto free_stats;
+		goto free_vf_stats;
 
 #ifdef CONFIG_SYSFS
 	if (vi->mergeable_rx_bufs)
@@ -2712,6 +2849,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 	cancel_delayed_work_sync(&vi->refill);
 	free_receive_page_frags(vi);
 	virtnet_del_vqs(vi);
+free_vf_stats:
+	free_percpu(vi->vf_stats);
 free_stats:
 	free_percpu(vi->stats);
 free:
@@ -2733,19 +2872,178 @@ static void remove_vq_common(struct virtnet_info *vi)
 	virtnet_del_vqs(vi);
 }
 
+static struct net_device *get_virtio_bymac(const u8 *mac)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	for_each_netdev(&init_net, dev) {
+		if (dev->netdev_ops != &virtnet_netdev)
+			continue;       /* not a virtio_net device */
+
+		if (ether_addr_equal(mac, dev->perm_addr))
+			return dev;
+	}
+
+	return NULL;
+}
+
+static struct net_device *get_virtio_byref(struct net_device *vf_netdev)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	for_each_netdev(&init_net, dev) {
+		struct virtnet_info *vi;
+
+		if (dev->netdev_ops != &virtnet_netdev)
+			continue;	/* not a virtio_net device */
+
+		vi = netdev_priv(dev);
+		if (rtnl_dereference(vi->vf_netdev) == vf_netdev)
+			return dev;	/* a match */
+	}
+
+	return NULL;
+}
+
+/* Called when VF is injecting data into network stack.
+ * Change the associated network device from VF to virtio.
+ * note: already called with rcu_read_lock
+ */
+static rx_handler_result_t virtnet_vf_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct net_device *ndev = rcu_dereference(skb->dev->rx_handler_data);
+	struct virtnet_info *vi = netdev_priv(ndev);
+	struct virtnet_vf_pcpu_stats *pcpu_stats =
+				this_cpu_ptr(vi->vf_stats);
+
+	skb->dev = ndev;
+
+	u64_stats_update_begin(&pcpu_stats->syncp);
+	pcpu_stats->rx_packets++;
+	pcpu_stats->rx_bytes += skb->len;
+	u64_stats_update_end(&pcpu_stats->syncp);
+
+	return RX_HANDLER_ANOTHER;
+}
+
+static int virtnet_vf_join(struct net_device *vf_netdev,
+			   struct net_device *ndev)
+{
+	struct virtnet_info *vi = netdev_priv(ndev);
+	int ret;
+
+	ret = netdev_rx_handler_register(vf_netdev,
+					 virtnet_vf_handle_frame, ndev);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not register virtio VF receive handler (err = %d)\n",
+			   ret);
+		goto rx_handler_failed;
+	}
+
+	ret = netdev_upper_dev_link(vf_netdev, ndev, NULL);
+	if (ret != 0) {
+		netdev_err(vf_netdev,
+			   "can not set master device %s (err = %d)\n",
+			   ndev->name, ret);
+		goto upper_link_failed;
+	}
+
+	/* set slave flag before open to prevent IPv6 addrconf */
+	vf_netdev->flags |= IFF_SLAVE;
+
+	schedule_delayed_work(&vi->vf_takeover, VF_TAKEOVER_INT);
+
+	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
+
+	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+	return 0;
+
+upper_link_failed:
+	netdev_rx_handler_unregister(vf_netdev);
+rx_handler_failed:
+	return ret;
+}
+
+static int virtnet_register_vf(struct net_device *vf_netdev)
+{
+	struct net_device *ndev;
+	struct virtnet_info *vi;
+
+	if (vf_netdev->addr_len != ETH_ALEN)
+		return NOTIFY_DONE;
+
+	/* We will use the MAC address to locate the virtio_net interface to
+	 * associate with the VF interface. If we don't find a matching
+	 * virtio interface, move on.
+	 */
+	ndev = get_virtio_bymac(vf_netdev->perm_addr);
+	if (!ndev)
+		return NOTIFY_DONE;
+
+	vi = netdev_priv(ndev);
+	if (rtnl_dereference(vi->vf_netdev))
+		return NOTIFY_DONE;
+
+	if (virtnet_vf_join(vf_netdev, ndev) != 0)
+		return NOTIFY_DONE;
+
+	netdev_info(ndev, "VF registering %s\n", vf_netdev->name);
+
+	dev_hold(vf_netdev);
+	rcu_assign_pointer(vi->vf_netdev, vf_netdev);
+
+	return NOTIFY_OK;
+}
+
+static int virtnet_unregister_vf(struct net_device *vf_netdev)
+{
+	struct net_device *ndev;
+	struct virtnet_info *vi;
+
+	ndev = get_virtio_byref(vf_netdev);
+	if (!ndev)
+		return NOTIFY_DONE;
+
+	vi = netdev_priv(ndev);
+	cancel_delayed_work_sync(&vi->vf_takeover);
+
+	netdev_info(ndev, "VF unregistering %s\n", vf_netdev->name);
+
+	netdev_rx_handler_unregister(vf_netdev);
+	netdev_upper_dev_unlink(vf_netdev, ndev);
+	RCU_INIT_POINTER(vi->vf_netdev, NULL);
+	dev_put(vf_netdev);
+
+	return NOTIFY_OK;
+}
+
 static void virtnet_remove(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
+	struct net_device *vf_netdev;
 
 	virtnet_cpu_notif_remove(vi);
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
 
+	rtnl_lock();
+	vf_netdev = rtnl_dereference(vi->vf_netdev);
+	if (vf_netdev)
+		virtnet_unregister_vf(vf_netdev);
+	rtnl_unlock();
+
 	unregister_netdev(vi->dev);
 
 	remove_vq_common(vi);
 
+	free_percpu(vi->vf_stats);
 	free_percpu(vi->stats);
 	free_netdev(vi->dev);
 }
@@ -2823,6 +3121,42 @@ static struct virtio_driver virtio_net_driver = {
 #endif
 };
 
+static int virtio_netdev_event(struct notifier_block *this,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+	/* Skip our own events */
+	if (event_dev->netdev_ops == &virtnet_netdev)
+		return NOTIFY_DONE;
+
+	/* Avoid non-Ethernet type devices */
+	if (event_dev->type != ARPHRD_ETHER)
+		return NOTIFY_DONE;
+
+	/* Avoid Vlan dev with same MAC registering as VF */
+	if (is_vlan_dev(event_dev))
+		return NOTIFY_DONE;
+
+	/* Avoid Bonding master dev with same MAC registering as VF */
+	if ((event_dev->priv_flags & IFF_BONDING) &&
+	    (event_dev->flags & IFF_MASTER))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		return virtnet_register_vf(event_dev);
+	case NETDEV_UNREGISTER:
+		return virtnet_unregister_vf(event_dev);
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block virtio_netdev_notifier = {
+	.notifier_call = virtio_netdev_event,
+};
+
 static __init int virtio_net_driver_init(void)
 {
 	int ret;
@@ -2841,6 +3175,8 @@ static __init int virtio_net_driver_init(void)
         ret = register_virtio_driver(&virtio_net_driver);
 	if (ret)
 		goto err_virtio;
+
+	register_netdevice_notifier(&virtio_netdev_notifier);
 	return 0;
 err_virtio:
 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
@@ -2853,6 +3189,7 @@ module_init(virtio_net_driver_init);
 
 static __exit void virtio_net_driver_exit(void)
 {
+	unregister_netdevice_notifier(&virtio_netdev_notifier);
 	unregister_virtio_driver(&virtio_net_driver);
 	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
 	cpuhp_remove_multi_state(virtionet_online);
-- 
2.14.3

^ permalink raw reply related

* Re: [PATCH] bpf: make function xdp_do_generic_redirect_map() static
From: Daniel Borkmann @ 2017-12-19  0:38 UTC (permalink / raw)
  To: Xiongwei Song, ast, davem; +Cc: netdev, linux-kernel
In-Reply-To: <20171218231715.3227-1-sxwjean@gmail.com>

On 12/19/2017 12:17 AM, Xiongwei Song wrote:
> The function xdp_do_generic_redirect_map() is only used in this file, so
> make it static.
> 
> Clean up sparse warning:
> net/core/filter.c:2687:5: warning: no previous prototype
> for 'xdp_do_generic_redirect_map' [-Wmissing-prototypes]
> 
> Signed-off-by: Xiongwei Song <sxwjean@gmail.com>

Applied to bpf-next, thanks Xiongwei!

^ permalink raw reply

* Re: [PATCH bpf-next] selftests/bpf: add netdevsim to config
From: Daniel Borkmann @ 2017-12-19  0:36 UTC (permalink / raw)
  To: Jakub Kicinski, alexei.starovoitov; +Cc: netdev, oss-drivers
In-Reply-To: <20171218231130.24619-1-jakub.kicinski@netronome.com>

On 12/19/2017 12:11 AM, Jakub Kicinski wrote:
> BPF offload tests (test_offload.py) will require netdevsim
> to be built, add it to config.
> 
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>

Applied to bpf-next, thanks Jakub!

^ permalink raw reply

* Re: [PATCH bpf-next] bpf: arm64: fix uninitialized variable
From: Daniel Borkmann @ 2017-12-19  0:34 UTC (permalink / raw)
  To: Alexei Starovoitov, Alexei Starovoitov, David S . Miller
  Cc: Arnd Bergmann, netdev, kernel-team
In-Reply-To: <68b024dd-4113-8c8a-a606-7b4b0206973d@fb.com>

On 12/18/2017 07:36 PM, Alexei Starovoitov wrote:
> On 12/18/17 10:19 AM, Daniel Borkmann wrote:
>> On 12/18/2017 07:09 PM, Alexei Starovoitov wrote:
>>> From: Alexei Starovoitov <ast@fb.com>
>>>
>>> fix the following issue:
>>> arch/arm64/net/bpf_jit_comp.c: In function 'bpf_int_jit_compile':
>>> arch/arm64/net/bpf_jit_comp.c:982:18: error: 'image_size' may be used
>>> uninitialized in this function [-Werror=maybe-uninitialized]
>>>
>>> Fixes: db496944fdaa ("bpf: arm64: add JIT support for multi-function programs")
>>> Reported-by: Arnd Bergmann <arnd@arndb.de>
>>> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
>>> ---
>>>  arch/arm64/net/bpf_jit_comp.c | 1 +
>>>  1 file changed, 1 insertion(+)
>>>
>>> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
>>> index 396490cf7316..acaa935ed977 100644
>>> --- a/arch/arm64/net/bpf_jit_comp.c
>>> +++ b/arch/arm64/net/bpf_jit_comp.c
>>> @@ -897,6 +897,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>>>          image_ptr = jit_data->image;
>>>          header = jit_data->header;
>>>          extra_pass = true;
>>> +        image_size = sizeof(u32) * ctx.idx;
>>>          goto skip_init_ctx;
>>>      }
>>>      memset(&ctx, 0, sizeof(ctx));
>>
>> I don't really mind, but it feels more complex than it needs to be
>> imho, since in the initial pass you fetch 'image_size' in fake pass
>> from ctx.idx, then we set ctx.idx to 0 again, do another pass and
>> use the cached ctx.idx from that second pass instead of the first
>> one where we set 'image_size' originally, so we definitely need to
>> take that into consideration in future reviews at least.
> 
> not sure what you mean.
> This check: ctx.idx != jit_data->ctx.idx matters the most.
> After first alloc the 'image_size' variable used for dumping only.
> That's why the JITing itself worked fine. We could have removed it
> since it's computable from idx, but imo it's fine this way.

Fair enough, given final ctx.idx value must be guaranteed to never change
in future between pass#1 and pass#2 from the first bpf_int_jit_compile()
run, then lets go with this smaller version; applied to bpf-next, thanks
Alexei!

^ permalink raw reply

* Re: [PATCH][next] bpf: make function skip_callee static and return NULL rather than 0
From: Daniel Borkmann @ 2017-12-19  0:28 UTC (permalink / raw)
  To: Colin King, Alexei Starovoitov, netdev; +Cc: kernel-janitors, linux-kernel
In-Reply-To: <20171218174707.15430-1-colin.king@canonical.com>

On 12/18/2017 06:47 PM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> Function skip_callee is local to the source and does not need to
> be in global scope, so make it static. Also return NULL rather than 0.
> Cleans up two sparse warnings:
> 
> symbol 'skip_callee' was not declared. Should it be static?
> Using plain integer as NULL pointer
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Makes sense, applied to bpf-next, thanks Colin!

^ permalink raw reply

* Re: [PATCH 1/3] kallsyms: don't leak address when symbol not found
From: Tobin C. Harding @ 2017-12-19  0:24 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Felix Fietkau, kernel-hardening, Tycho Andersen, Linus Torvalds,
	Kees Cook, Andrew Morton, Daniel Borkmann, Masahiro Yamada,
	Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <20171218184324.527ffd0e@gandalf.local.home>

On Mon, Dec 18, 2017 at 06:43:24PM -0500, Steven Rostedt wrote:
> On Tue, 19 Dec 2017 09:41:29 +1100
> "Tobin C. Harding" <me@tobin.cc> wrote:
> 
> > Current suggestion on list is to remove this function. Do you have a use
> > case in mind where debugging will break? We could add a fix to this
> > series if so. Otherwise next version will likely drop
> > string_is_no_symbol()
> 
> What about adding a kernel command line parameter that lets one put
> back the old behavior.
> 
> "insecure_print_all_symbols" ?

Cool. I've not done that before it will be a good learning
experience. I'll hack it up and see what people think.

thanks,
Tobin.

^ permalink raw reply

* Re: [PATCH][next] bpf: fix spelling mistake: "funcation"-> "function"
From: Daniel Borkmann @ 2017-12-19  0:24 UTC (permalink / raw)
  To: Colin King, Alexei Starovoitov, netdev; +Cc: kernel-janitors, linux-kernel
In-Reply-To: <20171218140312.6329-1-colin.king@canonical.com>

On 12/18/2017 03:03 PM, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> Trivial fix to spelling mistake in error message text.
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>

Applied to bpf-next, thanks Colin!

^ permalink raw reply

* Re: [PATCH 3/3] trace: print address if symbol not found
From: Tobin C. Harding @ 2017-12-19  0:22 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: kernel-hardening, Tycho Andersen, Linus Torvalds, Kees Cook,
	Andrew Morton, Daniel Borkmann, Masahiro Yamada,
	Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <20171218185143.4046a71b@gandalf.local.home>

On Mon, Dec 18, 2017 at 06:51:43PM -0500, Steven Rostedt wrote:
> On Tue, 19 Dec 2017 08:16:14 +1100
> "Tobin C. Harding" <me@tobin.cc> wrote:
> 
> > > >  #endif /* _LINUX_KERNEL_TRACE_H */
> > > > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > > > index 1e1558c99d56..3e28522a76f4 100644
> > > > --- a/kernel/trace/trace_events_hist.c
> > > > +++ b/kernel/trace/trace_events_hist.c
> > > > @@ -982,7 +982,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
> > > >  			return;
> > > >  
> > > >  		seq_printf(m, "%*c", 1 + spaces, ' ');
> > > > -		sprint_symbol(str, stacktrace_entries[i]);
> > > > +		trace_sprint_symbol_addr(str, stacktrace_entries[i]);  
> > > 
> 
> > 
> > If you have the time to give me some brief pointers on how I should go
> > about testing this I'd love to test it before the next version. I know
> > very little about ftrace.
> 
> For hitting the histogram stacktrace trigger (this code path), make
> sure you have CONFIG_HIST_TRIGGERS enabled. And then do:
> 
>  # cd /sys/kernel/debug/tracing
>  # echo 'hist:keys=common_pid.execname,stacktrace:vals=prev_state' > \
>      events/sched/sched_switch/trigger
>  # cat events/sched/sched_switch/hist
> 
> For the "sym" part, you can do (from the same directory):
> 
>  # echo 'hist:keys=call_site.sym:vals=bytes_req' > \
>      events/kmem/kmalloc/trigger
>  # cat events/kmem/kmalloc/hist
> 
> 
> And for sym-offset:
> 
>  # echo 'hist:keys=call_site.sym-offset:vals=bytes_req' > \
>     events/kmem/kmalloc/trigger
>  # cat events/kmem/kmalloc/hist
> 
> -- Steve

Thanks, you're the man

^ permalink raw reply

* Re: [PATCH] bpf: fix broken BPF selftest build on s390
From: Daniel Borkmann @ 2017-12-19  0:21 UTC (permalink / raw)
  To: Hendrik Brueckner, Alexei Starovoitov
  Cc: Arnaldo Carvalho de Melo, linux-s390, netdev
In-Reply-To: <1513602597-12280-1-git-send-email-brueckner@linux.vnet.ibm.com>

On 12/18/2017 02:09 PM, Hendrik Brueckner wrote:
> With 720f228e8d31 ("bpf: fix broken BPF selftest build") the
> inclusion of arch-specific header files changed.  Including the
> asm/bpf_perf_event.h on s390, correctly includes the s390 specific
> header file.  This header file tries then to include the s390
> asm/ptrace.h and the build fails with:
> 
> cc -Wall -O2 -I../../../include/uapi -I../../../lib -I../../../../include/generated  -I../../../include    test_verifier.c
> +/root/git/linux/tools/testing/selftests/bpf/libbpf.a /root/git/linux/tools/testing/selftests/bpf/cgroup_helpers.c -lcap -lelf -o
> +/root/git/linux/tools/testing/selftests/bpf/test_verifier
> In file included from ../../../include/uapi/asm/bpf_perf_event.h:4:0,
>                  from ../../../include/uapi/linux/bpf_perf_event.h:11,
>                  from test_verifier.c:29:
> ../../../include/uapi/../../arch/s390/include/uapi/asm/bpf_perf_event.h:7:9: error: unknown type name 'user_pt_regs'
>  typedef user_pt_regs bpf_user_pt_regs_t;
>          ^~~~~~~~~~~~
> make: *** [../lib.mk:109: /root/git/linux/tools/testing/selftests/bpf/test_verifier] Error 1
> 
> This is caused by a recent update to the s390 asm/ptrace.h file
> that is not (yet) available in the local installation.  That means,
> the s390 asm/ptrace.h must be included from the tools/arch/s390
> directory.
> 
> Because there is no proper framework to deal with asm specific
> includes in tools/, slightly modify the s390 asm/bpf_perf_event.h
> to include the local ptrace.h header file.
> 
> See also discussion on
> https://marc.info/?l=linux-s390&m=151359424420691&w=2
> 
> Please note that this needs to be preserved until tools/ is able to
> correctly handle asm specific headers.
> 
> References: https://marc.info/?l=linux-s390&m=151359424420691&w=2
> Fixes: 720f228e8d31 ("bpf: fix broken BPF selftest build")
> Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Alexei Starovoitov <ast@kernel.org>

Applied to bpf tree, thanks Hendrik!

^ permalink raw reply

* Re: [PATCH 3/3] trace: print address if symbol not found
From: Steven Rostedt @ 2017-12-18 23:51 UTC (permalink / raw)
  To: Tobin C. Harding
  Cc: kernel-hardening, Tycho Andersen, Linus Torvalds, Kees Cook,
	Andrew Morton, Daniel Borkmann, Masahiro Yamada,
	Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <20171218211614.GC19604@eros>

On Tue, 19 Dec 2017 08:16:14 +1100
"Tobin C. Harding" <me@tobin.cc> wrote:

> > >  #endif /* _LINUX_KERNEL_TRACE_H */
> > > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > > index 1e1558c99d56..3e28522a76f4 100644
> > > --- a/kernel/trace/trace_events_hist.c
> > > +++ b/kernel/trace/trace_events_hist.c
> > > @@ -982,7 +982,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m,
> > >  			return;
> > >  
> > >  		seq_printf(m, "%*c", 1 + spaces, ' ');
> > > -		sprint_symbol(str, stacktrace_entries[i]);
> > > +		trace_sprint_symbol_addr(str, stacktrace_entries[i]);  
> > 

> 
> If you have the time to give me some brief pointers on how I should go
> about testing this I'd love to test it before the next version. I know
> very little about ftrace.

For hitting the histogram stacktrace trigger (this code path), make
sure you have CONFIG_HIST_TRIGGERS enabled. And then do:

 # cd /sys/kernel/debug/tracing
 # echo 'hist:keys=common_pid.execname,stacktrace:vals=prev_state' > \
     events/sched/sched_switch/trigger
 # cat events/sched/sched_switch/hist

For the "sym" part, you can do (from the same directory):

 # echo 'hist:keys=call_site.sym:vals=bytes_req' > \
     events/kmem/kmalloc/trigger
 # cat events/kmem/kmalloc/hist


And for sym-offset:

 # echo 'hist:keys=call_site.sym-offset:vals=bytes_req' > \
    events/kmem/kmalloc/trigger
 # cat events/kmem/kmalloc/hist

-- Steve

^ permalink raw reply

* Re: [PATCH 1/3] kallsyms: don't leak address when symbol not found
From: Steven Rostedt @ 2017-12-18 23:43 UTC (permalink / raw)
  To: Tobin C. Harding
  Cc: Felix Fietkau, kernel-hardening, Tycho Andersen, Linus Torvalds,
	Kees Cook, Andrew Morton, Daniel Borkmann, Masahiro Yamada,
	Alexei Starovoitov, linux-kernel, Network Development
In-Reply-To: <1513636889.2482269.1209330464.6971721E@webmail.messagingengine.com>

On Tue, 19 Dec 2017 09:41:29 +1100
"Tobin C. Harding" <me@tobin.cc> wrote:

> Current suggestion on list is to remove this function. Do you have a use
> case in mind where debugging will break? We could add a fix to this
> series if so. Otherwise next version will likely drop
> string_is_no_symbol()

What about adding a kernel command line parameter that lets one put
back the old behavior.

"insecure_print_all_symbols" ?

-- Steve

^ permalink raw reply

* Re: [PATCH 2/3] rhashtable: Add rhashtable_walk_curr
From: Herbert Xu @ 2017-12-18 23:38 UTC (permalink / raw)
  To: Andreas Gruenbacher; +Cc: cluster-devel, Thomas Graf, netdev
In-Reply-To: <20171218133122.29179-2-agruenba@redhat.com>

On Mon, Dec 18, 2017 at 02:31:21PM +0100, Andreas Gruenbacher wrote:
> When iterating through an rhashtable is stopped with
> rhashtable_walk_stop and then resumed with rhashtable_walk_start, there
> currently is no way to get back to the current object and thus revisit
> the object rhashtable_walk_next has previously returned.
> 
> This functionality is useful when dumping an rhashtable via the seq file
> interface: seq_read will convert one object after the other.  When an
> object doesn't fit in the remaining buffer space anymore, user-space
> will be returned all objects that have been fully converted so far.
> Upon the next read from user-space, the object that didn't fit
> previously will be revisited.
> 
> Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>

Doesn't the helper that Tom Herbert just added do exactly this?

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH] bpf: make function xdp_do_generic_redirect_map() static
From: Xiongwei Song @ 2017-12-18 23:17 UTC (permalink / raw)
  To: ast, daniel, davem; +Cc: netdev, linux-kernel

The function xdp_do_generic_redirect_map() is only used in this file, so
make it static.

Clean up sparse warning:
net/core/filter.c:2687:5: warning: no previous prototype
for 'xdp_do_generic_redirect_map' [-Wmissing-prototypes]

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
---
 net/core/filter.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 754abe1041b7..130b842c3a15 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2684,8 +2684,9 @@ static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
 	return 0;
 }
 
-int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
-				struct bpf_prog *xdp_prog)
+static int xdp_do_generic_redirect_map(struct net_device *dev,
+				       struct sk_buff *skb,
+				       struct bpf_prog *xdp_prog)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 	unsigned long map_owner = ri->map_owner;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next] selftests/bpf: add netdevsim to config
From: Jakub Kicinski @ 2017-12-18 23:11 UTC (permalink / raw)
  To: daniel, alexei.starovoitov; +Cc: netdev, oss-drivers, Jakub Kicinski

BPF offload tests (test_offload.py) will require netdevsim
to be built, add it to config.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 9d4897317c77..983dd25d49f4 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -4,3 +4,4 @@ CONFIG_NET_CLS_BPF=m
 CONFIG_BPF_EVENTS=y
 CONFIG_TEST_BPF=m
 CONFIG_CGROUP_BPF=y
+CONFIG_NETDEVSIM=m
-- 
2.15.1

^ permalink raw reply related

* [PATCH ethtool v2] ethtool: Support for FEC encoding control
From: Jakub Kicinski @ 2017-12-18 22:57 UTC (permalink / raw)
  To: John W. Linville
  Cc: netdev, oss-drivers, Dustin Byford, Vidya Sagar Ravipati,
	Dirk van der Merwe

From: Dustin Byford <dustin@cumulusnetworks.com>

As FEC settings and different FEC modes are mandatory
and configurable across various interfaces of 25G/50G/100G/40G,
the lack of FEC encoding control and reporting today is a source
for interoperability issues for many vendors

set-fec/show-fec option(s) are designed to provide control and report
the FEC encoding on the link.

$ethtool --set-fec swp1 encoding [off | RS | BaseR | auto]

Encoding: Types of encoding
Off    :  Turning off FEC
RS     :  Force RS-FEC encoding
BaseR  :  Force BaseR encoding
Auto   :  Default FEC settings for drivers, and would represent
          asking the hardware to essentially go into a best effort mode.

Here are a few examples of what we would expect if encoding=auto:
- if autoneg is on, we are  expecting FEC to be negotiated as on or off
  as long as protocol supports it
- if the hardware is capable of detecting the FEC encoding on it's
  receiver it will reconfigure its encoder to match
- in absence of the above, the configuration would be set to IEEE
  defaults.

>From our understanding, this is essentially what most hardware/driver
combinations are doing today in the absence of a way for users to
control the behavior.

$ethtool --show-fec  swp1
FEC parameters for swp1:
FEC encodings:  RS

ethtool devname output:
$ethtool swp1
Settings for swp1:
root@hpe-7712-03:~# ethtool swp18
Settings for swp18:
    Supported ports: [ FIBRE ]
    Supported link modes:   40000baseCR4/Full
                            40000baseSR4/Full
                            40000baseLR4/Full
                            100000baseSR4/Full
                            100000baseCR4/Full
                            100000baseLR4_ER4/Full
    Supported pause frame use: No
    Supports auto-negotiation: Yes
    Supported FEC modes: [RS | BaseR | None | Not reported]
    Advertised link modes:  Not reported
    Advertised pause frame use: No
    Advertised auto-negotiation: No
    Advertised FEC modes: [RS | BaseR | None | Not reported]
    Speed: 100000Mb/s
    Duplex: Full
    Port: FIBRE
    PHYAD: 106
    Transceiver: internal
    Auto-negotiation: off
    Link detected: yes

Signed-off-by: Vidya Sagar Ravipati <vidya.chowdary@gmail.com>
Signed-off-by: Dustin Byford <dustin@cumulusnetworks.com>
[code style + man page edits + commit message update]
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
---
v2:
 - don't break lines after opening parnes.

 ethtool.8.in |  31 ++++++++++++++++
 ethtool.c    | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+)

diff --git a/ethtool.8.in b/ethtool.8.in
index 7ca8bfe43607..9573ffdc985d 100644
--- a/ethtool.8.in
+++ b/ethtool.8.in
@@ -378,6 +378,13 @@ ethtool \- query or control network driver and hardware settings
 .RB [ ap-shared ]
 .RB [ dedicated ]
 .RB [ all ]
+.HP
+.B ethtool \-\-show\-fec
+.I devname
+.HP
+.B ethtool \-\-set\-fec
+.I devname
+.B4 encoding auto off rs baser
 .
 .\" Adjust lines (i.e. full justification) and hyphenate.
 .ad
@@ -1070,6 +1077,30 @@ All components dedicated to this interface
 .B all
 All components used by this interface, even if shared
 .RE
+.TP
+.B \-\-show\-fec
+Queries the specified network device for its support of Forward Error Correction.
+.TP
+.B \-\-set\-fec
+Configures Forward Error Correction for the specified network device.
+
+Forward Error Correction modes selected by a user are expected to be persisted
+after any hotplug events. If a module is swapped that does not support the
+current FEC mode, the driver or firmware must take the link down
+administratively and report the problem in the system logs for users to correct.
+.RS 4
+.TP
+.A4 encoding auto off rs baser
+Sets the FEC encoding for the device.
+.TS
+nokeep;
+lB	l.
+auto	Use the driver's default encoding
+off	Turn off FEC
+RS	Force RS-FEC encoding
+BaseR	Force BaseR encoding
+.TE
+.RE
 .SH BUGS
 Not supported (in part or whole) on all network drivers.
 .SH AUTHOR
diff --git a/ethtool.c b/ethtool.c
index 488f6bfb8378..79c076e42c6e 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -542,6 +542,9 @@ static void init_global_link_mode_masks(void)
 		ETHTOOL_LINK_MODE_Pause_BIT,
 		ETHTOOL_LINK_MODE_Asym_Pause_BIT,
 		ETHTOOL_LINK_MODE_Backplane_BIT,
+		ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+		ETHTOOL_LINK_MODE_FEC_RS_BIT,
+		ETHTOOL_LINK_MODE_FEC_BASER_BIT,
 	};
 	unsigned int i;
 
@@ -689,6 +692,7 @@ static void dump_link_caps(const char *prefix, const char *an_prefix,
 	};
 	int indent;
 	int did1, new_line_pend, i;
+	int fecreported = 0;
 
 	/* Indent just like the separate functions used to */
 	indent = strlen(prefix) + 14;
@@ -740,6 +744,26 @@ static void dump_link_caps(const char *prefix, const char *an_prefix,
 			fprintf(stdout, "Yes\n");
 		else
 			fprintf(stdout, "No\n");
+
+		fprintf(stdout, "	%s FEC modes:", prefix);
+		if (ethtool_link_mode_test_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT,
+					       mask)) {
+			fprintf(stdout, " None");
+			fecreported = 1;
+		}
+		if (ethtool_link_mode_test_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+					       mask)) {
+			fprintf(stdout, " BaseR");
+			fecreported = 1;
+		}
+		if (ethtool_link_mode_test_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT,
+					       mask)) {
+			fprintf(stdout, " RS");
+			fecreported = 1;
+		}
+		if (!fecreported)
+			fprintf(stdout, " Not reported");
+		fprintf(stdout, "\n");
 	}
 }
 
@@ -1562,6 +1586,20 @@ static void dump_eeecmd(struct ethtool_eee *ep)
 	dump_link_caps("Link partner advertised EEE", "", link_mode, 1);
 }
 
+static void dump_fec(u32 fec)
+{
+	if (fec & ETHTOOL_FEC_NONE)
+		fprintf(stdout, " None");
+	if (fec & ETHTOOL_FEC_AUTO)
+		fprintf(stdout, " Auto");
+	if (fec & ETHTOOL_FEC_OFF)
+		fprintf(stdout, " Off");
+	if (fec & ETHTOOL_FEC_BASER)
+		fprintf(stdout, " BaseR");
+	if (fec & ETHTOOL_FEC_RS)
+		fprintf(stdout, " RS");
+}
+
 #define N_SOTS 7
 
 static char *so_timestamping_labels[N_SOTS] = {
@@ -4812,6 +4850,84 @@ static int do_set_phy_tunable(struct cmd_context *ctx)
 	return err;
 }
 
+static int fecmode_str_to_type(const char *str)
+{
+	int fecmode = 0;
+
+	if (!str)
+		return fecmode;
+
+	if (!strcasecmp(str, "auto"))
+		fecmode |= ETHTOOL_FEC_AUTO;
+	else if (!strcasecmp(str, "off"))
+		fecmode |= ETHTOOL_FEC_OFF;
+	else if (!strcasecmp(str, "rs"))
+		fecmode |= ETHTOOL_FEC_RS;
+	else if (!strcasecmp(str, "baser"))
+		fecmode |= ETHTOOL_FEC_BASER;
+
+	return fecmode;
+}
+
+static int do_gfec(struct cmd_context *ctx)
+{
+	struct ethtool_fecparam feccmd = { 0 };
+	int rv;
+
+	if (ctx->argc != 0)
+		exit_bad_args();
+
+	feccmd.cmd = ETHTOOL_GFECPARAM;
+	rv = send_ioctl(ctx, &feccmd);
+	if (rv != 0) {
+		perror("Cannot get FEC settings");
+		return rv;
+	}
+
+	fprintf(stdout, "FEC parameters for %s:\n", ctx->devname);
+	fprintf(stdout, "Configured FEC encodings:");
+	dump_fec(feccmd.fec);
+	fprintf(stdout, "\n");
+
+	fprintf(stdout, "Active FEC encoding:");
+	dump_fec(feccmd.active_fec);
+	fprintf(stdout, "\n");
+
+	return 0;
+}
+
+static int do_sfec(struct cmd_context *ctx)
+{
+	char *fecmode_str = NULL;
+	struct ethtool_fecparam feccmd;
+	struct cmdline_info cmdline_fec[] = {
+		{ "encoding", CMDL_STR,  &fecmode_str,  &feccmd.fec},
+	};
+	int changed;
+	int fecmode;
+	int rv;
+
+	parse_generic_cmdline(ctx, &changed, cmdline_fec,
+			      ARRAY_SIZE(cmdline_fec));
+
+	if (!fecmode_str)
+		exit_bad_args();
+
+	fecmode = fecmode_str_to_type(fecmode_str);
+	if (!fecmode)
+		exit_bad_args();
+
+	feccmd.cmd = ETHTOOL_SFECPARAM;
+	feccmd.fec = fecmode;
+	rv = send_ioctl(ctx, &feccmd);
+	if (rv != 0) {
+		perror("Cannot set FEC settings");
+		return rv;
+	}
+
+	return 0;
+}
+
 #ifndef TEST_ETHTOOL
 int send_ioctl(struct cmd_context *ctx, void *cmd)
 {
@@ -5000,6 +5116,9 @@ static const struct option {
 	  "		[ ap-shared ]\n"
 	  "		[ dedicated ]\n"
 	  "		[ all ]\n"},
+	{ "--show-fec", 1, do_gfec, "Show FEC settings"},
+	{ "--set-fec", 1, do_sfec, "Set FEC settings",
+	  "		[ encoding auto|off|rs|baser ]\n"},
 	{ "-h|--help", 0, show_usage, "Show this help" },
 	{ "--version", 0, do_version, "Show version number" },
 	{}
-- 
2.15.1

^ permalink raw reply related

* [PATCHv3 net-next 14/14] net: sch: sch_drr: add extack support
From: Alexander Aring @ 2017-12-18 22:45 UTC (permalink / raw)
  To: jhs
  Cc: xiyou.wangcong, jiri, davem, netdev, kernel, Alexander Aring,
	David Ahern
In-Reply-To: <20171218224513.29836-1-aring@mojatatu.com>

This patch adds extack support for the drr qdisc implementation by
adding NL_SET_ERR_MSG in validation of user input.
Also it serves to illustrate a use case of how the infrastructure ops
api changes are to be used by individual qdiscs.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
---
 net/sched/sch_drr.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index bf638ce57c50..e0b0cf8a9939 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -74,17 +74,21 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	u32 quantum;
 	int err;
 
-	if (!opt)
+	if (!opt) {
+		NL_SET_ERR_MSG(extack, "DRR options are required for this operation");
 		return -EINVAL;
+	}
 
-	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, NULL);
+	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy, extack);
 	if (err < 0)
 		return err;
 
 	if (tb[TCA_DRR_QUANTUM]) {
 		quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
-		if (quantum == 0)
+		if (quantum == 0) {
+			NL_SET_ERR_MSG(extack, "Specified DRR quantum cannot be zero");
 			return -EINVAL;
+		}
 	} else
 		quantum = psched_mtu(qdisc_dev(sch));
 
@@ -95,8 +99,10 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 						    NULL,
 						    qdisc_root_sleeping_running(sch),
 						    tca[TCA_RATE]);
-			if (err)
+			if (err) {
+				NL_SET_ERR_MSG(extack, "Failed to replace estimator");
 				return err;
+			}
 		}
 
 		sch_tree_lock(sch);
@@ -127,6 +133,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 					    qdisc_root_sleeping_running(sch),
 					    tca[TCA_RATE]);
 		if (err) {
+			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
 			qdisc_destroy(cl->qdisc);
 			kfree(cl);
 			return err;
@@ -179,8 +186,10 @@ static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl,
 {
 	struct drr_sched *q = qdisc_priv(sch);
 
-	if (cl)
+	if (cl) {
+		NL_SET_ERR_MSG(extack, "DRR classid must be zero");
 		return NULL;
+	}
 
 	return q->block;
 }
-- 
2.11.0

^ permalink raw reply related

* [PATCHv3 net-next 13/14] net: sch: sch_cbs: add extack support
From: Alexander Aring @ 2017-12-18 22:45 UTC (permalink / raw)
  To: jhs
  Cc: xiyou.wangcong, jiri, davem, netdev, kernel, Alexander Aring,
	David Ahern
In-Reply-To: <20171218224513.29836-1-aring@mojatatu.com>

This patch adds extack support for the cbs qdisc implementation by
adding NL_SET_ERR_MSG in validation of user input.
Also it serves to illustrate a use case of how the infrastructure ops
api changes are to be used by individual qdiscs.

Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexander Aring <aring@mojatatu.com>
---
 net/sched/sch_cbs.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 8bf6e163d29c..cdd96b9a27bc 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -219,14 +219,17 @@ static void cbs_disable_offload(struct net_device *dev,
 }
 
 static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
-			      const struct tc_cbs_qopt *opt)
+			      const struct tc_cbs_qopt *opt,
+			      struct netlink_ext_ack *extack)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	struct tc_cbs_qopt_offload cbs = { };
 	int err;
 
-	if (!ops->ndo_setup_tc)
+	if (!ops->ndo_setup_tc) {
+		NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload");
 		return -EOPNOTSUPP;
+	}
 
 	cbs.queue = q->queue;
 
@@ -237,8 +240,10 @@ static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
 	cbs.sendslope = opt->sendslope;
 
 	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
-	if (err < 0)
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload");
 		return err;
+	}
 
 	q->enqueue = cbs_enqueue_offload;
 	q->dequeue = cbs_dequeue_offload;
@@ -255,12 +260,14 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
 	struct tc_cbs_qopt *qopt;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+	err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack);
 	if (err < 0)
 		return err;
 
-	if (!tb[TCA_CBS_PARMS])
+	if (!tb[TCA_CBS_PARMS]) {
+		NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory");
 		return -EINVAL;
+	}
 
 	qopt = nla_data(tb[TCA_CBS_PARMS]);
 
@@ -277,7 +284,7 @@ static int cbs_change(struct Qdisc *sch, struct nlattr *opt,
 
 		cbs_disable_offload(dev, q);
 	} else {
-		err = cbs_enable_offload(dev, q, qopt);
+		err = cbs_enable_offload(dev, q, qopt, extack);
 		if (err < 0)
 			return err;
 	}
@@ -298,8 +305,10 @@ static int cbs_init(struct Qdisc *sch, struct nlattr *opt,
 	struct cbs_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
 
-	if (!opt)
+	if (!opt) {
+		NL_SET_ERR_MSG(extack, "Missing CBS qdisc options  which are mandatory");
 		return -EINVAL;
+	}
 
 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 
-- 
2.11.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox