Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 05/12] nfp: add helper for printing ethtool strings
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski
In-Reply-To: <20170818224822.8409-1-jakub.kicinski@netronome.com>

Add a helper for printing ethtool strings and advancing the
pointer correctly.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 65 +++++++++++-----------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1753bfbc8b47..ba1c28b8791b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -335,53 +335,52 @@ static int nfp_net_set_ringparam(struct net_device *netdev,
 	return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt);
 }
 
+static __printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	vsnprintf(data, ETH_GSTRING_LEN, fmt, args);
+	va_end(args);
+
+	return data + ETH_GSTRING_LEN;
+}
+
 static void nfp_net_get_strings(struct net_device *netdev,
 				u32 stringset, u8 *data)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
-	u8 *p = data;
 	int i;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
-			memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN);
-			p += ETH_GSTRING_LEN;
-		}
+		for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++)
+			data = nfp_pr_et(data, nfp_net_et_stats[i].name);
+
 		for (i = 0; i < nn->dp.num_r_vecs; i++) {
-			sprintf(p, "rvec_%u_rx_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rvec_%u_tx_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rvec_%u_tx_busy", i);
-			p += ETH_GSTRING_LEN;
+			data = nfp_pr_et(data, "rvec_%u_rx_pkts", i);
+			data = nfp_pr_et(data, "rvec_%u_tx_pkts", i);
+			data = nfp_pr_et(data, "rvec_%u_tx_busy", i);
 		}
-		strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "tx_gather", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
-		strncpy(p, "tx_lso", ETH_GSTRING_LEN);
-		p += ETH_GSTRING_LEN;
+
+		data = nfp_pr_et(data, "hw_rx_csum_ok");
+		data = nfp_pr_et(data, "hw_rx_csum_inner_ok");
+		data = nfp_pr_et(data, "hw_rx_csum_err");
+		data = nfp_pr_et(data, "hw_tx_csum");
+		data = nfp_pr_et(data, "hw_tx_inner_csum");
+		data = nfp_pr_et(data, "tx_gather");
+		data = nfp_pr_et(data, "tx_lso");
+
 		for (i = 0; i < nn->dp.num_tx_rings; i++) {
-			sprintf(p, "txq_%u_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "txq_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			data = nfp_pr_et(data, "txq_%u_pkts", i);
+			data = nfp_pr_et(data, "txq_%u_bytes", i);
 		}
+
 		for (i = 0; i < nn->dp.num_rx_rings; i++) {
-			sprintf(p, "rxq_%u_pkts", i);
-			p += ETH_GSTRING_LEN;
-			sprintf(p, "rxq_%u_bytes", i);
-			p += ETH_GSTRING_LEN;
+			data = nfp_pr_et(data, "rxq_%u_pkts", i);
+			data = nfp_pr_et(data, "rxq_%u_bytes", i);
 		}
+
 		break;
 	}
 }
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 04/12] nfp: don't report standard netdev statistics in ethtool
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski
In-Reply-To: <20170818224822.8409-1-jakub.kicinski@netronome.com>

We have been recently called out as a bad example for reporting
standard netdev statistics as part of ethtool.  Fix that :)

Removing standard statistics allows us to simplify the structure
holding definitions since we no longer have to mux different types
of statistics.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 109 ++++++---------------
 1 file changed, 32 insertions(+), 77 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 289fa318a651..1753bfbc8b47 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -59,74 +59,44 @@ enum nfp_dump_diag {
 	NFP_DUMP_NSP_DIAG = 0,
 };
 
-/* Support for stats. Returns netdev, driver, and device stats */
-enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS };
-struct _nfp_net_et_stats {
+struct nfp_et_stat {
 	char name[ETH_GSTRING_LEN];
-	int type;
-	int sz;
 	int off;
 };
 
-#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS,			\
-		FIELD_SIZEOF(struct net_device_stats, m),	\
-		offsetof(struct net_device_stats, m)
-/* For stats in the control BAR (other than Q stats) */
-#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS,			\
-		sizeof(u64),					\
-		(m)
-static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
-	/* netdev stats */
-	{"rx_packets", NN_ET_NETDEV_STAT(rx_packets)},
-	{"tx_packets", NN_ET_NETDEV_STAT(tx_packets)},
-	{"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)},
-	{"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)},
-	{"rx_errors", NN_ET_NETDEV_STAT(rx_errors)},
-	{"tx_errors", NN_ET_NETDEV_STAT(tx_errors)},
-	{"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)},
-	{"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)},
-	{"multicast", NN_ET_NETDEV_STAT(multicast)},
-	{"collisions", NN_ET_NETDEV_STAT(collisions)},
-	{"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)},
-	{"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)},
-	{"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)},
-	{"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)},
-	{"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)},
-	{"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)},
-	{"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)},
-	{"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)},
+static const struct nfp_et_stat nfp_net_et_stats[] = {
 	/* Stats from the device */
-	{"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)},
-	{"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)},
-	{"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)},
-	{"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)},
-	{"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)},
-	{"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)},
-	{"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)},
-	{"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)},
-	{"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)},
-
-	{"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)},
-	{"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)},
-	{"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)},
-	{"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)},
-	{"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)},
-	{"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)},
-	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
-	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
-	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
-
-	{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
-	{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+	{ "dev_rx_discards",	NFP_NET_CFG_STATS_RX_DISCARDS },
+	{ "dev_rx_errors",	NFP_NET_CFG_STATS_RX_ERRORS },
+	{ "dev_rx_bytes",	NFP_NET_CFG_STATS_RX_OCTETS },
+	{ "dev_rx_uc_bytes",	NFP_NET_CFG_STATS_RX_UC_OCTETS },
+	{ "dev_rx_mc_bytes",	NFP_NET_CFG_STATS_RX_MC_OCTETS },
+	{ "dev_rx_bc_bytes",	NFP_NET_CFG_STATS_RX_BC_OCTETS },
+	{ "dev_rx_pkts",	NFP_NET_CFG_STATS_RX_FRAMES },
+	{ "dev_rx_mc_pkts",	NFP_NET_CFG_STATS_RX_MC_FRAMES },
+	{ "dev_rx_bc_pkts",	NFP_NET_CFG_STATS_RX_BC_FRAMES },
+
+	{ "dev_tx_discards",	NFP_NET_CFG_STATS_TX_DISCARDS },
+	{ "dev_tx_errors",	NFP_NET_CFG_STATS_TX_ERRORS },
+	{ "dev_tx_bytes",	NFP_NET_CFG_STATS_TX_OCTETS },
+	{ "dev_tx_uc_bytes",	NFP_NET_CFG_STATS_TX_UC_OCTETS },
+	{ "dev_tx_mc_bytes",	NFP_NET_CFG_STATS_TX_MC_OCTETS },
+	{ "dev_tx_bc_bytes",	NFP_NET_CFG_STATS_TX_BC_OCTETS },
+	{ "dev_tx_pkts",	NFP_NET_CFG_STATS_TX_FRAMES },
+	{ "dev_tx_mc_pkts",	NFP_NET_CFG_STATS_TX_MC_FRAMES },
+	{ "dev_tx_bc_pkts",	NFP_NET_CFG_STATS_TX_BC_FRAMES },
+
+	{ "bpf_pass_pkts",	NFP_NET_CFG_STATS_APP0_FRAMES },
+	{ "bpf_pass_bytes",	NFP_NET_CFG_STATS_APP0_BYTES },
 	/* see comments in outro functions in nfp_bpf_jit.c to find out
 	 * how different BPF modes use app-specific counters
 	 */
-	{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
-	{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
-	{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
-	{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
-	{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
-	{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
+	{ "bpf_app1_pkts",	NFP_NET_CFG_STATS_APP1_FRAMES },
+	{ "bpf_app1_bytes",	NFP_NET_CFG_STATS_APP1_BYTES },
+	{ "bpf_app2_pkts",	NFP_NET_CFG_STATS_APP2_FRAMES },
+	{ "bpf_app2_bytes",	NFP_NET_CFG_STATS_APP2_BYTES },
+	{ "bpf_app3_pkts",	NFP_NET_CFG_STATS_APP3_FRAMES },
+	{ "bpf_app3_bytes",	NFP_NET_CFG_STATS_APP3_BYTES },
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
@@ -421,28 +391,13 @@ static void nfp_net_get_stats(struct net_device *netdev,
 {
 	u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {};
 	struct nfp_net *nn = netdev_priv(netdev);
-	struct rtnl_link_stats64 *netdev_stats;
-	struct rtnl_link_stats64 temp = {};
 	u64 tmp[NN_ET_RVEC_GATHER_STATS];
 	u8 __iomem *io_p;
 	int i, j, k;
-	u8 *p;
-
-	netdev_stats = dev_get_stats(netdev, &temp);
 
 	for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) {
-		switch (nfp_net_et_stats[i].type) {
-		case NETDEV_ET_STATS:
-			p = (char *)netdev_stats + nfp_net_et_stats[i].off;
-			data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ?
-				*(u64 *)p : *(u32 *)p;
-			break;
-
-		case NFP_NET_DEV_ET_STATS:
-			io_p = nn->dp.ctrl_bar + nfp_net_et_stats[i].off;
-			data[i] = readq(io_p);
-			break;
-		}
+		io_p = nn->dp.ctrl_bar + nfp_net_et_stats[i].off;
+		data[i] = readq(io_p);
 	}
 	for (j = 0; j < nn->dp.num_r_vecs; j++) {
 		unsigned int start;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 03/12] nfp: allow retreiving management FW logs on representors
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski
In-Reply-To: <20170818224822.8409-1-jakub.kicinski@netronome.com>

Users should be able to dump the management FW logs on any
of the driver's netdevs.  Make the code only depend on the
nfp_app and share it between vNICs and representors.

Storing the dump flag is simply dropped for now, since we
only support the argument being set to 0.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  2 --
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 33 +++++++++++-----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index b1fa77bd708b..d51d8237b984 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -573,7 +573,6 @@ struct nfp_net_dp {
  * @tx_bar:             Pointer to mapped TX queues
  * @rx_bar:             Pointer to mapped FL/RX queues
  * @debugfs_dir:	Device directory in debugfs
- * @ethtool_dump_flag:	Ethtool dump flag
  * @vnic_list:		Entry on device vNIC list
  * @pdev:		Backpointer to PCI device
  * @app:		APP handle if available
@@ -640,7 +639,6 @@ struct nfp_net {
 	u8 __iomem *rx_bar;
 
 	struct dentry *debugfs_dir;
-	u32 ethtool_dump_flag;
 
 	struct list_head vnic_list;
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 3c34c8b27dcf..289fa318a651 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -727,18 +727,18 @@ static int nfp_net_get_coalesce(struct net_device *netdev,
 /* Other debug dumps
  */
 static int
-nfp_dump_nsp_diag(struct nfp_net *nn, struct ethtool_dump *dump, void *buffer)
+nfp_dump_nsp_diag(struct nfp_app *app, struct ethtool_dump *dump, void *buffer)
 {
 	struct nfp_resource *res;
 	int ret;
 
-	if (!nn->app)
+	if (!app)
 		return -EOPNOTSUPP;
 
 	dump->version = 1;
 	dump->flag = NFP_DUMP_NSP_DIAG;
 
-	res = nfp_resource_acquire(nn->app->cpp, NFP_RESOURCE_NSP_DIAG);
+	res = nfp_resource_acquire(app->cpp, NFP_RESOURCE_NSP_DIAG);
 	if (IS_ERR(res))
 		return PTR_ERR(res);
 
@@ -748,7 +748,7 @@ nfp_dump_nsp_diag(struct nfp_net *nn, struct ethtool_dump *dump, void *buffer)
 			goto exit_release;
 		}
 
-		ret = nfp_cpp_read(nn->app->cpp, nfp_resource_cpp_id(res),
+		ret = nfp_cpp_read(app->cpp, nfp_resource_cpp_id(res),
 				   nfp_resource_address(res),
 				   buffer, dump->len);
 		if (ret != dump->len)
@@ -765,32 +765,30 @@ nfp_dump_nsp_diag(struct nfp_net *nn, struct ethtool_dump *dump, void *buffer)
 	return ret;
 }
 
-static int nfp_net_set_dump(struct net_device *netdev, struct ethtool_dump *val)
+static int nfp_app_set_dump(struct net_device *netdev, struct ethtool_dump *val)
 {
-	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_app *app = nfp_app_from_netdev(netdev);
 
-	if (!nn->app)
+	if (!app)
 		return -EOPNOTSUPP;
 
 	if (val->flag != NFP_DUMP_NSP_DIAG)
 		return -EINVAL;
 
-	nn->ethtool_dump_flag = val->flag;
-
 	return 0;
 }
 
 static int
-nfp_net_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
+nfp_app_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
 {
-	return nfp_dump_nsp_diag(netdev_priv(netdev), dump, NULL);
+	return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, NULL);
 }
 
 static int
-nfp_net_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
+nfp_app_get_dump_data(struct net_device *netdev, struct ethtool_dump *dump,
 		      void *buffer)
 {
-	return nfp_dump_nsp_diag(netdev_priv(netdev), dump, buffer);
+	return nfp_dump_nsp_diag(nfp_app_from_netdev(netdev), dump, buffer);
 }
 
 static int nfp_net_set_coalesce(struct net_device *netdev,
@@ -947,9 +945,9 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.set_rxfh		= nfp_net_set_rxfh,
 	.get_regs_len		= nfp_net_get_regs_len,
 	.get_regs		= nfp_net_get_regs,
-	.set_dump		= nfp_net_set_dump,
-	.get_dump_flag		= nfp_net_get_dump_flag,
-	.get_dump_data		= nfp_net_get_dump_data,
+	.set_dump		= nfp_app_set_dump,
+	.get_dump_flag		= nfp_app_get_dump_flag,
+	.get_dump_data		= nfp_app_get_dump_data,
 	.get_coalesce           = nfp_net_get_coalesce,
 	.set_coalesce           = nfp_net_set_coalesce,
 	.get_channels		= nfp_net_get_channels,
@@ -961,6 +959,9 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 const struct ethtool_ops nfp_port_ethtool_ops = {
 	.get_drvinfo		= nfp_app_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
+	.set_dump		= nfp_app_set_dump,
+	.get_dump_flag		= nfp_app_get_dump_flag,
+	.get_dump_data		= nfp_app_get_dump_data,
 };
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 01/12] nfp: link basic ethtool ops to representors
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski
In-Reply-To: <20170818224822.8409-1-jakub.kicinski@netronome.com>

Start linking ethtool ops to representors.  Begin by adding
a separate ops structure and providing link state.  Next
patches will convert appropriate functions to only use nfp_port,
which will make them reusable on representors.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 4 ++++
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c    | 2 ++
 drivers/net/ethernet/netronome/nfp/nfp_port.h        | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 6e31355c3567..3edc5d62ad5b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -939,6 +939,10 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.set_link_ksettings	= nfp_net_set_link_ksettings,
 };
 
+const struct ethtool_ops nfp_port_ethtool_ops = {
+	.get_link		= ethtool_op_get_link,
+};
+
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
 {
 	netdev->ethtool_ops = &nfp_net_ethtool_ops;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 47daad30756c..50f7cc057cc9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -320,6 +320,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	repr->dst->u.port_info.lower_dev = pf_netdev;
 
 	netdev->netdev_ops = &nfp_repr_netdev_ops;
+	netdev->ethtool_ops = &nfp_port_ethtool_ops;
+
 	SWITCHDEV_SET_OPS(netdev, &nfp_port_switchdev_ops);
 
 	if (nfp_app_has_tc(app)) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index c88e376dcf0f..784d82c2f32c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -106,6 +106,7 @@ struct nfp_port {
 	struct list_head port_list;
 };
 
+extern const struct ethtool_ops nfp_port_ethtool_ops;
 extern const struct switchdev_ops nfp_port_switchdev_ops;
 
 int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 02/12] nfp: provide ethtool_drvinfo on representors
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski
In-Reply-To: <20170818224822.8409-1-jakub.kicinski@netronome.com>

Extend representors' ethtool ops to show basic info like firmware
version, driver version, and driver name.

While at it don't set drvinfo.n_stats and drvinfo.regdump_len,
core will invoke appropriate handlers to get those.

A helper is added to turn a netdev into nfp_app for convenience.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_app.c       | 20 ++++++++++
 drivers/net/ethernet/netronome/nfp/nfp_app.h       |  2 +
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 44 ++++++++++++++++------
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index c704c022574f..505e63f47419 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -38,6 +38,7 @@
 #include "nfpcore/nfp_nffw.h"
 #include "nfp_app.h"
 #include "nfp_main.h"
+#include "nfp_net.h"
 #include "nfp_net_repr.h"
 
 static const struct nfp_app_type *apps[] = {
@@ -48,6 +49,25 @@ static const struct nfp_app_type *apps[] = {
 #endif
 };
 
+struct nfp_app *nfp_app_from_netdev(struct net_device *netdev)
+{
+	if (nfp_netdev_is_nfp_net(netdev)) {
+		struct nfp_net *nn = netdev_priv(netdev);
+
+		return nn->app;
+	}
+
+	if (nfp_netdev_is_nfp_repr(netdev)) {
+		struct nfp_repr *repr = netdev_priv(netdev);
+
+		return repr->app;
+	}
+
+	WARN(1, "Unknown netdev type for nfp_app\n");
+
+	return NULL;
+}
+
 const char *nfp_app_mip_name(struct nfp_app *app)
 {
 	if (!app || !app->pf->mip)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index f34e8778fae2..c13b9bbe7e62 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -293,6 +293,8 @@ static inline struct net_device *nfp_app_repr_get(struct nfp_app *app, u32 id)
 	return app->type->repr_get(app, id);
 }
 
+struct nfp_app *nfp_app_from_netdev(struct net_device *netdev);
+
 struct nfp_reprs *
 nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type,
 		  struct nfp_reprs *reprs);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 3edc5d62ad5b..3c34c8b27dcf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -147,34 +147,53 @@ static void nfp_net_get_nspinfo(struct nfp_app *app, char *version)
 	if (IS_ERR(nsp))
 		return;
 
-	snprintf(version, ETHTOOL_FWVERS_LEN, "sp:%hu.%hu",
+	snprintf(version, ETHTOOL_FWVERS_LEN, "%hu.%hu",
 		 nfp_nsp_get_abi_ver_major(nsp),
 		 nfp_nsp_get_abi_ver_minor(nsp));
 
 	nfp_nsp_close(nsp);
 }
 
-static void nfp_net_get_drvinfo(struct net_device *netdev,
-				struct ethtool_drvinfo *drvinfo)
+static void
+nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
+		const char *vnic_version, struct ethtool_drvinfo *drvinfo)
 {
 	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
-	struct nfp_net *nn = netdev_priv(netdev);
 
-	strlcpy(drvinfo->driver, nn->pdev->driver->name,
-		sizeof(drvinfo->driver));
+	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
 	strlcpy(drvinfo->version, nfp_driver_version, sizeof(drvinfo->version));
 
-	nfp_net_get_nspinfo(nn->app, nsp_version);
+	nfp_net_get_nspinfo(app, nsp_version);
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
-		 "%d.%d.%d.%d %s %s %s",
+		 "%s %s %s %s", vnic_version, nsp_version,
+		 nfp_app_mip_name(app), nfp_app_name(app));
+}
+
+static void
+nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	char vnic_version[ETHTOOL_FWVERS_LEN] = {};
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d",
 		 nn->fw_ver.resv, nn->fw_ver.class,
-		 nn->fw_ver.major, nn->fw_ver.minor, nsp_version,
-		 nfp_app_mip_name(nn->app), nfp_app_name(nn->app));
+		 nn->fw_ver.major, nn->fw_ver.minor);
 	strlcpy(drvinfo->bus_info, pci_name(nn->pdev),
 		sizeof(drvinfo->bus_info));
 
-	drvinfo->n_stats = NN_ET_STATS_LEN;
-	drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ;
+	nfp_get_drvinfo(nn->app, nn->pdev, vnic_version, drvinfo);
+}
+
+static void
+nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct nfp_app *app;
+
+	app = nfp_app_from_netdev(netdev);
+	if (!app)
+		return;
+
+	nfp_get_drvinfo(app, app->pdev, "*", drvinfo);
 }
 
 /**
@@ -940,6 +959,7 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 };
 
 const struct ethtool_ops nfp_port_ethtool_ops = {
+	.get_drvinfo		= nfp_app_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 };
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 00/12] nfp: add basic ethtool callbacks to representors
From: Jakub Kicinski @ 2017-08-18 22:48 UTC (permalink / raw)
  To: netdev; +Cc: oss-drivers, Jakub Kicinski

Hi!

This set extends the basic ethtool functionality to representor
netdevs.  I start with providing link state via ethtool and then
move on to functions such as driver information, statistics and
FW log dump.  The series contains a number of clean ups to the
ethtool stats code too, some of the logic is simplified by making 
better use of the nfp_port abstraction.  The stats we expose on 
representors are only the PCIe and MAC port statistics firmware 
maintains for us.


Jakub Kicinski (12):
  nfp: link basic ethtool ops to representors
  nfp: provide ethtool_drvinfo on representors
  nfp: allow retreiving management FW logs on representors
  nfp: don't report standard netdev statistics in ethtool
  nfp: add helper for printing ethtool strings
  nfp: split software and hardware vNIC statistics
  nfp: store pointer to MAC statistics in nfp_port
  nfp: report MAC statistics in ethtool
  nfp: add pointer to vNIC config memory to nfp_port structure
  nfp: add ethtool statistics for representors
  nfp: fix copy paste in names and messages regarding vNICs
  nfp: don't reuse pointers in ring dumping

 drivers/net/ethernet/netronome/nfp/flower/main.c   |   8 +-
 drivers/net/ethernet/netronome/nfp/nfp_app.c       |  20 +
 drivers/net/ethernet/netronome/nfp/nfp_app.h       |   2 +
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |   2 -
 .../net/ethernet/netronome/nfp/nfp_net_debugfs.c   |  13 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 593 ++++++++++++++-------
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c  |   4 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c  |  59 +-
 drivers/net/ethernet/netronome/nfp/nfp_port.c      |   3 +
 drivers/net/ethernet/netronome/nfp/nfp_port.h      |  54 +-
 10 files changed, 497 insertions(+), 261 deletions(-)

-- 
2.11.0

^ permalink raw reply

* Re: [PATCH RESEND 0/2] enable hires timer to timeout datagram socket
From: Vallish Vaidyeshwara @ 2017-08-18 22:27 UTC (permalink / raw)
  To: Richard Cochran
  Cc: davem, shuah, netdev, linux-kernel, eduval, anchalag, tglx
In-Reply-To: <20170818201854.xes246oviptinwvq@localhost>

On Fri, Aug 18, 2017 at 10:18:54PM +0200, Richard Cochran wrote:
> On Fri, Aug 18, 2017 at 06:44:08PM +0000, Vallish Vaidyeshwara wrote:
> > There has been a behavior change in 4.9 kernel with refactoring of Kernel
> > timer wheel in 4.8. We have a use case wherein our datagram socket
> > application is sensitive to socket timeout including long timeouts.
> > 
> > One of the test runs with a timeout value of 180 seconds timed out at
> > 190 seconds.
> 
> So the whole premise of the new timer wheel is that long timeouts need
> not be very accurate.
> 
> > Patch 1: Has core code change of enabling hires timer to timeout datagram
> > 	 socket on AF_UNIX and AF_INET domain
>

Hello Richard,

> Using hrtimers will hurt performance for most applications.  Can you
> please explain your use case and why is it so important?
>

We have a on-demand application that uses long timeouts and needs to react to
events within milliseconds. With this change in behavior, we notice from 4.4 to
4.9, the legacy application is now exhibiting incorrect behavior with same set
of system calls.

Thanks.
-Vallish

> Thanks,
> Richard
> 
> 

^ permalink raw reply

* Re: [PATCH V4 net 0/2] ipv6: fix flowlabel issue for reset packet
From: David Miller @ 2017-08-18 22:27 UTC (permalink / raw)
  To: kafai; +Cc: tom, shli, netdev
In-Reply-To: <20170818205136.nsmlmyshaobsyukc@kafai-mba.dhcp.thefacebook.com>

From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 18 Aug 2017 13:51:36 -0700

> It seems like that middle box specifically drops TCP_RST if it
> does not know anything about this flow.  Since the flowlabel of the TCP_RST
> (sent in TW state) is always different, it always lands to a different middle
> box.  All of these TCP_RST cannot be delivered.

This really is illegal behavior.  The flow label is not a flow _KEY_
by any definition whatsoever.

Flow labels are an optimization, not a determinant for flow matching
particularly for proper TCP state processing.

I'd rather you invest all of this energy getting that vendor to fix
their kit.

Thank you.

^ permalink raw reply

* RE: [PATCH net-next 3/3] hv_sock: implements Hyper-V transport for Virtual Sockets (AF_VSOCK)
From: Dexuan Cui @ 2017-08-18 22:23 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: Michal Kubecek, joe@perches.com, olaf@aepfle.de,
	Stephen Hemminger, jasowang@redhat.com, netdev@vger.kernel.org,
	Haiyang Zhang, Dave Scott, linux-kernel@vger.kernel.org,
	apw@canonical.com, Jorgen Hansen, Rolf Neugebauer, Marcelo Cerri,
	devel@linuxdriverproject.org, Vitaly Kuznetsov,
	davem@davemloft.net, George Zhang, Dan Carpenter
In-Reply-To: <20170817145551.GI5539@stefanha-x1.localdomain>

> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Thursday, August 17, 2017 07:56
> To: Dexuan Cui <decui@microsoft.com>
> On Tue, Aug 15, 2017 at 10:18:41PM +0000, Dexuan Cui wrote:
> > +static u32 hvs_get_local_cid(void)
> > +{
> > +	return VMADDR_CID_ANY;
> > +}
> 
> Interesting concept: the guest never knows its CID.  This is nice from a
> live migration perspective.  Currently VMCI and virtio adjust listen
> socket local CIDs after migration.
> 
> > +static bool hvs_stream_allow(u32 cid, u32 port)
> > +{
> > +	static const u32 valid_cids[] = {
> > +		VMADDR_CID_ANY,
> 
> Is this for loopback?

No, we don't support lookback in Linux VM, at least for now.
In our Linux implementation, Linux VM can only connect to the host, and
here when Linux VM calls connect(), I treat  VMADDR_CID_ANY 
the same as VMADDR_CID_HOST.

> > +		VMADDR_CID_HOST,
> > +	};
> > +	int i;
> > +
> > +	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF)
> is
> > +	 * reserved as ephemeral ports, which are used as the host's ports
> > +	 * when the host initiates connections.
> > +	 */
> > +	if (port > MAX_HOST_LISTEN_PORT)
> > +		return false;
> 
> Without this if statement the guest will attempt to connect.  I guess
> there will be no listen sockets above MAX_HOST_LISTEN_PORT, so the
> connection attempt will fail.

You're correct.
To use the vsock common infrastructure, we have to map Hyper-V's
GUID <VM_ID, Service_ID> to int <cid, port>, and hence we must limit
the port range we can listen() on to [0, MAX_LISTEN_PORT], i.e.
we can only use half of the whole 32-bit port space for listen().
This is detailed in the long comments starting at about Line 100.
 
> ...but hardcode this knowledge into the guest driver?
I'd like the guest's connect() to fail immediately here.
IMO this is better than a connect timeout. :-)

Thanks,
-- Dexuan

^ permalink raw reply

* Re: [PATCH net-next] liquidio: fix Smatch error
From: David Miller @ 2017-08-18 22:21 UTC (permalink / raw)
  To: felix.manlunas
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	intiyaz.basha
In-Reply-To: <20170818200719.GA4685@felix-thinkpad.cavium.com>

From: Felix Manlunas <felix.manlunas@cavium.com>
Date: Fri, 18 Aug 2017 13:07:19 -0700

> From: Intiyaz Basha <intiyaz.basha@cavium.com>
> 
> Fix Smatch error by not dereferencing iq pointer if it's NULL.
> 
> See http://marc.info/?l=kernel-janitors&m=150296723301129&w=2
> 
> Also, remove unnecessary parentheses.
> 
> Fixes: d314ac222829 ("liquidio: moved liquidio_napi_poll to lio_core.c")
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Intiyaz Basha <intiyaz.basha@cavium.com>
> Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>

Applied.

^ permalink raw reply

* Re: [PATCH v3 net-next] ipv4: convert dst_metrics.refcnt from atomic_t to refcount_t
From: David Miller @ 2017-08-18 22:14 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, xiyou.wangcong
In-Reply-To: <1503083287.22502.5.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Aug 2017 12:08:07 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> refcount_t type and corresponding API should be
> used instead of atomic_t when the variable is used as
> a reference counter. This allows to avoid accidental
> refcounter overflows that might lead to use-after-free
> situations.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
> v3: added the include <linux/refcount.h> to be safe (Cong Wang)
> v2: fix a missing change in net/ipv4/fib_semantics.c

Applied, thanks Eric.

^ permalink raw reply

* Re: [PATCH net v3] datagram: When peeking datagrams with offset < 0 don't skip empty skbs
From: David Miller @ 2017-08-18 22:13 UTC (permalink / raw)
  To: matthew; +Cc: netdev, thiago.macieira, willemdebruijn.kernel, pabeni
In-Reply-To: <20170818190454.7214-1-matthew@mjdsystems.ca>

From: Matthew Dawson <matthew@mjdsystems.ca>
Date: Fri, 18 Aug 2017 15:04:54 -0400

> Due to commit e6afc8ace6dd5cef5e812f26c72579da8806f5ac ("udp: remove
> headers from UDP packets before queueing"), when udp packets are being
> peeked the requested extra offset is always 0 as there is no need to skip
> the udp header.  However, when the offset is 0 and the next skb is
> of length 0, it is only returned once.  The behaviour can be seen with
> the following python script:
> 
> from socket import *;
> f=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0);
> g=socket(AF_INET6, SOCK_DGRAM | SOCK_NONBLOCK, 0);
> f.bind(('::', 0));
> addr=('::1', f.getsockname()[1]);
> g.sendto(b'', addr)
> g.sendto(b'b', addr)
> print(f.recvfrom(10, MSG_PEEK));
> print(f.recvfrom(10, MSG_PEEK));
> 
> Where the expected output should be the empty string twice.
> 
> Instead, make sk_peek_offset return negative values, and pass those values
> to __skb_try_recv_datagram/__skb_try_recv_from_queue.  If the passed offset
> to __skb_try_recv_from_queue is negative, the checked skb is never skipped.
> __skb_try_recv_from_queue will then ensure the offset is reset back to 0
> if a peek is requested without an offset, unless no packets are found.
> 
> Also simplify the if condition in __skb_try_recv_from_queue.  If _off is
> greater then 0, and off is greater then or equal to skb->len, then
> (_off || skb->len) must always be true assuming skb->len >= 0 is always
> true.
> 
> Also remove a redundant check around a call to sk_peek_offset in af_unix.c,
> as it double checked if MSG_PEEK was set in the flags.
> 
> V2:
>  - Moved the negative fixup into __skb_try_recv_from_queue, and remove now
> redundant checks
>  - Fix peeking in udp{,v6}_recvmsg to report the right value when the
> offset is 0
> 
> V3:
>  - Marked new branch in __skb_try_recv_from_queue as unlikely.
> 
> Signed-off-by: Matthew Dawson <matthew@mjdsystems.ca>
> Acked-by: Willem de Bruijn <willemb@google.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* Re: [PATCH net-next 00/11] net: dsa: add generic debugfs interface
From: Florian Fainelli @ 2017-08-18 21:54 UTC (permalink / raw)
  To: Vivien Didelot, netdev
  Cc: linux-kernel, kernel, David S. Miller, Andrew Lunn,
	Egil Hjelmeland, John Crispin, Woojung Huh, Sean Wang,
	Volodymyr Bendiuga, Nikita Yushchenko, Maxime Hadjinlian,
	Chris Healy, Maxim Uvarov, Stefan Eichenberger, Jason Cobham,
	Juergen Borleis, Tobias Waldekranz
In-Reply-To: <20170814222242.10643-1-vivien.didelot@savoirfairelinux.com>

On 08/14/2017 03:22 PM, Vivien Didelot wrote:
> This patch series adds a generic debugfs interface for the DSA
> framework, so that all switch devices benefit from it, e.g. Marvell,
> Broadcom, Microchip or any other DSA driver.
> 
> This is really convenient for debugging, especially CPU ports and DSA
> links which are not exposed to userspace as net device. This interface
> is currently the only way to easily inspect the hardware for such ports.
> 
> With the patch series, any switch device user is able to query the
> hardware for the supported tagging protocol, the ports stats and
> registers, as well as their FDB, MDB and VLAN entries.
> 
> This support is only compiled if CONFIG_DEBUG_FS is enabled. Below is
> and example of usage of this interface on a multi-chip switch fabric:
> 
>     # mount -t debugfs none /sys/kernel/debug
>     # cd /sys/kernel/debug/dsa/
>     # ls
>     switch0  switch1 switch2
>     # ls -l switch0/
>     drwxr-xr-x 2 root root 0 Jan  1 00:00 port0
>     drwxr-xr-x 2 root root 0 Jan  1 00:00 port1
>     drwxr-xr-x 2 root root 0 Jan  1 00:00 port2
>     drwxr-xr-x 2 root root 0 Jan  1 00:00 port5
>     drwxr-xr-x 2 root root 0 Jan  1 00:00 port6
>     -r--r--r-- 1 root root 0 Jan  1 00:00 tag_protocol
>     -r--r--r-- 1 root root 0 Jan  1 00:00 tree
>     # ls -l switch0/port6
>     -r--r--r-- 1 root root 0 Jan  1 00:00 fdb
>     -r--r--r-- 1 root root 0 Jan  1 00:00 mdb
>     -r--r--r-- 1 root root 0 Jan  1 00:00 regs
>     -r--r--r-- 1 root root 0 Jan  1 00:00 stats
>     -r--r--r-- 1 root root 0 Jan  1 00:00 vlan
>     # cat switch0/port2/vlan
>     vid 42  pvid  untagged
>     # cat switch0/port1/fdb
>     vid 0    12:34:56:78:90:ab    static    unicast
>     # pr -mt switch0/port{5,6}/stats
>     in_good_octets      : 0             in_good_octets      : 13824
>     in_bad_octets       : 0             in_bad_octets       : 0
>     in_unicast          : 0             in_unicast          : 0
>     in_broadcasts       : 0             in_broadcasts       : 216
>     in_multicasts       : 0             in_multicasts       : 0
>     in_pause            : 0             in_pause            : 0
>     in_undersize        : 0             in_undersize        : 0
>     ...
>     # pr -mt switch0/port{5,6}/regs
>      0: 4e07			     0: 4d04
>      1: 403e			     1: 003d
>      2: 0000			     2: 0000
>      3: 3521			     3: 3521
>      4: 0533			     4: 373f
>      5: 8000			     5: 0000
>      6: 005f			     6: 003f
>      7: 002a			     7: 002a
>     ...
> 
> where switch0 port5 and port6 are CPU and DSA ports of a ZII Rev B.

For this whole series:

Tested-by: Florian Fainelli <f.fainelli@gmail.com>

On bcm_sf2

> 
> Vivien Didelot (11):
>   net: dsa: legacy: assign dst->applied
>   net: dsa: add debugfs interface
>   net: dsa: debugfs: add tree
>   net: dsa: debugfs: add tag_protocol
>   net: dsa: debugfs: add port stats
>   net: dsa: debugfs: add port registers
>   net: dsa: debugfs: add port fdb
>   net: dsa: restore mdb dump
>   net: dsa: debugfs: add port mdb
>   net: dsa: restore VLAN dump
>   net: dsa: debugfs: add port vlan
> 
>  drivers/net/dsa/b53/b53_common.c       |  41 +++
>  drivers/net/dsa/b53/b53_priv.h         |   2 +
>  drivers/net/dsa/bcm_sf2.c              |   1 +
>  drivers/net/dsa/dsa_loop.c             |  38 +++
>  drivers/net/dsa/microchip/ksz_common.c |  41 +++
>  drivers/net/dsa/mv88e6xxx/chip.c       |  82 +++++-
>  include/net/dsa.h                      |  15 ++
>  net/dsa/Kconfig                        |  14 +
>  net/dsa/Makefile                       |   1 +
>  net/dsa/debugfs.c                      | 453 +++++++++++++++++++++++++++++++++
>  net/dsa/dsa.c                          |   3 +
>  net/dsa/dsa2.c                         |   4 +
>  net/dsa/dsa_priv.h                     |  13 +
>  net/dsa/legacy.c                       |   7 +
>  14 files changed, 707 insertions(+), 8 deletions(-)
>  create mode 100644 net/dsa/debugfs.c
> 


-- 
Florian

^ permalink raw reply

* [PATCH net-next] i40e: fix a typo in i40e_pf documentation.
From: Rami Rosen @ 2017-08-18 21:20 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: intel-wired-lan, davem, netdev, linux-kernel, Rami Rosen

This patch fixes a typo in i40e_pf object documentation; num_req_vfs 
refers to the number of VFs requested for the PF.

Signed-off-by: Rami Rosen <rami.rosen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d616f69..cadbbec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -348,7 +348,7 @@ struct i40e_pf {
 	u16 num_vmdq_vsis;         /* num vmdq vsis this PF has set up */
 	u16 num_vmdq_qps;          /* num queue pairs per vmdq pool */
 	u16 num_vmdq_msix;         /* num queue vectors per vmdq pool */
-	u16 num_req_vfs;           /* num VFs requested for this VF */
+	u16 num_req_vfs;           /* num VFs requested for this PF */
 	u16 num_vf_qps;            /* num queue pairs per VF */
 	u16 num_lan_qps;           /* num lan queues this PF has set up */
 	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
-- 
1.9.1

^ permalink raw reply related

* RE: [PATCH net-next 2/3] vsock: fix vsock_dequeue/enqueue_accept race
From: Dexuan Cui @ 2017-08-18 21:13 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: davem@davemloft.net, netdev@vger.kernel.org,
	devel@linuxdriverproject.org, KY Srinivasan, Haiyang Zhang,
	Stephen Hemminger, George Zhang, Jorgen Hansen, Michal Kubecek,
	Vitaly Kuznetsov, Cathy Avery, jasowang@redhat.com,
	Rolf Neugebauer, Dave Scott, Marcelo Cerri, apw@canonical.com,
	olaf@aepfle.de, joe@perches.com, "lin
In-Reply-To: <20170817140541.GH5539@stefanha-x1.localdomain>

> From: Stefan Hajnoczi [mailto:stefanha@redhat.com]
> Sent: Thursday, August 17, 2017 07:06
> 
> On Tue, Aug 15, 2017 at 10:15:39PM +0000, Dexuan Cui wrote:
> > With the current code, when vsock_dequeue_accept() is removing a sock
> > from the list, nothing prevents vsock_enqueue_accept() from adding a new
> > sock into the list concurrently. We should add a lock to protect the list.
> 
> The listener sock is locked, preventing concurrent modification.  I have
> checked both the virtio and vmci transports.  Can you post an example
> where the listener sock isn't locked?
> 
> Stefan
Sorry, I was not careful when checking the vmci code. 
Please ignore the patch.

Now I realized the expectation is that the individual transport drivers should
do the locking for vsock_enqueue_accept(), but for vsock_dequeue_accept(),
the locking is done by the common vsock driver.

Thanks,
-- Dexuan

^ permalink raw reply

* [PATCH 2/2] netfilter/libxt_hashlimit: new feature/algorithm for xt_hashlimit
From: Vishwanath Pai @ 2017-08-18 20:59 UTC (permalink / raw)
  To: pablo, kadlec, netfilter-devel; +Cc: johunt, fw, netdev, pai.vishwain

This patch adds a new feature to hashlimit that allows matching on the
current packet/byte rate without rate limiting. This can be enabled
with a new flag --hashlimit-rate-match. The match returns true if the
current rate of packets is above/below the user specified value.

The main difference between the existing algorithm and the new one is
that the existing algorithm rate-limits the flow whereas the new algorithm
does not. Instead it *classifies* the flow based on whether it is above or
below a certain rate. I will demonstrate this with an example below. Let
us assume this rule:

iptables -A INPUT -m hashlimit --hashlimit-above 10/s -j new_chain

If the packet rate is 15/s, the existing algorithm would ACCEPT 10 packets
every second and send 5 packets to "new_chain".

But with the new algorithm, as long as the rate of 15/s is sustained, all
packets will continue to match and every packet is sent to new_chain.

This new functionality will let us classify different flows based on their
current rate, so that further decisions can be made on them based on what
the current rate is.

This is how the new algorithm works:
We divide time into intervals of 1 (sec/min/hour) as specified by
the user. We keep track of the number of packets/bytes processed in the
current interval. After each interval we reset the counter to 0.

When we receive a packet for match, we look at the packet rate
during the current interval and the previous interval to make a decision:

if [ prev_rate < user and cur_rate < user ]
        return Below
else
        return Above

Where cur_rate is the number of packets/bytes seen in the current
interval, prev is the number of packets/bytes seen in the previous
interval and 'user' is the rate specified by the user.

We also provide flexibility to the user for choosing the time
interval using the option --hashilmit-interval. For example the user can
keep a low rate like x/hour but still keep the interval as small as 1
second.

To preserve backwards compatibility we have to add this feature in a new
revision, so I've created revision 3 for hashlimit. The two new options
we add are:

--hashlimit-rate-match
--hashlimit-rate-interval

I have updated the help text to add these new options. Also added a few
tests for the new options.

Suggested-by: Igor Lubashev <ilubashe@akamai.com>
Reviewed-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
---
 extensions/libxt_hashlimit.c           | 407 ++++++++++++++++++++++++++++++---
 extensions/libxt_hashlimit.man         |   8 +
 extensions/libxt_hashlimit.t           |   4 +
 include/linux/netfilter/xt_hashlimit.h |  36 ++-
 4 files changed, 414 insertions(+), 41 deletions(-)

diff --git a/extensions/libxt_hashlimit.c b/extensions/libxt_hashlimit.c
index 9e63e1e..e51ac1a 100644
--- a/extensions/libxt_hashlimit.c
+++ b/extensions/libxt_hashlimit.c
@@ -68,10 +68,13 @@ enum {
 	O_HTABLE_MAX,
 	O_HTABLE_GCINT,
 	O_HTABLE_EXPIRE,
+	O_RATEMATCH,
+	O_INTERVAL,
 	F_BURST         = 1 << O_BURST,
 	F_UPTO          = 1 << O_UPTO,
 	F_ABOVE         = 1 << O_ABOVE,
 	F_HTABLE_EXPIRE = 1 << O_HTABLE_EXPIRE,
+	F_RATEMATCH	= 1 << O_RATEMATCH,
 };
 
 static void hashlimit_mt_help(void)
@@ -95,6 +98,29 @@ static void hashlimit_mt_help(void)
 "\n", XT_HASHLIMIT_BURST);
 }
 
+static void hashlimit_mt_help_v3(void)
+{
+	printf(
+"hashlimit match options:\n"
+"  --hashlimit-upto <avg>           max average match rate\n"
+"                                   [Packets per second unless followed by \n"
+"                                   /sec /minute /hour /day postfixes]\n"
+"  --hashlimit-above <avg>          min average match rate\n"
+"  --hashlimit-mode <mode>          mode is a comma-separated list of\n"
+"                                   dstip,srcip,dstport,srcport (or none)\n"
+"  --hashlimit-srcmask <length>     source address grouping prefix length\n"
+"  --hashlimit-dstmask <length>     destination address grouping prefix length\n"
+"  --hashlimit-name <name>          name for /proc/net/ipt_hashlimit\n"
+"  --hashlimit-burst <num>	    number to match in a burst, default %u\n"
+"  --hashlimit-htable-size <num>    number of hashtable buckets\n"
+"  --hashlimit-htable-max <num>     number of hashtable entries\n"
+"  --hashlimit-htable-gcinterval    interval between garbage collection runs\n"
+"  --hashlimit-htable-expire        after which time are idle entries expired?\n"
+"  --hashlimit-rate-match           rate match the flow without rate-limiting it\n"
+"  --hashlimit-rate-interval        interval in seconds for hashlimit-rate-match\n"
+"\n", XT_HASHLIMIT_BURST);
+}
+
 #define s struct xt_hashlimit_info
 static const struct xt_option_entry hashlimit_opts[] = {
 	{.name = "hashlimit", .id = O_UPTO, .excl = F_ABOVE,
@@ -153,6 +179,35 @@ static const struct xt_option_entry hashlimit_mt_opts_v1[] = {
 #undef s
 
 #define s struct xt_hashlimit_mtinfo2
+static const struct xt_option_entry hashlimit_mt_opts_v2[] = {
+	{.name = "hashlimit-upto", .id = O_UPTO, .excl = F_ABOVE,
+	 .type = XTTYPE_STRING, .flags = XTOPT_INVERT},
+	{.name = "hashlimit-above", .id = O_ABOVE, .excl = F_UPTO,
+	 .type = XTTYPE_STRING, .flags = XTOPT_INVERT},
+	{.name = "hashlimit", .id = O_UPTO, .excl = F_ABOVE,
+	 .type = XTTYPE_STRING, .flags = XTOPT_INVERT}, /* old name */
+	{.name = "hashlimit-srcmask", .id = O_SRCMASK, .type = XTTYPE_PLEN},
+	{.name = "hashlimit-dstmask", .id = O_DSTMASK, .type = XTTYPE_PLEN},
+	{.name = "hashlimit-burst", .id = O_BURST, .type = XTTYPE_STRING},
+	{.name = "hashlimit-htable-size", .id = O_HTABLE_SIZE,
+	 .type = XTTYPE_UINT32, .flags = XTOPT_PUT,
+	 XTOPT_POINTER(s, cfg.size)},
+	{.name = "hashlimit-htable-max", .id = O_HTABLE_MAX,
+	 .type = XTTYPE_UINT32, .flags = XTOPT_PUT,
+	 XTOPT_POINTER(s, cfg.max)},
+	{.name = "hashlimit-htable-gcinterval", .id = O_HTABLE_GCINT,
+	 .type = XTTYPE_UINT32, .flags = XTOPT_PUT,
+	 XTOPT_POINTER(s, cfg.gc_interval)},
+	{.name = "hashlimit-htable-expire", .id = O_HTABLE_EXPIRE,
+	 .type = XTTYPE_UINT32, .flags = XTOPT_PUT,
+	 XTOPT_POINTER(s, cfg.expire)},
+	{.name = "hashlimit-mode", .id = O_MODE, .type = XTTYPE_STRING},
+	{.name = "hashlimit-name", .id = O_NAME, .type = XTTYPE_STRING,
+	 .flags = XTOPT_MAND | XTOPT_PUT, XTOPT_POINTER(s, name), .min = 1},
+};
+#undef s
+
+#define s struct xt_hashlimit_mtinfo3
 static const struct xt_option_entry hashlimit_mt_opts[] = {
 	{.name = "hashlimit-upto", .id = O_UPTO, .excl = F_ABOVE,
 	 .type = XTTYPE_STRING, .flags = XTOPT_INVERT},
@@ -178,12 +233,14 @@ static const struct xt_option_entry hashlimit_mt_opts[] = {
 	{.name = "hashlimit-mode", .id = O_MODE, .type = XTTYPE_STRING},
 	{.name = "hashlimit-name", .id = O_NAME, .type = XTTYPE_STRING,
 	 .flags = XTOPT_MAND | XTOPT_PUT, XTOPT_POINTER(s, name), .min = 1},
+	{.name = "hashlimit-rate-match", .id = O_RATEMATCH, .type = XTTYPE_NONE},
+	{.name = "hashlimit-rate-interval", .id = O_INTERVAL, .type = XTTYPE_STRING},
 	XTOPT_TABLEEND,
 };
 #undef s
 
 static int
-cfg_copy(struct hashlimit_cfg2 *to, const void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
 {
 	if (revision == 1) {
 		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
@@ -198,7 +255,19 @@ cfg_copy(struct hashlimit_cfg2 *to, const void *from, int revision)
 		to->srcmask = cfg->srcmask;
 		to->dstmask = cfg->dstmask;
 	} else if (revision == 2) {
-		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+		struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 3) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg3));
 	} else {
 		return -EINVAL;
 	}
@@ -346,6 +415,16 @@ int parse_rate(const char *rate, void *val, struct hashlimit_mt_udata *ud, int r
 	return 1;
 }
 
+static int parse_interval(const char *rate, uint32_t *val)
+{
+	int r = atoi(rate);
+	if (r <= 0)
+		return 0;
+
+	*val = r;
+	return 1;
+}
+
 static void hashlimit_init(struct xt_entry_match *m)
 {
 	struct xt_hashlimit_info *r = (struct xt_hashlimit_info *)m->data;
@@ -377,7 +456,7 @@ static void hashlimit_mt6_init_v1(struct xt_entry_match *match)
 	info->cfg.dstmask     = 128;
 }
 
-static void hashlimit_mt4_init(struct xt_entry_match *match)
+static void hashlimit_mt4_init_v2(struct xt_entry_match *match)
 {
 	struct xt_hashlimit_mtinfo2 *info = (void *)match->data;
 
@@ -388,7 +467,7 @@ static void hashlimit_mt4_init(struct xt_entry_match *match)
 	info->cfg.dstmask     = 32;
 }
 
-static void hashlimit_mt6_init(struct xt_entry_match *match)
+static void hashlimit_mt6_init_v2(struct xt_entry_match *match)
 {
 	struct xt_hashlimit_mtinfo2 *info = (void *)match->data;
 
@@ -399,6 +478,30 @@ static void hashlimit_mt6_init(struct xt_entry_match *match)
 	info->cfg.dstmask     = 128;
 }
 
+static void hashlimit_mt4_init(struct xt_entry_match *match)
+{
+	struct xt_hashlimit_mtinfo3 *info = (void *)match->data;
+
+	info->cfg.mode        = 0;
+	info->cfg.burst       = XT_HASHLIMIT_BURST;
+	info->cfg.gc_interval = XT_HASHLIMIT_GCINTERVAL;
+	info->cfg.srcmask     = 32;
+	info->cfg.dstmask     = 32;
+	info->cfg.interval    = 0;
+}
+
+static void hashlimit_mt6_init(struct xt_entry_match *match)
+{
+	struct xt_hashlimit_mtinfo3 *info = (void *)match->data;
+
+	info->cfg.mode        = 0;
+	info->cfg.burst       = XT_HASHLIMIT_BURST;
+	info->cfg.gc_interval = XT_HASHLIMIT_GCINTERVAL;
+	info->cfg.srcmask     = 128;
+	info->cfg.dstmask     = 128;
+	info->cfg.interval    = 0;
+}
+
 /* Parse a 'mode' parameter into the required bitmask */
 static int parse_mode(uint32_t *mode, const char *option_arg)
 {
@@ -488,7 +591,7 @@ static void hashlimit_mt_parse_v1(struct xt_option_call *cb)
 	}
 }
 
-static void hashlimit_mt_parse(struct xt_option_call *cb)
+static void hashlimit_mt_parse_v2(struct xt_option_call *cb)
 {
 	struct xt_hashlimit_mtinfo2 *info = cb->data;
 
@@ -529,6 +632,54 @@ static void hashlimit_mt_parse(struct xt_option_call *cb)
 	}
 }
 
+static void hashlimit_mt_parse(struct xt_option_call *cb)
+{
+	struct xt_hashlimit_mtinfo3 *info = cb->data;
+
+	xtables_option_parse(cb);
+	switch (cb->entry->id) {
+	case O_BURST:
+		info->cfg.burst = parse_burst(cb->arg, 2);
+		break;
+	case O_UPTO:
+		if (cb->invert)
+			info->cfg.mode |= XT_HASHLIMIT_INVERT;
+		if (parse_bytes(cb->arg, &info->cfg.avg, cb->udata, 2))
+			info->cfg.mode |= XT_HASHLIMIT_BYTES;
+		else if (!parse_rate(cb->arg, &info->cfg.avg, cb->udata, 2))
+			xtables_param_act(XTF_BAD_VALUE, "hashlimit",
+			          "--hashlimit-upto", cb->arg);
+		break;
+	case O_ABOVE:
+		if (!cb->invert)
+			info->cfg.mode |= XT_HASHLIMIT_INVERT;
+		if (parse_bytes(cb->arg, &info->cfg.avg, cb->udata, 2))
+			info->cfg.mode |= XT_HASHLIMIT_BYTES;
+		else if (!parse_rate(cb->arg, &info->cfg.avg, cb->udata, 2))
+			xtables_param_act(XTF_BAD_VALUE, "hashlimit",
+			          "--hashlimit-above", cb->arg);
+		break;
+	case O_MODE:
+		if (parse_mode(&info->cfg.mode, cb->arg) < 0)
+			xtables_param_act(XTF_BAD_VALUE, "hashlimit",
+			          "--hashlimit-mode", cb->arg);
+		break;
+	case O_SRCMASK:
+		info->cfg.srcmask = cb->val.hlen;
+		break;
+	case O_DSTMASK:
+		info->cfg.dstmask = cb->val.hlen;
+		break;
+	case O_RATEMATCH:
+		info->cfg.mode |= XT_HASHLIMIT_RATE_MATCH;
+		break;
+	case O_INTERVAL:
+		if (!parse_interval(cb->arg, &info->cfg.interval))
+			xtables_param_act(XTF_BAD_VALUE, "hashlimit",
+				"--hashlimit-rate-interval", cb->arg);
+	}
+}
+
 static void hashlimit_check(struct xt_fcheck_call *cb)
 {
 	const struct hashlimit_mt_udata *udata = cb->udata;
@@ -572,7 +723,7 @@ static void hashlimit_mt_check_v1(struct xt_fcheck_call *cb)
 		burst_error_v1();
 }
 
-static void hashlimit_mt_check(struct xt_fcheck_call *cb)
+static void hashlimit_mt_check_v2(struct xt_fcheck_call *cb)
 {
 	const struct hashlimit_mt_udata *udata = cb->udata;
 	struct xt_hashlimit_mtinfo2 *info = cb->data;
@@ -603,6 +754,48 @@ static void hashlimit_mt_check(struct xt_fcheck_call *cb)
 		burst_error();
 }
 
+static void hashlimit_mt_check(struct xt_fcheck_call *cb)
+{
+	const struct hashlimit_mt_udata *udata = cb->udata;
+	struct xt_hashlimit_mtinfo3 *info = cb->data;
+
+	if (!(cb->xflags & (F_UPTO | F_ABOVE)))
+		xtables_error(PARAMETER_PROBLEM,
+				"You have to specify --hashlimit");
+	if (!(cb->xflags & F_HTABLE_EXPIRE))
+		info->cfg.expire = udata->mult * 1000; /* from s to msec */
+
+	if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
+		uint32_t burst = 0;
+		if (cb->xflags & F_BURST) {
+			if (info->cfg.burst < cost_to_bytes(info->cfg.avg))
+				xtables_error(PARAMETER_PROBLEM,
+					"burst cannot be smaller than %lub", cost_to_bytes(info->cfg.avg));
+
+			burst = info->cfg.burst;
+			burst /= cost_to_bytes(info->cfg.avg);
+			if (info->cfg.burst % cost_to_bytes(info->cfg.avg))
+				burst++;
+			if (!(cb->xflags & F_HTABLE_EXPIRE))
+				info->cfg.expire = XT_HASHLIMIT_BYTE_EXPIRE_BURST * 1000;
+		}
+		info->cfg.burst = burst;
+	} else if (info->cfg.burst > XT_HASHLIMIT_BURST_MAX)
+		burst_error();
+
+	if (cb->xflags & F_RATEMATCH) {
+		if (!(info->cfg.mode & XT_HASHLIMIT_BYTES))
+			info->cfg.avg /= udata->mult;
+
+		if (info->cfg.interval == 0) {
+			if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+				info->cfg.interval = 1;
+			else
+				info->cfg.interval = udata->mult;
+		}
+	}
+}
+
 struct rates {
 	const char *name;
 	uint64_t mult;
@@ -617,7 +810,7 @@ static const struct rates rates[] = {
 	{ "min", XT_HASHLIMIT_SCALE_v2*60 },
 	{ "sec", XT_HASHLIMIT_SCALE_v2 } };
 
-static uint32_t print_rate(uint32_t period, int revision)
+static uint32_t print_rate(uint64_t period, int revision)
 {
 	unsigned int i;
 	const struct rates *_rates = (revision == 1) ? rates_v1 : rates;
@@ -723,9 +916,10 @@ static void hashlimit_print(const void *ip,
 }
 
 static void
-hashlimit_mt_print(const struct hashlimit_cfg2 *cfg, unsigned int dmask, int revision)
+hashlimit_mt_print(const struct hashlimit_cfg3 *cfg, unsigned int dmask, int revision)
 {
-	uint32_t quantum;
+	uint64_t quantum;
+	uint64_t period;
 
 	if (cfg->mode & XT_HASHLIMIT_INVERT)
 		fputs(" limit: above", stdout);
@@ -735,7 +929,15 @@ hashlimit_mt_print(const struct hashlimit_cfg2 *cfg, unsigned int dmask, int rev
 	if (cfg->mode & XT_HASHLIMIT_BYTES) {
 		quantum = print_bytes(cfg->avg, cfg->burst, "");
 	} else {
-		quantum = print_rate(cfg->avg, revision);
+		if (revision == 3) {
+			period = cfg->avg;
+			if (cfg->interval != 0)
+				period *= cfg->interval;
+
+			quantum = print_rate(period, revision);
+		} else {
+			quantum = print_rate(cfg->avg, revision);
+		}
 		printf(" burst %llu", cfg->burst);
 	}
 	if (cfg->mode & (XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT |
@@ -756,6 +958,13 @@ hashlimit_mt_print(const struct hashlimit_cfg2 *cfg, unsigned int dmask, int rev
 		printf(" srcmask %u", cfg->srcmask);
 	if (cfg->dstmask != dmask)
 		printf(" dstmask %u", cfg->dstmask);
+
+	if ((revision == 3) && (cfg->mode & XT_HASHLIMIT_RATE_MATCH))
+		printf(" rate-match");
+
+	if ((revision == 3) && (cfg->mode & XT_HASHLIMIT_RATE_MATCH))
+		if (cfg->interval != 1)
+			printf(" rate-interval %u", cfg->interval);
 }
 
 static void
@@ -763,7 +972,7 @@ hashlimit_mt4_print_v1(const void *ip, const struct xt_entry_match *match,
                    int numeric)
 {
 	const struct xt_hashlimit_mtinfo1 *info = (const void *)match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 	int ret;
 
 	ret = cfg_copy(&cfg, (const void *)&info->cfg, 1);
@@ -779,7 +988,7 @@ hashlimit_mt6_print_v1(const void *ip, const struct xt_entry_match *match,
                    int numeric)
 {
 	const struct xt_hashlimit_mtinfo1 *info = (const void *)match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 	int ret;
 
 	ret = cfg_copy(&cfg, (const void *)&info->cfg, 1);
@@ -791,21 +1000,52 @@ hashlimit_mt6_print_v1(const void *ip, const struct xt_entry_match *match,
 }
 
 static void
-hashlimit_mt4_print(const void *ip, const struct xt_entry_match *match,
+hashlimit_mt4_print_v2(const void *ip, const struct xt_entry_match *match,
                    int numeric)
 {
 	const struct xt_hashlimit_mtinfo2 *info = (const void *)match->data;
+	struct hashlimit_cfg3 cfg;
+	int ret;
+
+	ret = cfg_copy(&cfg, (const void *)&info->cfg, 2);
 
-	hashlimit_mt_print(&info->cfg, 32, 2);
+	if (ret)
+		xtables_error(OTHER_PROBLEM, "unknown revision");
+
+	hashlimit_mt_print(&cfg, 32, 2);
 }
 
 static void
-hashlimit_mt6_print(const void *ip, const struct xt_entry_match *match,
+hashlimit_mt6_print_v2(const void *ip, const struct xt_entry_match *match,
                    int numeric)
 {
 	const struct xt_hashlimit_mtinfo2 *info = (const void *)match->data;
+	struct hashlimit_cfg3 cfg;
+	int ret;
+
+	ret = cfg_copy(&cfg, (const void *)&info->cfg, 2);
+
+	if (ret)
+		xtables_error(OTHER_PROBLEM, "unknown revision");
+
+	hashlimit_mt_print(&cfg, 128, 2);
+}
+static void
+hashlimit_mt4_print(const void *ip, const struct xt_entry_match *match,
+                   int numeric)
+{
+	const struct xt_hashlimit_mtinfo3 *info = (const void *)match->data;
+
+	hashlimit_mt_print(&info->cfg, 32, 3);
+}
+
+static void
+hashlimit_mt6_print(const void *ip, const struct xt_entry_match *match,
+                   int numeric)
+{
+	const struct xt_hashlimit_mtinfo3 *info = (const void *)match->data;
 
-	hashlimit_mt_print(&info->cfg, 128, 2);
+	hashlimit_mt_print(&info->cfg, 128, 3);
 }
 
 static void hashlimit_save(const void *ip, const struct xt_entry_match *match)
@@ -833,7 +1073,7 @@ static void hashlimit_save(const void *ip, const struct xt_entry_match *match)
 }
 
 static void
-hashlimit_mt_save(const struct hashlimit_cfg2 *cfg, const char* name, unsigned int dmask, int revision)
+hashlimit_mt_save(const struct hashlimit_cfg3 *cfg, const char* name, unsigned int dmask, int revision)
 {
 	uint32_t quantum;
 
@@ -870,13 +1110,20 @@ hashlimit_mt_save(const struct hashlimit_cfg2 *cfg, const char* name, unsigned i
 		printf(" --hashlimit-srcmask %u", cfg->srcmask);
 	if (cfg->dstmask != dmask)
 		printf(" --hashlimit-dstmask %u", cfg->dstmask);
+
+	if ((revision == 3) && (cfg->mode & XT_HASHLIMIT_RATE_MATCH))
+		printf(" --hashlimit-rate-match");
+
+	if ((revision == 3) && (cfg->mode & XT_HASHLIMIT_RATE_MATCH))
+		if (cfg->interval != 1)
+			printf(" --hashlimit-rate-interval %u", cfg->interval);
 }
 
 static void
 hashlimit_mt4_save_v1(const void *ip, const struct xt_entry_match *match)
 {
 	const struct xt_hashlimit_mtinfo1 *info = (const void *)match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 	int ret;
 
 	ret = cfg_copy(&cfg, (const void *)&info->cfg, 1);
@@ -891,7 +1138,7 @@ static void
 hashlimit_mt6_save_v1(const void *ip, const struct xt_entry_match *match)
 {
 	const struct xt_hashlimit_mtinfo1 *info = (const void *)match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 	int ret;
 
 	ret = cfg_copy(&cfg, (const void *)&info->cfg, 1);
@@ -903,19 +1150,49 @@ hashlimit_mt6_save_v1(const void *ip, const struct xt_entry_match *match)
 }
 
 static void
-hashlimit_mt4_save(const void *ip, const struct xt_entry_match *match)
+hashlimit_mt4_save_v2(const void *ip, const struct xt_entry_match *match)
 {
 	const struct xt_hashlimit_mtinfo2 *info = (const void *)match->data;
+	struct hashlimit_cfg3 cfg;
+	int ret;
+
+	ret = cfg_copy(&cfg, (const void *)&info->cfg, 2);
+
+	if (ret)
+		xtables_error(OTHER_PROBLEM, "unknown revision");
 
-	hashlimit_mt_save(&info->cfg, info->name, 32, 2);
+	hashlimit_mt_save(&cfg, info->name, 32, 2);
 }
 
 static void
-hashlimit_mt6_save(const void *ip, const struct xt_entry_match *match)
+hashlimit_mt6_save_v2(const void *ip, const struct xt_entry_match *match)
 {
 	const struct xt_hashlimit_mtinfo2 *info = (const void *)match->data;
+	struct hashlimit_cfg3 cfg;
+	int ret;
+
+	ret = cfg_copy(&cfg, (const void *)&info->cfg, 2);
+
+	if (ret)
+		xtables_error(OTHER_PROBLEM, "unknown revision");
+
+	hashlimit_mt_save(&cfg, info->name, 128, 2);
+}
+
+static void
+hashlimit_mt4_save(const void *ip, const struct xt_entry_match *match)
+{
+	const struct xt_hashlimit_mtinfo3 *info = (const void *)match->data;
+
+	hashlimit_mt_save(&info->cfg, info->name, 32, 3);
+}
+
+static void
+hashlimit_mt6_save(const void *ip, const struct xt_entry_match *match)
+{
+	const struct xt_hashlimit_mtinfo3 *info = (const void *)match->data;
 
-	hashlimit_mt_save(&info->cfg, info->name, 128, 2);
+	hashlimit_mt_save(&info->cfg, info->name, 128, 3);
 }
 
 static const struct rates rates_v1_xlate[] = {
@@ -947,7 +1224,7 @@ static void print_packets_rate_xlate(struct xt_xlate *xl, uint64_t avg,
 }
 
 static void print_bytes_rate_xlate(struct xt_xlate *xl,
-				   const struct hashlimit_cfg2 *cfg)
+				   const struct hashlimit_cfg3 *cfg)
 {
 	unsigned int i;
 	unsigned long long r;
@@ -1055,7 +1332,7 @@ static int hashlimit_mode_xlate(struct xt_xlate *xl,
 }
 
 static int hashlimit_mt_xlate(struct xt_xlate *xl, const char *name,
-			      const struct hashlimit_cfg2 *cfg,
+			      const struct hashlimit_cfg3 *cfg,
 			      int revision, int family)
 {
 	int ret = 1;
@@ -1098,7 +1375,7 @@ static int hashlimit_mt4_xlate_v1(struct xt_xlate *xl,
 {
 	const struct xt_hashlimit_mtinfo1 *info =
 		(const void *)params->match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 
 	if (cfg_copy(&cfg, (const void *)&info->cfg, 1))
 		xtables_error(OTHER_PROBLEM, "unknown revision");
@@ -1111,7 +1388,7 @@ static int hashlimit_mt6_xlate_v1(struct xt_xlate *xl,
 {
 	const struct xt_hashlimit_mtinfo1 *info =
 		(const void *)params->match->data;
-	struct hashlimit_cfg2 cfg;
+	struct hashlimit_cfg3 cfg;
 
 	if (cfg_copy(&cfg, (const void *)&info->cfg, 1))
 		xtables_error(OTHER_PROBLEM, "unknown revision");
@@ -1119,22 +1396,48 @@ static int hashlimit_mt6_xlate_v1(struct xt_xlate *xl,
 	return hashlimit_mt_xlate(xl, info->name, &cfg, 1, NFPROTO_IPV6);
 }
 
+static int hashlimit_mt4_xlate_v2(struct xt_xlate *xl,
+				  const struct xt_xlate_mt_params *params)
+{
+	const struct xt_hashlimit_mtinfo2 *info =
+		(const void *)params->match->data;
+	struct hashlimit_cfg3 cfg;
+
+	if (cfg_copy(&cfg, (const void *)&info->cfg, 2))
+		xtables_error(OTHER_PROBLEM, "unknown revision");
+
+	return hashlimit_mt_xlate(xl, info->name, &cfg, 2, NFPROTO_IPV4);
+}
+
+static int hashlimit_mt6_xlate_v2(struct xt_xlate *xl,
+				  const struct xt_xlate_mt_params *params)
+{
+	const struct xt_hashlimit_mtinfo2 *info =
+		(const void *)params->match->data;
+	struct hashlimit_cfg3 cfg;
+
+	if (cfg_copy(&cfg, (const void *)&info->cfg, 2))
+		xtables_error(OTHER_PROBLEM, "unknown revision");
+
+	return hashlimit_mt_xlate(xl, info->name, &cfg, 2, NFPROTO_IPV6);
+}
+
 static int hashlimit_mt4_xlate(struct xt_xlate *xl,
 			       const struct xt_xlate_mt_params *params)
 {
-	const struct xt_hashlimit_mtinfo2 *info =
+	const struct xt_hashlimit_mtinfo3 *info =
 		(const void *)params->match->data;
 
-	return hashlimit_mt_xlate(xl, info->name, &info->cfg, 2, NFPROTO_IPV4);
+	return hashlimit_mt_xlate(xl, info->name, &info->cfg, 3, NFPROTO_IPV4);
 }
 
 static int hashlimit_mt6_xlate(struct xt_xlate *xl,
 			       const struct xt_xlate_mt_params *params)
 {
-	const struct xt_hashlimit_mtinfo2 *info =
+	const struct xt_hashlimit_mtinfo3 *info =
 		(const void *)params->match->data;
 
-	return hashlimit_mt_xlate(xl, info->name, &info->cfg, 2, NFPROTO_IPV6);
+	return hashlimit_mt_xlate(xl, info->name, &info->cfg, 3, NFPROTO_IPV6);
 }
 
 static struct xtables_match hashlimit_mt_reg[] = {
@@ -1197,6 +1500,40 @@ static struct xtables_match hashlimit_mt_reg[] = {
 		.size          = XT_ALIGN(sizeof(struct xt_hashlimit_mtinfo2)),
 		.userspacesize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
 		.help          = hashlimit_mt_help,
+		.init          = hashlimit_mt4_init_v2,
+		.x6_parse      = hashlimit_mt_parse_v2,
+		.x6_fcheck     = hashlimit_mt_check_v2,
+		.print         = hashlimit_mt4_print_v2,
+		.save          = hashlimit_mt4_save_v2,
+		.x6_options    = hashlimit_mt_opts_v2,
+		.udata_size    = sizeof(struct hashlimit_mt_udata),
+		.xlate         = hashlimit_mt4_xlate_v2,
+	},
+	{
+		.version       = XTABLES_VERSION,
+		.name          = "hashlimit",
+		.revision      = 2,
+		.family        = NFPROTO_IPV6,
+		.size          = XT_ALIGN(sizeof(struct xt_hashlimit_mtinfo2)),
+		.userspacesize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.help          = hashlimit_mt_help,
+		.init          = hashlimit_mt6_init_v2,
+		.x6_parse      = hashlimit_mt_parse_v2,
+		.x6_fcheck     = hashlimit_mt_check_v2,
+		.print         = hashlimit_mt6_print_v2,
+		.save          = hashlimit_mt6_save_v2,
+		.x6_options    = hashlimit_mt_opts_v2,
+		.udata_size    = sizeof(struct hashlimit_mt_udata),
+		.xlate         = hashlimit_mt6_xlate_v2,
+	},
+	{
+		.version       = XTABLES_VERSION,
+		.name          = "hashlimit",
+		.revision      = 3,
+		.family        = NFPROTO_IPV4,
+		.size          = XT_ALIGN(sizeof(struct xt_hashlimit_mtinfo3)),
+		.userspacesize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
+		.help          = hashlimit_mt_help_v3,
 		.init          = hashlimit_mt4_init,
 		.x6_parse      = hashlimit_mt_parse,
 		.x6_fcheck     = hashlimit_mt_check,
@@ -1209,11 +1546,11 @@ static struct xtables_match hashlimit_mt_reg[] = {
 	{
 		.version       = XTABLES_VERSION,
 		.name          = "hashlimit",
-		.revision      = 2,
+		.revision      = 3,
 		.family        = NFPROTO_IPV6,
-		.size          = XT_ALIGN(sizeof(struct xt_hashlimit_mtinfo2)),
-		.userspacesize = offsetof(struct xt_hashlimit_mtinfo2, hinfo),
-		.help          = hashlimit_mt_help,
+		.size          = XT_ALIGN(sizeof(struct xt_hashlimit_mtinfo3)),
+		.userspacesize = offsetof(struct xt_hashlimit_mtinfo3, hinfo),
+		.help          = hashlimit_mt_help_v3,
 		.init          = hashlimit_mt6_init,
 		.x6_parse      = hashlimit_mt_parse,
 		.x6_fcheck     = hashlimit_mt_check,
diff --git a/extensions/libxt_hashlimit.man b/extensions/libxt_hashlimit.man
index 6aac3f2..5dbb327 100644
--- a/extensions/libxt_hashlimit.man
+++ b/extensions/libxt_hashlimit.man
@@ -51,6 +51,14 @@ After how many milliseconds do hash entries expire.
 .TP
 \fB\-\-hashlimit\-htable\-gcinterval\fP \fImsec\fP
 How many milliseconds between garbage collection intervals.
+.TP
+\fB\-\-hashlimit\-rate\-match\fP
+Classify the flow instead of rate-limiting it. This acts like a
+true/flase match on whether the rate is above/below a certain number
+.TP
+\fB\-\-hashlimit\-rate\-interval\fP \fIsec\fP
+Can be used with \-\-hashlimit\-rate\-match to specify the interval
+at which the rate should be sampled
 .PP
 Examples:
 .TP
diff --git a/extensions/libxt_hashlimit.t b/extensions/libxt_hashlimit.t
index a163fdf..ccd0d1e 100644
--- a/extensions/libxt_hashlimit.t
+++ b/extensions/libxt_hashlimit.t
@@ -27,3 +27,7 @@
 -m hashlimit --hashlimit-upto 1/sec;;FAIL
 -m hashlimit;;FAIL
 -m hashlimit --hashlimit-upto 40/sec --hashlimit-burst 20 --hashlimit-mode srcip --hashlimit-name syn-flood;=;OK
+-m hashlimit --hashlimit-upto 40/sec --hashlimit-burst 20 --hashlimit-mode srcip --hashlimit-name rate1 --hashlimit-rate-match;=;OK
+-m hashlimit --hashlimit-upto 40mb/s --hashlimit-mode srcip --hashlimit-name rate2 --hashlimit-rate-match;=;OK
+-m hashlimit --hashlimit-upto 40/sec --hashlimit-burst 20 --hashlimit-mode srcip --hashlimit-name rate3 --hashlimit-rate-match --hashlimit-rate-interval 10;=;OK
+-m hashlimit --hashlimit-upto 40mb/s --hashlimit-mode srcip --hashlimit-name rate4 --hashlimit-rate-match --hashlimit-rate-interval 10;=;OK
diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index d9808b5..ade33f6 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -17,12 +17,13 @@
 struct xt_hashlimit_htable;
 
 enum {
-	XT_HASHLIMIT_HASH_DIP = 1 << 0,
-	XT_HASHLIMIT_HASH_DPT = 1 << 1,
-	XT_HASHLIMIT_HASH_SIP = 1 << 2,
-	XT_HASHLIMIT_HASH_SPT = 1 << 3,
-	XT_HASHLIMIT_INVERT   = 1 << 4,
-	XT_HASHLIMIT_BYTES    = 1 << 5,
+	XT_HASHLIMIT_HASH_DIP	= 1 << 0,
+	XT_HASHLIMIT_HASH_DPT	= 1 << 1,
+	XT_HASHLIMIT_HASH_SIP	= 1 << 2,
+	XT_HASHLIMIT_HASH_SPT	= 1 << 3,
+	XT_HASHLIMIT_INVERT	= 1 << 4,
+	XT_HASHLIMIT_BYTES	= 1 << 5,
+	XT_HASHLIMIT_RATE_MATCH	= 1 << 6,
 };
 
 struct hashlimit_cfg {
@@ -77,6 +78,21 @@ struct hashlimit_cfg2 {
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg3 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u32 interval;		/* in seconds*/
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -93,4 +109,12 @@ struct xt_hashlimit_mtinfo2 {
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo3 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg3 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /*_XT_HASHLIMIT_H*/
-- 
1.9.1


^ permalink raw reply related

* [PATCH 1/2] netfilter/xt_hashlimit: new feature/algorithm for xt_hashlimit
From: Vishwanath Pai @ 2017-08-18 20:58 UTC (permalink / raw)
  To: pablo, kadlec, netfilter-devel; +Cc: johunt, fw, netdev, pai.vishwain

This patch adds a new feature to hashlimit that allows matching on the
current packet/byte rate without rate limiting. This can be enabled
with a new flag --hashlimit-rate-match. The match returns true if the
current rate of packets is above/below the user specified value.

The main difference between the existing algorithm and the new one is
that the existing algorithm rate-limits the flow whereas the new
algorithm does not. Instead it *classifies* the flow based on whether
it is above or below a certain rate. I will demonstrate this with an
example below. Let us assume this rule:

iptables -A INPUT -m hashlimit --hashlimit-above 10/s -j new_chain

If the packet rate is 15/s, the existing algorithm would ACCEPT 10
packets every second and send 5 packets to "new_chain".

But with the new algorithm, as long as the rate of 15/s is sustained,
all packets will continue to match and every packet is sent to new_chain.

This new functionality will let us classify different flows based on
their current rate, so that further decisions can be made on them based on
what the current rate is.

This is how the new algorithm works:
We divide time into intervals of 1 (sec/min/hour) as specified by
the user. We keep track of the number of packets/bytes processed in the
current interval. After each interval we reset the counter to 0.

When we receive a packet for match, we look at the packet rate
during the current interval and the previous interval to make a
decision:

if [ prev_rate < user and cur_rate < user ]
        return Below
else
        return Above

Where cur_rate is the number of packets/bytes seen in the current
interval, prev is the number of packets/bytes seen in the previous
interval and 'user' is the rate specified by the user.

We also provide flexibility to the user for choosing the time
interval using the option --hashilmit-interval. For example the user can
keep a low rate like x/hour but still keep the interval as small as 1
second.

To preserve backwards compatibility we have to add this feature in a new
revision, so I've created revision 3 for hashlimit. The two new options
we add are:

--hashlimit-rate-match
--hashlimit-rate-interval

I have updated the help text to add these new options. Also added a few
tests for the new options.

Suggested-by: Igor Lubashev <ilubashe@akamai.com>
Reviewed-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Vishwanath Pai <vpai@akamai.com>
---
 include/linux/netfilter/xt_hashlimit.h      |   3 +-
 include/uapi/linux/netfilter/xt_hashlimit.h |  36 +++-
 net/netfilter/xt_hashlimit.c                | 275 +++++++++++++++++++++++++---
 3 files changed, 284 insertions(+), 30 deletions(-)

diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index 074790c..0fc458b 100644
--- a/include/linux/netfilter/xt_hashlimit.h
+++ b/include/linux/netfilter/xt_hashlimit.h
@@ -5,5 +5,6 @@
 
 #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
 			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
-			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES)
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+			  XT_HASHLIMIT_RATE_MATCH)
 #endif /*_XT_HASHLIMIT_H*/
diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 79da349..aa98573 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -19,12 +19,13 @@
 struct xt_hashlimit_htable;
 
 enum {
-	XT_HASHLIMIT_HASH_DIP = 1 << 0,
-	XT_HASHLIMIT_HASH_DPT = 1 << 1,
-	XT_HASHLIMIT_HASH_SIP = 1 << 2,
-	XT_HASHLIMIT_HASH_SPT = 1 << 3,
-	XT_HASHLIMIT_INVERT   = 1 << 4,
-	XT_HASHLIMIT_BYTES    = 1 << 5,
+	XT_HASHLIMIT_HASH_DIP		= 1 << 0,
+	XT_HASHLIMIT_HASH_DPT		= 1 << 1,
+	XT_HASHLIMIT_HASH_SIP		= 1 << 2,
+	XT_HASHLIMIT_HASH_SPT		= 1 << 3,
+	XT_HASHLIMIT_INVERT		= 1 << 4,
+	XT_HASHLIMIT_BYTES		= 1 << 5,
+	XT_HASHLIMIT_RATE_MATCH		= 1 << 6,
 };
 
 struct hashlimit_cfg {
@@ -79,6 +80,21 @@ struct hashlimit_cfg2 {
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg3 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u32 interval;
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -95,4 +111,12 @@ struct xt_hashlimit_mtinfo2 {
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo3 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg3 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 762e187..df57989 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 }
 
 /* need to declare this at the top */
+static const struct file_operations dl_file_ops_v2;
 static const struct file_operations dl_file_ops_v1;
 static const struct file_operations dl_file_ops;
 
@@ -87,8 +88,19 @@ struct dsthash_ent {
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
-		u_int64_t credit;
-		u_int64_t credit_cap, cost;
+		union {
+			struct {
+				u_int64_t credit;
+				u_int64_t credit_cap;
+				u_int64_t cost;
+			};
+			struct {
+				u_int32_t interval, prev_window;
+				u_int64_t current_rate;
+				u_int64_t rate;
+				int64_t burst;
+			};
+		};
 	} rateinfo;
 	struct rcu_head rcu;
 };
@@ -99,7 +111,7 @@ struct xt_hashlimit_htable {
 	u_int8_t family;
 	bool rnd_initialized;
 
-	struct hashlimit_cfg2 cfg;	/* config */
+	struct hashlimit_cfg3 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
@@ -116,7 +128,7 @@ struct xt_hashlimit_htable {
 };
 
 static int
-cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision)
 {
 	if (revision == 1) {
 		struct hashlimit_cfg1 *cfg = from;
@@ -131,7 +143,19 @@ struct xt_hashlimit_htable {
 		to->srcmask = cfg->srcmask;
 		to->dstmask = cfg->dstmask;
 	} else if (revision == 2) {
-		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+		struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 3) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg3));
 	} else {
 		return -EINVAL;
 	}
@@ -240,13 +264,14 @@ static void dsthash_free_rcu(struct rcu_head *head)
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 			 const char *name, u_int8_t family,
 			 struct xt_hashlimit_htable **out_hinfo,
 			 int revision)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
+	const struct file_operations *fops;
 	unsigned int size, i;
 	int ret;
 
@@ -268,7 +293,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	*out_hinfo = hinfo;
 
 	/* copy match config into hashtable config */
-	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
 
 	if (ret)
 		return ret;
@@ -293,11 +318,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
 	}
 	spin_lock_init(&hinfo->lock);
 
+	switch (revision) {
+	case 1:
+		fops = &dl_file_ops_v1;
+		break;
+	case 2:
+		fops = &dl_file_ops_v2;
+		break;
+	default:
+		fops = &dl_file_ops;
+	}
+
 	hinfo->pde = proc_create_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		(revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
-		hinfo);
+		fops, hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -482,6 +517,25 @@ static u32 user2credits_byte(u32 user)
 	return (u32) (us >> 32);
 }
 
+static u64 user2rate(u64 user)
+{
+	if (user != 0) {
+		return div64_u64(XT_HASHLIMIT_SCALE_v2, user);
+	} else {
+		pr_warn("invalid rate from userspace: %llu\n", user);
+		return 0;
+	}
+}
+
+static u64 user2rate_bytes(u64 user)
+{
+	u64 r;
+
+	r = user ? 0xFFFFFFFFULL / user : 0xFFFFFFFFULL;
+	r = (r - 1) << 4;
+	return r;
+}
+
 static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 			    u32 mode, int revision)
 {
@@ -491,6 +545,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
 	if (delta == 0)
 		return;
 
+	if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) {
+		u64 interval = dh->rateinfo.interval * HZ;
+
+		if (delta < interval)
+			return;
+
+		dh->rateinfo.prev = now;
+		dh->rateinfo.prev_window =
+			((dh->rateinfo.current_rate * interval) >
+			 (delta * dh->rateinfo.rate));
+		dh->rateinfo.current_rate = 0;
+
+		return;
+	}
+
 	dh->rateinfo.prev = now;
 
 	if (mode & XT_HASHLIMIT_BYTES) {
@@ -515,7 +584,23 @@ static void rateinfo_init(struct dsthash_ent *dh,
 			  struct xt_hashlimit_htable *hinfo, int revision)
 {
 	dh->rateinfo.prev = jiffies;
-	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) {
+		dh->rateinfo.prev_window = 0;
+		dh->rateinfo.current_rate = 0;
+		if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
+			dh->rateinfo.rate = user2rate_bytes(hinfo->cfg.avg);
+			if (hinfo->cfg.burst)
+				dh->rateinfo.burst =
+					hinfo->cfg.burst * dh->rateinfo.rate;
+			else
+				dh->rateinfo.burst = dh->rateinfo.rate;
+		} else {
+			dh->rateinfo.rate = user2rate(hinfo->cfg.avg);
+			dh->rateinfo.burst =
+				hinfo->cfg.burst + dh->rateinfo.rate;
+		}
+		dh->rateinfo.interval = hinfo->cfg.interval;
+	} else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
 		dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ;
 		dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg);
 		dh->rateinfo.credit_cap = hinfo->cfg.burst;
@@ -648,7 +733,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 static bool
 hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
 		    struct xt_hashlimit_htable *hinfo,
-		    const struct hashlimit_cfg2 *cfg, int revision)
+		    const struct hashlimit_cfg3 *cfg, int revision)
 {
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
@@ -680,6 +765,20 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 	}
 
+	if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1;
+		dh->rateinfo.current_rate += cost;
+
+		if (!dh->rateinfo.prev_window &&
+		    (dh->rateinfo.current_rate <= dh->rateinfo.burst)) {
+			spin_unlock(&dh->lock);
+			rcu_read_unlock_bh();
+			return !(cfg->mode & XT_HASHLIMIT_INVERT);
+		} else {
+			goto overlimit;
+		}
+	}
+
 	if (cfg->mode & XT_HASHLIMIT_BYTES)
 		cost = hashlimit_byte_cost(skb->len, dh);
 	else
@@ -693,6 +792,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 		return !(cfg->mode & XT_HASHLIMIT_INVERT);
 	}
 
+overlimit:
 	spin_unlock(&dh->lock);
 	rcu_read_unlock_bh();
 	/* default match is underlimit - so over the limit, we need to invert */
@@ -708,7 +808,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
@@ -720,17 +820,33 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh)
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 2);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
 
-	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3);
 }
 
 static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 				     struct xt_hashlimit_htable **hinfo,
-				     struct hashlimit_cfg2 *cfg,
+				     struct hashlimit_cfg3 *cfg,
 				     const char *name, int revision)
 {
 	struct net *net = par->net;
@@ -753,7 +869,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 	}
 
 	/* Check for overflow. */
-	if (cfg->mode & XT_HASHLIMIT_BYTES) {
+	if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) {
+		if (cfg->avg == 0) {
+			pr_info("hashlimit invalid rate\n");
+			return -ERANGE;
+		}
+
+		if (cfg->interval == 0) {
+			pr_info("hashlimit invalid interval\n");
+			return -EINVAL;
+		}
+	} else if (cfg->mode & XT_HASHLIMIT_BYTES) {
 		if (user2credits_byte(cfg->avg) == 0) {
 			pr_info("overflow, rate too high: %llu\n", cfg->avg);
 			return -EINVAL;
@@ -784,7 +910,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
 static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-	struct hashlimit_cfg2 cfg = {};
+	struct hashlimit_cfg3 cfg = {};
 	int ret;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
@@ -799,15 +925,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 					 &cfg, info->name, 1);
 }
 
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 {
 	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct hashlimit_cfg3 cfg = {};
+	int ret;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 2);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	if (info->name[sizeof(info->name) - 1] != '\0')
 		return -EINVAL;
 
 	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
-					 info->name, 2);
+					 info->name, 3);
+}
+
+static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
 }
 
 static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
@@ -819,7 +970,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
 
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	const struct xt_hashlimit_mtinfo3 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
@@ -840,9 +991,20 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV4,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -863,9 +1025,20 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 		.name           = "hashlimit",
 		.revision       = 2,
 		.family         = NFPROTO_IPV6,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v2,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.usersize	= offsetof(struct xt_hashlimit_mtinfo2, hinfo),
+		.checkentry     = hashlimit_mt_check_v2,
+		.destroy        = hashlimit_mt_destroy_v2,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 3,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo3),
+		.usersize	= offsetof(struct xt_hashlimit_mtinfo3, hinfo),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -947,6 +1120,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
 	}
 }
 
+static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
@@ -969,7 +1157,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3);
 
 	dl_seq_print(ent, family, s);
 
@@ -977,6 +1165,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 	return seq_has_overflowed(s);
 }
 
+static int dl_seq_show_v2(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = s->private;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v2(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
 static int dl_seq_show_v1(struct seq_file *s, void *v)
 {
 	struct xt_hashlimit_htable *htable = s->private;
@@ -1012,6 +1214,13 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	.show  = dl_seq_show_v1
 };
 
+static const struct seq_operations dl_seq_ops_v2 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v2
+};
+
 static const struct seq_operations dl_seq_ops = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
@@ -1019,6 +1228,18 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	.show  = dl_seq_show
 };
 
+static int dl_proc_open_v2(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops_v2);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+
+		sf->private = PDE_DATA(inode);
+	}
+	return ret;
+}
+
 static int dl_proc_open_v1(struct inode *inode, struct file *file)
 {
 	int ret = seq_open(file, &dl_seq_ops_v1);
@@ -1042,6 +1263,14 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static const struct file_operations dl_file_ops_v2 = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open_v2,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 static const struct file_operations dl_file_ops_v1 = {
 	.owner   = THIS_MODULE,
 	.open    = dl_proc_open_v1,
-- 
1.9.1


^ permalink raw reply related

* Re: [PATCH V4 net 0/2] ipv6: fix flowlabel issue for reset packet
From: Martin KaFai Lau @ 2017-08-18 20:51 UTC (permalink / raw)
  To: Tom Herbert; +Cc: Shaohua Li, Linux Kernel Network Developers, David S. Miller
In-Reply-To: <CALx6S3498iPfNs4Wt5RWjoxZS0nXHT=qvdSgZhZbqxswqJqdZw@mail.gmail.com>

On Fri, Aug 18, 2017 at 07:50:03AM -0700, Tom Herbert wrote:
> > We had been using the auto_flowlabels=1 (i.e. essentially enable flowlabel)
> > mainly because we want to take the benefit of dst_negative_advice() when
> > tcp_write_timeout() happens.
> >
> > During our test, our system handles quite well with changing flowlabel.
> > The only exception we have hit is the TCP_RST sent from an inet_timewait_sock.
> >
> Martin,
>
> That is interesting data. Have you determined why the middlebox has a
> problem with flow label change in TW state but not other states?
Tom,

The problem of this middle box is specific to TCP_RST with a different
flowlabel from its previous packets.  Sending TCP_RST from TW state hits
this pain point.

It seems like that middle box specifically drops TCP_RST if it
does not know anything about this flow.  Since the flowlabel of the TCP_RST
(sent in TW state) is always different, it always lands to a different middle
box.  All of these TCP_RST cannot be delivered.

We are resilience to a small number of TCP_RST drop.  However, this guarantee
flowlabel change on TCP_RST and then dropped is too much.

This flowlabel change does not look like intentional either when transitioning
from full sk to tw sk (tw->tw_flowlabel is inheriting the np->flow_label
in tcp_time_wait()).  Currently, the tw_flowlabel is used in tcp_v6_timewait_ack()
but not in tcp_v6_send_reset().  Hence,  shaohua is looking for a solution to solve
them together.

Thanks,
Martin

>
> Tom
>
> > If we keep the flowlabel consistent (or persistent sk_txhash), there
> > is no practical usage for us to turn on flowlabel and the problem also goes
> > away.  We have it off for now.
> >
> >>
> >> > There seems to have other bug in this side. From my understanding, commit
> >> > 265f94ff54d6(net: Recompute sk_txhash on negative routing advice) tries to
> >> > select a different route. But the multipath selection code
> >> > (rt6_multipath_select) doesn't use sk_txhash or skb->hash, it does use
> >> > fl6.flowlabel, but that is the flowlabel user sets. So looks like the commit
> >> > doesn't change anything.
> >> >
> >> The routing functions typically don't use sock of skbuff, but use flow
> >> structs instead. It may be reasonable to add a hash to those.
> > The localhost's mutlipath selection is another existing issue.  AFAICT,
> > it does not take the sk_txhash (or skb->hash) into account and the following
> > dst_negative_advice() will also have no effect in the route selction.
> > It is another issue to be fixed and to be figured out how to pass the
> > sk_txhash down. (1)
> >
> > Shaohua is proposing to record the 20 bits of the sk_txhash in the
> > tw_flowlabel of the 'struct inet_timewait_sock'.  The tw_flowlabel could
> > potentially be used to do the multipath selection once we figured out
> > how to tackle (1).
> >
> > Thanks,
> > Martin
> >
> >
> >>
> >> > What's the 'src port for UDP encap'? I can't find the code setting skb->hash
> >> > to sk_txhash in UDP side.
> >> >
> >> udp_flow_src_port is function call by UDP encaps to set source port.
> >> This is call skb_get_hash. sk_set_txhash is function to set txhash
> >> right now to random value. skb_set_hash_from_sk set skb->hash when
> >> skbuff is owned by socket (skb_set_owner_w).
> >>
> >> Thanks,
> >> Tom
> >>
> >>
> >> > Thanks,
> >> > Shaohua

^ permalink raw reply

* [PATCH net-next 10/10] net: style cleanups
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

Make code closer to current style. Mostly whitespace changes.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 net/core/net-sysfs.c | 68 +++++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ca82c4a72350..927a6dcbad96 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -97,7 +97,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		if ((ret = (*set)(netdev, new)) == 0)
+		ret = (*set)(netdev, new);
+		if (ret == 0)
 			ret = len;
 	}
 	rtnl_unlock();
@@ -160,6 +161,7 @@ static ssize_t broadcast_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
 	struct net_device *ndev = to_net_dev(dev);
+
 	if (dev_isalive(ndev))
 		return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len);
 	return -EINVAL;
@@ -170,7 +172,7 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier)
 {
 	if (!netif_running(dev))
 		return -EINVAL;
-	return dev_change_carrier(dev, (bool) new_carrier);
+	return dev_change_carrier(dev, (bool)new_carrier);
 }
 
 static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
@@ -183,9 +185,10 @@ static ssize_t carrier_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
-	if (netif_running(netdev)) {
+
+	if (netif_running(netdev))
 		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
-	}
+
 	return -EINVAL;
 }
 static DEVICE_ATTR_RW(carrier);
@@ -290,6 +293,7 @@ static ssize_t carrier_changes_show(struct device *dev,
 				    char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
+
 	return sprintf(buf, fmt_dec,
 		       atomic_read(&netdev->carrier_changes));
 }
@@ -299,7 +303,7 @@ static DEVICE_ATTR_RO(carrier_changes);
 
 static int change_mtu(struct net_device *dev, unsigned long new_mtu)
 {
-	return dev_set_mtu(dev, (int) new_mtu);
+	return dev_set_mtu(dev, (int)new_mtu);
 }
 
 static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
@@ -311,7 +315,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
 
 static int change_flags(struct net_device *dev, unsigned long new_flags)
 {
-	return dev_change_flags(dev, (unsigned int) new_flags);
+	return dev_change_flags(dev, (unsigned int)new_flags);
 }
 
 static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
@@ -362,8 +366,8 @@ static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 }
 
 static ssize_t gro_flush_timeout_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t len)
+				       struct device_attribute *attr,
+				       const char *buf, size_t len)
 {
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -412,7 +416,7 @@ static DEVICE_ATTR_RW(ifalias);
 
 static int change_group(struct net_device *dev, unsigned long new_group)
 {
-	dev_set_group(dev, (int) new_group);
+	dev_set_group(dev, (int)new_group);
 	return 0;
 }
 
@@ -426,7 +430,7 @@ static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
 
 static int change_proto_down(struct net_device *dev, unsigned long proto_down)
 {
-	return dev_change_proto_down(dev, (bool) proto_down);
+	return dev_change_proto_down(dev, (bool)proto_down);
 }
 
 static ssize_t proto_down_store(struct device *dev,
@@ -549,14 +553,14 @@ static ssize_t netstat_show(const struct device *d,
 	ssize_t ret = -EINVAL;
 
 	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
-			offset % sizeof(u64) != 0);
+		offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
 		struct rtnl_link_stats64 temp;
 		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
-		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -565,7 +569,7 @@ static ssize_t netstat_show(const struct device *d,
 /* generate a read-only statistics attribute */
 #define NETSTAT_ENTRY(name)						\
 static ssize_t name##_show(struct device *d,				\
-			   struct device_attribute *attr, char *buf) 	\
+			   struct device_attribute *attr, char *buf)	\
 {									\
 	return netstat_show(d, attr, buf,				\
 			    offsetof(struct rtnl_link_stats64, name));	\
@@ -625,7 +629,6 @@ static struct attribute *netstat_attrs[] __ro_after_init = {
 	NULL
 };
 
-
 static const struct attribute_group netstat_group = {
 	.name  = "statistics",
 	.attrs  = netstat_attrs,
@@ -647,8 +650,8 @@ static const struct attribute_group wireless_group = {
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_SYSFS
-#define to_rx_queue_attr(_attr) container_of(_attr,		\
-    struct rx_queue_attribute, attr)
+#define to_rx_queue_attr(_attr) \
+	container_of(_attr, struct rx_queue_attribute, attr)
 
 #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
 
@@ -725,8 +728,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	}
 
 	map = kzalloc(max_t(unsigned int,
-	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
-	    GFP_KERNEL);
+			    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+		      GFP_KERNEL);
 	if (!map) {
 		free_cpumask_var(mask);
 		return -ENOMEM;
@@ -736,9 +739,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	for_each_cpu_and(cpu, mask, cpu_online_mask)
 		map->cpus[i++] = cpu;
 
-	if (i)
+	if (i) {
 		map->len = i;
-	else {
+	} else {
 		kfree(map);
 		map = NULL;
 	}
@@ -827,8 +830,9 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 		table->mask = mask;
 		for (count = 0; count <= mask; count++)
 			table->flows[count].cpu = RPS_NO_CPU;
-	} else
+	} else {
 		table = NULL;
+	}
 
 	spin_lock(&rps_dev_flow_lock);
 	old_table = rcu_dereference_protected(queue->rps_flow_table,
@@ -865,7 +869,6 @@ static void rx_queue_release(struct kobject *kobj)
 	struct rps_map *map;
 	struct rps_dev_flow_table *flow_table;
 
-
 	map = rcu_dereference_protected(queue->rps_map, 1);
 	if (map) {
 		RCU_INIT_POINTER(queue->rps_map, NULL);
@@ -910,7 +913,7 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
-	    "rx-%u", index);
+				     "rx-%u", index);
 	if (error)
 		return error;
 
@@ -974,8 +977,8 @@ struct netdev_queue_attribute {
 	ssize_t (*store)(struct netdev_queue *queue,
 			 const char *buf, size_t len);
 };
-#define to_netdev_queue_attr(_attr) container_of(_attr,		\
-    struct netdev_queue_attribute, attr)
+#define to_netdev_queue_attr(_attr) \
+	container_of(_attr, struct netdev_queue_attribute, attr)
 
 #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj)
 
@@ -1104,9 +1107,9 @@ static ssize_t bql_set(const char *buf, const size_t count,
 	unsigned int value;
 	int err;
 
-	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) {
 		value = DQL_MAX_LIMIT;
-	else {
+	} else {
 		err = kstrtouint(buf, 10, &value);
 		if (err < 0)
 			return err;
@@ -1320,7 +1323,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
 
 	kobj->kset = dev->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
-	    "tx-%u", index);
+				     "tx-%u", index);
 	if (error)
 		return error;
 
@@ -1377,7 +1380,7 @@ static int register_queue_kobjects(struct net_device *dev)
 
 #ifdef CONFIG_SYSFS
 	dev->queues_kset = kset_create_and_add("queues",
-	    NULL, &dev->dev.kobj);
+					       NULL, &dev->dev.kobj);
 	if (!dev->queues_kset)
 		return -ENOMEM;
 	real_rx = dev->real_num_rx_queues;
@@ -1467,7 +1470,8 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 
 	/* pass ifindex to uevent.
 	 * ifindex is useful as it won't change (interface name may change)
-	 * and is what RtNetlink uses natively. */
+	 * and is what RtNetlink uses natively.
+	 */
 	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
 
 exit:
@@ -1542,7 +1546,7 @@ EXPORT_SYMBOL(of_find_net_device_by_node);
  */
 void netdev_unregister_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 
 	if (!atomic_read(&dev_net(ndev)->count))
 		dev_set_uevent_suppress(dev, 1);
@@ -1559,7 +1563,7 @@ void netdev_unregister_kobject(struct net_device *ndev)
 /* Create sysfs entries for network device. */
 int netdev_register_kobject(struct net_device *ndev)
 {
-	struct device *dev = &(ndev->dev);
+	struct device *dev = &ndev->dev;
 	const struct attribute_group **groups = ndev->sysfs_groups;
 	int error = 0;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 09/10] net: mark receive queue attributes ro_after_init
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 net/core/net-sysfs.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 808fbb837f25..ca82c4a72350 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -655,7 +655,7 @@ static const struct attribute_group wireless_group = {
 static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 				  char *buf)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->show)
@@ -667,7 +667,7 @@ static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
 				   const char *buf, size_t count)
 {
-	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 
 	if (!attribute->store)
@@ -842,16 +842,15 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	return len;
 }
 
-static struct rx_queue_attribute rps_cpus_attribute =
-	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+static struct rx_queue_attribute rps_cpus_attribute __ro_after_init
+	= __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
-
-static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
-	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
-	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init
+	= __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+		 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
 #endif /* CONFIG_RPS */
 
-static struct attribute *rx_queue_default_attrs[] = {
+static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
 #ifdef CONFIG_RPS
 	&rps_cpus_attribute.attr,
 	&rps_dev_flow_table_cnt_attribute.attr,
@@ -896,7 +895,7 @@ static const void *rx_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type rx_queue_ktype = {
+static struct kobj_type rx_queue_ktype __ro_after_init = {
 	.sysfs_ops = &rx_queue_sysfs_ops,
 	.release = rx_queue_release,
 	.default_attrs = rx_queue_default_attrs,
@@ -983,7 +982,8 @@ struct netdev_queue_attribute {
 static ssize_t netdev_queue_attr_show(struct kobject *kobj,
 				      struct attribute *attr, char *buf)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->show)
@@ -996,7 +996,8 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
 				       struct attribute *attr,
 				       const char *buf, size_t count)
 {
-	struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr);
+	const struct netdev_queue_attribute *attribute
+		= to_netdev_queue_attr(attr);
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
 	if (!attribute->store)
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 08/10] net: make queue attributes ro_after_init
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

The XPS queue attributes can be ro_after_init.
Also use __ATTR_RX macros to simplify initialization.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 net/core/net-sysfs.c | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a4af5e2ff398..808fbb837f25 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1010,8 +1010,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 	.store = netdev_queue_attr_store,
 };
 
-static ssize_t show_trans_timeout(struct netdev_queue *queue,
-				  char *buf)
+static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
 {
 	unsigned long trans_timeout;
 
@@ -1033,7 +1032,7 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 	return i;
 }
 
-static ssize_t show_traffic_class(struct netdev_queue *queue,
+static ssize_t traffic_class_show(struct netdev_queue *queue,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
@@ -1047,14 +1046,14 @@ static ssize_t show_traffic_class(struct netdev_queue *queue,
 }
 
 #ifdef CONFIG_XPS
-static ssize_t show_tx_maxrate(struct netdev_queue *queue,
+static ssize_t tx_maxrate_show(struct netdev_queue *queue,
 			       char *buf)
 {
 	return sprintf(buf, "%lu\n", queue->tx_maxrate);
 }
 
-static ssize_t set_tx_maxrate(struct netdev_queue *queue,
-			      const char *buf, size_t len)
+static ssize_t tx_maxrate_store(struct netdev_queue *queue,
+				const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	int err, index = get_netdev_queue_index(queue);
@@ -1079,16 +1078,15 @@ static ssize_t set_tx_maxrate(struct netdev_queue *queue,
 	return err;
 }
 
-static struct netdev_queue_attribute queue_tx_maxrate =
-	__ATTR(tx_maxrate, S_IRUGO | S_IWUSR,
-	       show_tx_maxrate, set_tx_maxrate);
+static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init
+	= __ATTR_RW(tx_maxrate);
 #endif
 
-static struct netdev_queue_attribute queue_trans_timeout =
-	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+static struct netdev_queue_attribute queue_trans_timeout __ro_after_init
+	= __ATTR_RO(tx_timeout);
 
-static struct netdev_queue_attribute queue_traffic_class =
-	__ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+static struct netdev_queue_attribute queue_traffic_class __ro_after_init
+	= __ATTR_RO(traffic_class);
 
 #ifdef CONFIG_BQL
 /*
@@ -1196,8 +1194,8 @@ static const struct attribute_group dql_group = {
 #endif /* CONFIG_BQL */
 
 #ifdef CONFIG_XPS
-static ssize_t show_xps_map(struct netdev_queue *queue,
-			    char *buf)
+static ssize_t xps_cpus_show(struct netdev_queue *queue,
+			     char *buf)
 {
 	struct net_device *dev = queue->dev;
 	int cpu, len, num_tc = 1, tc = 0;
@@ -1243,8 +1241,8 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 	return len < PAGE_SIZE ? len : -EINVAL;
 }
 
-static ssize_t store_xps_map(struct netdev_queue *queue,
-			     const char *buf, size_t len)
+static ssize_t xps_cpus_store(struct netdev_queue *queue,
+			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	unsigned long index;
@@ -1272,11 +1270,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
 	return err ? : len;
 }
 
-static struct netdev_queue_attribute xps_cpus_attribute =
-    __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
+	= __ATTR_RW(xps_cpus);
 #endif /* CONFIG_XPS */
 
-static struct attribute *netdev_queue_default_attrs[] = {
+static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_trans_timeout.attr,
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
@@ -1306,7 +1304,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static struct kobj_type netdev_queue_ktype = {
+static struct kobj_type netdev_queue_ktype __ro_after_init = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
 	.release = netdev_queue_release,
 	.default_attrs = netdev_queue_default_attrs,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 07/10] net: make BQL sysfs attributes ro_after_init
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

Also fix macro to not have ; at end.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 net/core/net-sysfs.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 48714c8024f3..a4af5e2ff398 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1144,9 +1144,9 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue,
 	return len;
 }
 
-static struct netdev_queue_attribute bql_hold_time_attribute =
-	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
-	    bql_set_hold_time);
+static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init
+	= __ATTR(hold_time, S_IRUGO | S_IWUSR,
+		 bql_show_hold_time, bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
 				 char *buf)
@@ -1156,7 +1156,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue,
 	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
 }
 
-static struct netdev_queue_attribute bql_inflight_attribute =
+static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init =
 	__ATTR(inflight, S_IRUGO, bql_show_inflight, NULL);
 
 #define BQL_ATTR(NAME, FIELD)						\
@@ -1172,15 +1172,15 @@ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
 	return bql_set(buf, len, &queue->dql.FIELD);			\
 }									\
 									\
-static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
-	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
-	    bql_set_ ## NAME);
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \
+	= __ATTR(NAME, S_IRUGO | S_IWUSR,				\
+		 bql_show_ ## NAME, bql_set_ ## NAME)
 
-BQL_ATTR(limit, limit)
-BQL_ATTR(limit_max, max_limit)
-BQL_ATTR(limit_min, min_limit)
+BQL_ATTR(limit, limit);
+BQL_ATTR(limit_max, max_limit);
+BQL_ATTR(limit_min, min_limit);
 
-static struct attribute *dql_attrs[] = {
+static struct attribute *dql_attrs[] __ro_after_init = {
 	&bql_limit_attribute.attr,
 	&bql_limit_max_attribute.attr,
 	&bql_limit_min_attribute.attr,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 06/10] net: drop unused attribute argument from sysfs queue funcs
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

The show and store functions don't need/use the attribute.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 include/linux/netdevice.h |  5 ++---
 net/core/net-sysfs.c      | 37 +++++++++++--------------------------
 2 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b0c928598dab..c5475b37a631 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -694,10 +694,9 @@ struct netdev_rx_queue {
  */
 struct rx_queue_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct netdev_rx_queue *queue,
-	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*show)(struct netdev_rx_queue *queue, char *buf);
 	ssize_t (*store)(struct netdev_rx_queue *queue,
-	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+			 const char *buf, size_t len);
 };
 
 #ifdef CONFIG_XPS
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 76ec74d4a65b..48714c8024f3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -661,7 +661,7 @@ static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -673,7 +673,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops rx_queue_sysfs_ops = {
@@ -682,8 +682,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = {
 };
 
 #ifdef CONFIG_RPS
-static ssize_t show_rps_map(struct netdev_rx_queue *queue,
-			    struct rx_queue_attribute *attribute, char *buf)
+static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
 {
 	struct rps_map *map;
 	cpumask_var_t mask;
@@ -706,8 +705,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t store_rps_map(struct netdev_rx_queue *queue,
-		      struct rx_queue_attribute *attribute,
-		      const char *buf, size_t len)
+			     const char *buf, size_t len)
 {
 	struct rps_map *old_map, *map;
 	cpumask_var_t mask;
@@ -765,7 +763,6 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 }
 
 static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-					   struct rx_queue_attribute *attr,
 					   char *buf)
 {
 	struct rps_dev_flow_table *flow_table;
@@ -788,8 +785,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
 }
 
 static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
-				     struct rx_queue_attribute *attr,
-				     const char *buf, size_t len)
+					    const char *buf, size_t len)
 {
 	unsigned long mask, count;
 	struct rps_dev_flow_table *table, *old_table;
@@ -975,10 +971,9 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
  */
 struct netdev_queue_attribute {
 	struct attribute attr;
-	ssize_t (*show)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, char *buf);
+	ssize_t (*show)(struct netdev_queue *queue, char *buf);
 	ssize_t (*store)(struct netdev_queue *queue,
-	    struct netdev_queue_attribute *attr, const char *buf, size_t len);
+			 const char *buf, size_t len);
 };
 #define to_netdev_queue_attr(_attr) container_of(_attr,		\
     struct netdev_queue_attribute, attr)
@@ -994,7 +989,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj,
 	if (!attribute->show)
 		return -EIO;
 
-	return attribute->show(queue, attribute, buf);
+	return attribute->show(queue, buf);
 }
 
 static ssize_t netdev_queue_attr_store(struct kobject *kobj,
@@ -1007,7 +1002,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj,
 	if (!attribute->store)
 		return -EIO;
 
-	return attribute->store(queue, attribute, buf, count);
+	return attribute->store(queue, buf, count);
 }
 
 static const struct sysfs_ops netdev_queue_sysfs_ops = {
@@ -1016,7 +1011,6 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 };
 
 static ssize_t show_trans_timeout(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
 				  char *buf)
 {
 	unsigned long trans_timeout;
@@ -1040,7 +1034,6 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 }
 
 static ssize_t show_traffic_class(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attribute,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
@@ -1055,14 +1048,12 @@ static ssize_t show_traffic_class(struct netdev_queue *queue,
 
 #ifdef CONFIG_XPS
 static ssize_t show_tx_maxrate(struct netdev_queue *queue,
-			       struct netdev_queue_attribute *attribute,
 			       char *buf)
 {
 	return sprintf(buf, "%lu\n", queue->tx_maxrate);
 }
 
 static ssize_t set_tx_maxrate(struct netdev_queue *queue,
-			      struct netdev_queue_attribute *attribute,
 			      const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
@@ -1130,7 +1121,6 @@ static ssize_t bql_set(const char *buf, const size_t count,
 }
 
 static ssize_t bql_show_hold_time(struct netdev_queue *queue,
-				  struct netdev_queue_attribute *attr,
 				  char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1139,7 +1129,6 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue,
 }
 
 static ssize_t bql_set_hold_time(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attribute,
 				 const char *buf, size_t len)
 {
 	struct dql *dql = &queue->dql;
@@ -1160,7 +1149,6 @@ static struct netdev_queue_attribute bql_hold_time_attribute =
 	    bql_set_hold_time);
 
 static ssize_t bql_show_inflight(struct netdev_queue *queue,
-				 struct netdev_queue_attribute *attr,
 				 char *buf)
 {
 	struct dql *dql = &queue->dql;
@@ -1173,14 +1161,12 @@ static struct netdev_queue_attribute bql_inflight_attribute =
 
 #define BQL_ATTR(NAME, FIELD)						\
 static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
-				 struct netdev_queue_attribute *attr,	\
 				 char *buf)				\
 {									\
 	return bql_show(buf, queue->dql.FIELD);				\
 }									\
 									\
 static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
-				struct netdev_queue_attribute *attr,	\
 				const char *buf, size_t len)		\
 {									\
 	return bql_set(buf, len, &queue->dql.FIELD);			\
@@ -1211,7 +1197,7 @@ static const struct attribute_group dql_group = {
 
 #ifdef CONFIG_XPS
 static ssize_t show_xps_map(struct netdev_queue *queue,
-			    struct netdev_queue_attribute *attribute, char *buf)
+			    char *buf)
 {
 	struct net_device *dev = queue->dev;
 	int cpu, len, num_tc = 1, tc = 0;
@@ -1258,8 +1244,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
 }
 
 static ssize_t store_xps_map(struct netdev_queue *queue,
-		      struct netdev_queue_attribute *attribute,
-		      const char *buf, size_t len)
+			     const char *buf, size_t len)
 {
 	struct net_device *dev = queue->dev;
 	unsigned long index;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 05/10] net: make net sysfs attributes ro_after_init
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

The attributes of net devices are immutable.

Ideally, attribute groups would contain const attributes
but there are too many places that do modifications of list
during startup (in other code) to allow that.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 net/core/net-sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2de441692f28..76ec74d4a65b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -508,7 +508,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_switch_id);
 
-static struct attribute *net_class_attrs[] = {
+static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_netdev_group.attr,
 	&dev_attr_type.attr,
 	&dev_attr_dev_id.attr,
@@ -597,7 +597,7 @@ NETSTAT_ENTRY(rx_compressed);
 NETSTAT_ENTRY(tx_compressed);
 NETSTAT_ENTRY(rx_nohandler);
 
-static struct attribute *netstat_attrs[] = {
+static struct attribute *netstat_attrs[] __ro_after_init = {
 	&dev_attr_rx_packets.attr,
 	&dev_attr_tx_packets.attr,
 	&dev_attr_rx_bytes.attr,
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next 04/10] net: constify net_ns_type_operations
From: Stephen Hemminger @ 2017-08-18 20:46 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20170818204628.17147-1-sthemmin@microsoft.com>

This can be const.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
 include/linux/netdevice.h | 2 +-
 net/core/net-sysfs.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eaa77bd9cb80..b0c928598dab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4028,7 +4028,7 @@ static inline void netdev_class_remove_file(const struct class_attribute *class_
 	netdev_class_remove_file_ns(class_attr, NULL);
 }
 
-extern struct kobj_ns_type_operations net_ns_type_operations;
+extern const struct kobj_ns_type_operations net_ns_type_operations;
 
 const char *netdev_drivername(const struct net_device *dev);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99061b0a1ebd..2de441692f28 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1461,7 +1461,7 @@ static const void *net_netlink_ns(struct sock *sk)
 	return sock_net(sk);
 }
 
-struct kobj_ns_type_operations net_ns_type_operations = {
+const struct kobj_ns_type_operations net_ns_type_operations = {
 	.type = KOBJ_NS_TYPE_NET,
 	.current_may_mount = net_current_may_mount,
 	.grab_current_ns = net_grab_current_ns,
-- 
2.11.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox