* [PATCH net-next 1/7] timecounter: provide a macro to initialize the cyclecounter mask field.
From: Richard Cochran @ 2015-01-01 10:39 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
There is no need for users of the timecounter/cyclecounter code to include
clocksource.h just for a single macro.
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
include/linux/timecounter.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index 74f4549..4382035 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -19,6 +19,9 @@
#include <linux/types.h>
+/* simplify initialization of mask field */
+#define CYCLECOUNTER_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
+
/**
* struct cyclecounter - hardware abstraction for a free running counter
* Provides completely state-free accessors to the underlying hardware.
@@ -29,7 +32,7 @@
* @read: returns the current cycle value
* @mask: bitmask for two's complement
* subtraction of non 64 bit counters,
- * see CLOCKSOURCE_MASK() helper macro
+ * see CYCLECOUNTER_MASK() helper macro
* @mult: cycle to nanosecond multiplier
* @shift: cycle to nanosecond divisor (power of two)
*/
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 7/7] microblaze: include the new timecounter header.
From: Richard Cochran @ 2015-01-01 10:40 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
The timecounter/cyclecounter code has moved, so users need the new include.
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
arch/microblaze/kernel/timer.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c
index dd96f0e..c897745 100644
--- a/arch/microblaze/kernel/timer.c
+++ b/arch/microblaze/kernel/timer.c
@@ -17,6 +17,7 @@
#include <linux/clockchips.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/timecounter.h>
#include <asm/cpuinfo.h>
static void __iomem *timer_baseaddr;
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 6/7] mlx4: include clocksource.h again
From: Richard Cochran @ 2015-01-01 10:40 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
This driver uses the function, clocksource_khz2mult, and so it really must
include clocksource.h.
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
drivers/net/ethernet/mellanox/mlx4/en_clock.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index e9cce4f..90b5309 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -32,6 +32,7 @@
*/
#include <linux/mlx4/device.h>
+#include <linux/clocksource.h>
#include "mlx4_en.h"
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 5/7] ixgbe: convert to CYCLECOUNTER_MASK macro.
From: Richard Cochran @ 2015-01-01 10:39 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 47c29ea..79c00f5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -793,7 +793,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
memset(&adapter->cc, 0, sizeof(adapter->cc));
adapter->cc.read = ixgbe_ptp_read;
- adapter->cc.mask = CLOCKSOURCE_MASK(64);
+ adapter->cc.mask = CYCLECOUNTER_MASK(64);
adapter->cc.shift = shift;
adapter->cc.mult = 1;
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 4/7] igb: convert to CYCLECOUNTER_MASK macro.
From: Richard Cochran @ 2015-01-01 10:39 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
drivers/net/ethernet/intel/igb/igb_ptp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 1d27f2d..5e7a4e3 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -765,7 +765,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.settime = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_enable;
adapter->cc.read = igb_ptp_read_82576;
- adapter->cc.mask = CLOCKSOURCE_MASK(64);
+ adapter->cc.mask = CYCLECOUNTER_MASK(64);
adapter->cc.mult = 1;
adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
/* Dial the nominal frequency. */
@@ -785,7 +785,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.settime = igb_ptp_settime_82576;
adapter->ptp_caps.enable = igb_ptp_feature_enable;
adapter->cc.read = igb_ptp_read_82580;
- adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580);
+ adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580);
adapter->cc.mult = 1;
adapter->cc.shift = 0;
/* Enable the timer functions by clearing bit 31. */
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 3/7] e1000e: convert to CYCLECOUNTER_MASK macro.
From: Richard Cochran @ 2015-01-01 10:39 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e14fd85..332a298 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4189,7 +4189,7 @@ static int e1000_sw_init(struct e1000_adapter *adapter)
/* Setup hardware time stamping cyclecounter */
if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
adapter->cc.read = e1000e_cyclecounter_read;
- adapter->cc.mask = CLOCKSOURCE_MASK(64);
+ adapter->cc.mask = CYCLECOUNTER_MASK(64);
adapter->cc.mult = 1;
/* cc.shift set in e1000e_get_base_tininca() */
--
1.7.10.4
^ permalink raw reply related
* [PATCH net-next 2/7] bnx2x: convert to CYCLECOUNTER_MASK macro.
From: Richard Cochran @ 2015-01-01 10:39 UTC (permalink / raw)
To: netdev
Cc: linux-kernel, David Miller, Jeff Kirsher, John Stultz,
Thomas Gleixner
In-Reply-To: <cover.1420108214.git.richardcochran@gmail.com>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 2c95132..0758c8b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -14610,7 +14610,7 @@ static void bnx2x_init_cyclecounter(struct bnx2x *bp)
{
memset(&bp->cyclecounter, 0, sizeof(bp->cyclecounter));
bp->cyclecounter.read = bnx2x_cyclecounter_read;
- bp->cyclecounter.mask = CLOCKSOURCE_MASK(64);
+ bp->cyclecounter.mask = CYCLECOUNTER_MASK(64);
bp->cyclecounter.shift = 1;
bp->cyclecounter.mult = 1;
}
--
1.7.10.4
^ permalink raw reply related
* Re: [PATCH net-next v2 2/2] bridge: modify bridge af spec parser to accomodate vlan list and ranges
From: roopa @ 2015-01-01 10:01 UTC (permalink / raw)
To: Arad, Ronen
Cc: netdev@vger.kernel.org, hemminger@vyatta.com, vyasevic@redhat.com,
sfeldma@gmail.com, wkok@cumulusnetworks.com
In-Reply-To: <E4CD12F19ABA0C4D8729E087A761DC3505DD2DE4@ORSMSX101.amr.corp.intel.com>
On 1/1/15, 12:54 AM, Arad, Ronen wrote:
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>> Behalf Of roopa@cumulusnetworks.com
>> Sent: Wednesday, December 31, 2014 6:49 PM
>> To: netdev@vger.kernel.org; hemminger@vyatta.com; vyasevic@redhat.com
>> Cc: sfeldma@gmail.com; wkok@cumulusnetworks.com; Roopa Prabhu
>> Subject: [PATCH net-next v2 2/2] bridge: modify bridge af spec parser to
>> accomodate vlan list and ranges
>>
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch modifies br_afspec to parse incoming IFLA_BRIDGE_VLAN_INFO_LIST
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
>> net/bridge/br_netlink.c | 115 ++++++++++++++++++++++++++++++++++------------
>> -
>> 1 file changed, 85 insertions(+), 30 deletions(-)
>>
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index 492ef6a..bcba9d2 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -226,53 +226,108 @@ static const struct nla_policy
>> ifla_br_policy[IFLA_MAX+1] = {
>> [IFLA_BRIDGE_VLAN_INFO_LIST] = { .type = NLA_NESTED, },
>> };
>>
>> +static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
>> + int cmd, struct bridge_vlan_info *vinfo)
>> +{
>> + int err = 0;
>> +
>> + switch (cmd) {
>> + case RTM_SETLINK:
>> + if (p) {
>> + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>> + if (err)
>> + break;
>> +
>> + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>> + err = br_vlan_add(p->br, vinfo->vid,
>> + vinfo->flags);
>> + } else {
>> + err = br_vlan_add(br, vinfo->vid, vinfo->flags);
>> + }
>> + break;
>> +
>> + case RTM_DELLINK:
>> + if (p) {
>> + nbp_vlan_delete(p, vinfo->vid);
>> + if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>> + br_vlan_delete(p->br, vinfo->vid);
>> + } else {
>> + br_vlan_delete(br, vinfo->vid);
>> + }
>> + break;
>> + }
>> +
>> + return err;
>> +}
>> +
>> static int br_afspec(struct net_bridge *br,
>> struct net_bridge_port *p,
>> struct nlattr *af_spec,
>> int cmd)
>> {
>> struct nlattr *tb[IFLA_BRIDGE_MAX+1];
>> + struct nlattr *attr;
>> int err = 0;
>> + int rem;
>>
>> err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
>> if (err)
>> return err;
>>
>> if (tb[IFLA_BRIDGE_VLAN_INFO]) {
>> - struct bridge_vlan_info *vinfo;
>> -
>> - vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
>> -
>> - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
>> - return -EINVAL;
>> -
>> - switch (cmd) {
>> - case RTM_SETLINK:
>> - if (p) {
>> - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>> - if (err)
>> - break;
>> -
>> - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>> - err = br_vlan_add(p->br, vinfo->vid,
>> - vinfo->flags);
>> - } else
>> - err = br_vlan_add(br, vinfo->vid, vinfo->flags);
>> -
>> - break;
>> -
>> - case RTM_DELLINK:
>> - if (p) {
>> - nbp_vlan_delete(p, vinfo->vid);
>> - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>> - br_vlan_delete(p->br, vinfo->vid);
>> - } else
>> - br_vlan_delete(br, vinfo->vid);
>> - break;
>> + attr = tb[IFLA_BRIDGE_VLAN_INFO];
>> + if (nla_len(attr) != sizeof(struct bridge_vlan_info))
>> + goto err_inval;
>> +
>> + err = br_vlan_info(br, p, cmd,
>> + (struct bridge_vlan_info *)nla_data(attr));
>> +
>> + } else if (tb[IFLA_BRIDGE_VLAN_INFO_LIST]) {
>> + struct bridge_vlan_info *vinfo_start = NULL;
>> + struct bridge_vlan_info *vinfo = NULL;
>> +
>> + nla_for_each_nested(attr, tb[IFLA_BRIDGE_VLAN_INFO_LIST], rem) {
>> + if (nla_len(attr) != sizeof(struct bridge_vlan_info) ||
>> + nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
>> + goto err_inval;
>> + vinfo = nla_data(attr);
>> + if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_START) {
>> + if (vinfo_start)
>> + goto err_inval;
>> + vinfo_start = vinfo;
>> + continue;
>> + }
>> +
>> + if (vinfo_start) {
>> + int v;
>> +
>> + if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
>> + goto err_inval;
>> +
>> + if (vinfo->vid < vinfo_start->vid)
> This check rejects inverted range. However it allows the RANGE_START and
> RANGE_END vinfos to have the same vid. Isn't it inconsistent with the rejection
> of a single vinfo with both RANGE_START and RANGE_END set?
sure, i will make it <= to error out if they are equal.
>> + goto err_inval;
> Are additional validation such as consistency of flags between the RANGE_START
> and RANGE_END vinfos is needed here?
sure, i can add them (I have some of them in the iproute2 patch as well).
> The loop below applies flags (more precisely all data except for vid) from the
> RANGE_START vinfo to all vids in the range. All data from the RANGE_END vinfo
> is ignored.
>
>> +
>> + for (v = vinfo_start->vid; v <= vinfo->vid;
>> + v++) {
>> + vinfo_start->vid = v;
> This changes the vinfo with RANGE_START flag within the incoming message. Would
> it be better to left the input message unmodified and use a local copy of
> struct bridge_vlan_info?
agreed, I will make a copy.
>> + err = br_vlan_info(br, p, cmd,
>> + vinfo_start);
>> + if (err)
>> + break;
>> + }
>> + vinfo_start = NULL;
>> + } else {
>> + err = br_vlan_info(br, p, cmd, vinfo);
>> + }
>> + if (err)
>> + break;
>> }
>> }
>>
>> return err;
>> +
>> +err_inval:
>> + return -EINVAL;
>> }
>>
>> static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
>> --
>> 1.7.10.4
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* RE: [PATCH net-next v2 2/2] bridge: modify bridge af spec parser to accomodate vlan list and ranges
From: Arad, Ronen @ 2015-01-01 8:54 UTC (permalink / raw)
To: roopa@cumulusnetworks.com, netdev@vger.kernel.org,
hemminger@vyatta.com, vyasevic@redhat.com
Cc: sfeldma@gmail.com, wkok@cumulusnetworks.com
In-Reply-To: <1420044533-16963-3-git-send-email-roopa@cumulusnetworks.com>
>-----Original Message-----
>From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>Behalf Of roopa@cumulusnetworks.com
>Sent: Wednesday, December 31, 2014 6:49 PM
>To: netdev@vger.kernel.org; hemminger@vyatta.com; vyasevic@redhat.com
>Cc: sfeldma@gmail.com; wkok@cumulusnetworks.com; Roopa Prabhu
>Subject: [PATCH net-next v2 2/2] bridge: modify bridge af spec parser to
>accomodate vlan list and ranges
>
>From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
>This patch modifies br_afspec to parse incoming IFLA_BRIDGE_VLAN_INFO_LIST
>
>Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>---
> net/bridge/br_netlink.c | 115 ++++++++++++++++++++++++++++++++++------------
>-
> 1 file changed, 85 insertions(+), 30 deletions(-)
>
>diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>index 492ef6a..bcba9d2 100644
>--- a/net/bridge/br_netlink.c
>+++ b/net/bridge/br_netlink.c
>@@ -226,53 +226,108 @@ static const struct nla_policy
>ifla_br_policy[IFLA_MAX+1] = {
> [IFLA_BRIDGE_VLAN_INFO_LIST] = { .type = NLA_NESTED, },
> };
>
>+static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
>+ int cmd, struct bridge_vlan_info *vinfo)
>+{
>+ int err = 0;
>+
>+ switch (cmd) {
>+ case RTM_SETLINK:
>+ if (p) {
>+ err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>+ if (err)
>+ break;
>+
>+ if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>+ err = br_vlan_add(p->br, vinfo->vid,
>+ vinfo->flags);
>+ } else {
>+ err = br_vlan_add(br, vinfo->vid, vinfo->flags);
>+ }
>+ break;
>+
>+ case RTM_DELLINK:
>+ if (p) {
>+ nbp_vlan_delete(p, vinfo->vid);
>+ if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>+ br_vlan_delete(p->br, vinfo->vid);
>+ } else {
>+ br_vlan_delete(br, vinfo->vid);
>+ }
>+ break;
>+ }
>+
>+ return err;
>+}
>+
> static int br_afspec(struct net_bridge *br,
> struct net_bridge_port *p,
> struct nlattr *af_spec,
> int cmd)
> {
> struct nlattr *tb[IFLA_BRIDGE_MAX+1];
>+ struct nlattr *attr;
> int err = 0;
>+ int rem;
>
> err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
> if (err)
> return err;
>
> if (tb[IFLA_BRIDGE_VLAN_INFO]) {
>- struct bridge_vlan_info *vinfo;
>-
>- vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
>-
>- if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
>- return -EINVAL;
>-
>- switch (cmd) {
>- case RTM_SETLINK:
>- if (p) {
>- err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
>- if (err)
>- break;
>-
>- if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>- err = br_vlan_add(p->br, vinfo->vid,
>- vinfo->flags);
>- } else
>- err = br_vlan_add(br, vinfo->vid, vinfo->flags);
>-
>- break;
>-
>- case RTM_DELLINK:
>- if (p) {
>- nbp_vlan_delete(p, vinfo->vid);
>- if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
>- br_vlan_delete(p->br, vinfo->vid);
>- } else
>- br_vlan_delete(br, vinfo->vid);
>- break;
>+ attr = tb[IFLA_BRIDGE_VLAN_INFO];
>+ if (nla_len(attr) != sizeof(struct bridge_vlan_info))
>+ goto err_inval;
>+
>+ err = br_vlan_info(br, p, cmd,
>+ (struct bridge_vlan_info *)nla_data(attr));
>+
>+ } else if (tb[IFLA_BRIDGE_VLAN_INFO_LIST]) {
>+ struct bridge_vlan_info *vinfo_start = NULL;
>+ struct bridge_vlan_info *vinfo = NULL;
>+
>+ nla_for_each_nested(attr, tb[IFLA_BRIDGE_VLAN_INFO_LIST], rem) {
>+ if (nla_len(attr) != sizeof(struct bridge_vlan_info) ||
>+ nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
>+ goto err_inval;
>+ vinfo = nla_data(attr);
>+ if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_START) {
>+ if (vinfo_start)
>+ goto err_inval;
>+ vinfo_start = vinfo;
>+ continue;
>+ }
>+
>+ if (vinfo_start) {
>+ int v;
>+
>+ if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
>+ goto err_inval;
>+
>+ if (vinfo->vid < vinfo_start->vid)
This check rejects inverted range. However it allows the RANGE_START and
RANGE_END vinfos to have the same vid. Isn't it inconsistent with the rejection
of a single vinfo with both RANGE_START and RANGE_END set?
>+ goto err_inval;
Are additional validation such as consistency of flags between the RANGE_START
and RANGE_END vinfos is needed here?
The loop below applies flags (more precisely all data except for vid) from the
RANGE_START vinfo to all vids in the range. All data from the RANGE_END vinfo
is ignored.
>+
>+ for (v = vinfo_start->vid; v <= vinfo->vid;
>+ v++) {
>+ vinfo_start->vid = v;
This changes the vinfo with RANGE_START flag within the incoming message. Would
it be better to left the input message unmodified and use a local copy of
struct bridge_vlan_info?
>+ err = br_vlan_info(br, p, cmd,
>+ vinfo_start);
>+ if (err)
>+ break;
>+ }
>+ vinfo_start = NULL;
>+ } else {
>+ err = br_vlan_info(br, p, cmd, vinfo);
>+ }
>+ if (err)
>+ break;
> }
> }
>
> return err;
>+
>+err_inval:
>+ return -EINVAL;
> }
>
> static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
>--
>1.7.10.4
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next v2 1/2] bridge: new attribute and flags to represent vlan info lists and ranges
From: roopa @ 2015-01-01 4:25 UTC (permalink / raw)
To: Jeremiah Mahler, netdev, shemminger, vyasevic, sfeldma, wkok
In-Reply-To: <20150101030817.GA10710@hudson.localdomain>
On 12/31/14, 7:08 PM, Jeremiah Mahler wrote:
> Roopa,
>
> On Wed, Dec 31, 2014 at 01:17:31PM -0800, roopa wrote:
>> On 12/31/14, 10:48 AM, Jeremiah Mahler wrote:
>>> Roopa,
>>>
>>> On Wed, Dec 31, 2014 at 10:15:53AM -0800, roopa wrote:
>>>> On 12/31/14, 9:45 AM, Jeremiah Mahler wrote:
>>>>> Roopa,
>>>>>
>>>>> On Wed, Dec 31, 2014 at 08:48:52AM -0800, roopa@cumulusnetworks.com wrote:
>>>>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>>
>>>>>> This patch adds (as suggested by scott feldman),
>>>>>> - new netlink attribute IFLA_BRIDGE_VLAN_INFO_LIST to represent
>>>>>> vlan list
>>>>>> - And bridge_vlan_info flags BRIDGE_VLAN_INFO_RANGE_START and
>>>>>> BRIDGE_VLAN_INFO_RANGE_END to indicate start and end of vlan range
>>>>>>
>>>>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>>>> ---
>>>>>> include/uapi/linux/if_bridge.h | 4 ++++
>>>>>> net/bridge/br_netlink.c | 1 +
>>>>>> 2 files changed, 5 insertions(+)
>>>>>>
>>>>>> diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
>>>>>> index b03ee8f..fa468aa 100644
>>>>>> --- a/include/uapi/linux/if_bridge.h
>>>>>> +++ b/include/uapi/linux/if_bridge.h
>>>>>> @@ -112,12 +112,14 @@ struct __fdb_entry {
>>>>>> * [IFLA_BRIDGE_FLAGS]
>>>>>> * [IFLA_BRIDGE_MODE]
>>>>>> * [IFLA_BRIDGE_VLAN_INFO]
>>>>>> + * [IFLA_BRIDGE_VLAN_INFO_LIST]
>>>>>> * }
>>>>>> */
>>>>>> enum {
>>>>>> IFLA_BRIDGE_FLAGS,
>>>>>> IFLA_BRIDGE_MODE,
>>>>>> IFLA_BRIDGE_VLAN_INFO,
>>>>>> + IFLA_BRIDGE_VLAN_INFO_LIST,
>>>>>> __IFLA_BRIDGE_MAX,
>>>>>> };
>>>>>> #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
>>>>>> @@ -125,6 +127,8 @@ enum {
>>>>>> #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */
>>>>>> #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */
>>>>>> #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */
>>>>>> +#define BRIDGE_VLAN_INFO_RANGE_START (1<<3) /* VLAN is start of vlan range */
>>>>>> +#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
>>>>> You add these here but you don't use them until the next patch.
>>>>> If they were wrong a bisect would point to the next patch.
>>>>>
>>>>> I would add them in the next patch where you start to use them.
>>>> I thought it was ok to declare it first and use them in the next patch. Only
>>>> the other way around would be bad.
>>>> I have submitted in a similar way before. If needed i will resubmit.
>>>>
>>>>
>>> Hmm. I cannot see how the other way would be bad but maybe I am missing
>>> something.
>> sorry, i did not mean what you were saying would be bad. I was just trying
>> to say that, use first and declare later would be bad (ie if my patches 1
>> and 2 were swapped). Otherwise i don't see a problem.
>>
> Now I understand. Yes, swapping the patches would be bad.
>
>> I know that you are saying i should combine the patches 1 and 2 into a
>> single patch. That is not a problem. If i need to respin again due to other
>> reasons i will consider merging them as well if that is a concern.
>>
> Er, well not quite. I don't think both patches should be combined in to
> one. I would only move those two #defines that I pointed out in the
> first patch in to the second patch.
>
> I hope that makes a little more sense :)
okay :).
thanks,
Roopa
>
>> thanks.
>>
>>> Hopefully someone else has some insight.
>>>
>>>>>> struct bridge_vlan_info {
>>>>>> __u16 flags;
>>>>>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>>>>>> index 9f5eb55..492ef6a 100644
>>>>>> --- a/net/bridge/br_netlink.c
>>>>>> +++ b/net/bridge/br_netlink.c
>>>>>> @@ -223,6 +223,7 @@ static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
>>>>>> [IFLA_BRIDGE_MODE] = { .type = NLA_U16 },
>>>>>> [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY,
>>>>>> .len = sizeof(struct bridge_vlan_info), },
>>>>>> + [IFLA_BRIDGE_VLAN_INFO_LIST] = { .type = NLA_NESTED, },
>>>>>> };
>>>>>> static int br_afspec(struct net_bridge *br,
>>>>>> --
>>>>>> 1.7.10.4
>>>>>>
>>>>>> --
>>>>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>>>> the body of a message to majordomo@vger.kernel.org
>>>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next v2 1/2] bridge: new attribute and flags to represent vlan info lists and ranges
From: Jeremiah Mahler @ 2015-01-01 3:08 UTC (permalink / raw)
To: roopa; +Cc: netdev, shemminger, vyasevic, sfeldma, wkok
In-Reply-To: <54A467EB.5010404@cumulusnetworks.com>
Roopa,
On Wed, Dec 31, 2014 at 01:17:31PM -0800, roopa wrote:
> On 12/31/14, 10:48 AM, Jeremiah Mahler wrote:
> >Roopa,
> >
> >On Wed, Dec 31, 2014 at 10:15:53AM -0800, roopa wrote:
> >>On 12/31/14, 9:45 AM, Jeremiah Mahler wrote:
> >>>Roopa,
> >>>
> >>>On Wed, Dec 31, 2014 at 08:48:52AM -0800, roopa@cumulusnetworks.com wrote:
> >>>>From: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>>>
> >>>>This patch adds (as suggested by scott feldman),
> >>>> - new netlink attribute IFLA_BRIDGE_VLAN_INFO_LIST to represent
> >>>> vlan list
> >>>> - And bridge_vlan_info flags BRIDGE_VLAN_INFO_RANGE_START and
> >>>> BRIDGE_VLAN_INFO_RANGE_END to indicate start and end of vlan range
> >>>>
> >>>>Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
> >>>>---
> >>>> include/uapi/linux/if_bridge.h | 4 ++++
> >>>> net/bridge/br_netlink.c | 1 +
> >>>> 2 files changed, 5 insertions(+)
> >>>>
> >>>>diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
> >>>>index b03ee8f..fa468aa 100644
> >>>>--- a/include/uapi/linux/if_bridge.h
> >>>>+++ b/include/uapi/linux/if_bridge.h
> >>>>@@ -112,12 +112,14 @@ struct __fdb_entry {
> >>>> * [IFLA_BRIDGE_FLAGS]
> >>>> * [IFLA_BRIDGE_MODE]
> >>>> * [IFLA_BRIDGE_VLAN_INFO]
> >>>>+ * [IFLA_BRIDGE_VLAN_INFO_LIST]
> >>>> * }
> >>>> */
> >>>> enum {
> >>>> IFLA_BRIDGE_FLAGS,
> >>>> IFLA_BRIDGE_MODE,
> >>>> IFLA_BRIDGE_VLAN_INFO,
> >>>>+ IFLA_BRIDGE_VLAN_INFO_LIST,
> >>>> __IFLA_BRIDGE_MAX,
> >>>> };
> >>>> #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
> >>>>@@ -125,6 +127,8 @@ enum {
> >>>> #define BRIDGE_VLAN_INFO_MASTER (1<<0) /* Operate on Bridge device as well */
> >>>> #define BRIDGE_VLAN_INFO_PVID (1<<1) /* VLAN is PVID, ingress untagged */
> >>>> #define BRIDGE_VLAN_INFO_UNTAGGED (1<<2) /* VLAN egresses untagged */
> >>>>+#define BRIDGE_VLAN_INFO_RANGE_START (1<<3) /* VLAN is start of vlan range */
> >>>>+#define BRIDGE_VLAN_INFO_RANGE_END (1<<4) /* VLAN is end of vlan range */
> >>>You add these here but you don't use them until the next patch.
> >>>If they were wrong a bisect would point to the next patch.
> >>>
> >>>I would add them in the next patch where you start to use them.
> >>I thought it was ok to declare it first and use them in the next patch. Only
> >>the other way around would be bad.
> >> I have submitted in a similar way before. If needed i will resubmit.
> >>
> >>
> >Hmm. I cannot see how the other way would be bad but maybe I am missing
> >something.
> sorry, i did not mean what you were saying would be bad. I was just trying
> to say that, use first and declare later would be bad (ie if my patches 1
> and 2 were swapped). Otherwise i don't see a problem.
>
Now I understand. Yes, swapping the patches would be bad.
> I know that you are saying i should combine the patches 1 and 2 into a
> single patch. That is not a problem. If i need to respin again due to other
> reasons i will consider merging them as well if that is a concern.
>
Er, well not quite. I don't think both patches should be combined in to
one. I would only move those two #defines that I pointed out in the
first patch in to the second patch.
I hope that makes a little more sense :)
> thanks.
>
> > Hopefully someone else has some insight.
> >
> >>
> >>>> struct bridge_vlan_info {
> >>>> __u16 flags;
> >>>>diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
> >>>>index 9f5eb55..492ef6a 100644
> >>>>--- a/net/bridge/br_netlink.c
> >>>>+++ b/net/bridge/br_netlink.c
> >>>>@@ -223,6 +223,7 @@ static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
> >>>> [IFLA_BRIDGE_MODE] = { .type = NLA_U16 },
> >>>> [IFLA_BRIDGE_VLAN_INFO] = { .type = NLA_BINARY,
> >>>> .len = sizeof(struct bridge_vlan_info), },
> >>>>+ [IFLA_BRIDGE_VLAN_INFO_LIST] = { .type = NLA_NESTED, },
> >>>> };
> >>>> static int br_afspec(struct net_bridge *br,
> >>>>--
> >>>>1.7.10.4
> >>>>
> >>>>--
> >>>>To unsubscribe from this list: send the line "unsubscribe netdev" in
> >>>>the body of a message to majordomo@vger.kernel.org
> >>>>More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
- Jeremiah Mahler
^ permalink raw reply
* Re: [net-next PATCH 00/17] fib_trie: Reduce time spent in fib_table_lookup by 35 to 75%
From: Alexander Duyck @ 2015-01-01 2:32 UTC (permalink / raw)
To: David Miller, alexander.h.duyck; +Cc: netdev
In-Reply-To: <20141231.184610.1802958694945952516.davem@davemloft.net>
On 12/31/2014 03:46 PM, David Miller wrote:
> From: Alexander Duyck <alexander.h.duyck@redhat.com>
> Date: Wed, 31 Dec 2014 10:55:23 -0800
>
>> These patches are meant to address several performance issues I have seen
>> in the fib_trie implementation, and fib_table_lookup specifically. With
>> these changes in place I have seen a reduction of up to 35 to 75% for the
>> total time spent in fib_table_lookup depending on the type of search being
>> performed.
> ...
>> Changes since RFC:
>> Replaced this_cpu_ptr with correct call to this_cpu_inc in patch 1
>> Changed test for leaf_info mismatch to (key ^ n->key) & li->mask_plen in patch 10
> As before, this looks awesome.
Thanks.
> All applied to net-next, thanks!
>
> This knocks about 35 cpu cycles off of a lookup that ends up using the
> default route on sparc64. From about ~438 cycles to ~403.
Did that 438 value include both fib_table_lookup and check_leaf? Just
curious as the overall gain seems smaller than what I have been seeing
on the x86 system I was testing with, but then again it could just be a
sparc64 thing.
I've started work on a second round of patches. With any luck they
should be ready by the time the next net-next opens. My hope is to cut
the look-up time by another 30 to 50%, though it will take some time as
I have to go though and drop the leaf_info structure, and look at
splitting the tnode in half to break the key/pos/bits and child pointer
dependency chain which will hopefully allow for a significant reduction
in memory read stalls.
I am also planning to take a look at addressing the memory waste that
occurs on nodes larger than 256 bytes due to the way kmalloc allocates
memory as powers of 2. I'm thinking I might try encouraging the growth
of smaller nodes, and discouraging anything over 256 by implementing a
"truesize" type logic that can be used in the inflate/halve functions so
that the memory usage is more accurately reflected.
- Alex
^ permalink raw reply
* WIP alternative - was Re: [PATCH v3 14/20] selftests/size: add install target to enable test install
From: Tim Bird @ 2015-01-01 2:31 UTC (permalink / raw)
To: Shuah Khan, mmarek@suse.cz, gregkh@linuxfoundation.org,
akpm@linux-foundation.org, rostedt@goodmis.org, mingo@redhat.com,
davem@davemloft.net, keescook@chromium.org,
tranmanphong@gmail.com, mpe@ellerman.id.au, cov@codeaurora.org,
dh.herrmann@gmail.com, hughd@google.com, bobby.prani@gmail.com,
serge.hallyn@ubuntu.com, ebiederm@xmission.com,
josh@joshtriplett.org, koct9i@gmail.com
Cc: linux-kbuild@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-api@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <c961f80614b02a2f3c6455518d1f6fabdaf6c7f3.1419387513.git.shuahkh@osg.samsung.com>
On 12/24/2014 08:27 AM, Shuah Khan wrote:
> Add a new make target to enable installing test. This target
> installs test in the kselftest install location and add to the
> kselftest script to run the test. Install target can be run
> only from top level kernel source directory.
>
> Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
> ---
> tools/testing/selftests/size/Makefile | 12 +++++++++++-
> 1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
> index 04dc25e..bb7113b 100644
> --- a/tools/testing/selftests/size/Makefile
> +++ b/tools/testing/selftests/size/Makefile
> @@ -1,12 +1,22 @@
> CC = $(CROSS_COMPILE)gcc
>
> +TEST_STR = ./get_size || echo get_size selftests: [FAIL]
> +
> all: get_size
>
> get_size: get_size.c
> $(CC) -static -ffreestanding -nostartfiles -s $< -o $@
>
> +install:
> +ifdef INSTALL_KSFT_PATH
> + install ./get_size $(INSTALL_KSFT_PATH)
> + @echo "$(TEST_STR)" >> $(KSELFTEST)
> +else
> + @echo Run make kselftest_install in top level source directory
> +endif
> +
> run_tests: all
> - ./get_size
> + @$(TEST_STR)
>
> clean:
> $(RM) get_size
>
The install phase is desperately needed for usage of kselftest in
cross-target situations (applicable to almost all embedded). So this
is great stuff.
I worked a bit on isolating the install stuff to a makefile include file.
This allows simplifying some of the sub-level Makefiles a bit, and allowing
control of some of the install and run logic in less places.
This is still a work in progress, but before I got too far along, I wanted
to post it for people to provide feedback. A couple of problems cropped
up that are worth discussing, IMHO.
1) I think it should be a requirement that each test has a single
"main" program to execute to run the tests. If multiple tests are supported
or more flexibility is desired for additional arguments, or that sort of
thing, then that's fine, but the automated script builder should be able
to run just a single program or script to have things work. This also
makes things more consistent. In the case of the firmware test, I created
a single fw_both.sh script to do this, instead of having two separate
blocks in the kselftest.sh script.
2) I've added a CROSS_INSTALL variable, which can call an arbitrary program
to place files on the target system (rather than just calling 'install').
In my case, I'd use my own 'ttc cp' command, which I can extend as necessary
to put things on a remote machine. This works for a single directory,
but things get dicier with sub-directory trees full of files (like
the ftrace test uses.)
If additional items need to be installed to the target, then maybe a setup
program should be used, rather than just copying files.
3) Some of the scripts were using /bin/bash to execute them, rather
than rely on the interpreter line in the script itself (and having
the script have executable privileges). Is there a reason for this?
I modified a few scripts to be executable, and got rid of the
explicit execution with /bin/bash.
The following is just a start... Let me know if this direction looks
OK, and I'll finish this up. The main item to look at is
kselftest.include file. Note that these patches are based on Shuah's
series - but if you want to use these ideas I can rebase them onto
mainline, and break them out per test sub-level like Shuah did.
Let me know what you think.
>From 14164fd3117c97799a050f8cf791dedc93aa5e82 Mon Sep 17 00:00:00 2001
From: Tim Bird <tim.bird@sonymobile.com>
Date: Wed, 31 Dec 2014 18:04:08 -0800
Subject: [PATCH] Switch to using an include file for common kselftest_install
actions
---
tools/testing/selftests/cpu-hotplug/Makefile | 15 +++-------
.../selftests/cpu-hotplug/cpu-on-off-test.sh | 0
tools/testing/selftests/efivarfs/Makefile | 21 ++++----------
tools/testing/selftests/efivarfs/efivarfs.sh | 0
tools/testing/selftests/firmware/Makefile | 32 ++++------------------
tools/testing/selftests/firmware/fw_both.sh | 13 +++++++++
tools/testing/selftests/kselftest.include | 26 ++++++++++++++++++
tools/testing/selftests/size/Makefile | 19 +++----------
9 files changed, 60 insertions(+), 69 deletions(-)
mode change 100644 => 100755 tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
mode change 100644 => 100755 tools/testing/selftests/efivarfs/efivarfs.sh
create mode 100755 tools/testing/selftests/firmware/fw_both.sh
create mode 100644 tools/testing/selftests/kselftest.include
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index c9e15ee..d2e540d 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -1,18 +1,11 @@
-TEST_STR=/bin/bash ./cpu-on-off-test.sh || echo cpu-hotplug selftests: [FAIL]
+TEST_TITLE = cpu-hotplug
+TEST_MAIN_PROG = cpu-on-off-test.sh
all:
-install:
-ifdef INSTALL_KSFT_PATH
- install ./cpu-on-off-test.sh $(INSTALL_KSFT_PATH)/cpu-on-off-test.sh
- @echo "$(TEST_STR)" >> $(KSELFTEST)
-else
- @echo Run make kselftest_install in top level source directory
-endif
-
-run_tests:
- @$(TEST_STR)
+include ../kselftest.include
+# use -a to get all tests
run_full_test:
@/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
index aaf404b..8d48e9c 100644
--- a/tools/testing/selftests/efivarfs/Makefile
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -1,24 +1,15 @@
-CC = $(CROSS_COMPILE)gcc
-CFLAGS = -Wall
+TEST_TITLE = efivarfs
+TEST_MAIN_PROG = efivarfs.sh
-test_objs = open-unlink create-read
+CFLAGS = -Wall
-TEST_STR = /bin/bash ./efivarfs.sh || echo efivarfs selftests: [FAIL]
+TEST_PROGS = open-unlink create-read
all:
gcc open-unlink.c -o open-unlink
gcc create-read.c -o create-read
-install:
-ifdef INSTALL_KSFT_PATH
- install ./efivarfs.sh $(test_objs) $(INSTALL_KSFT_PATH)
- @echo "$(TEST_STR)" >> $(KSELFTEST)
-else
- @echo Run make kselftest_install in top level source directory
-endif
-
-run_tests: all
- @$(TEST_STR)
+include ../kselftest.include
clean:
- rm -f $(test_objs)
+ rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
old mode 100644
new mode 100755
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 7ac1cf3..b576308 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -1,36 +1,14 @@
# Makefile for firmware loading selftests
+TEST_MAIN_PROG = fw_both.sh
+TEST_TITLE = firmware
+TEST_FILES = ./fw_filesystem.sh ./fw_userhelper.sh
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
-
-__fw_filesystem:
-fw_filesystem = if /bin/sh ./fw_filesystem.sh ; then
-fw_filesystem += echo fw_filesystem: ok;
-fw_filesystem += else echo fw_filesystem: [FAIL];
-fw_filesystem += fi
-
-__fw_userhelper:
-fw_userhelper = if /bin/sh ./fw_userhelper.sh ; then
-fw_userhelper += echo fw_userhelper: ok;
-fw_userhelper += else
-fw_userhelper += echo fw_userhelper: [FAIL];
-fw_userhelper += fi
-
all:
-install:
-ifdef INSTALL_KSFT_PATH
- install ./fw_filesystem.sh ./fw_userhelper.sh $(INSTALL_KSFT_PATH)
- @echo "$(fw_filesystem)" >> $(KSELFTEST)
- @echo "$(fw_userhelper)" >> $(KSELFTEST)
-else
- @echo Run make kselftest_install in top level source directory
-endif
-
-run_tests:
- @$(fw_filesystem)
- @$(fw_userhelper)
+include ../kselftest.include
# Nothing to clean up.
clean:
-.PHONY: all clean run_tests fw_filesystem fw_userhelper
+.PHONY: all clean run_tests
diff --git a/tools/testing/selftests/firmware/fw_both.sh b/tools/testing/selftests/firmware/fw_both.sh
new file mode 100755
index 0000000..fb7fa8d
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_both.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+if /bin/sh ./fw_filesystem.sh ; then
+ echo "fw_filesystem: ok";
+else
+ echo "fw_filesystem: [FAIL]";
+fi
+
+if /bin/sh ./fw_userhelper.sh ; then
+ echo "fw_userhelper: ok";
+else
+ echo "fw_userhelper: [FAIL]";
+fi
diff --git a/tools/testing/selftests/kselftest.include b/tools/testing/selftests/kselftest.include
new file mode 100644
index 0000000..7020a82
--- /dev/null
+++ b/tools/testing/selftests/kselftest.include
@@ -0,0 +1,26 @@
+# this make include script expects the following variables to be set:
+# TEST_MAIN_PROG - a single program to invoke to run the test
+# TEST_TITLE - the title of the test
+# TEST_FILES - other files that should be copied to the testing directory
+# INSTALL_KSFT_PATH - where to put test programs
+# KSELFTEST - the name of the generated script (which calls the sub-tests)
+# the 'all' target, which makes sure everything is built
+
+# this file defines the 'install' and 'run_tests' targets
+
+CC = $(CROSS_COMPILE)gcc
+INSTALL = $(CROSS_INSTALL)install
+export INSTALL
+
+TEST_STR = ./$(TEST_MAIN_PROG) || echo $(TEST_TITLE) selftests: [FAIL]
+
+install:
+ifdef INSTALL_KSFT_PATH
+ $(INSTALL) ./$(TEST_MAIN_PROG) $(TEST_FILES) $(INSTALL_KSFT_PATH)
+ @echo "$(TEST_STR)" >> $(KSELFTEST)
+else
+ @echo Run make kselftest_install in the top level source directory
+endif
+
+run_tests: all
+ @$(TEST_STR)
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
index bb7113b..c88819a 100644
--- a/tools/testing/selftests/size/Makefile
+++ b/tools/testing/selftests/size/Makefile
@@ -1,22 +1,11 @@
-CC = $(CROSS_COMPILE)gcc
+TEST_MAIN_PROG = get_size
-TEST_STR = ./get_size || echo get_size selftests: [FAIL]
+all: $(TEST_MAIN_PROG)
-all: get_size
+include ../kselftest.include
get_size: get_size.c
$(CC) -static -ffreestanding -nostartfiles -s $< -o $@
-install:
-ifdef INSTALL_KSFT_PATH
- install ./get_size $(INSTALL_KSFT_PATH)
- @echo "$(TEST_STR)" >> $(KSELFTEST)
-else
- @echo Run make kselftest_install in top level source directory
-endif
-
-run_tests: all
- @$(TEST_STR)
-
clean:
- $(RM) get_size
+ $(RM) $(TEST_MAIN_PROG)
--
1.8.2.2
^ permalink raw reply related
* Re: [PATCH v3 0/6] support GMAC driver for RK3288
From: Heiko Stübner @ 2015-01-01 2:06 UTC (permalink / raw)
To: David Miller
Cc: roger.chen, peppe.cavallaro, netdev, linux-kernel, linux-rockchip,
kever.yang, eddie.cai
In-Reply-To: <20141231.191538.585375391107637966.davem@davemloft.net>
Hi David,
Am Mittwoch, 31. Dezember 2014, 19:15:38 schrieb David Miller:
> From: Roger Chen <roger.chen@rock-chips.com>
> Date: Mon, 29 Dec 2014 17:42:32 +0800
>
> > Roger Chen (6):
> > patch1: add driver for Rockchip RK3288 SoCs integrated GMAC
> > patch2: define clock ID used for GMAC
> > patch3: modify CRU config for Rockchip RK3288 SoCs integrated GMAC
> > patch4: dts: rockchip: add gmac info for rk3288
> > patch5: dts: rockchip: enable gmac on RK3288 evb board
> > patch6: add document for Rockchip RK3288 GMAC
> >
> > Tested on rk3288 evb board:
> > Execute the following command to enable ethernet,
> > set local IP and ping a remote host.
> >
> > busybox ifconfig eth0 up
> > busybox ifconfig eth0 192.168.1.111
> > ping 192.168.1.1
>
> Series applied to net-next, thanks.
could we split this up a bit instead?
If everything goes through your tree, we'll end up with a lot of conflicts with
other pending stuff, so I'd really appreciate if you could leave out patches
patch2: define clock ID used for GMAC
patch3: modify CRU config for Rockchip RK3288 SoCs integrated GMAC
patch4: dts: rockchip: add gmac info for rk3288
patch5: dts: rockchip: enable gmac on RK3288 evb board
so only take the driver itself and the dt-binding and I would take these 4
above through the appropriate trees (clk and arm-soc)
Thanks and happy new year
Heiko
^ permalink raw reply
* Re: [PATCH 5/6] bridge: new function to pack vlans using both IFLA_BRIDGE_VLAN_INFO and IFLA_BRIDGE_VLAN_RANGE_INFO
From: roopa @ 2015-01-01 1:50 UTC (permalink / raw)
To: Scott Feldman; +Cc: Netdev, shemminger, vyasevic@redhat.com, Wilson kok
In-Reply-To: <54A238BD.7050707@cumulusnetworks.com>
On 12/29/14, 9:31 PM, roopa wrote:
> On 12/29/14, 3:25 PM, Scott Feldman wrote:
>> On Mon, Dec 29, 2014 at 1:05 PM, <roopa@cumulusnetworks.com> wrote:
>>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>>
>>> This patch adds new function to compress vlans into ranges.
>>> Vlans are compressed into ranges only if the fill request is called
>>> with
>>> RTEXT_FILTER_BRVLAN_COMPRESSED in filtermask.
>>>
>>> Old vlan packing code is moved to a new function and continues to be
>>> called when filter_mask is RTEXT_FILTER_BRVLAN
>>>
>>> Signed-off-by: Wilson kok <wkok@cumulusnetworks.com>
>>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>>> ---
>>> net/bridge/br_netlink.c | 157
>>> +++++++++++++++++++++++++++++++++++++++++------
>>> 1 file changed, 137 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>>> index 4c47ba0..16bdd5a 100644
>>> --- a/net/bridge/br_netlink.c
>>> +++ b/net/bridge/br_netlink.c
>>> @@ -67,6 +67,133 @@ static int br_port_fill_attrs(struct sk_buff *skb,
>>> return 0;
>>> }
>>>
>>> +static int br_fill_ifvlaninfo_bitmap(struct sk_buff *skb,
>>> + const unsigned long *vlan_bmp,
>>> u16 flags)
>>> +{
>>> + struct bridge_vlan_range_info vinfo_range;
>>> + struct bridge_vlan_info vinfo;
>>> + u16 vid, start = 0, end = 0;
>> One per line?
> ack
>
>>> + u16 pvid;
>> Aren't you getting an "unused" compile warning on this ^^^?
> will double check..
>
>>> +
>>> + /* handle the untagged */
>> This comment ^^^ doesn't make sense here.
>>
>>> + for_each_set_bit(vid, vlan_bmp, VLAN_N_VID) {
>>> + if (start == 0) {
>>> + start = vid;
>>> + end = vid;
>>> + }
>>> + if ((vid - end) > 1) {
>>> + memset(&vinfo_range, 0, sizeof(vinfo_range));
>>> + vinfo_range.flags |= flags;
>>> + vinfo_range.vid = start;
>>> + vinfo_range.vid_end = end;
>>> + if (nla_put(skb, IFLA_BRIDGE_VLAN_RANGE_INFO,
>>> + sizeof(vinfo_range), &vinfo_range))
>>> + goto nla_put_failure;
>>> + start = vid;
>>> + end = vid;
>>> + } else {
>>> + end = vid;
>>> + }
>>> + }
>> What happens with this set {1-10, 12, 20-25, 30}? On vid 12, will
>> that put a VLAN_RANGE_INFO with start=end=12? Seems strange to use
>> RANGE_INFO for single vlan.
>>
>> Can the algorithm be simplified? Maybe there are other examples in
>> the kernel of finding ranges/singles from a bitmap we could borrow
>> code from?
> let me see...
>
>>> + if (start != 0 && end != 0) {
>>> + if (start != end) {
>>> + memset(&vinfo_range, 0, sizeof(vinfo_range));
>>> + vinfo_range.flags |= flags;
>>> + vinfo_range.vid = start;
>>> + vinfo_range.vid_end = end;
>>> + if (nla_put(skb, IFLA_BRIDGE_VLAN_RANGE_INFO,
>>> + sizeof(vinfo_range), &vinfo_range))
>>> + goto nla_put_failure;
>>> + } else {
>>> + memset(&vinfo, 0, sizeof(vinfo));
>>> + vinfo.flags |= flags;
>>> + vinfo.vid = start;
>>> + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
>>> + sizeof(vinfo), &vinfo))
>>> + goto nla_put_failure;
>> How many IFLA_BRIDGE_VLAN_INFOs can you fit into skb? It seems the
>> worst case is you'll have 4094/2 = 2047 VLAN_INFOs if using all
>> even-numbered vids, for example. Will that fit? I guess the original
>> code had the same concern...wonder if anyone checked this worst-case?
>
> I will check this again. Remember doing worst case tests for setlinks.
> will get back with some worst cases tests in the next submission.
>>
>>> + }
>>> + }
>>> +
>>> +nla_put_failure:
>>> + return -EMSGSIZE;
>>> +}
>>> +
>>> +static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
>>> + const struct net_port_vlans
>>> *pv)
>>> +{
>>> + unsigned long vlan_bmp_copy[BR_VLAN_BITMAP_LEN];
>>> + unsigned long untagged_bmp_copy[BR_VLAN_BITMAP_LEN];
>> Lots of automatic space on the stack...can you use dynamic mem
>> (kzalloc) for these?
> Let me see. Or make it a static global ?
>
>>
>>> + struct bridge_vlan_range_info vinfo_range;
>>> + struct bridge_vlan_info vinfo;
>>> + u16 pvid;
>>> +
>>> + memset(vlan_bmp_copy, 0,
>>> + sizeof(unsigned long) * BR_VLAN_BITMAP_LEN);
>>> + bitmap_copy(vlan_bmp_copy, pv->vlan_bitmap, VLAN_N_VID);
>>> +
>>> + memset(untagged_bmp_copy, 0,
>>> + sizeof(unsigned long) * BR_VLAN_BITMAP_LEN);
>>> + bitmap_copy(untagged_bmp_copy, pv->untagged_bitmap,
>>> VLAN_N_VID);
>>> +
>>> + /* send the pvid separately first */
>>> + pvid = br_get_pvid(pv);
>>> + if (pvid && (pvid != VLAN_N_VID)) {
>>> + memset(&vinfo, 0, sizeof(vinfo));
>>> + vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
>>> + if (test_bit(pvid, untagged_bmp_copy)) {
>>> + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
>>> + clear_bit(pvid, untagged_bmp_copy);
>>> + }
>>> + clear_bit(pvid, vlan_bmp_copy);
>>> + vinfo.vid = pvid;
>>> + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
>>> + sizeof(vinfo), &vinfo))
>>> + goto nla_put_failure;
>>> + }
>> What if pvid is not in vlan_bmp_copy? This statement block seems
>> slightly different than the original logic where BRIDGE_VLAN_INFO_PVID
>> was set only when looping thru vlan_bitmap and vid == pvid. Now can
>> you send a IFLA_BRIDGE_VLAN_INFO with pvid even if pvid isn't in
>> vlan_bitmap?
>
> yes, you can AFAIK. pvid is usually untagged.
>> Maybe pvid is always in vlan_bitmap, so it doesn't
>> matter. But there is a subtle logic change here, so something to
>> consider.
> Understand what you are saying, I will check this with v2.
>
>>> +
>>> + /* fill untagged vlans */
>>> + br_fill_ifvlaninfo_bitmap(skb, untagged_bmp_copy,
>>> + BRIDGE_VLAN_INFO_UNTAGGED);
>> This might be doing more than original logic. Original logic looped
>> thru vid in vlan_btimap and checked if vid was in untagged_bitmap.
>> Here you're dumping all bits in untagged_bitmap, without looking if
>> bit was set in vlan_bitmap. Maybe you want to do an intersection of
>> sets vlan_bitmap and untagged_bitmap.
> There is a clear_bit for vlan_bmp_copy below.
> And all this works as expected on our boxes (Its tested code).
>
> But, understand the difference with the original loop. Will confirm.
re-checked and the code is correct and does not deviate from original
logic.
The vlan_bitmap has all vids set in the untagged_bitmap.
>
> plus, we will see if we can use bitmap intersection here.
>
>>> + for_each_set_bit(vid, untagged_bmp_copy, VLAN_N_VID)
>>> + clear_bit(vid, vlan_bmp_copy);
>>> +
>>> + /* fill tagged vlans */
>>> + br_fill_ifvlaninfo_bitmap(skb, vlan_bmp_copy, 0);
>>> +
>>> + return 0;
>>> +
>>> +nla_put_failure:
>>> + return -EMSGSIZE;
>>> +}
>>> +
>>> +static int br_fill_ifvlaninfo(struct sk_buff *skb,
>>> + const struct net_port_vlans *pv)
>>> +{
>>> + struct bridge_vlan_info vinfo;
>>> + u16 pvid, vid;
>>> +
>>> + pvid = br_get_pvid(pv);
>>> + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
>>> + vinfo.vid = vid;
>>> + vinfo.flags = 0;
>>> + if (vid == pvid)
>>> + vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
>>> +
>>> + if (test_bit(vid, pv->untagged_bitmap))
>>> + vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
>>> +
>>> + if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
>>> + sizeof(vinfo), &vinfo))
>>> + goto nla_put_failure;
>>> + }
>>> +
>>> + return 0;
>>> +
>>> +nla_put_failure:
>>> + return -EMSGSIZE;
>>> +}
>>> +
>>> /*
>>> * Create one netlink message for one interface
>>> * Contains port and master info as well as carrier and bridge state.
>>> @@ -121,12 +248,11 @@ static int br_fill_ifinfo(struct sk_buff *skb,
>>> }
>>>
>>> /* Check if the VID information is requested */
>>> - if (filter_mask & RTEXT_FILTER_BRVLAN) {
>>> - struct nlattr *af;
>>> + if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
>>> + (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
>>> const struct net_port_vlans *pv;
>>> - struct bridge_vlan_info vinfo;
>>> - u16 vid;
>>> - u16 pvid;
>>> + struct nlattr *af;
>>> + int err;
>>>
>>> if (port)
>>> pv = nbp_get_vlan_info(port);
>>> @@ -140,21 +266,12 @@ static int br_fill_ifinfo(struct sk_buff *skb,
>>> if (!af)
>>> goto nla_put_failure;
>>>
>>> - pvid = br_get_pvid(pv);
>>> - for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
>>> - vinfo.vid = vid;
>>> - vinfo.flags = 0;
>>> - if (vid == pvid)
>>> - vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
>>> -
>>> - if (test_bit(vid, pv->untagged_bitmap))
>>> - vinfo.flags |=
>>> BRIDGE_VLAN_INFO_UNTAGGED;
>>> -
>>> - if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
>>> - sizeof(vinfo), &vinfo))
>>> - goto nla_put_failure;
>>> - }
>>> -
>>> + if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
>>> + err = br_fill_ifvlaninfo_compressed(skb, pv);
>>> + else
>>> + err = br_fill_ifvlaninfo(skb, pv);
>>> + if (err)
>>> + goto nla_put_failure;
>>> nla_nest_end(skb, af);
>>> }
>>>
>>> --
>>> 1.7.10.4
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply
* [RFC V2 PATCH 24/25] net/netpolicy: limit the total record number
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
NET policy can not fulfill users request without limit, because of the
security consideration and device limitation. For security
consideration, the attacker may fake millions of per task/socket request
to crash the system. For device limitation, the flow director rules
number is limited on i40e driver. NET policy should not run out the
rules, otherwise it cannot guarantee the good performance.
This patch limits the total record number in RCU hash table to fix the
cases as above. The max total record number could vary for different
device. For i40e driver, it limits the record number according to flow
director rules number. If it exceeds the limitation, the registeration
and new object request will be denied.
Since the dev may not be aware in registeration, the cur_rec_num may not
be updated on time. So the actual registered record may exceeds the
max_rec_num. But it will not bring any problems. Because the patch also
check the limitation on object request. It guarantees that the device
resource will not run out.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 4 ++++
net/core/netpolicy.c | 22 ++++++++++++++++++++--
2 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index 0eba512..9bc2ee0 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -40,6 +40,7 @@ enum netpolicy_traffic {
#define NETPOLICY_INVALID_QUEUE -1
#define NETPOLICY_INVALID_LOC NETPOLICY_INVALID_QUEUE
#define POLICY_NAME_LEN_MAX 64
+#define NETPOLICY_MAX_RECORD_NUM 7000
extern const char *policy_name[];
struct netpolicy_dev_info {
@@ -81,6 +82,9 @@ struct netpolicy_info {
struct netpolicy_sys_info sys_info;
/* List of policy objects 0 rx 1 tx */
struct list_head obj_list[NETPOLICY_RXTX][NET_POLICY_MAX];
+ /* for record number limitation */
+ int max_rec_num;
+ atomic_t cur_rec_num;
};
struct netpolicy_tcpudpip4_spec {
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 735405c..e9f3800 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -368,6 +368,9 @@ static int get_avail_queue(struct netpolicy_instance *instance, bool is_rx)
unsigned long ptr_id = (uintptr_t)instance->ptr;
int queue = -1;
+ if (atomic_read(&dev->netpolicy->cur_rec_num) > dev->netpolicy->max_rec_num)
+ return queue;
+
spin_lock_bh(&np_hashtable_lock);
old_record = netpolicy_record_search(ptr_id);
if (!old_record) {
@@ -388,8 +391,10 @@ static int get_avail_queue(struct netpolicy_instance *instance, bool is_rx)
if (is_rx) {
new_record->rx_obj = get_avail_object(dev, new_record->policy, is_rx);
- if (!new_record->dev)
+ if (!new_record->dev) {
new_record->dev = dev;
+ atomic_inc(&dev->netpolicy->cur_rec_num);
+ }
if (!new_record->rx_obj) {
kfree(new_record);
goto err;
@@ -397,8 +402,10 @@ static int get_avail_queue(struct netpolicy_instance *instance, bool is_rx)
queue = new_record->rx_obj->queue;
} else {
new_record->tx_obj = get_avail_object(dev, new_record->policy, is_rx);
- if (!new_record->dev)
+ if (!new_record->dev) {
new_record->dev = dev;
+ atomic_inc(&dev->netpolicy->cur_rec_num);
+ }
if (!new_record->tx_obj) {
kfree(new_record);
goto err;
@@ -638,6 +645,7 @@ int netpolicy_register(struct netpolicy_instance *instance,
enum netpolicy_name policy)
{
unsigned long ptr_id = (uintptr_t)instance->ptr;
+ struct net_device *dev = instance->dev;
struct netpolicy_record *new, *old;
if (!is_net_policy_valid(policy)) {
@@ -645,6 +653,10 @@ int netpolicy_register(struct netpolicy_instance *instance,
return -EINVAL;
}
+ if (dev && dev->netpolicy &&
+ (atomic_read(&dev->netpolicy->cur_rec_num) > dev->netpolicy->max_rec_num))
+ return -ENOSPC;
+
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new) {
instance->policy = NET_POLICY_INVALID;
@@ -668,6 +680,8 @@ int netpolicy_register(struct netpolicy_instance *instance,
new->dev = instance->dev;
new->policy = policy;
hash_add_rcu(np_record_hash, &new->hash_node, ptr_id);
+ if (dev && dev->netpolicy)
+ atomic_inc(&dev->netpolicy->cur_rec_num);
}
instance->policy = policy;
spin_unlock_bh(&np_hashtable_lock);
@@ -714,6 +728,7 @@ void netpolicy_unregister(struct netpolicy_instance *instance)
/* The record cannot be share. It can be safely free. */
put_queue(record->dev, record->rx_obj, record->tx_obj);
kfree(record);
+ atomic_dec(&dev->netpolicy->cur_rec_num);
}
instance->policy = NET_POLICY_INVALID;
spin_unlock_bh(&np_hashtable_lock);
@@ -1247,6 +1262,9 @@ int init_netpolicy(struct net_device *dev)
goto unlock;
}
+ if (!dev->netpolicy->max_rec_num)
+ dev->netpolicy->max_rec_num = NETPOLICY_MAX_RECORD_NUM;
+
spin_lock(&dev->np_ob_list_lock);
for (i = 0; i < NETPOLICY_RXTX; i++) {
for (j = NET_POLICY_NONE; j < NET_POLICY_MAX; j++)
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 23/25] net/netpolicy: optimize for queue pair
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
Some drivers like i40e driver does not support separate Tx and Rx queues
as channels. Using Rx queue to stand for the channels, if queue_pair is
set by driver.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 1 +
net/core/netpolicy.c | 3 +++
2 files changed, 4 insertions(+)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index 00600f8..0eba512 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -76,6 +76,7 @@ struct netpolicy_info {
enum netpolicy_name cur_policy;
unsigned long avail_policy[BITS_TO_LONGS(NET_POLICY_MAX)];
bool has_mix_policy;
+ bool queue_pair;
/* cpu and queue mapping information */
struct netpolicy_sys_info sys_info;
/* List of policy objects 0 rx 1 tx */
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index dc1edfc..735405c 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -495,6 +495,9 @@ int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
if (!policy_validate(instance))
return -EINVAL;
+ if (dev->netpolicy->queue_pair)
+ is_rx = true;
+
/* fast path */
rcu_read_lock();
version = (int *)rcu_dereference(netpolicy_sys_map_version);
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 22/25] net/netpolicy: fast path for finding the queues
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
Current implementation searches the hash table to get assigned object
for each transmit/receive packet. It's not necessory, because the
assigned object usually remain unchanged. This patch store the assigned
queue to speed up the searching process.
But under certain situations, the assigned objects has to be changed,
especially when system cpu and queue mapping changed, such as CPU
hotplug, device hotplug, queue number changes and so on. In this patch,
the netpolicy_sys_map_version is used to track the system cpu and queue
mapping changes. If the netpolicy_sys_map_version doesn't match with the
instance's version, the stored queue will be dropped. The
netpolicy_sys_map_version is protected by RCU lock.
Also, to reduce the overhead, this patch asynchronously find the
available object by work queue. So the first several packets may not be
benefited.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 8 ++++
net/core/netpolicy.c | 103 +++++++++++++++++++++++++++++++++++++++++++++-
net/ipv4/af_inet.c | 7 +---
3 files changed, 112 insertions(+), 6 deletions(-)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index df962de..00600f8 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -108,6 +108,14 @@ struct netpolicy_instance {
struct work_struct fc_wk; /* flow classification work */
atomic_t fc_wk_cnt; /* flow classification work number */
struct netpolicy_flow_spec flow; /* flow information */
+ /* For fast path */
+ atomic_t rx_queue;
+ atomic_t tx_queue;
+ struct work_struct get_rx_wk;
+ atomic_t get_rx_wk_cnt;
+ struct work_struct get_tx_wk;
+ atomic_t get_tx_wk_cnt;
+ int sys_map_version;
};
/* check if policy is valid */
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 4b844d8..dc1edfc 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -79,10 +79,13 @@ struct netpolicy_record {
struct netpolicy_object *tx_obj;
};
+static void __rcu *netpolicy_sys_map_version;
+
static DEFINE_HASHTABLE(np_record_hash, 10);
static DEFINE_SPINLOCK(np_hashtable_lock);
struct workqueue_struct *np_fc_wq;
+struct workqueue_struct *np_fast_path_wq;
static int netpolicy_get_dev_info(struct net_device *dev,
struct netpolicy_dev_info *d_info)
@@ -411,6 +414,37 @@ err:
return queue;
}
+static void np_find_rx_queue(struct work_struct *wk)
+{
+ struct netpolicy_instance *instance;
+ int queue;
+
+ instance = container_of(wk, struct netpolicy_instance,
+ get_rx_wk);
+
+ if (instance) {
+ queue = get_avail_queue(instance, true);
+ if (queue >= 0)
+ atomic_set(&instance->rx_queue, queue);
+ }
+ atomic_set(&instance->get_rx_wk_cnt, 0);
+}
+
+static void np_find_tx_queue(struct work_struct *wk)
+{
+ struct netpolicy_instance *instance;
+ int queue;
+
+ instance = container_of(wk, struct netpolicy_instance,
+ get_tx_wk);
+ if (instance) {
+ queue = get_avail_queue(instance, false);
+ if (queue >= 0)
+ atomic_set(&instance->tx_queue, queue);
+ }
+ atomic_set(&instance->get_tx_wk_cnt, 0);
+}
+
static inline bool policy_validate(struct netpolicy_instance *instance)
{
struct net_device *dev = instance->dev;
@@ -453,6 +487,7 @@ static inline bool policy_validate(struct netpolicy_instance *instance)
int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
{
struct net_device *dev = instance->dev;
+ int *version;
if (!dev || !dev->netpolicy)
return -EINVAL;
@@ -460,7 +495,31 @@ int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
if (!policy_validate(instance))
return -EINVAL;
- return get_avail_queue(instance, is_rx);
+ /* fast path */
+ rcu_read_lock();
+ version = (int *)rcu_dereference(netpolicy_sys_map_version);
+ if (*version == instance->sys_map_version) {
+ if (is_rx && (atomic_read(&instance->rx_queue) != NETPOLICY_INVALID_QUEUE)) {
+ rcu_read_unlock();
+ return atomic_read(&instance->rx_queue);
+ }
+ if (!is_rx && (atomic_read(&instance->tx_queue) != NETPOLICY_INVALID_QUEUE)) {
+ rcu_read_unlock();
+ return atomic_read(&instance->tx_queue);
+ }
+ } else {
+ atomic_set(&instance->rx_queue, NETPOLICY_INVALID_QUEUE);
+ atomic_set(&instance->tx_queue, NETPOLICY_INVALID_QUEUE);
+ instance->sys_map_version = *version;
+ }
+ rcu_read_unlock();
+
+ if (is_rx && !atomic_cmpxchg(&instance->get_rx_wk_cnt, 0, 1))
+ queue_work(np_fast_path_wq, &instance->get_rx_wk);
+ if (!is_rx && !atomic_cmpxchg(&instance->get_tx_wk_cnt, 0, 1))
+ queue_work(np_fast_path_wq, &instance->get_tx_wk);
+
+ return -1;
}
EXPORT_SYMBOL(netpolicy_pick_queue);
@@ -496,6 +555,7 @@ void np_flow_rule_set(struct work_struct *wk)
queue = get_avail_queue(instance, true);
if (queue < 0)
goto done;
+ atomic_set(&instance->rx_queue, queue);
/* using ethtool flow-type to configure
* Rx network flow classification options or rules
@@ -546,6 +606,14 @@ static void init_instance(struct netpolicy_instance *instance)
atomic_set(&instance->rule_queue, NETPOLICY_INVALID_QUEUE);
atomic_set(&instance->fc_wk_cnt, 0);
INIT_WORK(&instance->fc_wk, np_flow_rule_set);
+
+ atomic_set(&instance->rx_queue, NETPOLICY_INVALID_QUEUE);
+ atomic_set(&instance->tx_queue, NETPOLICY_INVALID_QUEUE);
+ instance->sys_map_version = 0;
+ atomic_set(&instance->get_rx_wk_cnt, 0);
+ atomic_set(&instance->get_tx_wk_cnt, 0);
+ INIT_WORK(&instance->get_rx_wk, np_find_rx_queue);
+ INIT_WORK(&instance->get_tx_wk, np_find_tx_queue);
}
/**
@@ -619,6 +687,8 @@ void netpolicy_unregister(struct netpolicy_instance *instance)
struct net_device *dev = instance->dev;
struct netpolicy_record *record;
+ cancel_work_sync(&instance->get_rx_wk);
+ cancel_work_sync(&instance->get_tx_wk);
cancel_work_sync(&instance->fc_wk);
/* remove FD rules */
if (dev && instance->location != NETPOLICY_INVALID_LOC) {
@@ -1296,6 +1366,7 @@ void update_netpolicy_sys_map(void)
struct net *net;
struct net_device *dev, *aux;
enum netpolicy_name cur_policy;
+ int *new_version, *old_version;
for_each_net(net) {
for_each_netdev_safe(net, dev, aux) {
@@ -1331,6 +1402,18 @@ unlock:
spin_unlock(&dev->np_lock);
}
}
+
+ old_version = rcu_dereference(netpolicy_sys_map_version);
+ new_version = kzalloc(sizeof(int), GFP_KERNEL);
+ if (new_version) {
+ *new_version = *old_version + 1;
+ if (*new_version < 0)
+ *new_version = 0;
+ rcu_assign_pointer(netpolicy_sys_map_version, new_version);
+ synchronize_rcu();
+ } else {
+ pr_warn("NETPOLICY: Failed to update sys map version\n");
+ }
}
EXPORT_SYMBOL(update_netpolicy_sys_map);
@@ -1357,11 +1440,26 @@ static struct notifier_block netpolicy_cpu_notifier = {
static int __init netpolicy_init(void)
{
int ret;
+ void *version;
np_fc_wq = create_workqueue("np_fc");
if (!np_fc_wq)
return -ENOMEM;
+ np_fast_path_wq = create_workqueue("np_fast_path");
+ if (!np_fast_path_wq) {
+ destroy_workqueue(np_fc_wq);
+ return -ENOMEM;
+ }
+
+ version = kzalloc(sizeof(int), GFP_KERNEL);
+ if (!version) {
+ destroy_workqueue(np_fc_wq);
+ destroy_workqueue(np_fast_path_wq);
+ return -ENOMEM;
+ }
+ rcu_assign_pointer(netpolicy_sys_map_version, version);
+
ret = register_pernet_subsys(&netpolicy_net_ops);
if (!ret)
register_netdevice_notifier(&netpolicy_dev_notf);
@@ -1376,6 +1474,9 @@ static int __init netpolicy_init(void)
static void __exit netpolicy_exit(void)
{
destroy_workqueue(np_fc_wq);
+ destroy_workqueue(np_fast_path_wq);
+ RCU_INIT_POINTER(netpolicy_sys_map_version, NULL);
+ synchronize_rcu();
unregister_netdevice_notifier(&netpolicy_dev_notf);
unregister_pernet_subsys(&netpolicy_net_ops);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b26e606..a21ae80 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -765,7 +765,6 @@ static void sock_netpolicy_manage_flow(struct sock *sk, struct msghdr *msg)
struct netpolicy_instance *instance;
struct netpolicy_flow_spec *flow;
bool change = false;
- int queue;
instance = netpolicy_find_instance(sk);
if (!instance)
@@ -819,10 +818,8 @@ static void sock_netpolicy_manage_flow(struct sock *sk, struct msghdr *msg)
return;
}
- queue = netpolicy_pick_queue(instance, true);
- if (queue < 0)
- return;
- if ((queue != atomic_read(&instance->rule_queue)) || change)
+ if ((atomic_read(&instance->rx_queue) != atomic_read(&instance->rule_queue)) ||
+ change)
netpolicy_set_rules(instance);
#endif
}
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 21/25] net/netpolicy: set per task policy by proc
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
Users may not want to change the source code to add per task net polic
support. Or they may want to change a running task's net policy. prctl
does not work for both cases.
This patch adds an interface in /proc, which can be used to set and
retrieve policy of already running tasks. User can write the policy name
into /proc/$PID/net_policy to set per task net policy.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
fs/proc/base.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a11eb71..7679785 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -91,6 +91,8 @@
#include <asm/hardwall.h>
#endif
#include <trace/events/oom.h>
+#include <linux/netpolicy.h>
+#include <linux/ctype.h>
#include "internal.h"
#include "fd.h"
@@ -2807,6 +2809,65 @@ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
return err;
}
+#ifdef CONFIG_NETPOLICY
+static int proc_net_policy_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (is_net_policy_valid(task->task_netpolicy.policy))
+ seq_printf(m, "%s\n", policy_name[task->task_netpolicy.policy]);
+
+ return 0;
+}
+
+static int proc_net_policy_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_net_policy_show, inode);
+}
+
+static ssize_t proc_net_policy_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *task = get_proc_task(inode);
+ char name[POLICY_NAME_LEN_MAX];
+ int i, ret;
+
+ if (count >= POLICY_NAME_LEN_MAX)
+ return -EINVAL;
+
+ if (copy_from_user(name, buf, count))
+ return -EINVAL;
+
+ for (i = 0; i < count - 1; i++)
+ name[i] = toupper(name[i]);
+ name[POLICY_NAME_LEN_MAX - 1] = 0;
+
+ for (i = 0; i < NET_POLICY_MAX; i++) {
+ if (!strncmp(name, policy_name[i], strlen(policy_name[i]))) {
+ ret = netpolicy_register(&task->task_netpolicy, i);
+ if (ret)
+ return ret;
+ break;
+ }
+ }
+
+ if (i == NET_POLICY_MAX)
+ return -EINVAL;
+
+ return count;
+}
+
+static const struct file_operations proc_net_policy_operations = {
+ .open = proc_net_policy_open,
+ .write = proc_net_policy_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+#endif /* CONFIG_NETPOLICY */
+
/*
* Thread groups
*/
@@ -2906,6 +2967,9 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("timers", S_IRUGO, proc_timers_operations),
#endif
REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
+#if IS_ENABLED(CONFIG_NETPOLICY)
+ REG("net_policy", S_IRUSR|S_IWUSR, proc_net_policy_operations),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 19/25] net/netpolicy: set Rx queues according to policy
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
For setting Rx queues, this patch configure Rx network flow
classification rules to redirect the packets to the assigned queue.
Since we may not get all the information required for rule until the
first packet arrived, it will add the rule after recvmsg. Also, to
avoid destroying the connection rates, the configuration will be done
asynchronized by work queue. So the first several packets may not use
the assigned queue.
The dev information will be discarded in udp_queue_rcv_skb, so we record
it in netpolicy struct in advance.
This patch only support INET tcp4 and udp4. It can be extend to other
socket type and V6 later shortly.
For each sk, it only supports one rule. If the port/address changed, the
previos rule will be replaced.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 33 +++++++++++-
net/core/netpolicy.c | 131 +++++++++++++++++++++++++++++++++++++++++++++-
net/ipv4/af_inet.c | 71 +++++++++++++++++++++++++
net/ipv4/udp.c | 4 ++
4 files changed, 236 insertions(+), 3 deletions(-)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index a522015..df962de 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -37,6 +37,8 @@ enum netpolicy_traffic {
NETPOLICY_RXTX,
};
+#define NETPOLICY_INVALID_QUEUE -1
+#define NETPOLICY_INVALID_LOC NETPOLICY_INVALID_QUEUE
#define POLICY_NAME_LEN_MAX 64
extern const char *policy_name[];
@@ -80,10 +82,32 @@ struct netpolicy_info {
struct list_head obj_list[NETPOLICY_RXTX][NET_POLICY_MAX];
};
+struct netpolicy_tcpudpip4_spec {
+ /* source and Destination host and port */
+ __be32 ip4src;
+ __be32 ip4dst;
+ __be16 psrc;
+ __be16 pdst;
+};
+
+union netpolicy_flow_union {
+ struct netpolicy_tcpudpip4_spec tcp_udp_ip4_spec;
+};
+
+struct netpolicy_flow_spec {
+ __u32 flow_type;
+ union netpolicy_flow_union spec;
+};
+
struct netpolicy_instance {
struct net_device *dev;
- enum netpolicy_name policy; /* required policy */
- void *ptr; /* pointers */
+ enum netpolicy_name policy; /* required policy */
+ void *ptr; /* pointers */
+ int location; /* rule location */
+ atomic_t rule_queue; /* queue set by rule */
+ struct work_struct fc_wk; /* flow classification work */
+ atomic_t fc_wk_cnt; /* flow classification work number */
+ struct netpolicy_flow_spec flow; /* flow information */
};
/* check if policy is valid */
@@ -98,6 +122,7 @@ extern int netpolicy_register(struct netpolicy_instance *instance,
enum netpolicy_name policy);
extern void netpolicy_unregister(struct netpolicy_instance *instance);
extern int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx);
+extern void netpolicy_set_rules(struct netpolicy_instance *instance);
#else
static inline void update_netpolicy_sys_map(void)
{
@@ -116,6 +141,10 @@ static inline int netpolicy_pick_queue(struct netpolicy_instance *instance, bool
{
return 0;
}
+
+static inline void netpolicy_set_rules(struct netpolicy_instance *instance)
+{
+}
#endif
#endif /*__LINUX_NETPOLICY_H*/
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 98ca430..89c65d9 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -39,6 +39,7 @@
#include <linux/ctype.h>
#include <linux/cpu.h>
#include <linux/hashtable.h>
+#include <net/rtnetlink.h>
struct netpolicy_record {
struct hlist_node hash_node;
@@ -52,6 +53,8 @@ struct netpolicy_record {
static DEFINE_HASHTABLE(np_record_hash, 10);
static DEFINE_SPINLOCK(np_hashtable_lock);
+struct workqueue_struct *np_fc_wq;
+
static int netpolicy_get_dev_info(struct net_device *dev,
struct netpolicy_dev_info *d_info)
{
@@ -426,6 +429,90 @@ int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
}
EXPORT_SYMBOL(netpolicy_pick_queue);
+void np_flow_rule_set(struct work_struct *wk)
+{
+ struct netpolicy_instance *instance;
+ struct netpolicy_flow_spec *flow;
+ struct ethtool_rxnfc cmd;
+ struct net_device *dev;
+ int queue, ret;
+
+ instance = container_of(wk, struct netpolicy_instance,
+ fc_wk);
+ if (!instance)
+ return;
+
+ flow = &instance->flow;
+ if (WARN_ON(!flow))
+ goto done;
+ dev = instance->dev;
+ if (WARN_ON(!dev))
+ goto done;
+
+ /* Check if ntuple is supported */
+ if (!dev->ethtool_ops->set_rxnfc)
+ goto done;
+
+ /* Only support TCP/UDP V4 by now */
+ if ((flow->flow_type != TCP_V4_FLOW) &&
+ (flow->flow_type != UDP_V4_FLOW))
+ goto done;
+
+ queue = get_avail_queue(instance, true);
+ if (queue < 0)
+ goto done;
+
+ /* using ethtool flow-type to configure
+ * Rx network flow classification options or rules
+ * RX_CLS_LOC_ANY must be supported by the driver
+ */
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.cmd = ETHTOOL_SRXCLSRLINS;
+ cmd.fs.flow_type = flow->flow_type;
+ cmd.fs.h_u.tcp_ip4_spec.ip4src = flow->spec.tcp_udp_ip4_spec.ip4src;
+ cmd.fs.h_u.tcp_ip4_spec.psrc = flow->spec.tcp_udp_ip4_spec.psrc;
+ cmd.fs.h_u.tcp_ip4_spec.ip4dst = flow->spec.tcp_udp_ip4_spec.ip4dst;
+ cmd.fs.h_u.tcp_ip4_spec.pdst = flow->spec.tcp_udp_ip4_spec.pdst;
+ cmd.fs.ring_cookie = queue;
+ cmd.fs.location = RX_CLS_LOC_ANY;
+ rtnl_lock();
+ ret = dev->ethtool_ops->set_rxnfc(dev, &cmd);
+ rtnl_unlock();
+ if (ret < 0) {
+ pr_warn("Failed to set rules ret %d\n", ret);
+ atomic_set(&instance->rule_queue, NETPOLICY_INVALID_QUEUE);
+ goto done;
+ }
+
+ /* TODO: now one sk only has one rule */
+ if (instance->location != NETPOLICY_INVALID_LOC) {
+ /* delete the old rule */
+ struct ethtool_rxnfc del_cmd;
+
+ del_cmd.cmd = ETHTOOL_SRXCLSRLDEL;
+ del_cmd.fs.location = instance->location;
+ rtnl_lock();
+ ret = dev->ethtool_ops->set_rxnfc(dev, &del_cmd);
+ rtnl_unlock();
+ if (ret < 0)
+ pr_warn("Failed to delete rules ret %d\n", ret);
+ }
+
+ /* record rule location */
+ instance->location = cmd.fs.location;
+ atomic_set(&instance->rule_queue, queue);
+done:
+ atomic_set(&instance->fc_wk_cnt, 0);
+}
+
+static void init_instance(struct netpolicy_instance *instance)
+{
+ instance->location = NETPOLICY_INVALID_LOC;
+ atomic_set(&instance->rule_queue, NETPOLICY_INVALID_QUEUE);
+ atomic_set(&instance->fc_wk_cnt, 0);
+ INIT_WORK(&instance->fc_wk, np_flow_rule_set);
+}
+
/**
* netpolicy_register() - Register per socket/task policy request
* @instance: NET policy per socket/task instance info
@@ -470,6 +557,7 @@ int netpolicy_register(struct netpolicy_instance *instance,
}
kfree(new);
} else {
+ init_instance(instance);
new->ptr_id = ptr_id;
new->dev = instance->dev;
new->policy = policy;
@@ -492,8 +580,23 @@ EXPORT_SYMBOL(netpolicy_register);
*/
void netpolicy_unregister(struct netpolicy_instance *instance)
{
- struct netpolicy_record *record;
unsigned long ptr_id = (uintptr_t)instance->ptr;
+ struct net_device *dev = instance->dev;
+ struct netpolicy_record *record;
+
+ cancel_work_sync(&instance->fc_wk);
+ /* remove FD rules */
+ if (dev && instance->location != NETPOLICY_INVALID_LOC) {
+ struct ethtool_rxnfc del_cmd;
+
+ del_cmd.cmd = ETHTOOL_SRXCLSRLDEL;
+ del_cmd.fs.location = instance->location;
+ rtnl_lock();
+ dev->ethtool_ops->set_rxnfc(dev, &del_cmd);
+ rtnl_unlock();
+ instance->location = NETPOLICY_INVALID_LOC;
+ atomic_set(&instance->rule_queue, NETPOLICY_INVALID_QUEUE);
+ }
spin_lock_bh(&np_hashtable_lock);
/* del from hash table */
@@ -509,6 +612,26 @@ void netpolicy_unregister(struct netpolicy_instance *instance)
}
EXPORT_SYMBOL(netpolicy_unregister);
+/**
+ * netpolicy_set_rules() - Configure Rx network flow classification rules
+ * @instance: NET policy per socket/task instance info
+ *
+ * This function intends to configure Rx network flow classification rules
+ * according to ip and port information. The configuration will be done
+ * asynchronized by work queue. It avoids to destroy the connection rates.
+ *
+ * Currently, it only supports TCP and UDP V4. Other protocols will be
+ * supported later.
+ *
+ */
+void netpolicy_set_rules(struct netpolicy_instance *instance)
+{
+ /* There should be only one work to run at the same time */
+ if (!atomic_cmpxchg(&instance->fc_wk_cnt, 0, 1))
+ queue_work(np_fc_wq, &instance->fc_wk);
+}
+EXPORT_SYMBOL(netpolicy_set_rules);
+
const char *policy_name[NET_POLICY_MAX] = {
"NONE",
"CPU",
@@ -1200,6 +1323,10 @@ static int __init netpolicy_init(void)
{
int ret;
+ np_fc_wq = create_workqueue("np_fc");
+ if (!np_fc_wq)
+ return -ENOMEM;
+
ret = register_pernet_subsys(&netpolicy_net_ops);
if (!ret)
register_netdevice_notifier(&netpolicy_dev_notf);
@@ -1213,6 +1340,8 @@ static int __init netpolicy_init(void)
static void __exit netpolicy_exit(void)
{
+ destroy_workqueue(np_fc_wq);
+
unregister_netdevice_notifier(&netpolicy_dev_notf);
unregister_pernet_subsys(&netpolicy_net_ops);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 55513e6..f536da3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -759,6 +759,71 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(inet_sendpage);
+static void sock_netpolicy_manage_flow(struct sock *sk, struct msghdr *msg)
+{
+#ifdef CONFIG_NETPOLICY
+ struct netpolicy_instance *instance;
+ struct netpolicy_flow_spec *flow;
+ bool change = false;
+ int queue;
+
+ instance = netpolicy_find_instance(sk);
+ if (!instance)
+ return;
+
+ if (!instance->dev)
+ return;
+
+ flow = &instance->flow;
+ /* TODO: need to change here and add more protocol support */
+ if (sk->sk_family != AF_INET)
+ return;
+ if ((sk->sk_protocol == IPPROTO_TCP) &&
+ (sk->sk_type == SOCK_STREAM)) {
+ if ((flow->flow_type != TCP_V4_FLOW) ||
+ (flow->spec.tcp_udp_ip4_spec.ip4src != sk->sk_daddr) ||
+ (flow->spec.tcp_udp_ip4_spec.psrc != sk->sk_dport) ||
+ (flow->spec.tcp_udp_ip4_spec.ip4dst != sk->sk_rcv_saddr) ||
+ (flow->spec.tcp_udp_ip4_spec.pdst != htons(sk->sk_num)))
+ change = true;
+ if (change) {
+ flow->flow_type = TCP_V4_FLOW;
+ flow->spec.tcp_udp_ip4_spec.ip4src = sk->sk_daddr;
+ flow->spec.tcp_udp_ip4_spec.psrc = sk->sk_dport;
+ flow->spec.tcp_udp_ip4_spec.ip4dst = sk->sk_rcv_saddr;
+ flow->spec.tcp_udp_ip4_spec.pdst = htons(sk->sk_num);
+ }
+ } else if ((sk->sk_protocol == IPPROTO_UDP) &&
+ (sk->sk_type == SOCK_DGRAM)) {
+ DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
+
+ if (!sin || !sin->sin_addr.s_addr || !sin->sin_port)
+ return;
+ if ((flow->flow_type != UDP_V4_FLOW) ||
+ (flow->spec.tcp_udp_ip4_spec.ip4src != sin->sin_addr.s_addr) ||
+ (flow->spec.tcp_udp_ip4_spec.psrc != sin->sin_port) ||
+ (flow->spec.tcp_udp_ip4_spec.ip4dst != sk->sk_rcv_saddr) ||
+ (flow->spec.tcp_udp_ip4_spec.pdst != htons(sk->sk_num)))
+ change = true;
+ if (change) {
+ flow->flow_type = UDP_V4_FLOW;
+ flow->spec.tcp_udp_ip4_spec.ip4src = sin->sin_addr.s_addr;
+ flow->spec.tcp_udp_ip4_spec.psrc = sin->sin_port;
+ flow->spec.tcp_udp_ip4_spec.ip4dst = sk->sk_rcv_saddr;
+ flow->spec.tcp_udp_ip4_spec.pdst = htons(sk->sk_num);
+ }
+ } else {
+ return;
+ }
+
+ queue = netpolicy_pick_queue(instance, true);
+ if (queue < 0)
+ return;
+ if ((queue != atomic_read(&instance->rule_queue)) || change)
+ netpolicy_set_rules(instance);
+#endif
+}
+
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -772,6 +837,12 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
flags & ~MSG_DONTWAIT, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
+
+ /* The dev info, src address and port information for UDP
+ * can only be retrieved after processing the msg.
+ */
+ sock_netpolicy_manage_flow(sk, msg);
+
return err;
}
EXPORT_SYMBOL(inet_recvmsg);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e61f7cd..c495392 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1785,6 +1785,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (sk) {
int ret;
+#ifdef CONFIG_NETPOLICY
+ /* Record dev info before it's discarded in udp_queue_rcv_skb */
+ sk->sk_netpolicy.dev = skb->dev;
+#endif
if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
inet_compute_pseudo);
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 18/25] net/netpolicy: set Tx queues according to policy
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
When the device tries to transmit a packet, netdev_pick_tx is called to
find the available Tx queues. If the net policy is applied, it picks up
the assigned Tx queue from net policy subsystem, and redirect the
traffic to the assigned queue.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/net/sock.h | 9 +++++++++
net/core/dev.c | 20 ++++++++++++++++++--
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index fd4132f..6219434 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2273,4 +2273,13 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
+/* Return netpolicy instance information from socket. */
+static inline struct netpolicy_instance *netpolicy_find_instance(struct sock *sk)
+{
+#ifdef CONFIG_NETPOLICY
+ if (is_net_policy_valid(sk->sk_netpolicy.policy))
+ return &sk->sk_netpolicy;
+#endif
+ return NULL;
+}
#endif /* _SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a9c39f..08db6eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3266,6 +3266,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
void *accel_priv)
{
+ struct sock *sk = skb->sk;
int queue_index = 0;
#ifdef CONFIG_XPS
@@ -3280,8 +3281,23 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
__netdev_pick_tx);
- else
- queue_index = __netdev_pick_tx(dev, skb);
+ else {
+#ifdef CONFIG_NETPOLICY
+ struct netpolicy_instance *instance;
+
+ queue_index = -1;
+ if (dev->netpolicy && sk) {
+ instance = netpolicy_find_instance(sk);
+ if (instance) {
+ if (!instance->dev)
+ instance->dev = dev;
+ queue_index = netpolicy_pick_queue(instance, false);
+ }
+ }
+ if (queue_index < 0)
+#endif
+ queue_index = __netdev_pick_tx(dev, skb);
+ }
if (!accel_priv)
queue_index = netdev_cap_txqueue(dev, queue_index);
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 17/25] net/netpolicy: introduce netpolicy_pick_queue
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
To achieve better network performance, the key step is to distribute the
packets to dedicated queues according to policy and system run time
status.
This patch provides an interface which can return the proper dedicated
queue for socket/task. Then the packets of the socket/task will be
redirect to the dedicated queue for better network performance.
For selecting the proper queue, currently it uses round-robin algorithm
to find the available object from the given policy object list. The
algorithm is good enough for now. But it could be improved by some
adaptive algorithm later.
The selected object will be stored in hashtable. So it does not need to
go through the whole object list every time.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 5 ++
net/core/netpolicy.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 141 insertions(+)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index 5900252..a522015 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -97,6 +97,7 @@ extern void update_netpolicy_sys_map(void);
extern int netpolicy_register(struct netpolicy_instance *instance,
enum netpolicy_name policy);
extern void netpolicy_unregister(struct netpolicy_instance *instance);
+extern int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx);
#else
static inline void update_netpolicy_sys_map(void)
{
@@ -111,6 +112,10 @@ static inline void netpolicy_unregister(struct netpolicy_instance *instance)
{
}
+static inline int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+ return 0;
+}
#endif
#endif /*__LINUX_NETPOLICY_H*/
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 3605761..98ca430 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -290,6 +290,142 @@ static void netpolicy_record_clear_dev_node(struct net_device *dev)
spin_unlock_bh(&np_hashtable_lock);
}
+static struct netpolicy_object *get_avail_object(struct net_device *dev,
+ enum netpolicy_name policy,
+ bool is_rx)
+{
+ int dir = is_rx ? NETPOLICY_RX : NETPOLICY_TX;
+ struct netpolicy_object *tmp, *obj = NULL;
+ int val = -1;
+
+ /* Check if net policy is supported */
+ if (!dev || !dev->netpolicy)
+ return NULL;
+
+ /* The system should have queues which support the request policy. */
+ if ((policy != dev->netpolicy->cur_policy) &&
+ (dev->netpolicy->cur_policy != NET_POLICY_MIX))
+ return NULL;
+
+ spin_lock_bh(&dev->np_ob_list_lock);
+ list_for_each_entry(tmp, &dev->netpolicy->obj_list[dir][policy], list) {
+ if ((val > atomic_read(&tmp->refcnt)) ||
+ (val == -1)) {
+ val = atomic_read(&tmp->refcnt);
+ obj = tmp;
+ }
+ }
+
+ if (WARN_ON(!obj)) {
+ spin_unlock_bh(&dev->np_ob_list_lock);
+ return NULL;
+ }
+ atomic_inc(&obj->refcnt);
+ spin_unlock_bh(&dev->np_ob_list_lock);
+
+ return obj;
+}
+
+static int get_avail_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+ struct netpolicy_record *old_record, *new_record;
+ struct net_device *dev = instance->dev;
+ unsigned long ptr_id = (uintptr_t)instance->ptr;
+ int queue = -1;
+
+ spin_lock_bh(&np_hashtable_lock);
+ old_record = netpolicy_record_search(ptr_id);
+ if (!old_record) {
+ pr_warn("NETPOLICY: doesn't registered. Remove net policy settings!\n");
+ instance->policy = NET_POLICY_INVALID;
+ goto err;
+ }
+
+ if (is_rx && old_record->rx_obj) {
+ queue = old_record->rx_obj->queue;
+ } else if (!is_rx && old_record->tx_obj) {
+ queue = old_record->tx_obj->queue;
+ } else {
+ new_record = kzalloc(sizeof(*new_record), GFP_KERNEL);
+ if (!new_record)
+ goto err;
+ memcpy(new_record, old_record, sizeof(*new_record));
+
+ if (is_rx) {
+ new_record->rx_obj = get_avail_object(dev, new_record->policy, is_rx);
+ if (!new_record->dev)
+ new_record->dev = dev;
+ if (!new_record->rx_obj) {
+ kfree(new_record);
+ goto err;
+ }
+ queue = new_record->rx_obj->queue;
+ } else {
+ new_record->tx_obj = get_avail_object(dev, new_record->policy, is_rx);
+ if (!new_record->dev)
+ new_record->dev = dev;
+ if (!new_record->tx_obj) {
+ kfree(new_record);
+ goto err;
+ }
+ queue = new_record->tx_obj->queue;
+ }
+ /* update record */
+ hlist_replace_rcu(&old_record->hash_node, &new_record->hash_node);
+ kfree(old_record);
+ }
+err:
+ spin_unlock_bh(&np_hashtable_lock);
+ return queue;
+}
+
+static inline bool policy_validate(struct netpolicy_instance *instance)
+{
+ struct net_device *dev = instance->dev;
+ enum netpolicy_name cur_policy;
+
+ cur_policy = dev->netpolicy->cur_policy;
+ if ((instance->policy == NET_POLICY_NONE) ||
+ (cur_policy == NET_POLICY_NONE))
+ return false;
+
+ if (((cur_policy != NET_POLICY_MIX) && (cur_policy != instance->policy)) ||
+ ((cur_policy == NET_POLICY_MIX) && (instance->policy == NET_POLICY_CPU))) {
+ pr_warn("NETPOLICY: %s current device policy %s doesn't support required policy %s! Remove net policy settings!\n",
+ dev->name, policy_name[cur_policy],
+ policy_name[instance->policy]);
+ return false;
+ }
+ return true;
+}
+
+/**
+ * netpolicy_pick_queue() - Find proper queue
+ * @instance: NET policy per socket/task instance info
+ * @is_rx: RX queue or TX queue
+ *
+ * This function intends to find the proper queue according to policy.
+ * For selecting the proper queue, currently it uses round-robin algorithm
+ * to find the available object from the given policy object list.
+ * The selected object will be stored in hashtable. So it does not need to
+ * go through the whole object list every time.
+ *
+ * Return: negative on failure, otherwise on the assigned queue
+ */
+int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+ struct net_device *dev = instance->dev;
+
+ if (!dev || !dev->netpolicy)
+ return -EINVAL;
+
+ if (!policy_validate(instance))
+ return -EINVAL;
+
+ return get_avail_queue(instance, is_rx);
+}
+EXPORT_SYMBOL(netpolicy_pick_queue);
+
/**
* netpolicy_register() - Register per socket/task policy request
* @instance: NET policy per socket/task instance info
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 16/25] net/netpolicy: introduce per socket netpolicy
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
The network socket is the most basic unit which control the network
traffic. This patch introduces a new socket option SO_NETPOLICY to
set/get net policy for socket. so that the application can set its own
policy on socket to improve the network performance.
Per socket net policy can also be inherited by new socket.
The usage of SO_NETPOLICY socket option is as below.
setsockopt(sockfd,SOL_SOCKET,SO_NETPOLICY,&policy,sizeof(int))
getsockopt(sockfd,SOL_SOCKET,SO_NETPOLICY,&policy,sizeof(int))
The policy set by SO_NETPOLICY socket option must be valid and
compatible with current device policy. Othrewise, it will error out. The
socket policy will be set to NET_POLICY_INVALID.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
arch/alpha/include/uapi/asm/socket.h | 2 ++
arch/avr32/include/uapi/asm/socket.h | 2 ++
arch/frv/include/uapi/asm/socket.h | 2 ++
arch/ia64/include/uapi/asm/socket.h | 2 ++
arch/m32r/include/uapi/asm/socket.h | 2 ++
arch/mips/include/uapi/asm/socket.h | 2 ++
arch/mn10300/include/uapi/asm/socket.h | 2 ++
arch/parisc/include/uapi/asm/socket.h | 2 ++
arch/powerpc/include/uapi/asm/socket.h | 2 ++
arch/s390/include/uapi/asm/socket.h | 2 ++
arch/sparc/include/uapi/asm/socket.h | 2 ++
arch/xtensa/include/uapi/asm/socket.h | 2 ++
include/net/request_sock.h | 4 +++-
include/net/sock.h | 9 +++++++++
include/uapi/asm-generic/socket.h | 2 ++
net/core/sock.c | 28 ++++++++++++++++++++++++++++
16 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9e46d6e..06b2ef9 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 1fd147f..24f85f0 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index afbc98f0..82c8d44 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -90,5 +90,7 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 0018fad..b99c1df 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -99,4 +99,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 5fe42fc..71a43ed 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2027240a..ce8b9ba 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 5129f23..c041265 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 9c935d7..2639dcd 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
#define SO_CNX_ADVICE 0x402E
+#define SO_NETPOLICY 0x402F
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 1672e33..e04e3b6 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 41b51c2..d43b854 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 31aede3..94a2cdf 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -86,6 +86,8 @@
#define SO_CNX_ADVICE 0x0037
+#define SO_NETPOLICY 0x0038
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 81435d9..97f1691 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* _XTENSA_SOCKET_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 6ebe13e..1fa2d0e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -101,7 +101,9 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
atomic_set(&req->rsk_refcnt, 0);
-
+#ifdef CONFIG_NETPOLICY
+ memcpy(&req_to_sk(req)->sk_netpolicy, &sk_listener->sk_netpolicy, sizeof(sk_listener->sk_netpolicy));
+#endif
return req;
}
diff --git a/include/net/sock.h b/include/net/sock.h
index ff5be7e..fd4132f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -70,6 +70,7 @@
#include <net/checksum.h>
#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
+#include <linux/netpolicy.h>
/*
* This structure really needs to be cleaned up.
@@ -141,6 +142,7 @@ typedef __u64 __bitwise __addrpair;
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
* @skc_incoming_cpu: record/match cpu processing incoming packets
* @skc_refcnt: reference count
+ * @skc_netpolicy: per socket net policy
*
* This is the minimal network layer representation of sockets, the header
* for struct sock and struct inet_timewait_sock.
@@ -200,6 +202,10 @@ struct sock_common {
struct sock *skc_listener; /* request_sock */
struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
};
+
+#ifdef CONFIG_NETPOLICY
+ struct netpolicy_instance skc_netpolicy;
+#endif
/*
* fields between dontcopy_begin/dontcopy_end
* are not copied in sock_copy()
@@ -339,6 +345,9 @@ struct sock {
#define sk_incoming_cpu __sk_common.skc_incoming_cpu
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
+#ifdef CONFIG_NETPOLICY
+#define sk_netpolicy __sk_common.skc_netpolicy
+#endif
socket_lock_t sk_lock;
struct sk_buff_head sk_receive_queue;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 67d632f..d2a5aeb 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -92,4 +92,6 @@
#define SO_CNX_ADVICE 53
+#define SO_NETPOLICY 54
+
#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b..77f226b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1003,6 +1003,12 @@ set_rcvbuf:
if (val == 1)
dst_negative_advice(sk);
break;
+
+#ifdef CONFIG_NETPOLICY
+ case SO_NETPOLICY:
+ ret = netpolicy_register(&sk->sk_netpolicy, val);
+ break;
+#endif
default:
ret = -ENOPROTOOPT;
break;
@@ -1263,6 +1269,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_incoming_cpu;
break;
+#ifdef CONFIG_NETPOLICY
+ case SO_NETPOLICY:
+ v.val = sk->sk_netpolicy.policy;
+ break;
+#endif
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -1424,6 +1435,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
+
+#ifdef CONFIG_NETPOLICY
+ sk->sk_netpolicy.dev = NULL;
+ sk->sk_netpolicy.ptr = (void *)sk;
+ sk->sk_netpolicy.policy = NET_POLICY_INVALID;
+#endif
}
return sk;
@@ -1461,6 +1478,10 @@ static void __sk_destruct(struct rcu_head *head)
put_pid(sk->sk_peer_pid);
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
+#ifdef CONFIG_NETPOLICY
+ if (is_net_policy_valid(sk->sk_netpolicy.policy))
+ netpolicy_unregister(&sk->sk_netpolicy);
+#endif
sk_prot_free(sk->sk_prot_creator, sk);
}
@@ -1597,6 +1618,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
if (sock_needs_netstamp(sk) &&
newsk->sk_flags & SK_FLAGS_TIMESTAMP)
net_enable_timestamp();
+
+#ifdef CONFIG_NETPOLICY
+ newsk->sk_netpolicy.ptr = (void *)newsk;
+ if (is_net_policy_valid(newsk->sk_netpolicy.policy))
+ netpolicy_register(&newsk->sk_netpolicy, newsk->sk_netpolicy.policy);
+
+#endif
}
out:
return newsk;
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 15/25] net/netpolicy: implement netpolicy register
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
The socket/task can only be benefited when it register itself with
specific policy. If it's the first time to register, a record will be
created and inserted into RCU hash table. The record includes ptr,
policy and object information. ptr is the socket/task's pointer which is
used as key to search the record in hash table. Object will be assigned
later.
This patch also introduces a new type NET_POLICY_INVALID, which
indicates that the task/socket are not registered.
np_hashtable_lock is introduced to protect the hash table.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 26 ++++++++
net/core/netpolicy.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 179 insertions(+)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index cc75e3c..5900252 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -17,6 +17,7 @@
#define __LINUX_NETPOLICY_H
enum netpolicy_name {
+ NET_POLICY_INVALID = -1,
NET_POLICY_NONE = 0,
NET_POLICY_CPU,
NET_POLICY_BULK,
@@ -79,12 +80,37 @@ struct netpolicy_info {
struct list_head obj_list[NETPOLICY_RXTX][NET_POLICY_MAX];
};
+struct netpolicy_instance {
+ struct net_device *dev;
+ enum netpolicy_name policy; /* required policy */
+ void *ptr; /* pointers */
+};
+
+/* check if policy is valid */
+static inline int is_net_policy_valid(enum netpolicy_name policy)
+{
+ return ((policy < NET_POLICY_MAX) && (policy > NET_POLICY_INVALID));
+}
+
#ifdef CONFIG_NETPOLICY
extern void update_netpolicy_sys_map(void);
+extern int netpolicy_register(struct netpolicy_instance *instance,
+ enum netpolicy_name policy);
+extern void netpolicy_unregister(struct netpolicy_instance *instance);
#else
static inline void update_netpolicy_sys_map(void)
{
}
+
+static inline int netpolicy_register(struct netpolicy_instance *instance,
+ enum netpolicy_name policy)
+{ return 0;
+}
+
+static inline void netpolicy_unregister(struct netpolicy_instance *instance)
+{
+}
+
#endif
#endif /*__LINUX_NETPOLICY_H*/
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 7579685..3605761 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -38,6 +38,19 @@
#include <linux/sort.h>
#include <linux/ctype.h>
#include <linux/cpu.h>
+#include <linux/hashtable.h>
+
+struct netpolicy_record {
+ struct hlist_node hash_node;
+ unsigned long ptr_id;
+ enum netpolicy_name policy;
+ struct net_device *dev;
+ struct netpolicy_object *rx_obj;
+ struct netpolicy_object *tx_obj;
+};
+
+static DEFINE_HASHTABLE(np_record_hash, 10);
+static DEFINE_SPINLOCK(np_hashtable_lock);
static int netpolicy_get_dev_info(struct net_device *dev,
struct netpolicy_dev_info *d_info)
@@ -223,6 +236,143 @@ static int netpolicy_enable(struct net_device *dev)
return 0;
}
+static struct netpolicy_record *netpolicy_record_search(unsigned long ptr_id)
+{
+ struct netpolicy_record *rec = NULL;
+
+ hash_for_each_possible_rcu(np_record_hash, rec, hash_node, ptr_id) {
+ if (rec->ptr_id == ptr_id)
+ break;
+ }
+
+ return rec;
+}
+
+static void put_queue(struct net_device *dev,
+ struct netpolicy_object *rx_obj,
+ struct netpolicy_object *tx_obj)
+{
+ if (!dev || !dev->netpolicy)
+ return;
+
+ if (rx_obj)
+ atomic_dec(&rx_obj->refcnt);
+ if (tx_obj)
+ atomic_dec(&tx_obj->refcnt);
+}
+
+static void netpolicy_record_clear_obj(void)
+{
+ struct netpolicy_record *rec;
+ int i;
+
+ spin_lock_bh(&np_hashtable_lock);
+ hash_for_each_rcu(np_record_hash, i, rec, hash_node) {
+ put_queue(rec->dev, rec->rx_obj, rec->tx_obj);
+ rec->rx_obj = NULL;
+ rec->tx_obj = NULL;
+ }
+ spin_unlock_bh(&np_hashtable_lock);
+}
+
+static void netpolicy_record_clear_dev_node(struct net_device *dev)
+{
+ struct netpolicy_record *rec;
+ int i;
+
+ spin_lock_bh(&np_hashtable_lock);
+ hash_for_each_rcu(np_record_hash, i, rec, hash_node) {
+ if (rec->dev == dev) {
+ hash_del_rcu(&rec->hash_node);
+ kfree(rec);
+ }
+ }
+ spin_unlock_bh(&np_hashtable_lock);
+}
+
+/**
+ * netpolicy_register() - Register per socket/task policy request
+ * @instance: NET policy per socket/task instance info
+ * @policy: request NET policy
+ *
+ * This function intends to register per socket/task policy request.
+ * If it's the first time to register, an record will be created and
+ * inserted into RCU hash table.
+ *
+ * The record includes ptr, policy and object info. ptr of the socket/task
+ * is the key to search the record in hash table. Object will be assigned
+ * until the first packet is received/transmitted.
+ *
+ * Return: 0 on success, others on failure
+ */
+int netpolicy_register(struct netpolicy_instance *instance,
+ enum netpolicy_name policy)
+{
+ unsigned long ptr_id = (uintptr_t)instance->ptr;
+ struct netpolicy_record *new, *old;
+
+ if (!is_net_policy_valid(policy)) {
+ instance->policy = NET_POLICY_INVALID;
+ return -EINVAL;
+ }
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ instance->policy = NET_POLICY_INVALID;
+ return -ENOMEM;
+ }
+
+ spin_lock_bh(&np_hashtable_lock);
+ /* Check it in mapping table */
+ old = netpolicy_record_search(ptr_id);
+ if (old) {
+ if (old->policy != policy) {
+ put_queue(old->dev, old->rx_obj, old->tx_obj);
+ old->rx_obj = NULL;
+ old->tx_obj = NULL;
+ old->policy = policy;
+ }
+ kfree(new);
+ } else {
+ new->ptr_id = ptr_id;
+ new->dev = instance->dev;
+ new->policy = policy;
+ hash_add_rcu(np_record_hash, &new->hash_node, ptr_id);
+ }
+ instance->policy = policy;
+ spin_unlock_bh(&np_hashtable_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(netpolicy_register);
+
+/**
+ * netpolicy_unregister() - Unregister per socket/task policy request
+ * @instance: NET policy per socket/task instance info
+ *
+ * This function intends to unregister policy request by del related record
+ * from hash table.
+ *
+ */
+void netpolicy_unregister(struct netpolicy_instance *instance)
+{
+ struct netpolicy_record *record;
+ unsigned long ptr_id = (uintptr_t)instance->ptr;
+
+ spin_lock_bh(&np_hashtable_lock);
+ /* del from hash table */
+ record = netpolicy_record_search(ptr_id);
+ if (record) {
+ hash_del_rcu(&record->hash_node);
+ /* The record cannot be share. It can be safely free. */
+ put_queue(record->dev, record->rx_obj, record->tx_obj);
+ kfree(record);
+ }
+ instance->policy = NET_POLICY_INVALID;
+ spin_unlock_bh(&np_hashtable_lock);
+}
+EXPORT_SYMBOL(netpolicy_unregister);
+
const char *policy_name[NET_POLICY_MAX] = {
"NONE",
"CPU",
@@ -825,6 +975,7 @@ static int netpolicy_notify(struct notifier_block *this,
break;
case NETDEV_GOING_DOWN:
uninit_netpolicy(dev);
+ netpolicy_record_clear_dev_node(dev);
#ifdef CONFIG_PROC_FS
proc_remove(dev->proc_dev);
dev->proc_dev = NULL;
@@ -863,6 +1014,8 @@ void update_netpolicy_sys_map(void)
dev->netpolicy->cur_policy = NET_POLICY_NONE;
+ /* clear mapping table */
+ netpolicy_record_clear_obj();
/* rebuild everything */
netpolicy_disable(dev);
netpolicy_enable(dev);
--
2.5.5
^ permalink raw reply related
* [RFC V2 PATCH 14/25] net/netpolicy: handle channel changes
From: kan.liang @ 2015-01-01 1:39 UTC (permalink / raw)
To: davem, linux-kernel, netdev
Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
hannes, jesse.brandeburg, andi, Kan Liang
In-Reply-To: <1420076354-4861-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
User can uses ethtool to set the channel number. This patch handles the
channel changes by rebuilding the object list.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/netpolicy.h | 8 ++++++++
net/core/ethtool.c | 8 +++++++-
net/core/netpolicy.c | 1 +
3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index 579ff98..cc75e3c 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -79,4 +79,12 @@ struct netpolicy_info {
struct list_head obj_list[NETPOLICY_RXTX][NET_POLICY_MAX];
};
+#ifdef CONFIG_NETPOLICY
+extern void update_netpolicy_sys_map(void);
+#else
+static inline void update_netpolicy_sys_map(void)
+{
+}
+#endif
+
#endif /*__LINUX_NETPOLICY_H*/
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9774898..e1f8bd0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1703,6 +1703,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
{
struct ethtool_channels channels, max;
u32 max_rx_in_use = 0;
+ int ret;
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
return -EOPNOTSUPP;
@@ -1726,7 +1727,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
(channels.combined_count + channels.rx_count) <= max_rx_in_use)
return -EINVAL;
- return dev->ethtool_ops->set_channels(dev, &channels);
+ ret = dev->ethtool_ops->set_channels(dev, &channels);
+#ifdef CONFIG_NETPOLICY
+ if (!ret)
+ update_netpolicy_sys_map();
+#endif
+ return ret;
}
static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 3b523fc..7579685 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -885,6 +885,7 @@ unlock:
}
}
}
+EXPORT_SYMBOL(update_netpolicy_sys_map);
static int netpolicy_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
--
2.5.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox