* [PATCH net v3 2/2] ipv4: igmp: use alarmtimer to prevent delayed reports
From: Tejaswi Tanikella @ 2018-06-11 13:46 UTC (permalink / raw)
To: netdev, f.fainelli; +Cc: andrew, davem
In-Reply-To: <20180611134405.GA28495@tejaswit-linux.qualcomm.com>
On receiving a IGMPv2/v3 query, based on max_delay set in the header a
timer is started to send out a response after a random time within
max_delay. If the system then moves into suspend state, Report is
delayed until system wakes up.
Use a alarmtimer instead of using a timer. Alarmtimer will wake the
system up from suspend to send out the IGMP report.
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
---
v2: use alarmtimer instead of wakelock.
v3: add patches in the right order.
---
If these changes are fine, I'll share similar patches for MLD and ARP.
---
include/linux/igmp.h | 7 ++++++-
net/ipv4/igmp.c | 27 ++++++++++++++++-----------
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f823185..45852eb 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -20,6 +20,9 @@
#include <linux/in.h>
#include <linux/refcount.h>
#include <uapi/linux/igmp.h>
+#ifdef CONFIG_IP_MULTICAST
+#include <linux/alarmtimer.h>
+#endif
static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
{
@@ -83,7 +86,9 @@ struct ip_mc_list {
struct ip_mc_list __rcu *next_rcu;
};
struct ip_mc_list __rcu *next_hash;
- struct timer_list timer;
+#ifdef CONFIG_IP_MULTICAST
+ struct alarm alarm;
+#endif
int users;
refcount_t refcnt;
spinlock_t lock;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b..c30b5c4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -199,7 +199,7 @@ static void ip_ma_put(struct ip_mc_list *im)
static void igmp_stop_timer(struct ip_mc_list *im)
{
spin_lock_bh(&im->lock);
- if (del_timer(&im->timer))
+ if (alarm_cancel(&im->alarm))
refcount_dec(&im->refcnt);
im->tm_running = 0;
im->reporter = 0;
@@ -210,11 +210,11 @@ static void igmp_stop_timer(struct ip_mc_list *im)
/* It must be called with locked im->lock */
static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
- int tv = prandom_u32() % max_delay;
+ ktime_t expiry = jiffies_to_ktime(prandom_u32() % max_delay + 2);
im->tm_running = 1;
- if (!mod_timer(&im->timer, jiffies+tv+2))
- refcount_inc(&im->refcnt);
+ alarm_start_relative(&im->alarm, expiry);
+ refcount_inc(&im->refcnt);
}
static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -241,11 +241,14 @@ static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
{
+ ktime_t expiry;
+
spin_lock_bh(&im->lock);
im->unsolicit_count = 0;
- if (del_timer(&im->timer)) {
- if ((long)(im->timer.expires-jiffies) < max_delay) {
- add_timer(&im->timer);
+ expiry = alarm_expires_remaining(&im->alarm);
+ if (alarm_cancel(&im->alarm)) {
+ if (ktime_to_jiffies(expiry) < max_delay) {
+ alarm_start_relative(&im->alarm, expiry);
im->tm_running = 1;
spin_unlock_bh(&im->lock);
return;
@@ -812,9 +815,9 @@ static void igmp_ifc_event(struct in_device *in_dev)
}
-static void igmp_timer_expire(struct timer_list *t)
+enum alarmtimer_restart igmp_timer_expire(struct alarm *alarm, ktime_t now)
{
- struct ip_mc_list *im = from_timer(im, t, timer);
+ struct ip_mc_list *im = container_of(alarm, struct ip_mc_list, alarm);
struct in_device *in_dev = im->interface;
spin_lock(&im->lock);
@@ -835,6 +838,8 @@ static void igmp_timer_expire(struct timer_list *t)
igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
ip_ma_put(im);
+
+ return ALARMTIMER_NORESTART;
}
/* mark EXCLUDE-mode sources */
@@ -1413,7 +1418,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
- timer_setup(&im->timer, igmp_timer_expire, 0);
+ alarm_init(&im->alarm, ALARM_BOOTTIME, igmp_timer_expire);
im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
#endif
@@ -2811,7 +2816,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
- delta = im->timer.expires - jiffies;
+ delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
seq_printf(seq,
"\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
im->multiaddr, im->users,
--
1.9.1
^ permalink raw reply related
* [PATCH net v3 1/2] ktime: helpers to convert between ktime and jiffies
From: Tejaswi Tanikella @ 2018-06-11 13:44 UTC (permalink / raw)
To: netdev, f.fainelli; +Cc: andrew, davem
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
---
v2: use alarmtimer instead of wakelock.
v3: add patches in the right order.
---
include/linux/ktime.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 5b9fddb..4881483 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,6 +96,10 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
#define ktime_to_ns(kt) (kt)
+/* ktime to jiffies and back */
+#define ktime_to_jiffies(kt) nsecs_to_jiffies(kt)
+#define jiffies_to_ktime(j) jiffies_to_nsecs(j)
+
/**
* ktime_compare - Compares two ktime_t variables for less, greater or equal
* @cmp1: comparable1
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 4/6] net: ethernet: ti: cpsw: add CBS Qdisc offload
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
The cpsw has up to 4 FIFOs per port and upper 3 FIFOs can feed rate
limited queue with shaping. In order to set and enable shaping for
those 3 FIFOs queues the network device with CBS qdisc attached is
needed. The CBS configuration is added for dual-emac/single port mode
only, but potentially can be used in switch mode also, based on
switchdev for instance.
Despite the FIFO shapers can work w/o cpdma level shapers the base
usage must be in combine with cpdma level shapers as described in TRM,
that are set as maximum rates for interface queues with sysfs.
One of the possible configuration with txq shapers and CBS shapers:
Configured with echo RATE >
/sys/class/net/eth0/queues/tx-0/tx_maxrate
/---------------------------------------------------
/
/ cpdma level shapers
+----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+
| c7 | | c6 | | c5 | | c4 | | c3 | | c2 | | c1 | | c0 |
\ / \ / \ / \ / \ / \ / \ / \ /
\ / \ / \ / \ / \ / \ / \ / \ /
\/ \/ \/ \/ \/ \/ \/ \/
+---------|------|------|------|-------------------------------------+
| +----+ | | +---+ |
| | +----+ | | |
| v v v v |
| +----+ +----+ +----+ +----+ p p+----+ +----+ +----+ +----+ |
| | | | | | | | | o o| | | | | | | | |
| | f3 | | f2 | | f1 | | f0 | r CPSW r| f3 | | f2 | | f1 | | f0 | |
| | | | | | | | | t t| | | | | | | | |
| \ / \ / \ / \ / 0 1\ / \ / \ / \ / |
| \ X \ / \ / \ / \ / \ / \ / \ / |
| \/ \ \/ \/ \/ \/ \/ \/ \/ |
+-------\------------------------------------------------------------+
\
\ FIFO shaper, set with CBS offload added in this patch,
\ FIFO0 cannot be rate limited
------------------------------------------------------
CBS shaper configuration is supposed to be used with root MQPRIO Qdisc
offload allowing to add sk_prio->tc->txq maps that direct traffic to
appropriate tx queue and maps L2 priority to FIFO shaper.
The CBS shaper is intended to be used for AVB where L2 priority
(pcp field) is used to differentiate class of traffic. So additionally
vlan needs to be created with appropriate egress sk_prio->l2 prio map.
If CBS has several tx queues assigned to it, the sum of their
bandwidth has not overlap bandwidth set for CBS. It's recomended the
CBS bandwidth to be a little bit more.
The CBS shaper is configured with CBS qdisc offload interface using tc
tool from iproute2 packet.
For instance:
$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
$ tc -g class show dev eth0
+---(100:ffe2) mqprio
| +---(100:3) mqprio
| +---(100:4) mqprio
|
+---(100:ffe1) mqprio
| +---(100:2) mqprio
|
+---(100:ffe0) mqprio
+---(100:1) mqprio
$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1440 \
hicredit 60 sendslope -960000 idleslope 40000 offload 1
$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
hicredit 62 sendslope -980000 idleslope 20000 offload 1
The above code set CBS shapers for tc0 and tc1, for that txq0 and
txq1 is used. Pay attention, the real set bandwidth can differ a bit
due to discreteness of configuration parameters.
Here parameters like locredit, hicredit and sendslope are ignored
internally and are supposed to be set with assumption that maximum
frame size for frame - 1500.
It's supposed that interface speed is not changed while reconnection,
not always is true, so inform user in case speed of interface was
changed, as it can impact on dependent shapers configuration.
For more examples see Documentation.
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
drivers/net/ethernet/ti/cpsw.c | 221 +++++++++++++++++++++++++++++++++
1 file changed, 221 insertions(+)
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index fd967d2bce5d..87a5586c5ea5 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -46,6 +46,8 @@
#include "cpts.h"
#include "davinci_cpdma.h"
+#include <net/pkt_sched.h>
+
#define CPSW_DEBUG (NETIF_MSG_HW | NETIF_MSG_WOL | \
NETIF_MSG_DRV | NETIF_MSG_LINK | \
NETIF_MSG_IFUP | NETIF_MSG_INTR | \
@@ -154,8 +156,12 @@ do { \
#define IRQ_NUM 2
#define CPSW_MAX_QUEUES 8
#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_FIFO_QUEUE_TYPE_SHIFT 16
+#define CPSW_FIFO_SHAPE_EN_SHIFT 16
+#define CPSW_FIFO_RATE_EN_SHIFT 20
#define CPSW_TC_NUM 4
#define CPSW_FIFO_SHAPERS_NUM (CPSW_TC_NUM - 1)
+#define CPSW_PCT_MASK 0x7f
#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT 29
#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK GENMASK(2, 0)
@@ -457,6 +463,8 @@ struct cpsw_priv {
bool rx_pause;
bool tx_pause;
bool mqprio_hw;
+ int fifo_bw[CPSW_TC_NUM];
+ int shp_cfg_speed;
u32 emac_port;
struct cpsw_common *cpsw;
};
@@ -1081,6 +1089,38 @@ static void cpsw_set_slave_mac(struct cpsw_slave *slave,
slave_write(slave, mac_lo(priv->mac_addr), SA_LO);
}
+static bool cpsw_shp_is_off(struct cpsw_priv *priv)
+{
+ struct cpsw_common *cpsw = priv->cpsw;
+ struct cpsw_slave *slave;
+ u32 shift, mask, val;
+
+ val = readl_relaxed(&cpsw->regs->ptype);
+
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+ mask = 7 << shift;
+ val = val & mask;
+
+ return !val;
+}
+
+static void cpsw_fifo_shp_on(struct cpsw_priv *priv, int fifo, int on)
+{
+ struct cpsw_common *cpsw = priv->cpsw;
+ struct cpsw_slave *slave;
+ u32 shift, mask, val;
+
+ val = readl_relaxed(&cpsw->regs->ptype);
+
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ shift = CPSW_FIFO_SHAPE_EN_SHIFT + 3 * slave->slave_num;
+ mask = (1 << --fifo) << shift;
+ val = on ? val | mask : val & ~mask;
+
+ writel_relaxed(val, &cpsw->regs->ptype);
+}
+
static void _cpsw_adjust_link(struct cpsw_slave *slave,
struct cpsw_priv *priv, bool *link)
{
@@ -1120,6 +1160,12 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
mac_control |= BIT(4);
*link = true;
+
+ if (priv->shp_cfg_speed &&
+ priv->shp_cfg_speed != slave->phy->speed &&
+ !cpsw_shp_is_off(priv))
+ dev_warn(priv->dev,
+ "Speed was changed, CBS sahper speeds are changed!");
} else {
mac_control = 0;
/* disable forwarding */
@@ -1589,6 +1635,178 @@ static int cpsw_tc_to_fifo(int tc, int num_tc)
return CPSW_FIFO_SHAPERS_NUM - tc;
}
+static int cpsw_set_fifo_bw(struct cpsw_priv *priv, int fifo, int bw)
+{
+ struct cpsw_common *cpsw = priv->cpsw;
+ u32 val = 0, send_pct, shift;
+ struct cpsw_slave *slave;
+ int pct = 0, i;
+
+ if (bw > priv->shp_cfg_speed * 1000)
+ goto err;
+
+ /* shaping has to stay enabled for highest fifos linearly
+ * and fifo bw no more then interface can allow
+ */
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ send_pct = slave_read(slave, SEND_PERCENT);
+ for (i = CPSW_FIFO_SHAPERS_NUM; i > 0; i--) {
+ if (!bw) {
+ if (i >= fifo || !priv->fifo_bw[i])
+ continue;
+
+ dev_warn(priv->dev, "Prev FIFO%d is shaped", i);
+ continue;
+ }
+
+ if (!priv->fifo_bw[i] && i > fifo) {
+ dev_err(priv->dev, "Upper FIFO%d is not shaped", i);
+ return -EINVAL;
+ }
+
+ shift = (i - 1) * 8;
+ if (i == fifo) {
+ send_pct &= ~(CPSW_PCT_MASK << shift);
+ val = DIV_ROUND_UP(bw, priv->shp_cfg_speed * 10);
+ if (!val)
+ val = 1;
+
+ send_pct |= val << shift;
+ pct += val;
+ continue;
+ }
+
+ if (priv->fifo_bw[i])
+ pct += (send_pct >> shift) & CPSW_PCT_MASK;
+ }
+
+ if (pct >= 100)
+ goto err;
+
+ slave_write(slave, send_pct, SEND_PERCENT);
+ priv->fifo_bw[fifo] = bw;
+
+ dev_warn(priv->dev, "set FIFO%d bw = %d\n", fifo,
+ DIV_ROUND_CLOSEST(val * priv->shp_cfg_speed, 100));
+
+ return 0;
+err:
+ dev_err(priv->dev, "Bandwidth doesn't fit in tc configuration");
+ return -EINVAL;
+}
+
+static int cpsw_set_fifo_rlimit(struct cpsw_priv *priv, int fifo, int bw)
+{
+ struct cpsw_common *cpsw = priv->cpsw;
+ struct cpsw_slave *slave;
+ u32 tx_in_ctl_rg, val;
+ int ret;
+
+ ret = cpsw_set_fifo_bw(priv, fifo, bw);
+ if (ret)
+ return ret;
+
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ tx_in_ctl_rg = cpsw->version == CPSW_VERSION_1 ?
+ CPSW1_TX_IN_CTL : CPSW2_TX_IN_CTL;
+
+ if (!bw)
+ cpsw_fifo_shp_on(priv, fifo, bw);
+
+ val = slave_read(slave, tx_in_ctl_rg);
+ if (cpsw_shp_is_off(priv)) {
+ /* disable FIFOs rate limited queues */
+ val &= ~(0xf << CPSW_FIFO_RATE_EN_SHIFT);
+
+ /* set type of FIFO queues to normal priority mode */
+ val &= ~(3 << CPSW_FIFO_QUEUE_TYPE_SHIFT);
+
+ /* set type of FIFO queues to be rate limited */
+ if (bw)
+ val |= 2 << CPSW_FIFO_QUEUE_TYPE_SHIFT;
+ else
+ priv->shp_cfg_speed = 0;
+ }
+
+ /* toggle a FIFO rate limited queue */
+ if (bw)
+ val |= BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+ else
+ val &= ~BIT(fifo + CPSW_FIFO_RATE_EN_SHIFT);
+ slave_write(slave, val, tx_in_ctl_rg);
+
+ /* FIFO transmit shape enable */
+ cpsw_fifo_shp_on(priv, fifo, bw);
+ return 0;
+}
+
+/* Defaults:
+ * class A - prio 3
+ * class B - prio 2
+ * shaping for class A should be set first
+ */
+static int cpsw_set_cbs(struct net_device *ndev,
+ struct tc_cbs_qopt_offload *qopt)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ struct cpsw_common *cpsw = priv->cpsw;
+ struct cpsw_slave *slave;
+ int prev_speed = 0;
+ int tc, ret, fifo;
+ u32 bw = 0;
+
+ tc = netdev_txq_to_tc(priv->ndev, qopt->queue);
+
+ /* enable channels in backward order, as highest FIFOs must be rate
+ * limited first and for compliance with CPDMA rate limited channels
+ * that also used in bacward order. FIFO0 cannot be rate limited.
+ */
+ fifo = cpsw_tc_to_fifo(tc, ndev->num_tc);
+ if (!fifo) {
+ dev_err(priv->dev, "Last tc%d can't be rate limited", tc);
+ return -EINVAL;
+ }
+
+ /* do nothing, it's disabled anyway */
+ if (!qopt->enable && !priv->fifo_bw[fifo])
+ return 0;
+
+ /* shapers can be set if link speed is known */
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ if (slave->phy && slave->phy->link) {
+ if (priv->shp_cfg_speed &&
+ priv->shp_cfg_speed != slave->phy->speed)
+ prev_speed = priv->shp_cfg_speed;
+
+ priv->shp_cfg_speed = slave->phy->speed;
+ }
+
+ if (!priv->shp_cfg_speed) {
+ dev_err(priv->dev, "Link speed is not known");
+ return -1;
+ }
+
+ ret = pm_runtime_get_sync(cpsw->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(cpsw->dev);
+ return ret;
+ }
+
+ bw = qopt->enable ? qopt->idleslope : 0;
+ ret = cpsw_set_fifo_rlimit(priv, fifo, bw);
+ if (ret) {
+ priv->shp_cfg_speed = prev_speed;
+ prev_speed = 0;
+ }
+
+ if (bw && prev_speed)
+ dev_warn(priv->dev,
+ "Speed was changed, CBS sahper speeds are changed!");
+
+ pm_runtime_put_sync(cpsw->dev);
+ return ret;
+}
+
static int cpsw_ndo_open(struct net_device *ndev)
{
struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2263,6 +2481,9 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
+ case TC_SETUP_QDISC_CBS:
+ return cpsw_set_cbs(ndev, type_data);
+
case TC_SETUP_QDISC_MQPRIO:
return cpsw_set_tc(ndev, type_data);
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 6/6] Documentation: networking: cpsw: add MQPRIO & CBS offload examples
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
This document describes MQPRIO and CBS Qdisc offload configuration
for cpsw driver based on examples. It potentially can be used in
audio video bridging (AVB) and time sensitive networking (TSN).
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
Documentation/networking/cpsw.txt | 540 ++++++++++++++++++++++++++++++
1 file changed, 540 insertions(+)
create mode 100644 Documentation/networking/cpsw.txt
diff --git a/Documentation/networking/cpsw.txt b/Documentation/networking/cpsw.txt
new file mode 100644
index 000000000000..f5d58f502e52
--- /dev/null
+++ b/Documentation/networking/cpsw.txt
@@ -0,0 +1,540 @@
+* Texas Instruments CPSW ethernet driver
+
+Multiqueue & CBS & MQPRIO
+=====================================================================
+=====================================================================
+
+The cpsw has 3 CBS shapers for each external ports. This document
+describes MQPRIO and CBS Qdisc offload configuration for cpsw driver
+based on examples. It potentially can be used in audio video bridging
+(AVB) and time sensitive networking (TSN).
+
+The following examples was tested on AM572x EVM and BBB boards.
+
+Test setup
+==========
+
+Under consideration two examples with AM52xx EVM running cpsw driver
+in dual_emac mode.
+
+Several prerequisites:
+- TX queues must be rated starting from txq0 that has highest priority
+- Traffic classes are used starting from 0, that has highest priority
+- CBS shapers should be used with rated queues
+- The bandwidth for CBS shapers has to be set a little bit more then
+ potential incoming rate, thus, rate of all incoming tx queues has
+ to be a little less
+- Real rates can differ, due to discreetness
+- Map skb-priority to txq is not enough, also skb-priority to l2 prio
+ map has to be created with ip or vconfig tool
+- Any l2/socket prio (0 - 7) for classes can be used, but for
+ simplicity default values are used: 3 and 2
+- only 2 classes tested: A and B, but checked and can work with more,
+ maximum allowed 4, but only for 3 rate can be set.
+
+Test setup for examples
+=======================
+ +-------------------------------+
+ |--+ |
+ | | Workstation0 |
+ |E | MAC 18:03:73:66:87:42 |
++-----------------------------+ +--|t | |
+| | 1 | E | | |h |./tsn_listener -d \ |
+| Target board: | 0 | t |--+ |0 | 18:03:73:66:87:42 -i eth0 \|
+| AM572x EVM | 0 | h | | | -s 1500 |
+| | 0 | 0 | |--+ |
+| Only 2 classes: |Mb +---| +-------------------------------+
+| class A, class B | |
+| | +---| +-------------------------------+
+| | 1 | E | |--+ |
+| | 0 | t | | | Workstation1 |
+| | 0 | h |--+ |E | MAC 20:cf:30:85:7d:fd |
+| |Mb | 1 | +--|t | |
++-----------------------------+ |h |./tsn_listener -d \ |
+ |0 | 20:cf:30:85:7d:fd -i eth0 \|
+ | | -s 1500 |
+ |--+ |
+ +-------------------------------+
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 1: One port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM52xx evm, applicable for single port boards)
+
+tc - traffic class
+txq - transmit queue
+p - priority
+f - fifo (cpsw fifo)
+S - shaper configured
+
++------------------------------------------------------------------+ u
+| +---------------+ +---------------+ +------+ +------+ | s
+| | | | | | | | | | e
+| | App 1 | | App 2 | | Apps | | Apps | | r
+| | Class A | | Class B | | Rest | | Rest | |
+| | Eth0 | | Eth0 | | Eth0 | | Eth1 | | s
+| | VLAN100 | | VLAN100 | | | | | | | | p
+| | 40 Mb/s | | 20 Mb/s | | | | | | | | a
+| | SO_PRIORITY=3 | | SO_PRIORITY=2 | | | | | | | | c
+| | | | | | | | | | | | | | e
+| +---|-----------+ +---|-----------+ +---|--+ +---|--+ |
++-----|------------------|------------------|--------|-------------+
+ +-+ +------------+ | |
+ | | +-----------------+ +--+
+ | | | |
++---|-------|-------------|-----------------------|----------------+
+| +----+ +----+ +----+ +----+ +----+ |
+| | p3 | | p2 | | p1 | | p0 | | p0 | | k
+| \ / \ / \ / \ / \ / | e
+| \ / \ / \ / \ / \ / | r
+| \/ \/ \/ \/ \/ | n
+| | | | | | e
+| | | +-----+ | | l
+| | | | | |
+| +----+ +----+ +----+ +----+ | s
+| |tc0 | |tc1 | |tc2 | |tc0 | | p
+| \ / \ / \ / \ / | a
+| \ / \ / \ / \ / | c
+| \/ \/ \/ \/ | e
+| | | +-----+ | |
+| | | | | | |
+| | | | | | |
+| | | | | | |
+| +----+ +----+ +----+ +----+ +----+ |
+| |txq0| |txq1| |txq2| |txq3| |txq4| |
+| \ / \ / \ / \ / \ / |
+| \ / \ / \ / \ / \ / |
+| \/ \/ \/ \/ \/ |
+| +-|------|------|------|--+ +--|--------------+ |
+| | | | | | | Eth0.100 | | Eth1 | |
++---|------|------|------|------------------------|----------------+
+ | | | | |
+ p p p p |
+ 3 2 0-1, 4-7 <- L2 priority |
+ | | | | |
+ | | | | |
++---|------|------|------|------------------------|----------------+
+| | | | | |----------+ |
+| +----+ +----+ +----+ +----+ +----+ |
+| |dma7| |dma6| |dma5| |dma4| |dma3| |
+| \ / \ / \ / \ / \ / | c
+| \S / \S / \ / \ / \ / | p
+| \/ \/ \/ \/ \/ | s
+| | | | +----- | | w
+| | | | | | |
+| | | | | | | d
+| +----+ +----+ +----+p p+----+ | r
+| | | | | | |o o| | | i
+| | f3 | | f2 | | f0 |r r| f0 | | v
+| |tc0 | |tc1 | |tc2 |t t|tc0 | | e
+| \CBS / \CBS / \CBS /1 2\CBS / | r
+| \S / \S / \ / \ / |
+| \/ \/ \/ \/ |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1
+$ ethtool -L eth0 rx 1 tx 5
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX: 8
+TX: 8
+Other: 0
+Combined: 0
+Current hardware settings:
+RX: 1
+TX: 5
+Other: 0
+Combined: 0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1
+// Set rates 40 and 20 Mb/s appropriately.
+// Pay attention, real speed can differ a bit due to discreetness.
+// Leave last 2 tx queues not rated.
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
+
+5a)
+// As two interface sharing same set of tx queues, assign all traffic
+// coming to interface Eth1 to separate queue in order to not mix it
+// with traffic from interface Eth0, so use separate txq to send
+// packets to Eth1, so all prio -> tc0 and tc0 -> txq4
+// Here hw 0, so here still default configuration for eth1 in hw
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \
+map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+| +---(100:3) mqprio
+| +---(100:4) mqprio
+|
++---(100:ffe1) mqprio
+| +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+ +---(100:1) mqprio
+
+$ tc -g class show dev eth1
++---(100:ffe0) mqprio
+ +---(100:5) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc
+// Set it +1 Mb for reserve (important!)
+// here only idle slope is important, others arg are ignored
+// Pay attention, real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc:
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos
+$ ip link add link eth0 name eth0.100 type vlan id 100
+8021q: 802.1Q VLAN Support v1.8
+8021q: adding VLAN 0 to HW filter on device eth0
+8021q: adding VLAN 0 to HW filter on device eth1
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio, 1 to 1
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Run your appropriate tools with socket option "SO_PRIORITY"
+// to 3 for class A and/or to 2 for class B
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+
+13)
+// run your listener on workstation
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+14)
+// Restore default configuration if needed
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
+
+*********************************************************************
+*********************************************************************
+*********************************************************************
+Example 2: Two port tx AVB configuration scheme for target board
+----------------------------------------------------------------------
+(prints and scheme for AM52xx evm, for dual emac boards only)
+
++------------------------------------------------------------------+ u
+| +----------+ +----------+ +------+ +----------+ +----------+ | s
+| | | | | | | | | | | | e
+| | App 1 | | App 2 | | Apps | | App 3 | | App 4 | | r
+| | Class A | | Class B | | Rest | | Class B | | Class A | |
+| | Eth0 | | Eth0 | | | | | Eth1 | | Eth1 | | s
+| | VLAN100 | | VLAN100 | | | | | VLAN100 | | VLAN100 | | p
+| | 40 Mb/s | | 20 Mb/s | | | | | 10 Mb/s | | 30 Mb/s | | a
+| | SO_PRI=3 | | SO_PRI=2 | | | | | SO_PRI=3 | | SO_PRI=2 | | c
+| | | | | | | | | | | | | | | | | e
+| +---|------+ +---|------+ +---|--+ +---|------+ +---|------+ |
++-----|-------------|-------------|---------|-------------|--------+
+ +-+ +-------+ | +----------+ +----+
+ | | +-------+------+ | |
+ | | | | | |
++---|-------|-------------|--------------|-------------|-------|---+
+| +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+ |
+| | p3 | | p2 | | p1 | | p0 | | p0 | | p1 | | p2 | | p3 | | k
+| \ / \ / \ / \ / \ / \ / \ / \ / | e
+| \ / \ / \ / \ / \ / \ / \ / \ / | r
+| \/ \/ \/ \/ \/ \/ \/ \/ | n
+| | | | | | | | e
+| | | +----+ +----+ | | | l
+| | | | | | | |
+| +----+ +----+ +----+ +----+ +----+ +----+ | s
+| |tc0 | |tc1 | |tc2 | |tc2 | |tc1 | |tc0 | | p
+| \ / \ / \ / \ / \ / \ / | a
+| \ / \ / \ / \ / \ / \ / | c
+| \/ \/ \/ \/ \/ \/ | e
+| | | +-----+ +-----+ | | |
+| | | | | | | | | |
+| | | | | | | | | |
+| | | | | E E | | | | |
+| +----+ +----+ +----+ +----+ t t +----+ +----+ +----+ +----+ |
+| |txq0| |txq1| |txq4| |txq5| h h |txq6| |txq7| |txq3| |txq2| |
+| \ / \ / \ / \ / 0 1 \ / \ / \ / \ / |
+| \ / \ / \ / \ / . . \ / \ / \ / \ / |
+| \/ \/ \/ \/ 1 1 \/ \/ \/ \/ |
+| +-|------|------|------|--+ 0 0 +-|------|------|------|--+ |
+| | | | | | | 0 0 | | | | | | |
++---|------|------|------|---------------|------|------|------|----+
+ | | | | | | | |
+ p p p p p p p p
+ 3 2 0-1, 4-7 <-L2 pri-> 0-1, 4-7 2 3
+ | | | | | | | |
+ | | | | | | | |
++---|------|------|------|---------------|------|------|------|----+
+| | | | | | | | | |
+| +----+ +----+ +----+ +----+ +----+ +----+ +----+ +----+ |
+| |dma7| |dma6| |dma3| |dma2| |dma1| |dma0| |dma4| |dma5| |
+| \ / \ / \ / \ / \ / \ / \ / \ / | c
+| \S / \S / \ / \ / \ / \ / \S / \S / | p
+| \/ \/ \/ \/ \/ \/ \/ \/ | s
+| | | | +----- | | | | | w
+| | | | | +----+ | | | |
+| | | | | | | | | | d
+| +----+ +----+ +----+p p+----+ +----+ +----+ | r
+| | | | | | |o o| | | | | | | i
+| | f3 | | f2 | | f0 |r CPSW r| f3 | | f2 | | f0 | | v
+| |tc0 | |tc1 | |tc2 |t t|tc0 | |tc1 | |tc2 | | e
+| \CBS / \CBS / \CBS /1 2\CBS / \CBS / \CBS / | r
+| \S / \S / \ / \S / \S / \ / |
+| \/ \/ \/ \/ \/ \/ |
++------------------------------------------------------------------+
+========================================Eth==========================>
+
+1)
+// Add 8 tx queues, for interface Eth0, but they are common, so are accessed
+// by two interfaces Eth0 and Eth1.
+$ ethtool -L eth1 rx 1 tx 8
+rx unmodified, ignoring
+
+2)
+// Check if num of queues is set correctly:
+$ ethtool -l eth0
+Channel parameters for eth0:
+Pre-set maximums:
+RX: 8
+TX: 8
+Other: 0
+Combined: 0
+Current hardware settings:
+RX: 1
+TX: 8
+Other: 0
+Combined: 0
+
+3)
+// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0
+// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately
+// for Eth0 and 30 and 10 Mb/s for Eth1.
+// Real speed can differ a bit due to discreetness
+// Leave last 4 tx queues as not rated
+$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate
+$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate
+
+4)
+// Check maximum rate of tx (cpdma) queues:
+$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+40
+20
+30
+10
+0
+0
+0
+0
+
+5)
+// Map skb->priority to traffic class for Eth0:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5)
+$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1
+
+6)
+// Check classes settings
+$ tc -g class show dev eth0
++---(100:ffe2) mqprio
+| +---(100:5) mqprio
+| +---(100:6) mqprio
+|
++---(100:ffe1) mqprio
+| +---(100:2) mqprio
+|
++---(100:ffe0) mqprio
+ +---(100:1) mqprio
+
+7)
+// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0
+// here only idle slope is important, others ignored
+// Real speed can differ a bit due to discreetness
+$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \
+hicredit 62 sendslope -959000 idleslope 41000 offload 1
+net eth0: set FIFO3 bw = 50
+
+8)
+// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0
+$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
+hicredit 65 sendslope -979000 idleslope 21000 offload 1
+net eth0: set FIFO2 bw = 30
+
+9)
+// Create vlan 100 to map sk->priority to vlan qos for Eth0
+$ ip link add link eth0 name eth0.100 type vlan id 100
+net eth0: Adding vlanid 100 to vlan filter
+
+10)
+// Map skb->priority to L2 prio for Eth0.100, one to one
+$ ip link set eth0.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth0.100
+[...]
+INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12)
+// Map skb->priority to traffic class for Eth1:
+// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+// Map traffic class to transmit queue:
+// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7)
+$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \
+map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1
+
+13)
+// Check classes settings
+$ tc -g class show dev eth1
++---(100:ffe2) mqprio
+| +---(100:7) mqprio
+| +---(100:8) mqprio
+|
++---(100:ffe1) mqprio
+| +---(100:4) mqprio
+|
++---(100:ffe0) mqprio
+ +---(100:3) mqprio
+
+14)
+// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1
+// here only idle slope is important, others ignored
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:3 cbs locredit -1453 \
+hicredit 47 sendslope -969000 idleslope 31000 offload 1
+net eth1: set FIFO3 bw = 31
+
+15)
+// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1
+// Set it +1 Mb for reserve (important!)
+$ tc qdisc add dev eth1 parent 100:4 cbs locredit -1483 \
+hicredit 34 sendslope -989000 idleslope 11000 offload 1
+net eth1: set FIFO2 bw = 11
+
+16)
+// Create vlan 100 to map sk->priority to vlan qos for Eth1
+$ ip link add link eth1 name eth1.100 type vlan id 100
+net eth1: Adding vlanid 100 to vlan filter
+
+17)
+// Map skb->priority to L2 prio for Eth1.100, one to one
+$ ip link set eth1.100 type vlan \
+egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+18)
+// Check egress map for vlan 100
+$ cat /proc/net/vlan/eth1.100
+[...]
+INGRESS priority mappings: 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0
+EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+19)
+// Run appropriate tools with socket option "SO_PRIORITY" to 3
+// for class A and to 2 for class B. For both interfaces
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500&
+./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500&
+
+20)
+// run your listeners on workstations
+// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39012 kbps
+Receiving data rate: 39000 kbps
+
+21)
+// Restore default configuration if needed
+$ ip link del eth1.100
+$ ip link del eth0.100
+$ tc qdisc del dev eth1 root
+net eth1: Prev FIFO2 is shaped
+net eth1: set FIFO3 bw = 0
+net eth1: set FIFO2 bw = 0
+$ tc qdisc del dev eth0 root
+net eth0: Prev FIFO2 is shaped
+net eth0: set FIFO3 bw = 0
+net eth0: set FIFO2 bw = 0
+$ ethtool -L eth0 rx 1 tx 1
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 5/6] net: ethernet: ti: cpsw: restore shaper configuration while down/up
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
Need to restore shapers configuration after interface was down/up.
This is needed as appropriate configuration is still replicated in
kernel settings. This only shapers context restore, so vlan
configuration should be restored by user if needed, especially for
devices with one port where vlan frames are sent via ALE.
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
drivers/net/ethernet/ti/cpsw.c | 47 ++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 87a5586c5ea5..f39d2662c5ab 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1807,6 +1807,51 @@ static int cpsw_set_cbs(struct net_device *ndev,
return ret;
}
+static void cpsw_cbs_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+ int fifo, bw;
+
+ for (fifo = CPSW_FIFO_SHAPERS_NUM; fifo > 0; fifo--) {
+ bw = priv->fifo_bw[fifo];
+ if (!bw)
+ continue;
+
+ cpsw_set_fifo_rlimit(priv, fifo, bw);
+ }
+}
+
+static void cpsw_mqprio_resume(struct cpsw_slave *slave, struct cpsw_priv *priv)
+{
+ struct cpsw_common *cpsw = priv->cpsw;
+ u32 tx_prio_map = 0;
+ int i, tc, fifo;
+ u32 tx_prio_rg;
+
+ if (!priv->mqprio_hw)
+ return;
+
+ for (i = 0; i < 8; i++) {
+ tc = netdev_get_prio_tc_map(priv->ndev, i);
+ fifo = CPSW_FIFO_SHAPERS_NUM - tc;
+ tx_prio_map |= fifo << (4 * i);
+ }
+
+ tx_prio_rg = cpsw->version == CPSW_VERSION_1 ?
+ CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+ slave_write(slave, tx_prio_map, tx_prio_rg);
+}
+
+/* restore resources after port reset */
+static void cpsw_restore(struct cpsw_priv *priv)
+{
+ /* restore MQPRIO offload */
+ for_each_slave(priv, cpsw_mqprio_resume, priv);
+
+ /* restore CBS offload */
+ for_each_slave(priv, cpsw_cbs_resume, priv);
+}
+
static int cpsw_ndo_open(struct net_device *ndev)
{
struct cpsw_priv *priv = netdev_priv(ndev);
@@ -1886,6 +1931,8 @@ static int cpsw_ndo_open(struct net_device *ndev)
}
+ cpsw_restore(priv);
+
/* Enable Interrupt pacing if configured */
if (cpsw->coal_intvl != 0) {
struct ethtool_coalesce coal;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 3/6] net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
That's possible to offload vlan to tc priority mapping with
assumption sk_prio == L2 prio.
Example:
$ ethtool -L eth0 rx 1 tx 4
$ qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
$ tc -g class show dev eth0
+---(100:ffe2) mqprio
| +---(100:3) mqprio
| +---(100:4) mqprio
|
+---(100:ffe1) mqprio
| +---(100:2) mqprio
|
+---(100:ffe0) mqprio
+---(100:1) mqprio
Here, 100:1 is txq0, 100:2 is txq1, 100:3 is txq2, 100:4 is txq3
txq0 belongs to tc0, txq1 to tc1, txq2 and txq3 to tc2
The offload part only maps L2 prio to classes of traffic, but not
to transmit queues, so to direct traffic to traffic class vlan has
to be created with appropriate egress map.
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
drivers/net/ethernet/ti/cpsw.c | 82 ++++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 406537d74ec1..fd967d2bce5d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -39,6 +39,7 @@
#include <linux/sys_soc.h>
#include <linux/pinctrl/consumer.h>
+#include <net/pkt_cls.h>
#include "cpsw.h"
#include "cpsw_ale.h"
@@ -153,6 +154,8 @@ do { \
#define IRQ_NUM 2
#define CPSW_MAX_QUEUES 8
#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
+#define CPSW_TC_NUM 4
+#define CPSW_FIFO_SHAPERS_NUM (CPSW_TC_NUM - 1)
#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_SHIFT 29
#define CPSW_RX_VLAN_ENCAP_HDR_PRIO_MSK GENMASK(2, 0)
@@ -453,6 +456,7 @@ struct cpsw_priv {
u8 mac_addr[ETH_ALEN];
bool rx_pause;
bool tx_pause;
+ bool mqprio_hw;
u32 emac_port;
struct cpsw_common *cpsw;
};
@@ -1577,6 +1581,14 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
soft_reset_slave(slave);
}
+static int cpsw_tc_to_fifo(int tc, int num_tc)
+{
+ if (tc == num_tc - 1)
+ return 0;
+
+ return CPSW_FIFO_SHAPERS_NUM - tc;
+}
+
static int cpsw_ndo_open(struct net_device *ndev)
{
struct cpsw_priv *priv = netdev_priv(ndev);
@@ -2190,6 +2202,75 @@ static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
return ret;
}
+static int cpsw_set_tc(struct net_device *ndev, void *type_data)
+{
+ struct tc_mqprio_qopt_offload *mqprio = type_data;
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ struct cpsw_common *cpsw = priv->cpsw;
+ int fifo, num_tc, count, offset;
+ struct cpsw_slave *slave;
+ u32 tx_prio_map = 0;
+ int i, tc, ret;
+
+ num_tc = mqprio->qopt.num_tc;
+ if (num_tc > CPSW_TC_NUM)
+ return -EINVAL;
+
+ if (mqprio->mode != TC_MQPRIO_MODE_DCB)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(cpsw->dev);
+ if (ret < 0) {
+ pm_runtime_put_noidle(cpsw->dev);
+ return ret;
+ }
+
+ if (num_tc) {
+ for (i = 0; i < 8; i++) {
+ tc = mqprio->qopt.prio_tc_map[i];
+ fifo = cpsw_tc_to_fifo(tc, num_tc);
+ tx_prio_map |= fifo << (4 * i);
+ }
+
+ netdev_set_num_tc(ndev, num_tc);
+ for (i = 0; i < num_tc; i++) {
+ count = mqprio->qopt.count[i];
+ offset = mqprio->qopt.offset[i];
+ netdev_set_tc_queue(ndev, i, count, offset);
+ }
+ }
+
+ if (!mqprio->qopt.hw) {
+ /* restore default configuration */
+ netdev_reset_tc(ndev);
+ tx_prio_map = TX_PRIORITY_MAPPING;
+ }
+
+ priv->mqprio_hw = mqprio->qopt.hw;
+
+ offset = cpsw->version == CPSW_VERSION_1 ?
+ CPSW1_TX_PRI_MAP : CPSW2_TX_PRI_MAP;
+
+ slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
+ slave_write(slave, tx_prio_map, offset);
+
+ pm_runtime_put_sync(cpsw->dev);
+
+ return 0;
+}
+
+static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_QDISC_MQPRIO:
+ return cpsw_set_tc(ndev, type_data);
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops cpsw_netdev_ops = {
.ndo_open = cpsw_ndo_open,
.ndo_stop = cpsw_ndo_stop,
@@ -2205,6 +2286,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
#endif
.ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid,
+ .ndo_setup_tc = cpsw_ndo_setup_tc,
};
static int cpsw_get_regs_len(struct net_device *ndev)
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 2/6] net: ethernet: ti: cpdma: fit rated channels in backward order
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
According to TRM tx rated channels should be in 7..0 order,
so correct it.
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
drivers/net/ethernet/ti/davinci_cpdma.c | 31 ++++++++++++-------------
1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index cdbddf16dd29..19bb63902997 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -406,37 +406,36 @@ static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
struct cpdma_chan *chan;
u32 old_rate = ch->rate;
u32 new_rmask = 0;
- int rlim = 1;
+ int rlim = 0;
int i;
- *prio_mode = 0;
for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
chan = ctlr->channels[i];
- if (!chan) {
- rlim = 0;
+ if (!chan)
continue;
- }
if (chan == ch)
chan->rate = rate;
if (chan->rate) {
- if (rlim) {
- new_rmask |= chan->mask;
- } else {
- ch->rate = old_rate;
- dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
- chan->chan_num);
- return -EINVAL;
- }
- } else {
- *prio_mode = 1;
- rlim = 0;
+ rlim = 1;
+ new_rmask |= chan->mask;
+ continue;
}
+
+ if (rlim)
+ goto err;
}
*rmask = new_rmask;
+ *prio_mode = rlim;
return 0;
+
+err:
+ ch->rate = old_rate;
+ dev_err(ctlr->dev, "Upper cpdma ch%d is not rate limited\n",
+ chan->chan_num);
+ return -EINVAL;
}
static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 1/6] net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
In-Reply-To: <20180611133047.4818-1-ivan.khoronzhuk@linaro.org>
The cpdma channel highest priority is from hi to lo number.
The driver has limited number of descriptors that are shared between
number of cpdma channels. Number of queues can be tuned with ethtool,
that allows to not spend descriptors on not needed cpdma channels.
In AVB usually only 2 tx queues can be enough with rate limitation.
The rate limitation can be used only for hi priority queues. Thus, to
use only 2 queues the 8 has to be created. It's wasteful.
So, in order to allow using only needed number of rate limited
tx queues, save resources, and be able to set rate limitation for
them, let assign tx cpdma channels in backward order to queues.
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
---
drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 534596ce00d3..406537d74ec1 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -967,8 +967,8 @@ static int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
/* process every unprocessed channel */
ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
- for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
- if (!(ch_map & 0x01))
+ for (ch = 0, num_tx = 0; ch_map & 0xff; ch_map <<= 1, ch++) {
+ if (!(ch_map & 0x80))
continue;
txv = &cpsw->txv[ch];
@@ -2431,7 +2431,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
void (*handler)(void *, int, int);
struct netdev_queue *queue;
struct cpsw_vector *vec;
- int ret, *ch;
+ int ret, *ch, vch;
if (rx) {
ch = &cpsw->rx_ch_num;
@@ -2444,7 +2444,8 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
}
while (*ch < ch_num) {
- vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+ vch = rx ? *ch : 7 - *ch;
+ vec[*ch].ch = cpdma_chan_create(cpsw->dma, vch, handler, rx);
queue = netdev_get_tx_queue(priv->ndev, *ch);
queue->tx_maxrate = 0;
@@ -2980,7 +2981,7 @@ static int cpsw_probe(struct platform_device *pdev)
u32 slave_offset, sliver_offset, slave_size;
const struct soc_device_attribute *soc;
struct cpsw_common *cpsw;
- int ret = 0, i;
+ int ret = 0, i, ch;
int irq;
cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
@@ -3155,7 +3156,8 @@ static int cpsw_probe(struct platform_device *pdev)
if (soc)
cpsw->quirk_irq = 1;
- cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+ ch = cpsw->quirk_irq ? 0 : 7;
+ cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, ch, cpsw_tx_handler, 0);
if (IS_ERR(cpsw->txv[0].ch)) {
dev_err(priv->dev, "error initializing tx dma channel\n");
ret = PTR_ERR(cpsw->txv[0].ch);
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 0/6] net: ethernet: ti: cpsw: add MQPRIO and CBS Qdisc offload
From: Ivan Khoronzhuk @ 2018-06-11 13:30 UTC (permalink / raw)
To: grygorii.strashko, davem
Cc: corbet, akpm, netdev, linux-doc, linux-kernel, linux-omap,
vinicius.gomes, henrik, jesus.sanchez-palencia, ilias.apalodimas,
p-varis, spatton, francois.ozog, yogeshs, nsekhar,
Ivan Khoronzhuk
This series adds MQPRIO and CBS Qdisc offload for TI cpsw driver.
It potentially can be used in audio video bridging (AVB) and time
sensitive networking (TSN).
Patchset was tested on AM572x EVM and BBB boards. Last patch from this
series adds detailed description of configuration with examples. For
consistency reasons, in role of talker and listener, tools from
patchset "TSN: Add qdisc based config interface for CBS" were used and
can be seen here: https://www.spinics.net/lists/netdev/msg460869.html
Based on net-next/master
Ivan Khoronzhuk (6):
net: ethernet: ti: cpsw: use cpdma channels in backward order for txq
net: ethernet: ti: cpdma: fit rated channels in backward order
net: ethernet: ti: cpsw: add MQPRIO Qdisc offload
net: ethernet: ti: cpsw: add CBS Qdisc offload
net: ethernet: ti: cpsw: restore shaper configuration while down/up
Documentation: networking: cpsw: add MQPRIO & CBS offload examples
Documentation/networking/cpsw.txt | 540 ++++++++++++++++++++++++
drivers/net/ethernet/ti/cpsw.c | 364 +++++++++++++++-
drivers/net/ethernet/ti/davinci_cpdma.c | 31 +-
3 files changed, 913 insertions(+), 22 deletions(-)
create mode 100644 Documentation/networking/cpsw.txt
--
2.17.1
^ permalink raw reply
* Re: [PATCH net 1/2] ipv4: igmp: use alarmtimer to prevent delayed reports
From: kbuild test robot @ 2018-06-11 13:25 UTC (permalink / raw)
To: Tejaswi Tanikella; +Cc: kbuild-all, netdev, f.fainelli, andrew, davem
In-Reply-To: <20180611115058.GA12452@tejaswit-linux.qualcomm.com>
[-- Attachment #1: Type: text/plain, Size: 1948 bytes --]
Hi Tejaswi,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on net/master]
url: https://github.com/0day-ci/linux/commits/Tejaswi-Tanikella/ipv4-igmp-use-alarmtimer-to-prevent-delayed-reports/20180611-195615
config: i386-randconfig-x012-201823 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
Note: the linux-review/Tejaswi-Tanikella/ipv4-igmp-use-alarmtimer-to-prevent-delayed-reports/20180611-195615 HEAD 20987eecc3144eb22578baea09ff017ddcb45163 builds fine.
It only hurts bisectibility.
All errors (new ones prefixed by >>):
net//ipv4/igmp.c: In function 'igmp_start_timer':
>> net//ipv4/igmp.c:213:19: error: implicit declaration of function 'jiffies_to_ktime'; did you mean 'jiffies_to_timeval'? [-Werror=implicit-function-declaration]
ktime_t expiry = jiffies_to_ktime(prandom_u32() % max_delay + 2);
^~~~~~~~~~~~~~~~
jiffies_to_timeval
net//ipv4/igmp.c: In function 'igmp_mod_timer':
net//ipv4/igmp.c:250:7: error: implicit declaration of function 'ktime_to_jiffies'; did you mean 'timeval_to_jiffies'? [-Werror=implicit-function-declaration]
if (ktime_to_jiffies(expiry) < max_delay) {
^~~~~~~~~~~~~~~~
timeval_to_jiffies
cc1: some warnings being treated as errors
vim +213 net//ipv4/igmp.c
209
210 /* It must be called with locked im->lock */
211 static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
212 {
> 213 ktime_t expiry = jiffies_to_ktime(prandom_u32() % max_delay + 2);
214
215 im->tm_running = 1;
216 alarm_start_relative(&im->alarm, expiry);
217 refcount_inc(&im->refcnt);
218 }
219
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 33045 bytes --]
^ permalink raw reply
* Re: [PATCH net 1/2] ipv4: igmp: use alarmtimer to prevent delayed reports
From: kbuild test robot @ 2018-06-11 13:25 UTC (permalink / raw)
To: Tejaswi Tanikella; +Cc: kbuild-all, netdev, f.fainelli, andrew, davem
In-Reply-To: <20180611115058.GA12452@tejaswit-linux.qualcomm.com>
[-- Attachment #1: Type: text/plain, Size: 2211 bytes --]
Hi Tejaswi,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on net/master]
url: https://github.com/0day-ci/linux/commits/Tejaswi-Tanikella/ipv4-igmp-use-alarmtimer-to-prevent-delayed-reports/20180611-195615
config: x86_64-randconfig-x003-201823 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All errors (new ones prefixed by >>):
net/ipv4/igmp.c: In function 'igmp_mc_seq_show':
>> net/ipv4/igmp.c:2819:11: error: implicit declaration of function 'ktime_to_jiffies'; did you mean 'timeval_to_jiffies'? [-Werror=implicit-function-declaration]
delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
^~~~~~~~~~~~~~~~
timeval_to_jiffies
>> net/ipv4/igmp.c:2819:28: error: implicit declaration of function 'alarm_expires_remaining'; did you mean 'hrtimer_expires_remaining'? [-Werror=implicit-function-declaration]
delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
^~~~~~~~~~~~~~~~~~~~~~~
hrtimer_expires_remaining
>> net/ipv4/igmp.c:2819:55: error: 'struct ip_mc_list' has no member named 'alarm'
delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
^~
cc1: some warnings being treated as errors
vim +2819 net/ipv4/igmp.c
2813
2814 if (rcu_access_pointer(state->in_dev->mc_list) == im) {
2815 seq_printf(seq, "%d\t%-10s: %5d %7s\n",
2816 state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
2817 }
2818
> 2819 delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
2820 seq_printf(seq,
2821 "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
2822 im->multiaddr, im->users,
2823 im->tm_running,
2824 im->tm_running ? jiffies_delta_to_clock_t(delta) : 0,
2825 im->reporter);
2826 }
2827 return 0;
2828 }
2829
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31333 bytes --]
^ permalink raw reply
* Re: [PATCH v2] net-fq: Add WARN_ON check for null flow.
From: Ben Greear @ 2018-06-11 13:18 UTC (permalink / raw)
To: Michał Kazior, Arend van Spriel
Cc: Cong Wang, Linux Kernel Network Developers,
linux-wireless@vger.kernel.org
In-Reply-To: <CABvG-CUUAQaWQuh1HqNmyM+wudBdAjZhSvHdsXEpwRyOwTg-fg@mail.gmail.com>
On 06/10/2018 10:10 AM, Michał Kazior wrote:
> Ben,
>
> The patch is symptomatic. fq_tin_dequeue() already checks if the list
> is empty before it tries to access first entry. I see no point in
> using the _or_null() + WARN_ON.
>
> The 0x3c deref is likely an offset off of NULL base pointer. Did you
> check gdb/addr2line of the ieee80211_tx_dequeue+0xfb? Where did it
> point to?
gdb pointed to one line above the flow dereference, which is why I was
going to put some debugging in there.
>
> I suspect there's not enough synchronization between quescing the
> device/ath10k after fw crashes and performing mac80211's reconfig
> procedure.
I am already running this patch which helps with some of that. That
patch never made it upstream, but it fixed problems for me earlier.
https://patchwork.kernel.org/patch/9457639/
Could easily be there are some more issues in that logic.
Someone else posted a patch to disable mac-80211 tx when FW crashes,
I think...I have not tried to backport that.
https://patchwork.kernel.org/patch/10411967/
Thanks,
Ben
>
>
> Michał
>
> On 8 June 2018 at 23:40, Arend van Spriel <arend.vanspriel@broadcom.com> wrote:
>> On 6/8/2018 5:17 PM, Ben Greear wrote:
>>
>> I recalled an email from Michał leaving tieto so adding his alternate email
>> he provided back then.
>>
>> Gr. AvS
>>
>>
>>> On 06/07/2018 04:59 PM, Cong Wang wrote:
>>>>
>>>> On Thu, Jun 7, 2018 at 4:48 PM, <greearb@candelatech.com> wrote:
>>>>>
>>>>> diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
>>>>> index be7c0fa..cb911f0 100644
>>>>> --- a/include/net/fq_impl.h
>>>>> +++ b/include/net/fq_impl.h
>>>>> @@ -78,7 +78,10 @@ static struct sk_buff *fq_tin_dequeue(struct fq *fq,
>>>>> return NULL;
>>>>> }
>>>>>
>>>>> - flow = list_first_entry(head, struct fq_flow, flowchain);
>>>>> + flow = list_first_entry_or_null(head, struct fq_flow,
>>>>> flowchain);
>>>>> +
>>>>> + if (WARN_ON_ONCE(!flow))
>>>>> + return NULL;
>>>>
>>>>
>>>> This does not make sense either. list_first_entry_or_null()
>>>> returns NULL only when the list is empty, but we already check
>>>> list_empty() right before this code, and it is protected by fq->lock.
>>>>
>>>
>>> Hello Michal,
>>>
>>> git blame shows you as the author of the fq_impl.h code.
>>>
>>> I saw a crash when debugging funky ath10k firmware in a 4.16 + hacks
>>> kernel. There was an apparent
>>> mostly-null deref in the fq_tin_dequeue method. According to gdb, it
>>> was within
>>> 1 line of the dereference of 'flow'.
>>>
>>> My hack above is probably not that useful. Cong thinks maybe the
>>> locking is bad.
>>>
>>> If you get a chance, please review this thread and see if you have any
>>> ideas for
>>> a better fix (or better debugging code).
>>>
>>> As always, if you would like me to generate you a buggy firmware that
>>> will crash
>>> in the tx path and cause all sorts of mayhem in the ath10k driver and
>>> wifi stack,
>>> I will be happy to do so.
>>>
>>> https://www.mail-archive.com/netdev@vger.kernel.org/msg239738.html
>>>
>>> Thanks,
>>> Ben
>>>
>>
>
--
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc http://www.candelatech.com
^ permalink raw reply
* [PATCH net] net: qcom/emac: Add missing of_node_put()
From: YueHaibing @ 2018-06-11 13:03 UTC (permalink / raw)
To: timur, davem; +Cc: linux-kernel, netdev, YueHaibing
Add missing of_node_put() call for device node returned by
of_parse_phandle().
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index e78e5db..c694e34 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -384,6 +384,7 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
}
sgmii_pdev = of_find_device_by_node(np);
+ of_node_put(np);
if (!sgmii_pdev) {
dev_err(&pdev->dev, "invalid internal-phy property\n");
return -ENODEV;
--
2.7.0
^ permalink raw reply related
* Re: [PATCH v2 15/24] net: qualcomm: MODULE_DEVICE_TABLE(serdev)
From: Marcel Holtmann @ 2018-06-11 13:01 UTC (permalink / raw)
To: Ricardo Ribalda Delgado
Cc: LKML, linux-serial, Lino Sanfilippo, David S. Miller,
Stefan Wahren, Rob Herring, Johan Hovold, netdev
In-Reply-To: <20180611115240.32606-16-ricardo.ribalda@gmail.com>
Hi Ricardo,
> Export serdev table to the module header, allowing module autoload via
> udev/modprobe.
>
> Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Stefan Wahren <stefan.wahren@i2se.com>
> Cc: Rob Herring <robh@kernel.org>
> Cc: Johan Hovold <johan@kernel.org>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> ---
> drivers/net/ethernet/qualcomm/qca_uart.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
> index db6068cd7a1f..6d2ac6cae63f 100644
> --- a/drivers/net/ethernet/qualcomm/qca_uart.c
> +++ b/drivers/net/ethernet/qualcomm/qca_uart.c
> @@ -405,6 +405,12 @@ static void qca_uart_remove(struct serdev_device *serdev)
> free_netdev(qca->net_dev);
> }
>
> +static struct serdev_device_id qca_uart_serdev_id[] = {
> + { QCAUART_DRV_NAME, },
> + {}
> +};
> +MODULE_DEVICE_TABLE(serdev, qca_uart_serdev_id);
> +
> static struct serdev_device_driver qca_uart_driver = {
> .probe = qca_uart_probe,
> .remove = qca_uart_remove,
> @@ -412,6 +418,7 @@ static struct serdev_device_driver qca_uart_driver = {
> .name = QCAUART_DRV_NAME,
> .of_match_table = of_match_ptr(qca_uart_of_match),
> },
> + .id_table = qca_uart_serdev_id,
> };
the commit message is misleading me. If I build something with ACPI or DT support, then modinfo will show all modalias information for ACPI and DT compatible strings. What else does udev/modprobe actually need? Is something broken with the modalias export?
Regards
Marcel
^ permalink raw reply
* [PATCH bpf] xsk: silence warning on memory allocation failure
From: Björn Töpel @ 2018-06-11 11:57 UTC (permalink / raw)
To: magnus.karlsson, magnus.karlsson, ast, daniel, netdev
Cc: Björn Töpel, penguin-kernel, syzkaller-bugs,
syzbot+4abadc5d69117b346506
From: Björn Töpel <bjorn.topel@intel.com>
syzkaller reported a warning from xdp_umem_pin_pages():
WARNING: CPU: 1 PID: 4537 at mm/slab_common.c:996 kmalloc_slab+0x56/0x70 mm/slab_common.c:996
...
__do_kmalloc mm/slab.c:3713 [inline]
__kmalloc+0x25/0x760 mm/slab.c:3727
kmalloc_array include/linux/slab.h:634 [inline]
kcalloc include/linux/slab.h:645 [inline]
xdp_umem_pin_pages net/xdp/xdp_umem.c:205 [inline]
xdp_umem_reg net/xdp/xdp_umem.c:318 [inline]
xdp_umem_create+0x5c9/0x10f0 net/xdp/xdp_umem.c:349
xsk_setsockopt+0x443/0x550 net/xdp/xsk.c:531
__sys_setsockopt+0x1bd/0x390 net/socket.c:1935
__do_sys_setsockopt net/socket.c:1946 [inline]
__se_sys_setsockopt net/socket.c:1943 [inline]
__x64_sys_setsockopt+0xbe/0x150 net/socket.c:1943
do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is a warning about attempting to allocate more than
KMALLOC_MAX_SIZE memory. The request originates from userspace, and if
the request is too big, the kernel is free to deny its allocation. In
this patch, the failed allocation attempt is silenced with
__GFP_NOWARN.
Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt")
Reported-by: syzbot+4abadc5d69117b346506@syzkaller.appspotmail.com
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
net/xdp/xdp_umem.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index b9ef487c4618..f47abb46c587 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -204,7 +204,8 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
long npgs;
int err;
- umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
+ umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
+ GFP_KERNEL | __GFP_NOWARN);
if (!umem->pgs)
return -ENOMEM;
--
2.14.1
^ permalink raw reply related
* [PATCH 2/2] ktime: helpers to convert between ktime and jiffies
From: Tejaswi Tanikella @ 2018-06-11 11:52 UTC (permalink / raw)
To: netdev, f.fainelli; +Cc: andrew, davem
In-Reply-To: <20180611115058.GA12452@tejaswit-linux.qualcomm.com>
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
---
include/linux/ktime.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 5b9fddb..4881483 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -96,6 +96,10 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
#define ktime_to_ns(kt) (kt)
+/* ktime to jiffies and back */
+#define ktime_to_jiffies(kt) nsecs_to_jiffies(kt)
+#define jiffies_to_ktime(j) jiffies_to_nsecs(j)
+
/**
* ktime_compare - Compares two ktime_t variables for less, greater or equal
* @cmp1: comparable1
--
1.9.1
^ permalink raw reply related
* [PATCH v2 15/24] net: qualcomm: MODULE_DEVICE_TABLE(serdev)
From: Ricardo Ribalda Delgado @ 2018-06-11 11:52 UTC (permalink / raw)
To: linux-kernel, linux-serial
Cc: Ricardo Ribalda Delgado, Lino Sanfilippo, David S . Miller,
Stefan Wahren, Rob Herring, Johan Hovold, netdev
In-Reply-To: <20180611115240.32606-1-ricardo.ribalda@gmail.com>
Export serdev table to the module header, allowing module autoload via
udev/modprobe.
Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Johan Hovold <johan@kernel.org>
Cc: netdev@vger.kernel.org
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
---
drivers/net/ethernet/qualcomm/qca_uart.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index db6068cd7a1f..6d2ac6cae63f 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -405,6 +405,12 @@ static void qca_uart_remove(struct serdev_device *serdev)
free_netdev(qca->net_dev);
}
+static struct serdev_device_id qca_uart_serdev_id[] = {
+ { QCAUART_DRV_NAME, },
+ {}
+};
+MODULE_DEVICE_TABLE(serdev, qca_uart_serdev_id);
+
static struct serdev_device_driver qca_uart_driver = {
.probe = qca_uart_probe,
.remove = qca_uart_remove,
@@ -412,6 +418,7 @@ static struct serdev_device_driver qca_uart_driver = {
.name = QCAUART_DRV_NAME,
.of_match_table = of_match_ptr(qca_uart_of_match),
},
+ .id_table = qca_uart_serdev_id,
};
module_serdev_device_driver(qca_uart_driver);
--
2.17.1
^ permalink raw reply related
* [PATCH net 1/2] ipv4: igmp: use alarmtimer to prevent delayed reports
From: Tejaswi Tanikella @ 2018-06-11 11:51 UTC (permalink / raw)
To: netdev, f.fainelli; +Cc: andrew, davem
On receiving a IGMPv2/v3 query, based on max_delay set in the header a
timer is started to send out a response after a random time within
max_delay. If the system then moves into suspend state, Report is
delayed until system wakes up.
Use a alarmtimer instead of using a timer. Alarmtimer will wake the
system up from suspend to send out the IGMP report.
Signed-off-by: Tejaswi Tanikella <tejaswit@codeaurora.org>
---
v2: use alarmtimer instead of wakelock.
---
If these changes are fine, I'll share similar patches for MLD and ARP.
---
include/linux/igmp.h | 7 ++++++-
net/ipv4/igmp.c | 27 ++++++++++++++++-----------
2 files changed, 22 insertions(+), 12 deletions(-)
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f823185..45852eb 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -20,6 +20,9 @@
#include <linux/in.h>
#include <linux/refcount.h>
#include <uapi/linux/igmp.h>
+#ifdef CONFIG_IP_MULTICAST
+#include <linux/alarmtimer.h>
+#endif
static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
{
@@ -83,7 +86,9 @@ struct ip_mc_list {
struct ip_mc_list __rcu *next_rcu;
};
struct ip_mc_list __rcu *next_hash;
- struct timer_list timer;
+#ifdef CONFIG_IP_MULTICAST
+ struct alarm alarm;
+#endif
int users;
refcount_t refcnt;
spinlock_t lock;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b..c30b5c4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -199,7 +199,7 @@ static void ip_ma_put(struct ip_mc_list *im)
static void igmp_stop_timer(struct ip_mc_list *im)
{
spin_lock_bh(&im->lock);
- if (del_timer(&im->timer))
+ if (alarm_cancel(&im->alarm))
refcount_dec(&im->refcnt);
im->tm_running = 0;
im->reporter = 0;
@@ -210,11 +210,11 @@ static void igmp_stop_timer(struct ip_mc_list *im)
/* It must be called with locked im->lock */
static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
- int tv = prandom_u32() % max_delay;
+ ktime_t expiry = jiffies_to_ktime(prandom_u32() % max_delay + 2);
im->tm_running = 1;
- if (!mod_timer(&im->timer, jiffies+tv+2))
- refcount_inc(&im->refcnt);
+ alarm_start_relative(&im->alarm, expiry);
+ refcount_inc(&im->refcnt);
}
static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -241,11 +241,14 @@ static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
static void igmp_mod_timer(struct ip_mc_list *im, int max_delay)
{
+ ktime_t expiry;
+
spin_lock_bh(&im->lock);
im->unsolicit_count = 0;
- if (del_timer(&im->timer)) {
- if ((long)(im->timer.expires-jiffies) < max_delay) {
- add_timer(&im->timer);
+ expiry = alarm_expires_remaining(&im->alarm);
+ if (alarm_cancel(&im->alarm)) {
+ if (ktime_to_jiffies(expiry) < max_delay) {
+ alarm_start_relative(&im->alarm, expiry);
im->tm_running = 1;
spin_unlock_bh(&im->lock);
return;
@@ -812,9 +815,9 @@ static void igmp_ifc_event(struct in_device *in_dev)
}
-static void igmp_timer_expire(struct timer_list *t)
+enum alarmtimer_restart igmp_timer_expire(struct alarm *alarm, ktime_t now)
{
- struct ip_mc_list *im = from_timer(im, t, timer);
+ struct ip_mc_list *im = container_of(alarm, struct ip_mc_list, alarm);
struct in_device *in_dev = im->interface;
spin_lock(&im->lock);
@@ -835,6 +838,8 @@ static void igmp_timer_expire(struct timer_list *t)
igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
ip_ma_put(im);
+
+ return ALARMTIMER_NORESTART;
}
/* mark EXCLUDE-mode sources */
@@ -1413,7 +1418,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
- timer_setup(&im->timer, igmp_timer_expire, 0);
+ alarm_init(&im->alarm, ALARM_BOOTTIME, igmp_timer_expire);
im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
#endif
@@ -2811,7 +2816,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
- delta = im->timer.expires - jiffies;
+ delta = ktime_to_jiffies(alarm_expires_remaining(&im->alarm));
seq_printf(seq,
"\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
im->multiaddr, im->users,
--
1.9.1
^ permalink raw reply related
* Re: [PATCH] net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode
From: Dean Nelson @ 2018-06-11 11:22 UTC (permalink / raw)
To: David Miller, Vadim.Lomovtsev
Cc: rric, sgoutham, linux-arm-kernel, netdev, linux-kernel,
Vadim.Lomovtsev
In-Reply-To: <20180610.123551.885190586229525170.davem@davemloft.net>
On 06/10/2018 02:35 PM, David Miller wrote:
> From: Vadim Lomovtsev <Vadim.Lomovtsev@caviumnetworks.com>
> Date: Fri, 8 Jun 2018 02:27:59 -0700
>
>> + /* Save message data locally to prevent them from
>> + * being overwritten by next ndo_set_rx_mode call().
>> + */
>> + spin_lock(&nic->rx_mode_wq_lock);
>> + mode = vf_work->mode;
>> + mc = vf_work->mc;
>> + vf_work->mc = NULL;
If I'm reading this code correctly, I believe nic->rx_mode_work.mc will
have been set to NULL before the lock is dropped by
nicvf_set_rx_mode_task() and acquired by nicvf_set_rx_mode().
>> + spin_unlock(&nic->rx_mode_wq_lock);
>
> At the moment you drop this lock, the memory behind 'mc' can be
> freed up by:
>
>> + spin_lock(&nic->rx_mode_wq_lock);
>> + kfree(nic->rx_mode_work.mc);
So the kfree() will be called with a NULL pointer and quickly return.
>
> And you'll crash when you dereference it above via
> __nicvf_set_rx_mode_task().
>
I believe the call to kfree() in nicvf_set_rx_mode() is there to free
up a mc_list that has been allocated by nicvf_set_rx_mode() during a
previous callback to the function, one that has not yet been processed
by nicvf_set_rx_mode_task().
In this way only the last 'unprocessed' callback to nicvf_set_rx_mode()
gets processed should there be multiple callbacks occurring between the
times the nicvf_set_rx_mode_task() runs.
In my testing with this patch, this is what I see happening.
^ permalink raw reply
* [PATCH] net: phy: mdio-gpio: Cut surplus includes
From: Linus Walleij @ 2018-06-11 11:19 UTC (permalink / raw)
To: Andrew Lunn, Florian Fainelli; +Cc: netdev, Linus Walleij
The GPIO MDIO driver now needs only <linux/gpio/consumer.h>
so cut the legacy <linux/gpio.h> and <linux/of_gpio.h>
includes that are no longer used.
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
drivers/net/phy/mdio-gpio.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 4e4c8daf44c3..33265747bf39 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -26,10 +26,7 @@
#include <linux/platform_device.h>
#include <linux/mdio-bitbang.h>
#include <linux/mdio-gpio.h>
-#include <linux/gpio.h>
#include <linux/gpio/consumer.h>
-
-#include <linux/of_gpio.h>
#include <linux/of_mdio.h>
struct mdio_gpio_info {
--
2.17.0
^ permalink raw reply related
* Re: [PATCH v2] selftests: bpf: fix urandom_read build issue
From: Daniel Borkmann @ 2018-06-11 10:58 UTC (permalink / raw)
To: Anders Roxell, ast, ys114321, shuah; +Cc: netdev, linux-kernel, linux-kselftest
In-Reply-To: <20180608065127.28115-1-anders.roxell@linaro.org>
On 06/08/2018 08:51 AM, Anders Roxell wrote:
> gcc complains that urandom_read gets built twice.
>
> gcc -o tools/testing/selftests/bpf/urandom_read
> -static urandom_read.c -Wl,--build-id
> gcc -Wall -O2 -I../../../include/uapi -I../../../lib -I../../../lib/bpf
> -I../../../../include/generated -I../../../include urandom_read.c
> urandom_read -lcap -lelf -lrt -lpthread -o
> tools/testing/selftests/bpf/urandom_read
> gcc: fatal error: input file
> ‘tools/testing/selftests/bpf/urandom_read’ is the
> same as output file
> compilation terminated.
> ../lib.mk:110: recipe for target
> 'tools/testing/selftests/bpf/urandom_read' failed
> To fix this issue remove the urandom_read target and so target
> TEST_CUSTOM_PROGS gets used.
>
> Fixes: 81f77fd0deeb ("bpf: add selftest for stackmap with BPF_F_STACK_BUILD_ID")
> Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
> Acked-by: Yonghong Song <yhs@fb.com>
Applied to bpf, thanks Anders!
^ permalink raw reply
* 4.17.0-10146-gf0dc7f9c6dd9: hw csum failure on powerpc+sungem
From: Meelis Roos @ 2018-06-11 10:57 UTC (permalink / raw)
To: netdev, linuxppc-dev; +Cc: Linux Kernel list
I am seeing this on PowerMac G4 with sungem ethernet driver. 4.17 was
OK, 4.17.0-10146-gf0dc7f9c6dd9 is problematic.
[ 140.518664] eth0: hw csum failure
[ 140.518699] CPU: 0 PID: 1237 Comm: postconf Not tainted 4.17.0-10146-gf0dc7f9c6dd9 #83
[ 140.518707] Call Trace:
[ 140.518734] [effefd90] [c03d6db8] __skb_checksum_complete+0xd8/0xdc (unreliable)
[ 140.518759] [effefdb0] [c04c1284] icmpv6_rcv+0x248/0x4ec
[ 140.518775] [effefdd0] [c049a448] ip6_input_finish.constprop.0+0x11c/0x5f4
[ 140.518786] [effefe10] [c049b1c0] ip6_mc_input+0xcc/0x100
[ 140.518807] [effefe20] [c03e110c] __netif_receive_skb_core+0x310/0x944
[ 140.518820] [effefe70] [c03e76ec] napi_gro_receive+0xd0/0xe8
[ 140.518845] [effefe80] [f3e1f66c] gem_poll+0x618/0x1274 [sungem]
[ 140.518856] [effeff30] [c03e6f0c] net_rx_action+0x198/0x374
[ 140.518872] [effeff90] [c0501a88] __do_softirq+0x120/0x278
[ 140.518890] [effeffe0] [c0036188] irq_exit+0xd8/0xdc
[ 140.518908] [effefff0] [c000f478] call_do_irq+0x24/0x3c
[ 140.518925] [d05a5d30] [c0007120] do_IRQ+0x74/0xf0
[ 140.518941] [d05a5d50] [c0012474] ret_from_except+0x0/0x14
[ 140.518960] --- interrupt: 501 at copy_page+0x40/0x90
LR = copy_user_page+0x18/0x30
[ 140.518973] [d05a5e10] [d058cd80] 0xd058cd80 (unreliable)
[ 140.518989] [d05a5e20] [c00fa2bc] wp_page_copy+0xec/0x654
[ 140.519002] [d05a5e60] [c00fd3a4] do_wp_page+0xa8/0x5b4
[ 140.519013] [d05a5e90] [c00fe934] handle_mm_fault+0x564/0xa84
[ 140.519025] [d05a5f00] [c0016230] do_page_fault+0x1bc/0x7e8
[ 140.519037] [d05a5f40] [c0012300] handle_page_fault+0x14/0x40
[ 140.519048] --- interrupt: 301 at 0xb78b6864
LR = 0xb78b6c54
--
Meelis Roos (mroos@linux.ee)
^ permalink raw reply
* Re: [PATCH RFC v2 2/9] veth: Add driver XDP
From: Toshiaki Makita @ 2018-06-11 10:55 UTC (permalink / raw)
To: netdev
Cc: Toshiaki Makita, Jesper Dangaard Brouer, Alexei Starovoitov,
Daniel Borkmann
In-Reply-To: <20180610160217.3146-3-toshiaki.makita1@gmail.com>
On 2018/06/11 1:02, Toshiaki Makita wrote:
> From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>
> This is basic implementation of veth driver XDP.
>
> Incoming packets are sent from the peer veth device in the form of skb,
> so this is generally doing the same thing as generic XDP.
>
> This itself is not so useful, but a starting point to implement other
> useful veth XDP features like TX and REDIRECT.
>
> This introduces NAPI when XDP is enabled, because XDP is now heavily
> relies on NAPI context. Use ptr_ring to emulate NIC ring. Tx function
> enqueues packets to the ring and peer NAPI handler drains the ring.
>
> Currently only one ring is allocated for each veth device, so it does
> not scale on multiqueue env. This can be resolved by allocating rings
> on the per-queue basis later.
>
> Note that NAPI is not used but netif_rx is used when XDP is not loaded,
> so this does not change the default behaviour.
>
> v2:
> - Squashed with the patch adding NAPI.
> - Implement adjust_tail.
> - Don't acquire consumer lock because it is guarded by NAPI.
> - Make poll_controller noop since it is unnecessary.
> - Register rxq_info on enabling XDP rather than on opening the device.
>
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> ---
...
> @@ -203,6 +480,9 @@ static int veth_close(struct net_device *dev)
> if (peer)
> netif_carrier_off(peer);
>
> + if (rtnl_dereference(priv->xdp_prog))
> + veth_disable_xdp(dev);
Found that it is not safe to disable napi at this point, because packets
still can come from the peer and the ring may be filled after cleanup.
I'll fix the problem in v3, but wait for feedback some more.
--
Toshiaki Makita
^ permalink raw reply
* Dear Talented
From: Lisa Clement @ 2018-06-11 9:13 UTC (permalink / raw)
To: Recipients
Dear Talented,
I am Talent Scout For BLUE SKY FILM STUDIO, Present Blue sky Studio a
Film Corporation Located in the United State, is Soliciting for the
Right to use Your Photo/Face and Personality as One of the Semi -Major
Role/ Character in our Upcoming ANIMATED Stereoscope 3D Movie-The Story
of Spies in Disguise (Spies in Disguise 2019) The Movie is Currently Filming (In
Production) Please Note That There Will Be No Auditions, Traveling or
Any Special / Professional Acting Skills, Since the Production of This
Movie Will Be Done with our State of Art Computer -Generating Imagery
Equipment. We Are Prepared to Pay the Total Sum of $620,000.00 USD. For
More Information/Understanding, Please Write us on the E-Mail Below.
CONTACT EMAIL: bluesky.filmstudio@usa.com
All Reply to: bluesky.filmstudio@usa.com
Note: Only the Response send to this mail will be Given a Prior
Consideration.
Talent Scout
Lisa Clement
^ permalink raw reply
* Re: KASAN: slab-out-of-bounds Read in bpf_skb_change_proto
From: Daniel Borkmann @ 2018-06-11 10:31 UTC (permalink / raw)
To: Dmitry Vyukov
Cc: syzbot, Alexei Starovoitov, David Miller, LKML, netdev,
syzkaller-bugs
In-Reply-To: <CACT4Y+boO990R6a9kGtx-UZpZPeB2bwhgvyUjPvEFOpbzxJKrg@mail.gmail.com>
On 06/11/2018 11:52 AM, Dmitry Vyukov wrote:
> On Mon, Jun 11, 2018 at 11:42 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>> On 06/10/2018 05:27 PM, syzbot wrote:
>>> Hello,
>>>
>>> syzbot found the following crash on:
>>>
>>> HEAD commit: a16afaf7928b Merge tag 'for-v4.18' of git://git.kernel.org..
>>> git tree: upstream
>>> console output: https://syzkaller.appspot.com/x/log.txt?x=1338f6bf800000
>>> kernel config: https://syzkaller.appspot.com/x/.config?x=314f2150f36c16ca
>>> dashboard link: https://syzkaller.appspot.com/bug?extid=d2d729bdde65dee3eae6
>>> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>>> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=1173381f800000
>>> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=171f90cf800000
>>>
>>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>>> Reported-by: syzbot+d2d729bdde65dee3eae6@syzkaller.appspotmail.com
>>
>> #syz fix: bpf: reject passing modified ctx to helper functions
>
> On a related note, it seems that it still can unwind past
> bpf_skb_change_proto. I think the "net.core.bpf_jit_kallsyms = 1"
> sysctl should have been reached syzbot by the time of crash. Are you
> sure that's the only thing requires? We are using frame pointer
> unwinder just in case.
Nah, I know what happened; just sent a PR fix for syzkaller.
Thanks,
Daniel
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox