* [PATCH v2] scm: fix possible control message header alignment issue
From: yuan linyu @ 2016-12-29 12:39 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, yuan linyu
From: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
1. put_cmsg{_compat}() may copy data to user when buffer free space less than
control message header alignment size.
2. scm_detach_fds{_compat}() may calc wrong fdmax if control message header
have greater alignment size.
Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
---
net/compat.c | 10 ++++++++--
net/core/scm.c | 8 +++++---
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/net/compat.c b/net/compat.c
index 96c544b..ffe7a04 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -245,7 +245,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
return -EFAULT;
- if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+ if (cmlen > CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)) &&
+ copy_to_user(CMSG_COMPAT_DATA(cm), data,
+ cmlen - CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))))
return -EFAULT;
cmlen = CMSG_COMPAT_SPACE(len);
if (kmsg->msg_controllen < cmlen)
@@ -258,12 +260,16 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
{
struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
- int fdmax = (kmsg->msg_controllen - sizeof(struct compat_cmsghdr)) / sizeof(int);
+ int fdmax = 0;
int fdnum = scm->fp->count;
struct file **fp = scm->fp->fp;
int __user *cmfptr;
int err = 0, i;
+ if (kmsg->msg_controllen > CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr)))
+ fdmax = (kmsg->msg_controllen -
+ CMSG_COMPAT_ALIGN(sizeof(struct compat_cmsghdr))) / sizeof(int);
+
if (fdnum < fdmax)
fdmax = fdnum;
diff --git a/net/core/scm.c b/net/core/scm.c
index d882043..b2e60fd 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -238,7 +238,9 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
err = -EFAULT;
if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
goto out;
- if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+ if (cmlen > CMSG_ALIGN(sizeof(struct cmsghdr)) &&
+ copy_to_user(CMSG_DATA(cm), data,
+ cmlen - CMSG_ALIGN(sizeof(struct cmsghdr))))
goto out;
cmlen = CMSG_SPACE(len);
if (msg->msg_controllen < cmlen)
@@ -267,8 +269,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
return;
}
- if (msg->msg_controllen > sizeof(struct cmsghdr))
- fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
+ if (msg->msg_controllen > CMSG_ALIGN(sizeof(struct cmsghdr)))
+ fdmax = ((msg->msg_controllen - CMSG_ALIGN(sizeof(struct cmsghdr)))
/ sizeof(int));
if (fdnum < fdmax)
--
2.7.4
^ permalink raw reply related
* [PATCH] ipv4: make tcp_notsent_lowat sysctl knob behave as true unsigned int
From: Pavel Tikhomirov @ 2016-12-29 14:35 UTC (permalink / raw)
To: David S . Miller
Cc: Eric Dumazet, Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI,
Patrick McHardy, netdev, linux-kernel, Konstantin Khorenko,
Pavel Tikhomirov
> cat /proc/sys/net/ipv4/tcp_notsent_lowat
-1
> echo 4294967295 > /proc/sys/net/ipv4/tcp_notsent_lowat
-bash: echo: write error: Invalid argument
> echo -2147483648 > /proc/sys/net/ipv4/tcp_notsent_lowat
> cat /proc/sys/net/ipv4/tcp_notsent_lowat
-2147483648
but in documentation we have "tcp_notsent_lowat - UNSIGNED INTEGER"
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
---
net/ipv4/sysctl_net_ipv4.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b..5361373 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -41,6 +41,7 @@ static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static unsigned int uint_max = UINT_MAX;
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
@@ -958,7 +959,9 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &uint_max,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
--
2.9.3
^ permalink raw reply related
* Re: [PATCH net] sctp: do not loose window information if in rwnd_over
From: Neil Horman @ 2016-12-29 15:42 UTC (permalink / raw)
To: Marcelo Ricardo Leitner; +Cc: netdev, linux-sctp, Vlad Yasevich
In-Reply-To: <14d1eef1c9da4fcebc53d428a59ecaefab439b2c.1482510284.git.marcelo.leitner@gmail.com>
On Fri, Dec 23, 2016 at 02:29:02PM -0200, Marcelo Ricardo Leitner wrote:
> It's possible that we receive a packet that is larger than current
> window. If it's the first packet in this way, it will cause it to
> increase rwnd_over. Then, if we receive another data chunk (specially as
> SCTP allows you to have one data chunk in flight even during 0 window),
> rwnd_over will be overwritten instead of added to.
>
> In the long run, this could cause the window to grow bigger than its
> initial size, as rwnd_over would be charged only for the last received
> data chunk while the code will try open the window for all packets that
> were received and had its value in rwnd_over overwritten. This, then,
> can lead to the worsening of payload/buffer ratio and cause rwnd_press
> to kick in more often.
>
> The fix is to sum it too, same as is done for rwnd_press, so that if we
> receive 3 chunks after closing the window, we still have to release that
> same amount before re-opening it.
>
> Log snippet from sctp_test exhibiting the issue:
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease: asoc:ffff88013928e000
> rwnd decreased by 1 to (0, 1, 114221)
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease:
> association:ffff88013928e000 has asoc->rwnd:0, asoc->rwnd_over:1!
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease: asoc:ffff88013928e000
> rwnd decreased by 1 to (0, 1, 114221)
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease:
> association:ffff88013928e000 has asoc->rwnd:0, asoc->rwnd_over:1!
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease: asoc:ffff88013928e000
> rwnd decreased by 1 to (0, 1, 114221)
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease:
> association:ffff88013928e000 has asoc->rwnd:0, asoc->rwnd_over:1!
> [ 146.209232] sctp: sctp_assoc_rwnd_decrease: asoc:ffff88013928e000
> rwnd decreased by 1 to (0, 1, 114221)
>
> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
> net/sctp/associola.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/sctp/associola.c b/net/sctp/associola.c
> index 68428e1f71810fbe65b7f86c750c3ad61f0266ec..56ddcfaeb4f64235b54b0daa915fabf0cc0170a9 100644
> --- a/net/sctp/associola.c
> +++ b/net/sctp/associola.c
> @@ -1539,7 +1539,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
> asoc->rwnd = 0;
> }
> } else {
> - asoc->rwnd_over = len - asoc->rwnd;
> + asoc->rwnd_over += len - asoc->rwnd;
> asoc->rwnd = 0;
> }
>
> --
> 2.9.3
>
>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
^ permalink raw reply
* [PATCH] rtlwifi: fix spelling mistake: "contry" -> "country"
From: Colin King @ 2016-12-29 16:00 UTC (permalink / raw)
To: Larry Finger, Chaoming Li, Kalle Valo, linux-wireless, netdev
Cc: linux-kernel
From: Colin Ian King <colin.king@canonical.com>
trivial fix to spelling mistake in RT_TRACE message
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
drivers/net/wireless/realtek/rtlwifi/regd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c
index 6ee6bf8..558c31b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/regd.c
+++ b/drivers/net/wireless/realtek/rtlwifi/regd.c
@@ -440,7 +440,7 @@ int rtl_regd_init(struct ieee80211_hw *hw,
if (rtlpriv->regd.country_code >= COUNTRY_CODE_MAX) {
RT_TRACE(rtlpriv, COMP_REGD, DBG_DMESG,
- "rtl: EEPROM indicates invalid contry code, world wide 13 should be used\n");
+ "rtl: EEPROM indicates invalid country code, world wide 13 should be used\n");
rtlpriv->regd.country_code = COUNTRY_CODE_WORLD_WIDE_13;
}
--
2.10.2
^ permalink raw reply related
* Re: [PATCH] net: ethernet: ti: davinci_cpdma: fix access to uninitialized variable in cpdma_chan_set_descs()
From: Grygorii Strashko @ 2016-12-29 16:04 UTC (permalink / raw)
To: David S. Miller, netdev, Mugunthan V N, Sekhar Nori, linux-kernel,
linux-omap
In-Reply-To: <20161229014904.GA22718@khorivan>
Hi Ivan,
On 12/28/2016 07:49 PM, Ivan Khoronzhuk wrote:
> On Wed, Dec 28, 2016 at 05:42:13PM -0600, Grygorii Strashko wrote:
>> Now below code sequence causes "Unable to handle kernel NULL pointer
>> dereference.." exception and system crash during CPSW CPDMA initialization:
>>
>> cpsw_probe
>> |-cpdma_chan_create (TX channel)
>> |-cpdma_chan_split_pool
>> |-cpdma_chan_set_descs(for TX channels)
>> |-cpdma_chan_set_descs(for RX channels) [1]
>>
>> - and -
>> static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
>> int rx, int desc_num,
>> int per_ch_desc)
>> {
>> struct cpdma_chan *chan, *most_chan = NULL;
>>
>> ...
>>
>> for (i = min; i < max; i++) {
>> chan = ctlr->channels[i];
>> if (!chan)
>> continue;
>> ...
>>
>> if (most_dnum < chan->desc_num) {
>> most_dnum = chan->desc_num;
>> most_chan = chan;
>> }
>> }
>> /* use remains */
>> most_chan->desc_num += desc_cnt; [2]
>> }
>>
>> So, most_chan value will never be reassigned when cpdma_chan_set_descs() is
>> called second time [1], because there are no RX channels yet and system
>> will crash at [2].
>
> How did you get this?
> I just remember as I fixed it before sending patchset.
>
> Maybe it was some experiment with it.
> I just wonder and want to find actual reason what's happening.
>
> Look bellow:
>
> cpsw_probe
> |-cpdma_chan_create (TX channel)
> |-cpdma_chan_split_pool
> |-cpdma_chan_set_descs(for TX channels)
> |-cpdma_chan_set_descs(for RX channels) [1]
>
> |-cpdma_chan_set_descs(for RX channels) in case you'be described has to be
> called with rx_desc_num = 0, because all descs are assigned already for tx
> channel. And, if desc_num = 0, cpdma_chan_set_descs just exits and no issues.
> So, could you please explain how you get this, in what circumstances.
You are right. I've hit this issue while working on other feature which allows
to split pool between RX and TX path and as part of it cpdma_chan_set_descs()
is called with different set of arguments. I probably will just squash it in my changes or
or send as part of my series.
--
regards,
-grygorii
^ permalink raw reply
* Re: [PATCH net-next V2 3/3] tun: rx batching
From: David Miller @ 2016-12-29 16:35 UTC (permalink / raw)
To: jasowang; +Cc: netdev, virtualization, linux-kernel, kvm, mst
In-Reply-To: <1482912571-3157-4-git-send-email-jasowang@redhat.com>
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 28 Dec 2016 16:09:31 +0800
> + spin_lock(&queue->lock);
> + qlen = skb_queue_len(queue);
> + if (qlen > rx_batched)
> + goto drop;
> + __skb_queue_tail(queue, skb);
> + if (!more || qlen + 1 > rx_batched) {
> + __skb_queue_head_init(&process_queue);
> + skb_queue_splice_tail_init(queue, &process_queue);
> + rcv = true;
> + }
> + spin_unlock(&queue->lock);
Since you always clear the 'queue' when you insert the skb that hits
the limit, I don't see how the "goto drop" path can be possibly taken.
^ permalink raw reply
* [PATCH net 2/5] net/mlx4_en: Fix bad WQE issue
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Eugenia Emantayev, Tariq Toukan
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>
From: Eugenia Emantayev <eugenia@mellanox.com>
Single send WQE in RX buffer should be stamped with software
ownership in order to prevent the flow of QP in error in FW
once UPDATE_QP is called.
Fixes: 9f519f68cfff ('mlx4_en: Not using Shared Receive Queues')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 3c37e216bbf3..eac527e25ec9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -445,8 +445,14 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
ring->stride = stride;
- if (ring->stride <= TXBB_SIZE)
+ if (ring->stride <= TXBB_SIZE) {
+ /* Stamp first unused send wqe */
+ __be32 *ptr = (__be32 *)ring->buf;
+ __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
+ *ptr = stamp;
+ /* Move pointer to start of rx section */
ring->buf += TXBB_SIZE;
+ }
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride;
--
1.8.3.1
^ permalink raw reply related
* [PATCH net 1/5] net/mlx4_core: Use-after-free causes a resource leak in flow-steering detach
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Jack Morgenstein, Tariq Toukan
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
mlx4_QP_FLOW_STEERING_DETACH_wrapper first removes the steering
rule (which results in freeing the rule structure), and then
references a field in this struct (the qp number) when releasing the
busy-status on the rule's qp.
Since this memory was freed, it could reallocated and changed.
Therefore, the qp number in the struct may be incorrect,
so that we are releasing the incorrect qp. This leaves the rule's qp
in the busy state (and could possibly release an incorrect qp as well).
Fix this by saving the qp number in a local variable, for use after
removing the steering rule.
Fixes: 2c473ae7e582 ("net/mlx4_core: Disallow releasing VF QPs which have steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index c548beaaf910..4b3e139e9c82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4473,6 +4473,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
struct res_qp *rqp;
struct res_fs_rule *rrule;
u64 mirr_reg_id;
+ int qpn;
if (dev->caps.steering_mode !=
MLX4_STEERING_MODE_DEVICE_MANAGED)
@@ -4489,10 +4490,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
}
mirr_reg_id = rrule->mirr_rule_id;
kfree(rrule->mirr_mbox);
+ qpn = rrule->qpn;
/* Release the rule form busy state before removal */
put_res(dev, slave, vhcr->in_param, RES_FS_RULE);
- err = get_res(dev, slave, rrule->qpn, RES_QP, &rqp);
+ err = get_res(dev, slave, qpn, RES_QP, &rqp);
if (err)
return err;
@@ -4517,7 +4519,7 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
if (!err)
atomic_dec(&rqp->ref_count);
out:
- put_res(dev, slave, rrule->qpn, RES_QP);
+ put_res(dev, slave, qpn, RES_QP);
return err;
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH net 3/5] net/mlx4: Remove BUG_ON from ICM allocation routine
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Leon Romanovsky, Tariq Toukan
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>
From: Leon Romanovsky <leonro@mellanox.com>
This patch removes BUG_ON() macro from mlx4_alloc_icm_coherent()
by checking DMA address alignment in advance and performing proper
folding in case of error.
Fixes: 5b0bf5e25efe ("mlx4_core: Support ICM tables in coherent memory")
Reported-by: Ozgur Karatas <okaratas@member.fsf.org>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/icm.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 2a9dd460a95f..e1f9e7cebf8f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -118,8 +118,13 @@ static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
if (!buf)
return -ENOMEM;
+ if (offset_in_page(buf)) {
+ dma_free_coherent(dev, PAGE_SIZE << order,
+ buf, sg_dma_address(mem));
+ return -ENOMEM;
+ }
+
sg_set_buf(mem, buf, PAGE_SIZE << order);
- BUG_ON(mem->offset);
sg_dma_len(mem) = PAGE_SIZE << order;
return 0;
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH net 4/5] net/mlx4_en: Fix type mismatch for 32-bit systems
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Slava Shwartsman, Tariq Toukan
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>
From: Slava Shwartsman <slavash@mellanox.com>
is_power_of_2 expects unsigned long and we pass u64 max_val_cycles,
this will be truncated on 32 bit systems, and the result is not what we
were expecting.
div_u64 expects u32 as a second argument and we pass
max_val_cycles_rounded which is u64 hence it will always be truncated.
Fix was tested on both 64 and 32 bit systems and got same results for
max_val_cycles and max_val_cycles_rounded.
Fixes: 4850cf458157 ("net/mlx4_en: Resolve dividing by zero in 32-bit system")
Signed-off-by: Slava Shwartsman <slavash@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 015198c14fa8..504461a464c5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -245,13 +245,9 @@ static u32 freq_to_shift(u16 freq)
{
u32 freq_khz = freq * 1000;
u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
- u64 tmp_rounded =
- roundup_pow_of_two(max_val_cycles) > max_val_cycles ?
- roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX;
- u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
- max_val_cycles : tmp_rounded;
+ u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1);
/* calculate max possible multiplier in order to fit in 64bit */
- u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
+ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded);
/* This comes from the reverse of clocksource_khz2mult */
return ilog2(div_u64(max_mul * freq_khz, 1000000));
--
1.8.3.1
^ permalink raw reply related
* [PATCH net 0/5] mlx4 misc fixes
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Tariq Toukan
Hi Dave,
This patchset contains several bug fixes from the team to the
mlx4 Eth and Core drivers.
Series generated against net commit:
60133867f1f1 'net: wan: slic_ds26522: fix spelling mistake: "configurated" -> "configured"'
Thanks,
Tariq.
Eugenia Emantayev (1):
net/mlx4_en: Fix bad WQE issue
Jack Morgenstein (2):
net/mlx4_core: Use-after-free causes a resource leak in flow-steering
detach
net/mlx4_core: Fix raw qp flow steering rules under SRIOV
Leon Romanovsky (1):
net/mlx4: Remove BUG_ON from ICM allocation routine
Slava Shwartsman (1):
net/mlx4_en: Fix type mismatch for 32-bit systems
drivers/infiniband/hw/mlx4/main.c | 14 +++++++++--
drivers/net/ethernet/mellanox/mlx4/en_clock.c | 8 ++-----
drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 ++++++-
drivers/net/ethernet/mellanox/mlx4/icm.c | 7 +++++-
drivers/net/ethernet/mellanox/mlx4/main.c | 18 ++++++++++++++
.../net/ethernet/mellanox/mlx4/resource_tracker.c | 28 ++++------------------
include/linux/mlx4/device.h | 2 ++
7 files changed, 52 insertions(+), 33 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH net 5/5] net/mlx4_core: Fix raw qp flow steering rules under SRIOV
From: Tariq Toukan @ 2016-12-29 16:37 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev, Eran Ben Elisha, Jack Morgenstein, Tariq Toukan
In-Reply-To: <1483029433-3624-1-git-send-email-tariqt@mellanox.com>
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Demoting simple flow steering rule priority (for DPDK) was achieved by
wrapping FW commands MLX4_QP_FLOW_STEERING_ATTACH/DETACH for the PF
as well, and forcing the priority to MLX4_DOMAIN_NIC in the wrapper
function for the PF and all VFs.
In function mlx4_ib_create_flow(), this change caused the main rule
creation for the PF to be wrapped, while it left the associated
tunnel steering rule creation unwrapped for the PF.
This mismatch caused rule deletion failures in mlx4_ib_destroy_flow()
for the PF when the detach wrapper function did not find the associated
tunnel-steering rule (since creation of that rule for the PF did not
go through the wrapper function).
Fix this by setting MLX4_QP_FLOW_STEERING_ATTACH/DETACH to be "native"
(so that the PF invocation does not go through the wrapper), and perform
the required priority demotion for the PF in the mlx4_ib_create_flow()
code path.
Fixes: 48564135cba8 ("net/mlx4_core: Demote simple multicast and broadcast flow steering rules")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
---
drivers/infiniband/hw/mlx4/main.c | 14 ++++++++++++--
drivers/net/ethernet/mellanox/mlx4/main.c | 18 ++++++++++++++++++
.../net/ethernet/mellanox/mlx4/resource_tracker.c | 22 +---------------------
include/linux/mlx4/device.h | 2 ++
4 files changed, 33 insertions(+), 23 deletions(-)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c8413fc120e6..7031a8dd4d14 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1682,9 +1682,19 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
size += ret;
}
+ if (mlx4_is_master(mdev->dev) && flow_type == MLX4_FS_REGULAR &&
+ flow_attr->num_of_specs == 1) {
+ struct _rule_hw *rule_header = (struct _rule_hw *)(ctrl + 1);
+ enum ib_flow_spec_type header_spec =
+ ((union ib_flow_spec *)(flow_attr + 1))->type;
+
+ if (header_spec == IB_FLOW_SPEC_ETH)
+ mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
+ }
+
ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
- MLX4_CMD_WRAPPED);
+ MLX4_CMD_NATIVE);
if (ret == -ENOMEM)
pr_err("mcg table is full. Fail to register network rule.\n");
else if (ret == -ENXIO)
@@ -1701,7 +1711,7 @@ static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
int err;
err = mlx4_cmd(dev, reg_id, 0, 0,
MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
- MLX4_CMD_WRAPPED);
+ MLX4_CMD_NATIVE);
if (err)
pr_err("Fail to detach network rule. registration id = 0x%llx\n",
reg_id);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 5e7840a7a33b..bffa6f345f2f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -42,6 +42,7 @@
#include <linux/io-mapping.h>
#include <linux/delay.h>
#include <linux/kmod.h>
+#include <linux/etherdevice.h>
#include <net/devlink.h>
#include <linux/mlx4/device.h>
@@ -782,6 +783,23 @@ int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
}
EXPORT_SYMBOL(mlx4_is_slave_active);
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+ struct _rule_hw *eth_header)
+{
+ if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
+ is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+ struct mlx4_net_trans_rule_hw_eth *eth =
+ (struct mlx4_net_trans_rule_hw_eth *)eth_header;
+ struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
+ bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
+ next_rule->rsvd == 0;
+
+ if (last_rule)
+ ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
+ }
+}
+EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
+
static void slave_adjust_steering_mode(struct mlx4_dev *dev,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *hca_param)
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4b3e139e9c82..56185a0b827d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4164,22 +4164,6 @@ static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
return 0;
}
-static void handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
- struct _rule_hw *eth_header)
-{
- if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
- is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
- struct mlx4_net_trans_rule_hw_eth *eth =
- (struct mlx4_net_trans_rule_hw_eth *)eth_header;
- struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
- bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
- next_rule->rsvd == 0;
-
- if (last_rule)
- ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
- }
-}
-
/*
* In case of missing eth header, append eth header with a MAC address
* assigned to the VF.
@@ -4363,10 +4347,7 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
if (header_id == MLX4_NET_TRANS_RULE_ID_ETH)
- handle_eth_header_mcast_prio(ctrl, rule_header);
-
- if (slave == dev->caps.function)
- goto execute;
+ mlx4_handle_eth_header_mcast_prio(ctrl, rule_header);
switch (header_id) {
case MLX4_NET_TRANS_RULE_ID_ETH:
@@ -4394,7 +4375,6 @@ int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
goto err_put_qp;
}
-execute:
err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
vhcr->in_modifier, 0,
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 93bdb3485192..6533c16e27ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1384,6 +1384,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port,
int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
bool *vlan_offload_disabled);
+void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
+ struct _rule_hw *eth_header);
int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH 1/2] ipv4: Namespaceify tcp_tw_recycle and tcp_max_tw_buckets knob
From: David Miller @ 2016-12-29 16:39 UTC (permalink / raw)
To: yanhaishuang; +Cc: kuznet, jmorris, kernel, netdev, linux-kernel
In-Reply-To: <1482918753-1235-1-git-send-email-yanhaishuang@cmss.chinamobile.com>
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Wed, 28 Dec 2016 17:52:32 +0800
> Different namespace application might require fast recycling
> TIME-WAIT sockets independently of the host.
>
> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Applied, but:
> @@ -111,6 +121,7 @@ struct netns_ipv4 {
> int sysctl_tcp_fin_timeout;
> unsigned int sysctl_tcp_notsent_lowat;
> int sysctl_tcp_tw_reuse;
> + struct inet_timewait_death_row tcp_death_row;
^^
Trailing whitespace I had to fix up.
^ permalink raw reply
* Re: [PATCH 2/2] ipv4: Namespaceify tcp_max_syn_backlog knob
From: David Miller @ 2016-12-29 16:39 UTC (permalink / raw)
To: yanhaishuang; +Cc: kuznet, jmorris, kernel, netdev, linux-kernel
In-Reply-To: <1482918753-1235-2-git-send-email-yanhaishuang@cmss.chinamobile.com>
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Wed, 28 Dec 2016 17:52:33 +0800
> Different namespace application might require different maximal
> number of remembered connection requests.
>
> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Applied.
^ permalink raw reply
* Re: [PATCH v4] stmmac: enable rx queues
From: David Miller @ 2016-12-29 16:53 UTC (permalink / raw)
To: Joao.Pinto; +Cc: netdev, sandeepkishan108
In-Reply-To: <44464911a078d7494e1a968a2df6d2ad3401ef1c.1482929644.git.jpinto@synopsys.com>
From: Joao Pinto <Joao.Pinto@synopsys.com>
Date: Wed, 28 Dec 2016 12:57:48 +0000
> When the hardware is synthesized with multiple queues, all queues are
> disabled for default. This patch adds the rx queues configuration.
> This patch was successfully tested in a Synopsys QoS Reference design.
>
> Signed-off-by: Joao Pinto <jpinto@synopsys.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH] ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output
From: David Miller @ 2016-12-29 16:55 UTC (permalink / raw)
To: lizheng043
Cc: linux-kernel, netdev, kuznet, jmorris, yoshfuji, kaber,
james.z.li
In-Reply-To: <1482938626-20151-1-git-send-email-lizheng043@gmail.com>
From: Zheng Li <lizheng043@gmail.com>
Date: Wed, 28 Dec 2016 23:23:46 +0800
> From: Zheng Li <james.z.li@ericsson.com>
>
> There is an inconsistent conditional judgement between __ip6_append_data
> and ip6_finish_output functions, the variable length in __ip6_append_data
> just include the length of application's payload and udp6 header, don't
> include the length of ipv6 header, but in ip6_finish_output use
> (skb->len > ip6_skb_dst_mtu(skb)) as judgement, and skb->len include the
> length of ipv6 header.
>
> That causes some particular application's udp6 payloads whose length are
> between (MTU - IPv6 Header) and MTU were fragmented by ip6_fragment even
> though the rst->dev support UFO feature.
>
> Add the length of ipv6 header to length in __ip6_append_data to keep
> consistent conditional judgement as ip6_finish_output for ip6 fragment.
>
> Signed-off-by: Zheng Li <james.z.li@ericsson.com>
Applied, thank you.
^ permalink raw reply
* Re: [PATCHv2 net-next 00/11] net: mvpp2: misc improvements and preparation patches
From: David Miller @ 2016-12-29 17:03 UTC (permalink / raw)
To: thomas.petazzoni
Cc: netdev, jason, andrew, sebastian.hesselbarth, gregory.clement,
nadavh, hannah, yehuday, linux-arm-kernel, stefanc, mw
In-Reply-To: <1482943567-12483-1-git-send-email-thomas.petazzoni@free-electrons.com>
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Wed, 28 Dec 2016 17:45:56 +0100
> This series contains a number of misc improvements and preparation
> patches for an upcoming series that adds support for the new PPv2.2
> network controller to the mvpp2 driver.
>
> The most significant improvements are:
>
> - Switching to using build_skb(), which is necessary for the upcoming
> PPv2.2 support, but anyway a good improvement to the current mvpp2
> driver (supporting PPv2.1).
>
> - Making the driver build on 64-bit platforms.
>
> Changes since v1:
>
> - This series is split as a separate series from the larger patch set
> adding support for PPv2.2 in the mvpp2 driver, as requested by
> David Miller.
>
> - Rebased on top of v4.10-rc1.
You still have warnings to fix for the 64-bit build:
drivers/net/ethernet/marvell/mvpp2.c: In function ‘mvpp2_rx’:
drivers/net/ethernet/marvell/mvpp2.c:5125:10: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
data = (void *)rx_desc->buf_cookie;
^
^ permalink raw reply
* Re: ipv6: remove unnecessary inet6_sk check
From: David Miller @ 2016-12-29 17:06 UTC (permalink / raw)
To: davej; +Cc: netdev
In-Reply-To: <20161228165318.5afrtlbulsgymc4c@codemonkey.org.uk>
From: Dave Jones <davej@codemonkey.org.uk>
Date: Wed, 28 Dec 2016 11:53:18 -0500
> np is already assigned in the variable declaration of ping_v6_sendmsg.
> At this point, we have already dereferenced np several times, so the
> NULL check is also redundant.
>
> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Applied, thanks Dave.
^ permalink raw reply
* Re: [PATCH] drivers: atm: eni: rename macro DAUGTHER_ID to fix spelling mistake
From: David Miller @ 2016-12-29 17:07 UTC (permalink / raw)
To: colin.king; +Cc: 3chas3, linux-atm-general, netdev, linux-kernel
In-Reply-To: <20161228173120.20207-1-colin.king@canonical.com>
From: Colin King <colin.king@canonical.com>
Date: Wed, 28 Dec 2016 17:31:20 +0000
> From: Colin Ian King <colin.king@canonical.com>
>
> Rename DAUGTHER_ID to DAUGHTER_ID to fix spelling mistake
>
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
Applied.
^ permalink raw reply
* Re: [PATCH net] net: stmmac: Fix error path after register_netdev move
From: David Miller @ 2016-12-29 17:08 UTC (permalink / raw)
To: f.fainelli
Cc: netdev, pavel, Joao.Pinto, seraphin.bonnaffe, alexandre.torgue,
manabian, niklas.cassel, johan, boon.leong.ong, weifeng.voon,
lars.persson, linux-kernel, peppe.cavallaro, alexandre.torgue
In-Reply-To: <20161228234441.5026-1-f.fainelli@gmail.com>
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Dec 2016 15:44:41 -0800
> Commit 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and
> stmmac_open") re-ordered how the MDIO bus registration and the network
> device are registered, but missed to unwind the MDIO bus registration in
> case we fail to register the network device.
>
> Fixes: 5701659004d6 ("net: stmmac: Fix race between stmmac_drv_probe and stmmac_open")
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Applied, thanks Florian.
^ permalink raw reply
* [PATCH] stmmac: adding EEE to GMAC4
From: Joao Pinto @ 2016-12-29 17:10 UTC (permalink / raw)
To: davem
Cc: peppe.cavallaro, alexandre.torgue, rayagond, linux-kernel, netdev,
Joao Pinto
This patch adds Energy Efficiency Ethernet to GMAC4.
Signed-off-by: Joao Pinto <jpinto@synopsys.com>
---
drivers/net/ethernet/stmicro/stmmac/dwmac4.h | 12 +++++
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 59 +++++++++++++++++++++++
2 files changed, 71 insertions(+)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index b524598..73d1dab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -90,6 +90,18 @@ enum power_event {
power_down = 0x00000001,
};
+/* Energy Efficient Ethernet (EEE) for GMAC4
+ *
+ * LPI status, timer and control register offset
+ */
+#define GMAC4_LPI_CTRL_STATUS 0xd0
+#define GMAC4_LPI_TIMER_CTRL 0xd4
+
+/* LPI control and status defines */
+#define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */
+#define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */
+#define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */
+
/* MAC Debug bitmap */
#define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17)
#define GMAC_DEBUG_TFCSTS_SHIFT 17
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index ecfbf57..02eab79 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -137,6 +137,61 @@ static void dwmac4_get_umac_addr(struct mac_device_info *hw,
GMAC_ADDR_LOW(reg_n));
}
+static void dwmac4_set_eee_mode(struct mac_device_info *hw)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ /* Enable the link status receive on RGMII, SGMII ore SMII
+ * receive path and instruct the transmit to enter in LPI
+ * state.
+ */
+ value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
+ value |= GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA;
+
+ writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
+}
+
+static void dwmac4_reset_eee_mode(struct mac_device_info *hw)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
+ value &= ~(GMAC4_LPI_CTRL_STATUS_LPIEN | GMAC4_LPI_CTRL_STATUS_LPITXA);
+ writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
+}
+
+static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ u32 value;
+
+ value = readl(ioaddr + GMAC4_LPI_CTRL_STATUS);
+
+ if (link)
+ value |= GMAC4_LPI_CTRL_STATUS_PLS;
+ else
+ value &= ~GMAC4_LPI_CTRL_STATUS_PLS;
+
+ writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
+}
+
+static void dwmac4_set_eee_timer(struct mac_device_info *hw, int ls, int tw)
+{
+ void __iomem *ioaddr = hw->pcsr;
+ int value = ((tw & 0xffff)) | ((ls & 0x7ff) << 16);
+
+ /* Program the timers in the LPI timer control register:
+ * LS: minimum time (ms) for which the link
+ * status from PHY should be ok before transmitting
+ * the LPI pattern.
+ * TW: minimum time (us) for which the core waits
+ * after it has stopped transmitting the LPI pattern.
+ */
+ writel(value, ioaddr + GMAC4_LPI_TIMER_CTRL);
+}
+
static void dwmac4_set_filter(struct mac_device_info *hw,
struct net_device *dev)
{
@@ -410,6 +465,10 @@ static const struct stmmac_ops dwmac4_ops = {
.pmt = dwmac4_pmt,
.set_umac_addr = dwmac4_set_umac_addr,
.get_umac_addr = dwmac4_get_umac_addr,
+ .set_eee_mode = dwmac4_set_eee_mode,
+ .reset_eee_mode = dwmac4_reset_eee_mode,
+ .set_eee_timer = dwmac4_set_eee_timer,
+ .set_eee_pls = dwmac4_set_eee_pls,
.pcs_ctrl_ane = dwmac4_ctrl_ane,
.pcs_rane = dwmac4_rane,
.pcs_get_adv_lp = dwmac4_get_adv_lp,
--
2.9.3
^ permalink raw reply related
* [PATCH net-next 00/14] bnxt_en: updates for net-next.
From: Michael Chan @ 2016-12-29 17:13 UTC (permalink / raw)
To: davem; +Cc: netdev
This patch series for net-next contains cleanups, new features and minor
fixes. The driver specific busy polling code is removed to use busy
polling support in core networking. Hardware RFS support is enhanced with
added ipv6 flows support and VF support. A new scheme to allocate TX
rings from the firmware is implemented for newer chips and firmware. Plus
some misc. cleanups, minor fixes, and to add the maintainer entry. Please
review.
Michael Chan (14):
bnxt_en: Remove busy poll logic in the driver.
bnxt_en: Use napi_complete_done()
bnxt_en: Improve the IRQ disable sequence during shutdown.
bnxt_en: Fix and clarify link_info->advertising.
bnxt_en: Refactor TPA code path.
bnxt_en: Add function to get vnic capability.
bnxt_en: Refactor code that determines RFS capability.
bnxt_en: Add new hardware RFS mode.
bnxt_en: Assign additional vnics to VFs.
bnxt_en: Add IPV6 hardware RFS support.
bnxt_en: Implement new scheme to reserve tx rings.
bnxt_en: Set default completion ring for async events.
bnxt_en: Handle no aggregation ring gracefully.
MAINTAINERS: Add bnxt_en maintainer info.
MAINTAINERS | 6 +
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 385 +++++++++++++++-------
drivers/net/ethernet/broadcom/bnxt/bnxt.h | 108 +-----
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 73 +++-
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 34 ++
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 15 +-
6 files changed, 388 insertions(+), 233 deletions(-)
--
1.8.3.1
^ permalink raw reply
* [PATCH net-next 02/14] bnxt_en: Use napi_complete_done()
From: Michael Chan @ 2016-12-29 17:13 UTC (permalink / raw)
To: davem; +Cc: netdev
In-Reply-To: <1483031624-20076-1-git-send-email-michael.chan@broadcom.com>
For better busy polling and GRO support. Do not re-arm IRQ if
napi_complete_done() returns false.
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b53f958..3fbc842 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1778,8 +1778,9 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
break;
if (!bnxt_has_work(bp, cpr)) {
- napi_complete(napi);
- BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
+ if (napi_complete_done(napi, work_done))
+ BNXT_CP_DB_REARM(cpr->cp_doorbell,
+ cpr->cp_raw_cons);
break;
}
}
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next 01/14] bnxt_en: Remove busy poll logic in the driver.
From: Michael Chan @ 2016-12-29 17:13 UTC (permalink / raw)
To: davem; +Cc: netdev
In-Reply-To: <1483031624-20076-1-git-send-email-michael.chan@broadcom.com>
Use native NAPI polling instead. The next patch will complete the work
by switching to use napi_complete_done()
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 53 +----------------
drivers/net/ethernet/broadcom/bnxt/bnxt.h | 99 -------------------------------
2 files changed, 3 insertions(+), 149 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9608cb4..b53f958 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -39,9 +39,6 @@
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <net/udp_tunnel.h>
-#ifdef CONFIG_NET_RX_BUSY_POLL
-#include <net/busy_poll.h>
-#endif
#include <linux/workqueue.h>
#include <linux/prefetch.h>
#include <linux/cache.h>
@@ -1356,11 +1353,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
rc = -ENOMEM;
if (likely(skb)) {
skb_record_rx_queue(skb, bnapi->index);
- skb_mark_napi_id(skb, &bnapi->napi);
- if (bnxt_busy_polling(bnapi))
- netif_receive_skb(skb);
- else
- napi_gro_receive(&bnapi->napi, skb);
+ napi_gro_receive(&bnapi->napi, skb);
rc = 1;
}
goto next_rx_no_prod;
@@ -1460,11 +1453,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
}
skb_record_rx_queue(skb, bnapi->index);
- skb_mark_napi_id(skb, &bnapi->napi);
- if (bnxt_busy_polling(bnapi))
- netif_receive_skb(skb);
- else
- napi_gro_receive(&bnapi->napi, skb);
+ napi_gro_receive(&bnapi->napi, skb);
rc = 1;
next_rx:
@@ -1782,9 +1771,6 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
int work_done = 0;
- if (!bnxt_lock_napi(bnapi))
- return budget;
-
while (1) {
work_done += bnxt_poll_work(bp, bnapi, budget - work_done);
@@ -1798,36 +1784,9 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
}
}
mmiowb();
- bnxt_unlock_napi(bnapi);
return work_done;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
-static int bnxt_busy_poll(struct napi_struct *napi)
-{
- struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi);
- struct bnxt *bp = bnapi->bp;
- struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
- int rx_work, budget = 4;
-
- if (atomic_read(&bp->intr_sem) != 0)
- return LL_FLUSH_FAILED;
-
- if (!bp->link_info.link_up)
- return LL_FLUSH_FAILED;
-
- if (!bnxt_lock_poll(bnapi))
- return LL_FLUSH_BUSY;
-
- rx_work = bnxt_poll_work(bp, bnapi, budget);
-
- BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
-
- bnxt_unlock_poll(bnapi);
- return rx_work;
-}
-#endif
-
static void bnxt_free_tx_skbs(struct bnxt *bp)
{
int i, max_idx;
@@ -5094,10 +5053,8 @@ static void bnxt_disable_napi(struct bnxt *bp)
if (!bp->bnapi)
return;
- for (i = 0; i < bp->cp_nr_rings; i++) {
+ for (i = 0; i < bp->cp_nr_rings; i++)
napi_disable(&bp->bnapi[i]->napi);
- bnxt_disable_poll(bp->bnapi[i]);
- }
}
static void bnxt_enable_napi(struct bnxt *bp)
@@ -5106,7 +5063,6 @@ static void bnxt_enable_napi(struct bnxt *bp)
for (i = 0; i < bp->cp_nr_rings; i++) {
bp->bnapi[i]->in_reset = false;
- bnxt_enable_poll(bp->bnapi[i]);
napi_enable(&bp->bnapi[i]->napi);
}
}
@@ -6765,9 +6721,6 @@ static void bnxt_udp_tunnel_del(struct net_device *dev,
#endif
.ndo_udp_tunnel_add = bnxt_udp_tunnel_add,
.ndo_udp_tunnel_del = bnxt_udp_tunnel_del,
-#ifdef CONFIG_NET_RX_BUSY_POLL
- .ndo_busy_poll = bnxt_busy_poll,
-#endif
};
static void bnxt_remove_one(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 16defe9..fddc316 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -654,21 +654,9 @@ struct bnxt_napi {
struct bnxt_rx_ring_info *rx_ring;
struct bnxt_tx_ring_info *tx_ring;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- atomic_t poll_state;
-#endif
bool in_reset;
};
-#ifdef CONFIG_NET_RX_BUSY_POLL
-enum bnxt_poll_state_t {
- BNXT_STATE_IDLE = 0,
- BNXT_STATE_NAPI,
- BNXT_STATE_POLL,
- BNXT_STATE_DISABLE,
-};
-#endif
-
struct bnxt_irq {
irq_handler_t handler;
unsigned int vector;
@@ -1141,93 +1129,6 @@ struct bnxt {
((offsetof(struct tx_port_stats, counter) + \
sizeof(struct rx_port_stats) + 512) / 8)
-#ifdef CONFIG_NET_RX_BUSY_POLL
-static inline void bnxt_enable_poll(struct bnxt_napi *bnapi)
-{
- atomic_set(&bnapi->poll_state, BNXT_STATE_IDLE);
-}
-
-/* called from the NAPI poll routine to get ownership of a bnapi */
-static inline bool bnxt_lock_napi(struct bnxt_napi *bnapi)
-{
- int rc = atomic_cmpxchg(&bnapi->poll_state, BNXT_STATE_IDLE,
- BNXT_STATE_NAPI);
-
- return rc == BNXT_STATE_IDLE;
-}
-
-static inline void bnxt_unlock_napi(struct bnxt_napi *bnapi)
-{
- atomic_set(&bnapi->poll_state, BNXT_STATE_IDLE);
-}
-
-/* called from the busy poll routine to get ownership of a bnapi */
-static inline bool bnxt_lock_poll(struct bnxt_napi *bnapi)
-{
- int rc = atomic_cmpxchg(&bnapi->poll_state, BNXT_STATE_IDLE,
- BNXT_STATE_POLL);
-
- return rc == BNXT_STATE_IDLE;
-}
-
-static inline void bnxt_unlock_poll(struct bnxt_napi *bnapi)
-{
- atomic_set(&bnapi->poll_state, BNXT_STATE_IDLE);
-}
-
-static inline bool bnxt_busy_polling(struct bnxt_napi *bnapi)
-{
- return atomic_read(&bnapi->poll_state) == BNXT_STATE_POLL;
-}
-
-static inline void bnxt_disable_poll(struct bnxt_napi *bnapi)
-{
- int old;
-
- while (1) {
- old = atomic_cmpxchg(&bnapi->poll_state, BNXT_STATE_IDLE,
- BNXT_STATE_DISABLE);
- if (old == BNXT_STATE_IDLE)
- break;
- usleep_range(500, 5000);
- }
-}
-
-#else
-
-static inline void bnxt_enable_poll(struct bnxt_napi *bnapi)
-{
-}
-
-static inline bool bnxt_lock_napi(struct bnxt_napi *bnapi)
-{
- return true;
-}
-
-static inline void bnxt_unlock_napi(struct bnxt_napi *bnapi)
-{
-}
-
-static inline bool bnxt_lock_poll(struct bnxt_napi *bnapi)
-{
- return false;
-}
-
-static inline void bnxt_unlock_poll(struct bnxt_napi *bnapi)
-{
-}
-
-static inline bool bnxt_busy_polling(struct bnxt_napi *bnapi)
-{
- return false;
-}
-
-static inline void bnxt_disable_poll(struct bnxt_napi *bnapi)
-{
-}
-
-#endif
-
#define I2C_DEV_ADDR_A0 0xa0
#define I2C_DEV_ADDR_A2 0xa2
#define SFP_EEPROM_SFF_8472_COMP_ADDR 0x5e
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next 03/14] bnxt_en: Improve the IRQ disable sequence during shutdown.
From: Michael Chan @ 2016-12-29 17:13 UTC (permalink / raw)
To: davem; +Cc: netdev
In-Reply-To: <1483031624-20076-1-git-send-email-michael.chan@broadcom.com>
The IRQ is disabled by writing to the completion ring doorbell. This
should be done before the hardware completion ring is freed for correctness.
The current code disables IRQs after all the completion rings are freed.
Fix it by calling bnxt_disable_int_sync() before freeing the completion
rings. Rearrange the code to avoid forward declaration.
Signed-off-by: Michael Chan <michael.chan@broadocm.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 89 ++++++++++++++++---------------
1 file changed, 46 insertions(+), 43 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3fbc842..277573b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2953,6 +2953,45 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
return rc;
}
+static void bnxt_disable_int(struct bnxt *bp)
+{
+ int i;
+
+ if (!bp->bnapi)
+ return;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+ struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+
+ BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
+ }
+}
+
+static void bnxt_disable_int_sync(struct bnxt *bp)
+{
+ int i;
+
+ atomic_inc(&bp->intr_sem);
+
+ bnxt_disable_int(bp);
+ for (i = 0; i < bp->cp_nr_rings; i++)
+ synchronize_irq(bp->irq_tbl[i].vector);
+}
+
+static void bnxt_enable_int(struct bnxt *bp)
+{
+ int i;
+
+ atomic_set(&bp->intr_sem, 0);
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_napi *bnapi = bp->bnapi[i];
+ struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+
+ BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
+ }
+}
+
void bnxt_hwrm_cmd_hdr_init(struct bnxt *bp, void *request, u16 req_type,
u16 cmpl_ring, u16 target_id)
{
@@ -3937,6 +3976,12 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
}
}
+ /* The completion rings are about to be freed. After that the
+ * IRQ doorbell will not work anymore. So we need to disable
+ * IRQ here.
+ */
+ bnxt_disable_int_sync(bp);
+
for (i = 0; i < bp->cp_nr_rings; i++) {
struct bnxt_napi *bnapi = bp->bnapi[i];
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -4658,34 +4703,6 @@ static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
return bnxt_init_chip(bp, irq_re_init);
}
-static void bnxt_disable_int(struct bnxt *bp)
-{
- int i;
-
- if (!bp->bnapi)
- return;
-
- for (i = 0; i < bp->cp_nr_rings; i++) {
- struct bnxt_napi *bnapi = bp->bnapi[i];
- struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
-
- BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);
- }
-}
-
-static void bnxt_enable_int(struct bnxt *bp)
-{
- int i;
-
- atomic_set(&bp->intr_sem, 0);
- for (i = 0; i < bp->cp_nr_rings; i++) {
- struct bnxt_napi *bnapi = bp->bnapi[i];
- struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
-
- BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons);
- }
-}
-
static int bnxt_set_real_num_queues(struct bnxt *bp)
{
int rc;
@@ -5640,19 +5657,6 @@ static int bnxt_open(struct net_device *dev)
return __bnxt_open_nic(bp, true, true);
}
-static void bnxt_disable_int_sync(struct bnxt *bp)
-{
- int i;
-
- atomic_inc(&bp->intr_sem);
- if (!netif_running(bp->dev))
- return;
-
- bnxt_disable_int(bp);
- for (i = 0; i < bp->cp_nr_rings; i++)
- synchronize_irq(bp->irq_tbl[i].vector);
-}
-
int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
{
int rc = 0;
@@ -5674,13 +5678,12 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
while (test_bit(BNXT_STATE_IN_SP_TASK, &bp->state))
msleep(20);
- /* Flush rings before disabling interrupts */
+ /* Flush rings and and disable interrupts */
bnxt_shutdown_nic(bp, irq_re_init);
/* TODO CHIMP_FW: Link/PHY related cleanup if (link_re_init) */
bnxt_disable_napi(bp);
- bnxt_disable_int_sync(bp);
del_timer_sync(&bp->timer);
bnxt_free_skbs(bp);
--
1.8.3.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox