* [PATCH RFC v3 10/16] virtio_net: use v1.0 endian.
From: Michael S. Tsirkin @ 2014-10-22 18:44 UTC (permalink / raw)
To: linux-kernel; +Cc: Rusty Russell, Cornelia Huck, virtualization, netdev
In-Reply-To: <1414003404-505-1-git-send-email-mst@redhat.com>
From: Rusty Russell <rusty@rustcorp.com.au>
[Cornelia Huck: converted some missed fields]
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
drivers/net/virtio_net.c | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d75256bd..2e6561e 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -347,13 +347,14 @@ err:
}
static struct sk_buff *receive_mergeable(struct net_device *dev,
+ struct virtnet_info *vi,
struct receive_queue *rq,
unsigned long ctx,
unsigned int len)
{
void *buf = mergeable_ctx_to_buf_address(ctx);
struct skb_vnet_hdr *hdr = buf;
- int num_buf = hdr->mhdr.num_buffers;
+ u16 num_buf = virtio16_to_cpu(rq->vq->vdev, hdr->mhdr.num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
@@ -369,7 +370,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
if (unlikely(!ctx)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
- dev->name, num_buf, hdr->mhdr.num_buffers);
+ dev->name, num_buf,
+ virtio16_to_cpu(rq->vq->vdev,
+ hdr->mhdr.num_buffers));
dev->stats.rx_length_errors++;
goto err_buf;
}
@@ -454,7 +457,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
}
if (vi->mergeable_rx_bufs)
- skb = receive_mergeable(dev, rq, (unsigned long)buf, len);
+ skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
else if (vi->big_packets)
skb = receive_big(dev, rq, buf, len);
else
@@ -473,8 +476,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
pr_debug("Needs csum!\n");
if (!skb_partial_csum_set(skb,
- hdr->hdr.csum_start,
- hdr->hdr.csum_offset))
+ virtio16_to_cpu(vi->vdev, hdr->hdr.csum_start),
+ virtio16_to_cpu(vi->vdev, hdr->hdr.csum_offset)))
goto frame_err;
} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -505,7 +508,8 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
- skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
+ skb_shinfo(skb)->gso_size = virtio16_to_cpu(vi->vdev,
+ hdr->hdr.gso_size);
if (skb_shinfo(skb)->gso_size == 0) {
net_warn_ratelimited("%s: zero gso size.\n", dev->name);
goto frame_err;
@@ -867,16 +871,19 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
if (skb->ip_summed == CHECKSUM_PARTIAL) {
hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->hdr.csum_start = skb_checksum_start_offset(skb);
- hdr->hdr.csum_offset = skb->csum_offset;
+ hdr->hdr.csum_start = cpu_to_virtio16(vi->vdev,
+ skb_checksum_start_offset(skb));
+ hdr->hdr.csum_offset = cpu_to_virtio16(vi->vdev,
+ skb->csum_offset);
} else {
hdr->hdr.flags = 0;
hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
}
if (skb_is_gso(skb)) {
- hdr->hdr.hdr_len = skb_headlen(skb);
- hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
+ hdr->hdr.hdr_len = cpu_to_virtio16(vi->vdev, skb_headlen(skb));
+ hdr->hdr.gso_size = cpu_to_virtio16(vi->vdev,
+ skb_shinfo(skb)->gso_size);
if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
@@ -1182,7 +1189,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
sg_init_table(sg, 2);
/* Store the unicast list and count in the front of the buffer */
- mac_data->entries = uc_count;
+ mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
i = 0;
netdev_for_each_uc_addr(ha, dev)
memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
@@ -1193,7 +1200,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
/* multicast list and count fill the end */
mac_data = (void *)&mac_data->macs[uc_count][0];
- mac_data->entries = mc_count;
+ mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
i = 0;
netdev_for_each_mc_addr(ha, dev)
memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
--
MST
^ permalink raw reply related
* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Paul E. McKenney @ 2014-10-22 18:55 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Cong Wang, Josh Boyer, Kevin Fenzi, netdev,
Linux-Kernel@Vger. Kernel. Org
In-Reply-To: <87d29kezby.fsf@x220.int.ebiederm.org>
On Wed, Oct 22, 2014 at 01:25:37PM -0500, Eric W. Biederman wrote:
> "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> writes:
>
> > On Wed, Oct 22, 2014 at 12:53:24PM -0500, Eric W. Biederman wrote:
> >> Cong Wang <cwang@twopensource.com> writes:
> >>
> >> > (Adding Paul and Eric in Cc)
> >> >
> >> >
> >> > On Wed, Oct 22, 2014 at 10:12 AM, Josh Boyer <jwboyer@fedoraproject.org> wrote:
> >> >>
> >> >> Someone else is seeing this when they try and modprobe ppp_generic:
> >> >>
> >> >> [ 240.599195] INFO: task kworker/u16:5:100 blocked for more than 120 seconds.
> >> >> [ 240.599338] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
> >> >> [ 240.599446] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> >> >> disables this message.
> >> >> [ 240.599583] kworker/u16:5 D ffff8802202db480 12400 100 2 0x00000000
> >> >> [ 240.599744] Workqueue: netns cleanup_net
> >> >> [ 240.599823] ffff8802202eb9e8 0000000000000096 ffff8802202db480
> >> >> 00000000001d5f00
> >> >> [ 240.600066] ffff8802202ebfd8 00000000001d5f00 ffff8800368c3480
> >> >> ffff8802202db480
> >> >> [ 240.600228] ffffffff81ee2690 7fffffffffffffff ffffffff81ee2698
> >> >> ffffffff81ee2690
> >> >> [ 240.600386] Call Trace:
> >> >> [ 240.600445] [<ffffffff8185e239>] schedule+0x29/0x70
> >> >> [ 240.600541] [<ffffffff8186345c>] schedule_timeout+0x26c/0x410
> >> >> [ 240.600651] [<ffffffff81865ef7>] ? retint_restore_args+0x13/0x13
> >> >> [ 240.600765] [<ffffffff818644e4>] ? _raw_spin_unlock_irq+0x34/0x50
> >> >> [ 240.600879] [<ffffffff8185fc6c>] wait_for_completion+0x10c/0x150
> >> >> [ 240.601025] [<ffffffff810e53e0>] ? wake_up_state+0x20/0x20
> >> >> [ 240.601133] [<ffffffff8112a749>] _rcu_barrier+0x159/0x200
> >> >> [ 240.601237] [<ffffffff8112a845>] rcu_barrier+0x15/0x20
> >> >> [ 240.601335] [<ffffffff81718ebf>] netdev_run_todo+0x6f/0x310
> >> >> [ 240.601442] [<ffffffff8170da85>] ? rollback_registered_many+0x265/0x2e0
> >> >> [ 240.601564] [<ffffffff81725f2e>] rtnl_unlock+0xe/0x10
> >> >> [ 240.601660] [<ffffffff8170f8e6>] default_device_exit_batch+0x156/0x180
> >> >> [ 240.601781] [<ffffffff810fd8a0>] ? abort_exclusive_wait+0xb0/0xb0
> >> >> [ 240.601895] [<ffffffff81707993>] ops_exit_list.isra.1+0x53/0x60
> >> >> [ 240.602028] [<ffffffff81708540>] cleanup_net+0x100/0x1f0
> >> >> [ 240.602131] [<ffffffff810ccfa8>] process_one_work+0x218/0x850
> >> >> [ 240.602241] [<ffffffff810ccf0f>] ? process_one_work+0x17f/0x850
> >> >> [ 240.602350] [<ffffffff810cd6c7>] ? worker_thread+0xe7/0x4a0
> >> >> [ 240.602454] [<ffffffff810cd64b>] worker_thread+0x6b/0x4a0
> >> >> [ 240.602555] [<ffffffff810cd5e0>] ? process_one_work+0x850/0x850
> >> >> [ 240.602665] [<ffffffff810d399b>] kthread+0x10b/0x130
> >> >> [ 240.602762] [<ffffffff81028cc9>] ? sched_clock+0x9/0x10
> >> >> [ 240.602862] [<ffffffff810d3890>] ? kthread_create_on_node+0x250/0x250
> >> >> [ 240.603004] [<ffffffff818651fc>] ret_from_fork+0x7c/0xb0
> >> >> [ 240.603106] [<ffffffff810d3890>] ? kthread_create_on_node+0x250/0x250
> >> >> [ 240.603224] 4 locks held by kworker/u16:5/100:
> >> >> [ 240.603304] #0: ("%s""netns"){.+.+.+}, at: [<ffffffff810ccf0f>]
> >> >> process_one_work+0x17f/0x850
> >> >> [ 240.603495] #1: (net_cleanup_work){+.+.+.}, at:
> >> >> [<ffffffff810ccf0f>] process_one_work+0x17f/0x850
> >> >> [ 240.603691] #2: (net_mutex){+.+.+.}, at: [<ffffffff817084cc>]
> >> >> cleanup_net+0x8c/0x1f0
> >> >> [ 240.603869] #3: (rcu_sched_state.barrier_mutex){+.+...}, at:
> >> >> [<ffffffff8112a625>] _rcu_barrier+0x35/0x200
> >> >> [ 240.604211] INFO: task modprobe:1387 blocked for more than 120 seconds.
> >> >> [ 240.604329] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
> >> >> [ 240.604434] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> >> >> disables this message.
> >> >> [ 240.604570] modprobe D ffff8800cb4f1a40 13112 1387 1386 0x00000080
> >> >> [ 240.604719] ffff8800cafbbbe8 0000000000000096 ffff8800cb4f1a40
> >> >> 00000000001d5f00
> >> >> [ 240.604878] ffff8800cafbbfd8 00000000001d5f00 ffff880223280000
> >> >> ffff8800cb4f1a40
> >> >> [ 240.605068] ffff8800cb4f1a40 ffffffff81f8fb48 0000000000000246
> >> >> ffff8800cb4f1a40
> >> >> [ 240.605228] Call Trace:
> >> >> [ 240.605283] [<ffffffff8185e7e1>] schedule_preempt_disabled+0x31/0x80
> >> >> [ 240.605400] [<ffffffff81860033>] mutex_lock_nested+0x183/0x440
> >> >> [ 240.605510] [<ffffffff8170835f>] ? register_pernet_subsys+0x1f/0x50
> >> >> [ 240.605626] [<ffffffff8170835f>] ? register_pernet_subsys+0x1f/0x50
> >> >> [ 240.605757] [<ffffffffa0701000>] ? 0xffffffffa0701000
> >> >> [ 240.605854] [<ffffffff8170835f>] register_pernet_subsys+0x1f/0x50
> >> >> [ 240.606005] [<ffffffffa0701048>] br_init+0x48/0xd3 [bridge]
> >> >> [ 240.606112] [<ffffffff81002148>] do_one_initcall+0xd8/0x210
> >> >> [ 240.606224] [<ffffffff81153c02>] load_module+0x20c2/0x2870
> >> >> [ 240.606327] [<ffffffff8114ebe0>] ? store_uevent+0x70/0x70
> >> >> [ 240.606433] [<ffffffff8110ac26>] ? lock_release_non_nested+0x3c6/0x3d0
> >> >> [ 240.606557] [<ffffffff81154497>] SyS_init_module+0xe7/0x140
> >> >> [ 240.606664] [<ffffffff818652a9>] system_call_fastpath+0x12/0x17
> >> >> [ 240.606773] 1 lock held by modprobe/1387:
> >> >> [ 240.606845] #0: (net_mutex){+.+.+.}, at: [<ffffffff8170835f>]
> >> >> register_pernet_subsys+0x1f/0x50
> >> >> [ 240.607114] INFO: task modprobe:1466 blocked for more than 120 seconds.
> >> >> [ 240.607231] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
> >> >> [ 240.607337] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
> >> >> disables this message.
> >> >> [ 240.607473] modprobe D ffff88020fbab480 13096 1466 1399 0x00000084
> >> >> [ 240.607622] ffff88020d1bbbe8 0000000000000096 ffff88020fbab480
> >> >> 00000000001d5f00
> >> >> [ 240.607791] ffff88020d1bbfd8 00000000001d5f00 ffffffff81e1b580
> >> >> ffff88020fbab480
> >> >> [ 240.607949] ffff88020fbab480 ffffffff81f8fb48 0000000000000246
> >> >> ffff88020fbab480
> >> >> [ 240.608138] Call Trace:
> >> >> [ 240.608193] [<ffffffff8185e7e1>] schedule_preempt_disabled+0x31/0x80
> >> >> [ 240.608316] [<ffffffff81860033>] mutex_lock_nested+0x183/0x440
> >> >> [ 240.608425] [<ffffffff817083ad>] ? register_pernet_device+0x1d/0x70
> >> >> [ 240.608542] [<ffffffff817083ad>] ? register_pernet_device+0x1d/0x70
> >> >> [ 240.608662] [<ffffffffa071d000>] ? 0xffffffffa071d000
> >> >> [ 240.608759] [<ffffffff817083ad>] register_pernet_device+0x1d/0x70
> >> >> [ 240.608881] [<ffffffffa071d020>] ppp_init+0x20/0x1000 [ppp_generic]
> >> >> [ 240.609021] [<ffffffff81002148>] do_one_initcall+0xd8/0x210
> >> >> [ 240.609131] [<ffffffff81153c02>] load_module+0x20c2/0x2870
> >> >> [ 240.609235] [<ffffffff8114ebe0>] ? store_uevent+0x70/0x70
> >> >> [ 240.609339] [<ffffffff8110ac26>] ? lock_release_non_nested+0x3c6/0x3d0
> >> >> [ 240.609462] [<ffffffff81154497>] SyS_init_module+0xe7/0x140
> >> >> [ 240.609568] [<ffffffff818652a9>] system_call_fastpath+0x12/0x17
> >> >> [ 240.609677] 1 lock held by modprobe/1466:
> >> >> [ 240.609749] #0: (net_mutex){+.+.+.}, at: [<ffffffff817083ad>]
> >> >> register_pernet_device+0x1d/0x70
> >> >>
> >> >> Looks like contention on net_mutex or something, but I honestly have
> >> >> no idea yet. I can't recreate it myself at the moment or I would
> >> >> bisect.
> >> >>
> >> >> Has nobody else run into this with the pre-3.18 kernels? Fedora isn't
> >> >> carrying any patches in this area.
> >>
> >> > I am not aware of any change in net/core/dev.c related here,
> >> > so I guess it's a bug in rcu_barrier().
> >>
> >> >From the limited trace data I see in this email I have to agree.
> >>
> >> It looks like for some reason rcu_barrier is taking forever
> >> while the rtnl_lock is held in cleanup_net. Because the
> >> rtnl_lock is held modprobe of the ppp driver is getting stuck.
> >>
> >> Is it possible we have an AB BA deadlock between the rtnl_lock
> >> and rcu. With something the module loading code assumes?
> >
> > I am not aware of RCU ever acquiring rtnl_lock, not directly, anyway.
>
> Does the module loading code do something strange with rcu? Perhaps
> blocking an rcu grace period until the module loading completes?
>
> If the module loading somehow blocks an rcu grace period that would
> create an AB deadlock because loading the ppp module grabs the
> rtnl_lock. And elsewhere we have the rtnl_lock waiting for an rcu grace
> period.
>
> I would think trying and failing to get the rtnl_lock would sleep and
> thus let any rcu grace period happen but shrug.
>
> It looks like something is holding up the rcu grace period, and causing
> this. Although it is possible that something is causing cleanup_net
> to run slowly and we are just seeing that slowness show up in
> rcu_barrier as that is one of the slower bits. With a single trace I
> can't definitely same that the rcu barrier is getting stuck but it
> certainly looks that way.
Don't get me wrong -- the fact that this kthread appears to have
blocked within rcu_barrier() for 120 seconds means that something is
most definitely wrong here. I am surprised that there are no RCU CPU
stall warnings, but perhaps the blockage is in the callback execution
rather than grace-period completion. Or something is preventing this
kthread from starting up after the wake-up callback executes. Or...
Is this thing reproducible?
Thanx, Paul
^ permalink raw reply
* [RFC] tcp md5 use of alloc_percpu
From: Crestez Dan Leonard @ 2014-10-22 18:55 UTC (permalink / raw)
To: netdev
Hello,
It seems that the TCP MD5 feature allocates a percpu struct tcp_md5sig_pool and uses part of that memory for a scratch buffer to do crypto on. Here is the relevant code:
static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
__be32 daddr, __be32 saddr, int nbytes)
{
struct tcp4_pseudohdr *bp;
struct scatterlist sg;
bp = &hp->md5_blk.ip4;
/*
* 1. the TCP pseudo-header (in the order: source IP address,
* destination IP address, zero-padded protocol number, and
* segment length)
*/
bp->saddr = saddr;
bp->daddr = daddr;
bp->pad = 0;
bp->protocol = IPPROTO_TCP;
bp->len = cpu_to_be16(nbytes);
sg_init_one(&sg, bp, sizeof(*bp));
return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
}
sg_init_one does virt_addr on the pointer which assumes it is directly accessible. But the tcp_md5sig_pool pointer comes from alloc_percpu which can return memory from the vmalloc area after the pcpu_first_chunk is exhausted. This looks wrong to me. I'm am getting crashes on mips and I believe this to be the cause.
Allocating a scratch buffer this way is very peculiar. The tcp4_pseudohdr struct is only 12 bytes in length. Similar code in tcp_v6_md5_hash_pseudoheader uses a 40 byte tcp6_pseudohdr. I think it is perfectly reasonable to allocate this kind of stuff on the stack, right? These pseudohdr structs are not used at all outside these two static functions and it would simplify the code.
The whole notion of struct tcp_md5sig_pool seems dubious. This is a very tiny struct already and after removing the pseudohdr it shrinks to a percpu hash_desc for md5 (8 or 16 bytes). Wouldn't DEFINE_PERCPU be more appropriate? Before commit 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b the struct tcp_md5sig_pool structs were freed when all users were gone, but that functionality seems to have been dropped.
I'm not familiar with the linux crypto API. Isn't there an easier way to get a temporary md5 hasher?
Here's what I mean by allocating tcp{4,6}_pseudohdr on the stack:
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4062b4f..beabd7b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1266,33 +1266,9 @@ struct tcp_md5sig_info {
struct rcu_head rcu;
};
-/* - pseudo header */
-struct tcp4_pseudohdr {
- __be32 saddr;
- __be32 daddr;
- __u8 pad;
- __u8 protocol;
- __be16 len;
-};
-
-struct tcp6_pseudohdr {
- struct in6_addr saddr;
- struct in6_addr daddr;
- __be32 len;
- __be32 protocol; /* including padding */
-};
-
-union tcp_md5sum_block {
- struct tcp4_pseudohdr ip4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct tcp6_pseudohdr ip6;
-#endif
-};
-
/* - pool: digest algorithm, hash description and scratch buffer */
struct tcp_md5sig_pool {
struct hash_desc md5_desc;
- union tcp_md5sum_block md5_blk;
};
/* - functions */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 94d1a77..e716a67 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1041,27 +1041,33 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
GFP_KERNEL);
}
+struct tcp4_pseudohdr {
+ __be32 saddr;
+ __be32 daddr;
+ __u8 pad;
+ __u8 protocol;
+ __be16 len;
+};
+
static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
__be32 daddr, __be32 saddr, int nbytes)
{
- struct tcp4_pseudohdr *bp;
+ struct tcp4_pseudohdr bp;
struct scatterlist sg;
- bp = &hp->md5_blk.ip4;
-
/*
* 1. the TCP pseudo-header (in the order: source IP address,
* destination IP address, zero-padded protocol number, and
* segment length)
*/
- bp->saddr = saddr;
- bp->daddr = daddr;
- bp->pad = 0;
- bp->protocol = IPPROTO_TCP;
- bp->len = cpu_to_be16(nbytes);
-
- sg_init_one(&sg, bp, sizeof(*bp));
- return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+ bp.saddr = saddr;
+ bp.daddr = daddr;
+ bp.pad = 0;
+ bp.protocol = IPPROTO_TCP;
+ bp.len = cpu_to_be16(nbytes);
+
+ sg_init_one(&sg, &bp, sizeof(bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(bp));
}
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8314955..87a9126 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -568,22 +568,28 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval,
AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
+struct tcp6_pseudohdr {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ __be32 len;
+ __be32 protocol; /* including padding */
+};
+
static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
const struct in6_addr *daddr,
const struct in6_addr *saddr, int nbytes)
{
- struct tcp6_pseudohdr *bp;
+ struct tcp6_pseudohdr bp;
struct scatterlist sg;
- bp = &hp->md5_blk.ip6;
/* 1. TCP pseudo-header (RFC2460) */
- bp->saddr = *saddr;
- bp->daddr = *daddr;
- bp->protocol = cpu_to_be32(IPPROTO_TCP);
- bp->len = cpu_to_be32(nbytes);
+ bp.saddr = *saddr;
+ bp.daddr = *daddr;
+ bp.protocol = cpu_to_be32(IPPROTO_TCP);
+ bp.len = cpu_to_be32(nbytes);
- sg_init_one(&sg, bp, sizeof(*bp));
- return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+ sg_init_one(&sg, &bp, sizeof(bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(bp));
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
^ permalink raw reply related
* Re: [PATCHv3 RFC net-next 0/4] sunvnet: NAPIfy sunvnet
From: Sowmini Varadhan @ 2014-10-22 18:57 UTC (permalink / raw)
To: davem, bob.picco, dwight.engen, raghuram.kothakota; +Cc: netdev, sparclinux
In-Reply-To: <20141015164234.GA11840@oracle.com>
Please hold off on this patch-set for the moment.
I just discovered a subtle bug that can be triggered when multiple
ldc events (e.g., UP and DATA_READY) are passed up from
ldc_rx -> send_events in one shot- we need to treat the vnet_port's
(new field) rx_event as a bit mask, and need to unroll send_events()
into vnet_event_napi.
the result of this bug is that inter-vnet handshake can sometimes
fail incorrectly causing things to needlessly go thorugh the
switchport. I'm working on the fix for this, I'll send out
PATCHv5 for this series shortly.
Apologies for the confusion.
--Sowmini
^ permalink raw reply
* [PATCH 1/1 net-next] lapb: move EXPORT_SYMBOL after functions.
From: Fabian Frederick @ 2014-10-22 19:01 UTC (permalink / raw)
To: linux-kernel; +Cc: Fabian Frederick, David S. Miller, linux-x25, netdev
See Documentation/CodingStyle Chapter 6
Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
net/lapb/lapb_iface.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 3cdaa04..fc60d9d 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -73,6 +73,7 @@ static void __lapb_remove_cb(struct lapb_cb *lapb)
lapb_put(lapb);
}
}
+EXPORT_SYMBOL(lapb_register);
/*
* Add a socket to the bound sockets list.
@@ -195,6 +196,7 @@ out:
write_unlock_bh(&lapb_list_lock);
return rc;
}
+EXPORT_SYMBOL(lapb_unregister);
int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
{
@@ -227,6 +229,7 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
out:
return rc;
}
+EXPORT_SYMBOL(lapb_getparms);
int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
{
@@ -262,6 +265,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_setparms);
int lapb_connect_request(struct net_device *dev)
{
@@ -290,6 +294,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_connect_request);
int lapb_disconnect_request(struct net_device *dev)
{
@@ -334,6 +339,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_disconnect_request);
int lapb_data_request(struct net_device *dev, struct sk_buff *skb)
{
@@ -355,6 +361,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_data_request);
int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
{
@@ -369,6 +376,7 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
return rc;
}
+EXPORT_SYMBOL(lapb_data_received);
void lapb_connect_confirmation(struct lapb_cb *lapb, int reason)
{
@@ -415,15 +423,6 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
return used;
}
-EXPORT_SYMBOL(lapb_register);
-EXPORT_SYMBOL(lapb_unregister);
-EXPORT_SYMBOL(lapb_getparms);
-EXPORT_SYMBOL(lapb_setparms);
-EXPORT_SYMBOL(lapb_connect_request);
-EXPORT_SYMBOL(lapb_disconnect_request);
-EXPORT_SYMBOL(lapb_data_request);
-EXPORT_SYMBOL(lapb_data_received);
-
static int __init lapb_init(void)
{
return 0;
--
1.9.1
^ permalink raw reply related
* [PATCH 1/1 net-next] net: llc: include linux/errno.h instead of asm/errno.h
From: Fabian Frederick @ 2014-10-22 19:06 UTC (permalink / raw)
To: linux-kernel
Cc: Fabian Frederick, Arnaldo Carvalho de Melo, David S. Miller,
netdev
Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
net/llc/llc_if.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 25c31c0..6daf391 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
#include <net/llc_if.h>
#include <net/llc_sap.h>
#include <net/llc_s_ev.h>
--
1.9.1
^ permalink raw reply related
* Re: [PATCH] net: fix saving TX flow hash in sock for outgoing connections
From: Eric Dumazet @ 2014-10-22 19:09 UTC (permalink / raw)
To: Sathya Perla; +Cc: netdev@vger.kernel.org, therbert@google.com
In-Reply-To: <CF9D1877D81D214CB0CA0669EFAE020C68CA2FAA@CMEXMB1.ad.emulex.com>
On Wed, 2014-10-22 at 18:35 +0000, Sathya Perla wrote:
> > -----Original Message-----
> > From: Eric Dumazet [mailto:eric.dumazet@gmail.com]
> >
> > Are you really using the socket/flow hash to select a TXQ ?
> Yes, as I don't have XPS configured on my setup.
> netdev_pick_tx() uses the socket/flow hash when XPS is not used.
Yes, this is the (poor) fallback
>
> >
> > Even with this patch, you have a good probability of multiple
> > cpus hitting same TXQ.
> Agree. Are you suggesting that drivers should automatically
> register an XPS configuration? I thought it was upto the user
> to enable it...
Yep, search for netif_set_xps_queue()
(commit 537c00de1c9ba9876b9)
Look at commit d03a68f8217ea0349 for an example of how it can be done,
if user do not override this later.
^ permalink raw reply
* Re: [RFC] tcp md5 use of alloc_percpu
From: Eric Dumazet @ 2014-10-22 19:12 UTC (permalink / raw)
To: Crestez Dan Leonard; +Cc: netdev
In-Reply-To: <5447FDB2.2010906@gmail.com>
On Wed, 2014-10-22 at 21:55 +0300, Crestez Dan Leonard wrote:
> Hello,
>
> It seems that the TCP MD5 feature allocates a percpu struct
> tcp_md5sig_pool and uses part of that memory for a scratch buffer to
> do crypto on. Here is the relevant code:
>
> static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
> __be32 daddr, __be32 saddr,
> int nbytes)
> {
> struct tcp4_pseudohdr *bp;
> struct scatterlist sg;
>
> bp = &hp->md5_blk.ip4;
>
> /*
> * 1. the TCP pseudo-header (in the order: source IP address,
> * destination IP address, zero-padded protocol number, and
> * segment length)
> */
> bp->saddr = saddr;
> bp->daddr = daddr;
> bp->pad = 0;
> bp->protocol = IPPROTO_TCP;
> bp->len = cpu_to_be16(nbytes);
>
> sg_init_one(&sg, bp, sizeof(*bp));
> return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
> }
>
> sg_init_one does virt_addr on the pointer which assumes it is directly
> accessible. But the tcp_md5sig_pool pointer comes from alloc_percpu
> which can return memory from the vmalloc area after the
> pcpu_first_chunk is exhausted. This looks wrong to me. I'm am getting
> crashes on mips and I believe this to be the cause.
Then just remove the alloc_percpu() from __tcp_alloc_md5sig_pool() and
make this a static per cpu definition (not a dynamic allocation)
>
> Allocating a scratch buffer this way is very peculiar. The
> tcp4_pseudohdr struct is only 12 bytes in length. Similar code in
> tcp_v6_md5_hash_pseudoheader uses a 40 byte tcp6_pseudohdr. I think it
> is perfectly reasonable to allocate this kind of stuff on the stack,
> right? These pseudohdr structs are not used at all outside these two
> static functions and it would simplify the code.
>
Yep, but the sg stuff does not allow for stack variables. Because of
possible offloading and DMA, I dont know...
> The whole notion of struct tcp_md5sig_pool seems dubious. This is a
> very tiny struct already and after removing the pseudohdr it shrinks
> to a percpu hash_desc for md5 (8 or 16 bytes). Wouldn't DEFINE_PERCPU
> be more appropriate?
Sure. this would be the more appropriate fix IMO.
> Before commit 71cea17ed39fdf1c0634f530ddc6a2c2fc601c2b the struct
> tcp_md5sig_pool structs were freed when all users were gone, but that
> functionality seems to have been dropped.
>
> I'm not familiar with the linux crypto API. Isn't there an easier way
> to get a temporary md5 hasher?
You should CC crypto guys maybe ...
>
> Here's what I mean by allocating tcp{4,6}_pseudohdr on the stack:
Your patch is quite invasive, you should so something simpler to ease
backports.
Thanks
^ permalink raw reply
* Re: localed stuck in recent 3.18 git in copy_net_ns?
From: Josh Boyer @ 2014-10-22 19:33 UTC (permalink / raw)
To: Paul McKenney
Cc: Eric W. Biederman, Cong Wang, Kevin Fenzi, netdev,
Linux-Kernel@Vger. Kernel. Org, yaneti
In-Reply-To: <20141022185511.GI4977@linux.vnet.ibm.com>
On Wed, Oct 22, 2014 at 2:55 PM, Paul E. McKenney
<paulmck@linux.vnet.ibm.com> wrote:
> On Wed, Oct 22, 2014 at 01:25:37PM -0500, Eric W. Biederman wrote:
>> "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> writes:
>>
>> > On Wed, Oct 22, 2014 at 12:53:24PM -0500, Eric W. Biederman wrote:
>> >> Cong Wang <cwang@twopensource.com> writes:
>> >>
>> >> > (Adding Paul and Eric in Cc)
>> >> >
>> >> >
>> >> > On Wed, Oct 22, 2014 at 10:12 AM, Josh Boyer <jwboyer@fedoraproject.org> wrote:
>> >> >>
>> >> >> Someone else is seeing this when they try and modprobe ppp_generic:
>> >> >>
>> >> >> [ 240.599195] INFO: task kworker/u16:5:100 blocked for more than 120 seconds.
>> >> >> [ 240.599338] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
>> >> >> [ 240.599446] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> >> >> disables this message.
>> >> >> [ 240.599583] kworker/u16:5 D ffff8802202db480 12400 100 2 0x00000000
>> >> >> [ 240.599744] Workqueue: netns cleanup_net
>> >> >> [ 240.599823] ffff8802202eb9e8 0000000000000096 ffff8802202db480
>> >> >> 00000000001d5f00
>> >> >> [ 240.600066] ffff8802202ebfd8 00000000001d5f00 ffff8800368c3480
>> >> >> ffff8802202db480
>> >> >> [ 240.600228] ffffffff81ee2690 7fffffffffffffff ffffffff81ee2698
>> >> >> ffffffff81ee2690
>> >> >> [ 240.600386] Call Trace:
>> >> >> [ 240.600445] [<ffffffff8185e239>] schedule+0x29/0x70
>> >> >> [ 240.600541] [<ffffffff8186345c>] schedule_timeout+0x26c/0x410
>> >> >> [ 240.600651] [<ffffffff81865ef7>] ? retint_restore_args+0x13/0x13
>> >> >> [ 240.600765] [<ffffffff818644e4>] ? _raw_spin_unlock_irq+0x34/0x50
>> >> >> [ 240.600879] [<ffffffff8185fc6c>] wait_for_completion+0x10c/0x150
>> >> >> [ 240.601025] [<ffffffff810e53e0>] ? wake_up_state+0x20/0x20
>> >> >> [ 240.601133] [<ffffffff8112a749>] _rcu_barrier+0x159/0x200
>> >> >> [ 240.601237] [<ffffffff8112a845>] rcu_barrier+0x15/0x20
>> >> >> [ 240.601335] [<ffffffff81718ebf>] netdev_run_todo+0x6f/0x310
>> >> >> [ 240.601442] [<ffffffff8170da85>] ? rollback_registered_many+0x265/0x2e0
>> >> >> [ 240.601564] [<ffffffff81725f2e>] rtnl_unlock+0xe/0x10
>> >> >> [ 240.601660] [<ffffffff8170f8e6>] default_device_exit_batch+0x156/0x180
>> >> >> [ 240.601781] [<ffffffff810fd8a0>] ? abort_exclusive_wait+0xb0/0xb0
>> >> >> [ 240.601895] [<ffffffff81707993>] ops_exit_list.isra.1+0x53/0x60
>> >> >> [ 240.602028] [<ffffffff81708540>] cleanup_net+0x100/0x1f0
>> >> >> [ 240.602131] [<ffffffff810ccfa8>] process_one_work+0x218/0x850
>> >> >> [ 240.602241] [<ffffffff810ccf0f>] ? process_one_work+0x17f/0x850
>> >> >> [ 240.602350] [<ffffffff810cd6c7>] ? worker_thread+0xe7/0x4a0
>> >> >> [ 240.602454] [<ffffffff810cd64b>] worker_thread+0x6b/0x4a0
>> >> >> [ 240.602555] [<ffffffff810cd5e0>] ? process_one_work+0x850/0x850
>> >> >> [ 240.602665] [<ffffffff810d399b>] kthread+0x10b/0x130
>> >> >> [ 240.602762] [<ffffffff81028cc9>] ? sched_clock+0x9/0x10
>> >> >> [ 240.602862] [<ffffffff810d3890>] ? kthread_create_on_node+0x250/0x250
>> >> >> [ 240.603004] [<ffffffff818651fc>] ret_from_fork+0x7c/0xb0
>> >> >> [ 240.603106] [<ffffffff810d3890>] ? kthread_create_on_node+0x250/0x250
>> >> >> [ 240.603224] 4 locks held by kworker/u16:5/100:
>> >> >> [ 240.603304] #0: ("%s""netns"){.+.+.+}, at: [<ffffffff810ccf0f>]
>> >> >> process_one_work+0x17f/0x850
>> >> >> [ 240.603495] #1: (net_cleanup_work){+.+.+.}, at:
>> >> >> [<ffffffff810ccf0f>] process_one_work+0x17f/0x850
>> >> >> [ 240.603691] #2: (net_mutex){+.+.+.}, at: [<ffffffff817084cc>]
>> >> >> cleanup_net+0x8c/0x1f0
>> >> >> [ 240.603869] #3: (rcu_sched_state.barrier_mutex){+.+...}, at:
>> >> >> [<ffffffff8112a625>] _rcu_barrier+0x35/0x200
>> >> >> [ 240.604211] INFO: task modprobe:1387 blocked for more than 120 seconds.
>> >> >> [ 240.604329] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
>> >> >> [ 240.604434] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> >> >> disables this message.
>> >> >> [ 240.604570] modprobe D ffff8800cb4f1a40 13112 1387 1386 0x00000080
>> >> >> [ 240.604719] ffff8800cafbbbe8 0000000000000096 ffff8800cb4f1a40
>> >> >> 00000000001d5f00
>> >> >> [ 240.604878] ffff8800cafbbfd8 00000000001d5f00 ffff880223280000
>> >> >> ffff8800cb4f1a40
>> >> >> [ 240.605068] ffff8800cb4f1a40 ffffffff81f8fb48 0000000000000246
>> >> >> ffff8800cb4f1a40
>> >> >> [ 240.605228] Call Trace:
>> >> >> [ 240.605283] [<ffffffff8185e7e1>] schedule_preempt_disabled+0x31/0x80
>> >> >> [ 240.605400] [<ffffffff81860033>] mutex_lock_nested+0x183/0x440
>> >> >> [ 240.605510] [<ffffffff8170835f>] ? register_pernet_subsys+0x1f/0x50
>> >> >> [ 240.605626] [<ffffffff8170835f>] ? register_pernet_subsys+0x1f/0x50
>> >> >> [ 240.605757] [<ffffffffa0701000>] ? 0xffffffffa0701000
>> >> >> [ 240.605854] [<ffffffff8170835f>] register_pernet_subsys+0x1f/0x50
>> >> >> [ 240.606005] [<ffffffffa0701048>] br_init+0x48/0xd3 [bridge]
>> >> >> [ 240.606112] [<ffffffff81002148>] do_one_initcall+0xd8/0x210
>> >> >> [ 240.606224] [<ffffffff81153c02>] load_module+0x20c2/0x2870
>> >> >> [ 240.606327] [<ffffffff8114ebe0>] ? store_uevent+0x70/0x70
>> >> >> [ 240.606433] [<ffffffff8110ac26>] ? lock_release_non_nested+0x3c6/0x3d0
>> >> >> [ 240.606557] [<ffffffff81154497>] SyS_init_module+0xe7/0x140
>> >> >> [ 240.606664] [<ffffffff818652a9>] system_call_fastpath+0x12/0x17
>> >> >> [ 240.606773] 1 lock held by modprobe/1387:
>> >> >> [ 240.606845] #0: (net_mutex){+.+.+.}, at: [<ffffffff8170835f>]
>> >> >> register_pernet_subsys+0x1f/0x50
>> >> >> [ 240.607114] INFO: task modprobe:1466 blocked for more than 120 seconds.
>> >> >> [ 240.607231] Not tainted 3.18.0-0.rc1.git2.1.fc22.x86_64 #1
>> >> >> [ 240.607337] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
>> >> >> disables this message.
>> >> >> [ 240.607473] modprobe D ffff88020fbab480 13096 1466 1399 0x00000084
>> >> >> [ 240.607622] ffff88020d1bbbe8 0000000000000096 ffff88020fbab480
>> >> >> 00000000001d5f00
>> >> >> [ 240.607791] ffff88020d1bbfd8 00000000001d5f00 ffffffff81e1b580
>> >> >> ffff88020fbab480
>> >> >> [ 240.607949] ffff88020fbab480 ffffffff81f8fb48 0000000000000246
>> >> >> ffff88020fbab480
>> >> >> [ 240.608138] Call Trace:
>> >> >> [ 240.608193] [<ffffffff8185e7e1>] schedule_preempt_disabled+0x31/0x80
>> >> >> [ 240.608316] [<ffffffff81860033>] mutex_lock_nested+0x183/0x440
>> >> >> [ 240.608425] [<ffffffff817083ad>] ? register_pernet_device+0x1d/0x70
>> >> >> [ 240.608542] [<ffffffff817083ad>] ? register_pernet_device+0x1d/0x70
>> >> >> [ 240.608662] [<ffffffffa071d000>] ? 0xffffffffa071d000
>> >> >> [ 240.608759] [<ffffffff817083ad>] register_pernet_device+0x1d/0x70
>> >> >> [ 240.608881] [<ffffffffa071d020>] ppp_init+0x20/0x1000 [ppp_generic]
>> >> >> [ 240.609021] [<ffffffff81002148>] do_one_initcall+0xd8/0x210
>> >> >> [ 240.609131] [<ffffffff81153c02>] load_module+0x20c2/0x2870
>> >> >> [ 240.609235] [<ffffffff8114ebe0>] ? store_uevent+0x70/0x70
>> >> >> [ 240.609339] [<ffffffff8110ac26>] ? lock_release_non_nested+0x3c6/0x3d0
>> >> >> [ 240.609462] [<ffffffff81154497>] SyS_init_module+0xe7/0x140
>> >> >> [ 240.609568] [<ffffffff818652a9>] system_call_fastpath+0x12/0x17
>> >> >> [ 240.609677] 1 lock held by modprobe/1466:
>> >> >> [ 240.609749] #0: (net_mutex){+.+.+.}, at: [<ffffffff817083ad>]
>> >> >> register_pernet_device+0x1d/0x70
>> >> >>
>> >> >> Looks like contention on net_mutex or something, but I honestly have
>> >> >> no idea yet. I can't recreate it myself at the moment or I would
>> >> >> bisect.
>> >> >>
>> >> >> Has nobody else run into this with the pre-3.18 kernels? Fedora isn't
>> >> >> carrying any patches in this area.
>> >>
>> >> > I am not aware of any change in net/core/dev.c related here,
>> >> > so I guess it's a bug in rcu_barrier().
>> >>
>> >> >From the limited trace data I see in this email I have to agree.
>> >>
>> >> It looks like for some reason rcu_barrier is taking forever
>> >> while the rtnl_lock is held in cleanup_net. Because the
>> >> rtnl_lock is held modprobe of the ppp driver is getting stuck.
>> >>
>> >> Is it possible we have an AB BA deadlock between the rtnl_lock
>> >> and rcu. With something the module loading code assumes?
>> >
>> > I am not aware of RCU ever acquiring rtnl_lock, not directly, anyway.
>>
>> Does the module loading code do something strange with rcu? Perhaps
>> blocking an rcu grace period until the module loading completes?
>>
>> If the module loading somehow blocks an rcu grace period that would
>> create an AB deadlock because loading the ppp module grabs the
>> rtnl_lock. And elsewhere we have the rtnl_lock waiting for an rcu grace
>> period.
>>
>> I would think trying and failing to get the rtnl_lock would sleep and
>> thus let any rcu grace period happen but shrug.
>>
>> It looks like something is holding up the rcu grace period, and causing
>> this. Although it is possible that something is causing cleanup_net
>> to run slowly and we are just seeing that slowness show up in
>> rcu_barrier as that is one of the slower bits. With a single trace I
>> can't definitely same that the rcu barrier is getting stuck but it
>> certainly looks that way.
>
> Don't get me wrong -- the fact that this kthread appears to have
> blocked within rcu_barrier() for 120 seconds means that something is
> most definitely wrong here. I am surprised that there are no RCU CPU
> stall warnings, but perhaps the blockage is in the callback execution
> rather than grace-period completion. Or something is preventing this
> kthread from starting up after the wake-up callback executes. Or...
>
> Is this thing reproducible?
I've added Yanko on CC, who reported the backtrace above and can
recreate it reliably. Apparently reverting the RCU merge commit
(d6dd50e) and rebuilding the latest after that does not show the
issue. I'll let Yanko explain more and answer any questions you have.
josh
^ permalink raw reply
* Re: [PATCH] net: fs_enet: set back promiscuity mode after restart
From: David Miller @ 2014-10-22 19:33 UTC (permalink / raw)
To: christophe.leroy
Cc: pantelis.antoniou, vbordug, linux-kernel, linuxppc-dev, netdev,
germain.montoies
In-Reply-To: <20141022070547.83AAB1A5E63@localhost.localdomain>
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Wed, 22 Oct 2014 09:05:47 +0200 (CEST)
> After interface restart (eg: after link disconnection/reconnection), the bridge
> function doesn't work anymore. This is due to the promiscuous mode being cleared
> by the restart.
>
> The mac-fcc already includes code to set the promiscuous mode back during the restart.
> This patch adds the same handling to mac-fec and mac-scc.
>
> Tested with bridge function on MPC885 with FEC.
>
> Reported-by: Germain Montoies <germain.montoies@c-s.fr>
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH v2] drivers: net: xgene: Rewrite loop in xgene_enet_ecc_init()
From: David Miller @ 2014-10-22 19:34 UTC (permalink / raw)
To: geert; +Cc: isubramanian, kchudgar, netdev, linux-kernel
In-Reply-To: <1413963581-24019-1-git-send-email-geert@linux-m68k.org>
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 22 Oct 2014 09:39:41 +0200
> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function
>
> Depending on the arbitrary value on the stack, the loop may terminate
> too early, and cause a bogus -ENODEV failure.
>
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> ---
> v2: Rewrite the loop instead of pre-initializing data.
I hate to be a pest, but like the other patch of your's I think
a do { } while() works best here because the intent is clearly
to run the loop at least once, right?
^ permalink raw reply
* [PATCH net] ptp: restore the makefile for building the test program.
From: Richard Cochran @ 2014-10-22 19:35 UTC (permalink / raw)
To: netdev; +Cc: David Miller, Peter Foley
This patch brings back the makefile called testptp.mk which was removed
in commit adb19fb66eee (Documentation: add makefiles for more targets).
While the idea of that commit was to improve build coverage of the
examples, the new Makefile is unable to cross compile the testptp program.
In contrast, the deleted makefile was able to do this just fine.
This patch fixes the regression by restoring the original makefile.
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
---
Documentation/ptp/testptp.mk | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
create mode 100644 Documentation/ptp/testptp.mk
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 0000000..4ef2d97
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC = $(CROSS_COMPILE)gcc
+INC = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS = -Wall $(INC)
+LDLIBS = -lrt
+PROGS = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+ rm -f testptp.o
+
+distclean: clean
+ rm -f $(PROGS)
--
1.7.10.4
^ permalink raw reply related
* Re: [PATCH 1/2 v2] xfrm: fix a potential use after free in xfrm4_policy.c
From: David Miller @ 2014-10-22 19:35 UTC (permalink / raw)
To: roy.qing.li; +Cc: netdev
In-Reply-To: <1413968993-13528-1-git-send-email-roy.qing.li@gmail.com>
From: roy.qing.li@gmail.com
Date: Wed, 22 Oct 2014 17:09:52 +0800
> From: Li RongQing <roy.qing.li@gmail.com>
>
> pskb_may_pull() maybe change skb->data and make xprth pointer oboslete,
> so recompute the xprth
>
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---
> Diff with v1: NEXT_HEAD add a length parameter, which hide on v1
>
> net/ipv4/xfrm4_policy.c | 21 +++++++++++++--------
> 1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
> index 6156f68..d7b33c5 100644
> --- a/net/ipv4/xfrm4_policy.c
> +++ b/net/ipv4/xfrm4_policy.c
> @@ -98,11 +98,14 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
> return 0;
> }
>
> +#define NEXT_HEAD(skb, length) (skb_network_header(skb) + length)
> +
If you're going to properly have arguments, use an inline function
rather a macro so you get type checking.
THanks.
^ permalink raw reply
* Re: [PATCH 2/2 v2] xfrm6: fix a potential use after free in xfrm6_policy.c
From: David Miller @ 2014-10-22 19:39 UTC (permalink / raw)
To: roy.qing.li; +Cc: netdev
In-Reply-To: <1413968993-13528-2-git-send-email-roy.qing.li@gmail.com>
From: roy.qing.li@gmail.com
Date: Wed, 22 Oct 2014 17:09:53 +0800
> From: Li RongQing <roy.qing.li@gmail.com>
>
> pskb_may_pull() maybe change skb->data and make nh and exthdr pointer
> oboslete, so recompute the nd and exthdr
>
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---
> Diff v1: add space between the date type and *, like (__be16 *)
Applied, thanks.
^ permalink raw reply
* Re: [PATCHv3 RFC net-next 0/4] sunvnet: NAPIfy sunvnet
From: David Miller @ 2014-10-22 19:39 UTC (permalink / raw)
To: sowmini.varadhan
Cc: bob.picco, dwight.engen, raghuram.kothakota, netdev, sparclinux
In-Reply-To: <20141022185757.GA8134@oracle.com>
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 22 Oct 2014 14:57:57 -0400
> Please hold off on this patch-set for the moment.
Ok, no problem.
^ permalink raw reply
* Re: [PATCH net] ptp: restore the makefile for building the test program.
From: Peter Foley @ 2014-10-22 19:43 UTC (permalink / raw)
To: Richard Cochran; +Cc: netdev, David Miller
In-Reply-To: <edb4b32ca0326b648d905620d95297cef5ed9391.1414006463.git.rcochran@linutronix.de>
On Wed, Oct 22, 2014 at 3:35 PM, Richard Cochran
<richardcochran@gmail.com> wrote:
> This patch brings back the makefile called testptp.mk which was removed
> in commit adb19fb66eee (Documentation: add makefiles for more targets).
>
> While the idea of that commit was to improve build coverage of the
> examples, the new Makefile is unable to cross compile the testptp program.
> In contrast, the deleted makefile was able to do this just fine.
>
> This patch fixes the regression by restoring the original makefile.
>
> Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Peter Foley <pefoley2@pefoley.com>
^ permalink raw reply
* Re: [PATCH v2] drivers: net: xgene: Rewrite loop in xgene_enet_ecc_init()
From: Geert Uytterhoeven @ 2014-10-22 19:50 UTC (permalink / raw)
To: David Miller
Cc: isubramanian, kchudgar, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20141022.153436.93475594748933547.davem@davemloft.net>
On Wed, Oct 22, 2014 at 9:34 PM, David Miller <davem@davemloft.net> wrote:
> From: Geert Uytterhoeven <geert@linux-m68k.org>
> Date: Wed, 22 Oct 2014 09:39:41 +0200
>
>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function
>>
>> Depending on the arbitrary value on the stack, the loop may terminate
>> too early, and cause a bogus -ENODEV failure.
>>
>> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
>> ---
>> v2: Rewrite the loop instead of pre-initializing data.
>
> I hate to be a pest, but like the other patch of your's I think
> a do { } while() works best here because the intent is clearly
> to run the loop at least once, right?
I wanted to avoid checking for "data != ~0U" twice: once to abort the loop,
and once to check if a timeout happened.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH net-next] tcp: Add TCP_FREEZE socket option
From: Hagen Paul Pfeifer @ 2014-10-22 19:50 UTC (permalink / raw)
To: Kristian Evensen; +Cc: David Miller, Network Development
In-Reply-To: <CAKfDRXgARYe3hw3vgpgup1X4bNjH0cpwYOfKUZK51HcOf2jeJA@mail.gmail.com>
On 22 October 2014 19:08, Kristian Evensen <kristian.evensen@gmail.com> wrote:
> Another approach I designed was to have a separate TCP Freeze module
> and trigger the freeze/unfreeze through genetlink-messages. A user
> space application will be responsible for monitoring the devices and
> decide when to trigger the ZWAs. Would a design like that be
> acceptable?
At least better. But what userspace daemon would configure this?
Likely NetworkManager and friends. But at what conditions?
- When the WIFI signal strength is below some threshold?
- When switched to another AP?
- When switched from 802.11 to 802.3
- ...
In a NATed scenario there is no gain because IP addreses change and
the connection is lost anyway. For the signal strength thing there
might be an advantage but it has costs:
a) how long did you freeze the connection? What if NetworkManager
stops? The connection hang \infty
b) is it not better to inform the upper layer - the application - that
something happen with the link?
I mean when the application experience disruptions, the application
can decide what it do: reconnect, reconnect and resend or inform the
user. This possibility is now lost/hidden. Maybe it is no problem -
maybe it is for some applications.
I have no fundamental problems with TCP Freeze, but what is missing is
a complete story line. The use cases where it makes sense and if it is
save.
Do you have considered to bring this to the IETF (TCPM WG)?
Hagen
^ permalink raw reply
* Re: [PATCH v2] drivers: net: xgene: Rewrite loop in xgene_enet_ecc_init()
From: David Miller @ 2014-10-22 20:12 UTC (permalink / raw)
To: geert; +Cc: isubramanian, kchudgar, netdev, linux-kernel
In-Reply-To: <CAMuHMdXJs=+SpB07t0a+LRWX_TGUnUmQi3P-aExFzmbm_j0fow@mail.gmail.com>
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 22 Oct 2014 21:50:06 +0200
> On Wed, Oct 22, 2014 at 9:34 PM, David Miller <davem@davemloft.net> wrote:
>> From: Geert Uytterhoeven <geert@linux-m68k.org>
>> Date: Wed, 22 Oct 2014 09:39:41 +0200
>>
>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
>>> drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function
>>>
>>> Depending on the arbitrary value on the stack, the loop may terminate
>>> too early, and cause a bogus -ENODEV failure.
>>>
>>> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>> ---
>>> v2: Rewrite the loop instead of pre-initializing data.
>>
>> I hate to be a pest, but like the other patch of your's I think
>> a do { } while() works best here because the intent is clearly
>> to run the loop at least once, right?
>
> I wanted to avoid checking for "data != ~0U" twice: once to abort the loop,
> and once to check if a timeout happened.
Hmmm:
do {
usleep_range(...);
data = ...();
if (data == ~0)
return 0;
} while (++i < 10);
netdev_err(...);
return -ENODEV;
Why would you have to check data twice?
^ permalink raw reply
* Re: [PATCH] net: fix saving TX flow hash in sock for outgoing connections
From: David Miller @ 2014-10-22 20:14 UTC (permalink / raw)
To: eric.dumazet; +Cc: Sathya.Perla, netdev, therbert
In-Reply-To: <1414004996.9031.20.camel@edumazet-glaptop2.roam.corp.google.com>
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 22 Oct 2014 12:09:56 -0700
> On Wed, 2014-10-22 at 18:35 +0000, Sathya Perla wrote:
>> Agree. Are you suggesting that drivers should automatically
>> register an XPS configuration? I thought it was upto the user
>> to enable it...
>
> Yep, search for netif_set_xps_queue()
>
> (commit 537c00de1c9ba9876b9)
>
> Look at commit d03a68f8217ea0349 for an example of how it can be done,
> if user do not override this later.
Very few people know about this :-/
I only see 4 drivers adjusted to do this, it would be nice if this
was more widespread.
^ permalink raw reply
* Re: [PATCH] net: fix saving TX flow hash in sock for outgoing connections
From: David Miller @ 2014-10-22 20:15 UTC (permalink / raw)
To: sathya.perla; +Cc: netdev, therbert
In-Reply-To: <1413994321-20435-1-git-send-email-sathya.perla@emulex.com>
From: Sathya Perla <sathya.perla@emulex.com>
Date: Wed, 22 Oct 2014 21:42:01 +0530
> The commit "net: Save TX flow hash in sock and set in skbuf on xmit"
> introduced the inet_set_txhash() and ip6_set_txhash() routines to calculate
> and record flow hash(sk_txhash) in the socket structure. sk_txhash is used
> to set skb->hash which is used to spread flows across multiple TXQs.
>
> But, the above routines are invoked before the source port of the connection
> is created. Because of this all outgoing connections that just differ in the
> source port get hashed into the same TXQ.
>
> This patch fixes this problem for IPv4/6 by invoking the the above routines
> after the source port is available for the socket.
>
> Fixes: b73c3d0e4("net: Save TX flow hash in sock and set in skbuf on xmit")
>
> Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Applied and queued up for -stable, thanks.
^ permalink raw reply
* Re: [PATCH net-next] tcp: Add TCP_FREEZE socket option
From: Kristian Evensen @ 2014-10-22 20:33 UTC (permalink / raw)
To: Hagen Paul Pfeifer; +Cc: David Miller, Network Development
In-Reply-To: <CAPh34mechU=aUS422OBFhHUt68Nk2PZBsMiLPw8eLwS_4PnRtw@mail.gmail.com>
Hi,
I am very sorry for not explaining the scenario/use-case properly.
Freeze-TCP is mostly targeted at TCP connections established through
mobile broadband networks. One example scenario is that of when a user
moves outside of an area with LTE coverage. The mobile broadband
connection will then be downgraded to 2G/3G and this process takes
10-15 seconds in the networks I have been able to measure. During this
handover, the modem/device will in most cases report that it is still
connected to LTE. So just looking at the state of the link is not good
enough, as it will appear to be working fine (except for no data
coming through it). The device does not change IP address, so TCP
connections will resume normal operation as soon as the network
connection is re-established and packet is retransmitted. However,
because of the large "idle" period, this can take another 10-15
seconds.
On Wed, Oct 22, 2014 at 9:50 PM, Hagen Paul Pfeifer <hagen@jauu.net> wrote:
> At least better. But what userspace daemon would configure this?
> Likely NetworkManager and friends. But at what conditions?
Yes, that would be my suggestion for tools too. The conditions would
depend on the kind of network, available information and so on.
> In a NATed scenario there is no gain because IP addreses change and
> the connection is lost anyway. For the signal strength thing there
> might be an advantage but it has costs:
>
> a) how long did you freeze the connection? What if NetworkManager
> stops? The connection hang \infty
> b) is it not better to inform the upper layer - the application - that
> something happen with the link?
>
> I mean when the application experience disruptions, the application
> can decide what it do: reconnect, reconnect and resend or inform the
> user. This possibility is now lost/hidden. Maybe it is no problem -
> maybe it is for some applications.
This is the main reason why I went with a socket option. While I
worked on this patch I wrote a small daemon for testing purposes. This
daemon analyses data exported from a mobile broadband modem (QMI),
looks at total interface throughput and then multicasts a netlink
message when it determines that a handover might happen. This message
is only a hint and then it is up to the application developer to
decide what to do. Another solution would be a hybrid, the module will
works as I described and the socket option will be used as an opt-in
for Freeze-TCP.
>
> Do you have considered to bring this to the IETF (TCPM WG)?
>
Yes, I am currently considering it, or if I should look into different
solutions before bringing it up for discussion. The ideal solution
would be if there was a way to force a retransmit when the handover
period is over, but that opens a whole net set of problems, potential
security problems and changes TCP semantics a bit. An advantage of
Freeze-TCP is that it works fine with what we have today.
Thanks for your detailed comments!
Kristian
^ permalink raw reply
* [PATCH net] hyperv: Fix the total_data_buflen in send path
From: Haiyang Zhang @ 2014-10-22 20:47 UTC (permalink / raw)
To: davem, netdev; +Cc: olaf, jasowang, driverdev-devel, linux-kernel, haiyangz
total_data_buflen is used by netvsc_send() to decide if a packet can be put
into send buffer. It should also include the size of RNDIS message before the
Ethernet frame. Otherwise, a messge with total size bigger than send_section_size
may be copied into the send buffer, and cause data corruption.
[Request to include this patch to the Stable branches]
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
---
drivers/net/hyperv/netvsc_drv.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 9e17d1a..78ec33f 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -550,6 +550,7 @@ do_lso:
do_send:
/* Start filling in the page buffers with the rndis hdr */
rndis_msg->msg_len += rndis_msg_size;
+ packet->total_data_buflen = rndis_msg->msg_len;
packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
skb, &packet->page_buf[0]);
--
1.7.1
^ permalink raw reply related
* Re: [RFC] tcp md5 use of alloc_percpu
From: Jonathan Toppins @ 2014-10-22 21:35 UTC (permalink / raw)
To: Eric Dumazet, Crestez Dan Leonard; +Cc: netdev
In-Reply-To: <1414005158.9031.22.camel@edumazet-glaptop2.roam.corp.google.com>
On 10/22/14, 3:12 PM, Eric Dumazet wrote:
> On Wed, 2014-10-22 at 21:55 +0300, Crestez Dan Leonard wrote:
>> Hello,
>>
>> It seems that the TCP MD5 feature allocates a percpu struct
>> tcp_md5sig_pool and uses part of that memory for a scratch buffer to
>> do crypto on. Here is the relevant code:
>>
>> static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
>> __be32 daddr, __be32 saddr,
>> int nbytes)
>> {
>> struct tcp4_pseudohdr *bp;
>> struct scatterlist sg;
>>
>> bp = &hp->md5_blk.ip4;
>>
>> /*
>> * 1. the TCP pseudo-header (in the order: source IP address,
>> * destination IP address, zero-padded protocol number, and
>> * segment length)
>> */
>> bp->saddr = saddr;
>> bp->daddr = daddr;
>> bp->pad = 0;
>> bp->protocol = IPPROTO_TCP;
>> bp->len = cpu_to_be16(nbytes);
>>
>> sg_init_one(&sg, bp, sizeof(*bp));
>> return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
>> }
>>
>> sg_init_one does virt_addr on the pointer which assumes it is directly
>> accessible. But the tcp_md5sig_pool pointer comes from alloc_percpu
>> which can return memory from the vmalloc area after the
>> pcpu_first_chunk is exhausted. This looks wrong to me. I'm am getting
>> crashes on mips and I believe this to be the cause.
I can confirm this created an issue on our powerpc based switches. My
solution in our 3.2 kernel was to allocate the buffer on the stack. I
like this solution better.
^ permalink raw reply
* Re: [PATCH] net: fec: ptp: fix NULL pointer dereference if ptp_clock is not set
From: David Miller @ 2014-10-22 21:48 UTC (permalink / raw)
To: p.zabel; +Cc: b45643, netdev
In-Reply-To: <1413988475-6565-1-git-send-email-p.zabel@pengutronix.de>
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 22 Oct 2014 16:34:35 +0200
> Since commit 278d24047891 (net: fec: ptp: Enable PPS output based on ptp clock)
> fec_enet_interrupt calls fec_ptp_check_pps_event unconditionally, which calls
> into ptp_clock_event. If fep->ptp_clock is NULL, ptp_clock_event tries to
> dereference the NULL pointer.
> Since on i.MX53 fep->bufdesc_ex is not set, fec_ptp_init is never called,
> and fep->ptp_clock is NULL, which reliably causes a kernel panic.
>
> This patch adds a check for fep->ptp_clock == NULL in fec_enet_interrupt.
>
> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Applied, thank you.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox