Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net v2 2/2] ipv6: properly check return value in inet6_dump_all()
From: Alexey Kodanev @ 2018-11-02 16:11 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern, David Miller, Alexey Kodanev
In-Reply-To: <1541175065-25931-1-git-send-email-alexey.kodanev@oracle.com>

Make sure we call fib6_dump_end() if it happens that skb->len
is zero. rtnl_dump_all() can reset cb->args on the next loop
iteration there.

Fixes: 08e814c9e8eb ("net/ipv6: Bail early if user only wants cloned entries")
Fixes: ae677bbb4441 ("net: Don't return invalid table id error when dumping all families")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---

v2: a new patch in v2

 net/ipv6/ip6_fib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b8bc00..ae37861 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
 	/* fib entries are never clones */
 	if (arg.filter.flags & RTM_F_CLONED)
-		return skb->len;
+		goto out;
 
 	w = (void *)cb->args[2];
 	if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 		tb = fib6_get_table(net, arg.filter.table_id);
 		if (!tb) {
 			if (arg.filter.dump_all_families)
-				return skb->len;
+				goto out;
 
 			NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
 			return -ENOENT;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net v2 1/2] rtnetlink: restore handling of dumpit return value in rtnl_dump_all()
From: Alexey Kodanev @ 2018-11-02 16:11 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern, David Miller, Alexey Kodanev

For non-zero return from dumpit() we should break the loop
in rtnl_dump_all() and return the result. Otherwise, e.g.,
we could get the memory leak in inet6_dump_fib() [1]. The
pointer to the allocated struct fib6_walker there (saved
in cb->args) can be lost, reset on the next iteration.

Fix it by partially restoring the previous behavior before
commit c63586dc9b3e ("net: rtnl_dump_all needs to propagate
error from dumpit function"). The returned error from
dumpit() is still passed further.

[1]:
unreferenced object 0xffff88001322a200 (size 96):
  comm "sshd", pid 1484, jiffies 4296032768 (age 1432.542s)
  hex dump (first 32 bytes):
    00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de  ................
    18 09 41 36 00 88 ff ff 18 09 41 36 00 88 ff ff  ..A6......A6....
  backtrace:
    [<0000000095846b39>] kmem_cache_alloc_trace+0x151/0x220
    [<000000007d12709f>] inet6_dump_fib+0x68d/0x940
    [<000000002775a316>] rtnl_dump_all+0x1d9/0x2d0
    [<00000000d7cd302b>] netlink_dump+0x945/0x11a0
    [<000000002f43485f>] __netlink_dump_start+0x55d/0x800
    [<00000000f76bbeec>] rtnetlink_rcv_msg+0x4fa/0xa00
    [<000000009b5761f3>] netlink_rcv_skb+0x29c/0x420
    [<0000000087a1dae1>] rtnetlink_rcv+0x15/0x20
    [<00000000691b703b>] netlink_unicast+0x4e3/0x6c0
    [<00000000b5be0204>] netlink_sendmsg+0x7f2/0xba0
    [<0000000096d2aa60>] sock_sendmsg+0xba/0xf0
    [<000000008c1b786f>] __sys_sendto+0x1e4/0x330
    [<0000000019587b3f>] __x64_sys_sendto+0xe1/0x1a0
    [<00000000071f4d56>] do_syscall_64+0x9f/0x300
    [<000000002737577f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
    [<0000000057587684>] 0xffffffffffffffff

Fixes: c63586dc9b3e ("net: rtnl_dump_all needs to propagate error from dumpit function")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
---

v2: * fix it by restoring the previous behavior as suggested by David

    * adjust subject and commit description

 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e01274b..33d9227 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->seq = 0;
 		}
 		ret = dumpit(skb, cb);
-		if (ret < 0)
+		if (ret)
 			break;
 	}
 	cb->family = idx;
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH 1/1] vhost: add per-vq worker thread
From: Vitaly Mayatskikh @ 2018-11-02 16:07 UTC (permalink / raw)
  To: Michael S . Tsirkin
  Cc: Jason Wang, kvm, virtualization, netdev, linux-kernel,
	Vitaly Mayatskikh
In-Reply-To: <20181102160710.3741-1-v.mayatskih@gmail.com>

This enables a near linear scaling in multiqueue cases.

First virtqueue still gets the worker created unconditionally,
the rest is postponed until the actual poll starts on the queue.

Signed-off-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
---
 drivers/vhost/vhost.c | 123 +++++++++++++++++++++++++++++++-----------
 drivers/vhost/vhost.h |  11 +++-
 2 files changed, 100 insertions(+), 34 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3a5f81a66d34..523dcfac4541 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -185,18 +185,27 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
 }
 EXPORT_SYMBOL_GPL(vhost_work_init);
 
-/* Init poll structure */
-void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
-		     __poll_t mask, struct vhost_dev *dev)
+
+static void vhost_vq_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+			       __poll_t mask, struct vhost_virtqueue *vq)
 {
 	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
 	init_poll_funcptr(&poll->table, vhost_poll_func);
 	poll->mask = mask;
-	poll->dev = dev;
+	poll->dev = vq->dev;
+	poll->vq = vq;
 	poll->wqh = NULL;
 
 	vhost_work_init(&poll->work, fn);
 }
+EXPORT_SYMBOL_GPL(vhost_vq_poll_init);
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+		     __poll_t mask, struct vhost_dev *dev)
+{
+	vhost_vq_poll_init(poll, fn, mask, dev->vqs[0]);
+}
 EXPORT_SYMBOL_GPL(vhost_poll_init);
 
 /* Start polling a file. We add ourselves to file's wait queue. The caller must
@@ -232,31 +241,74 @@ void vhost_poll_stop(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_stop);
 
-void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+
+static void vhost_vq_poll_start_work(struct vhost_work *w)
+{
+	struct vhost_virtqueue *vq = container_of(w, struct vhost_virtqueue,
+						  work);
+
+	vhost_poll_start(&vq->poll, vq->kick);
+}
+
+static int vhost_vq_worker(void *data);
+
+static int vhost_vq_poll_start(struct vhost_virtqueue *vq)
+{
+	if (!vq->worker) {
+		vq->worker = kthread_create(vhost_vq_worker, vq, "vhost-%d/%i",
+					    vq->dev->pid, vq->index);
+		if (IS_ERR(vq->worker)) {
+			int ret = PTR_ERR(vq->worker);
+
+			pr_err("%s: can't create vq worker: %d\n", __func__,
+			       ret);
+			vq->worker = NULL;
+			return ret;
+		}
+	}
+	vhost_work_init(&vq->work, vhost_vq_poll_start_work);
+	vhost_vq_work_queue(vq, &vq->work);
+	return 0;
+}
+
+static void vhost_vq_work_flush(struct vhost_virtqueue *vq,
+				struct vhost_work *work)
 {
 	struct vhost_flush_struct flush;
 
-	if (dev->worker) {
+	if (vq->worker) {
 		init_completion(&flush.wait_event);
 		vhost_work_init(&flush.work, vhost_flush_work);
 
-		vhost_work_queue(dev, &flush.work);
+		vhost_vq_work_queue(vq, &flush.work);
 		wait_for_completion(&flush.wait_event);
 	}
 }
+EXPORT_SYMBOL_GPL(vhost_vq_work_flush);
+
+void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+{
+	vhost_vq_work_flush(dev->vqs[0], work);
+}
 EXPORT_SYMBOL_GPL(vhost_work_flush);
 
 /* Flush any work that has been scheduled. When calling this, don't hold any
  * locks that are also used by the callback. */
 void vhost_poll_flush(struct vhost_poll *poll)
 {
-	vhost_work_flush(poll->dev, &poll->work);
+	vhost_vq_work_flush(poll->vq, &poll->work);
 }
 EXPORT_SYMBOL_GPL(vhost_poll_flush);
 
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 {
-	if (!dev->worker)
+	return vhost_vq_work_queue(dev->vqs[0], work);
+}
+EXPORT_SYMBOL_GPL(vhost_work_queue);
+
+void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
+{
+	if (!vq->worker)
 		return;
 
 	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
@@ -264,22 +316,22 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 		 * sure it was not in the list.
 		 * test_and_set_bit() implies a memory barrier.
 		 */
-		llist_add(&work->node, &dev->work_list);
-		wake_up_process(dev->worker);
+		llist_add(&work->node, &vq->work_list);
+		wake_up_process(vq->worker);
 	}
 }
-EXPORT_SYMBOL_GPL(vhost_work_queue);
+EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
 
 /* A lockless hint for busy polling code to exit the loop */
 bool vhost_has_work(struct vhost_dev *dev)
 {
-	return !llist_empty(&dev->work_list);
+	return !llist_empty(&dev->vqs[0]->work_list);
 }
 EXPORT_SYMBOL_GPL(vhost_has_work);
 
 void vhost_poll_queue(struct vhost_poll *poll)
 {
-	vhost_work_queue(poll->dev, &poll->work);
+	vhost_vq_work_queue(poll->vq, &poll->work);
 }
 EXPORT_SYMBOL_GPL(vhost_poll_queue);
 
@@ -333,9 +385,10 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	__vhost_vq_meta_reset(vq);
 }
 
-static int vhost_worker(void *data)
+static int vhost_vq_worker(void *data)
 {
-	struct vhost_dev *dev = data;
+	struct vhost_virtqueue *vq = data;
+	struct vhost_dev *dev = vq->dev;
 	struct vhost_work *work, *work_next;
 	struct llist_node *node;
 	mm_segment_t oldfs = get_fs();
@@ -351,8 +404,7 @@ static int vhost_worker(void *data)
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
-
-		node = llist_del_all(&dev->work_list);
+		node = llist_del_all(&vq->work_list);
 		if (!node)
 			schedule();
 
@@ -429,25 +481,26 @@ void vhost_dev_init(struct vhost_dev *dev,
 	dev->umem = NULL;
 	dev->iotlb = NULL;
 	dev->mm = NULL;
-	dev->worker = NULL;
-	init_llist_head(&dev->work_list);
 	init_waitqueue_head(&dev->wait);
 	INIT_LIST_HEAD(&dev->read_list);
 	INIT_LIST_HEAD(&dev->pending_list);
 	spin_lock_init(&dev->iotlb_lock);
 
-
 	for (i = 0; i < dev->nvqs; ++i) {
 		vq = dev->vqs[i];
+		vq->index = i;
 		vq->log = NULL;
 		vq->indirect = NULL;
 		vq->heads = NULL;
 		vq->dev = dev;
+		vq->worker = NULL;
 		mutex_init(&vq->mutex);
 		vhost_vq_reset(dev, vq);
+		init_llist_head(&vq->work_list);
+		vq->worker = NULL;
 		if (vq->handle_kick)
-			vhost_poll_init(&vq->poll, vq->handle_kick,
-					EPOLLIN, dev);
+			vhost_vq_poll_init(&vq->poll, vq->handle_kick,
+					   EPOLLIN, vq);
 	}
 }
 EXPORT_SYMBOL_GPL(vhost_dev_init);
@@ -506,14 +559,16 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 
 	/* No owner, become one */
 	dev->mm = get_task_mm(current);
-	worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
+	dev->pid = current->pid;
+	worker = kthread_create(vhost_vq_worker, dev->vqs[0], "vhost-%d/0",
+				current->pid);
 	if (IS_ERR(worker)) {
 		err = PTR_ERR(worker);
 		goto err_worker;
 	}
 
-	dev->worker = worker;
-	wake_up_process(worker);	/* avoid contributing to loadavg */
+	dev->vqs[0]->worker = worker;
+	wake_up_process(worker);        /* avoid contributing to loadavg */
 
 	err = vhost_attach_cgroups(dev);
 	if (err)
@@ -526,7 +581,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 	return 0;
 err_cgroup:
 	kthread_stop(worker);
-	dev->worker = NULL;
+	dev->vqs[0]->worker = NULL;
 err_worker:
 	if (dev->mm)
 		mmput(dev->mm);
@@ -638,11 +693,15 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 	dev->iotlb = NULL;
 	vhost_clear_msg(dev);
 	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
-	WARN_ON(!llist_empty(&dev->work_list));
-	if (dev->worker) {
-		kthread_stop(dev->worker);
-		dev->worker = NULL;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		WARN_ON(!llist_empty(&dev->vqs[i]->work_list));
+		if (dev->vqs[i]->worker) {
+			kthread_stop(dev->vqs[i]->worker);
+			dev->vqs[i]->worker = NULL;
+		}
 	}
+
 	if (dev->mm)
 		mmput(dev->mm);
 	dev->mm = NULL;
@@ -1564,7 +1623,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 		fput(filep);
 
 	if (pollstart && vq->handle_kick)
-		r = vhost_poll_start(&vq->poll, vq->kick);
+		r = vhost_vq_poll_start(vq);
 
 	mutex_unlock(&vq->mutex);
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 466ef7542291..c00733fac49f 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -32,6 +32,7 @@ struct vhost_poll {
 	struct vhost_work	  work;
 	__poll_t		  mask;
 	struct vhost_dev	 *dev;
+	struct vhost_virtqueue   *vq;
 };
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
@@ -86,6 +87,7 @@ struct vhost_virtqueue {
 
 	/* The actual ring of buffers. */
 	struct mutex mutex;
+	unsigned int index;
 	unsigned int num;
 	struct vring_desc __user *desc;
 	struct vring_avail __user *avail;
@@ -145,6 +147,10 @@ struct vhost_virtqueue {
 	bool user_be;
 #endif
 	u32 busyloop_timeout;
+
+	struct llist_head work_list;
+	struct task_struct *worker;
+	struct vhost_work work;
 };
 
 struct vhost_msg_node {
@@ -158,12 +164,11 @@ struct vhost_msg_node {
 
 struct vhost_dev {
 	struct mm_struct *mm;
+	pid_t pid;
 	struct mutex mutex;
 	struct vhost_virtqueue **vqs;
 	int nvqs;
 	struct eventfd_ctx *log_ctx;
-	struct llist_head work_list;
-	struct task_struct *worker;
 	struct vhost_umem *umem;
 	struct vhost_umem *iotlb;
 	spinlock_t iotlb_lock;
@@ -185,6 +190,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
 bool vhost_log_access_ok(struct vhost_dev *);
 
+void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work);
+
 int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
 		      unsigned int *out_num, unsigned int *in_num,
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH bpf 2/3] bpf: show real jited address in bpf_prog_info->jited_ksyms
From: Song Liu @ 2018-11-02 16:07 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Netdev, Kernel Team, ast@kernel.org, sandipan@linux.vnet.ibm.com
In-Reply-To: <a35ece7a-d958-dbe3-4f03-6fe7eab42e06@iogearbox.net>



> On Nov 2, 2018, at 3:19 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> On 11/02/2018 11:09 AM, Daniel Borkmann wrote:
>> On 11/01/2018 08:00 AM, Song Liu wrote:
>>> Currently, jited_ksyms in bpf_prog_info shows page addresses of jited
>>> bpf program. This is not ideal for detailed profiling (find hot
>>> instructions from stack traces). This patch replaces the page address
>>> with real prog start address.
>>> 
>>> Signed-off-by: Song Liu <songliubraving@fb.com>
>>> ---
>>> kernel/bpf/syscall.c | 1 -
>>> 1 file changed, 1 deletion(-)
>>> 
>>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>>> index ccb93277aae2..34a9eef5992c 100644
>>> --- a/kernel/bpf/syscall.c
>>> +++ b/kernel/bpf/syscall.c
>>> @@ -2172,7 +2172,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
>>> 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
>>> 			for (i = 0; i < ulen; i++) {
>>> 				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
>>> -				ksym_addr &= PAGE_MASK;
>> 
>> Note that the masking was done on purpose here and in patch 1/3 in order to
>> not expose randomized start address to kallsyms at least. I suppose it's
>> okay to change it here and for kallsyms given bpf_prog_get_info_by_fd() dump
>> is for root only, and in each of the two cases we additionally apply
>> kallsyms_show_value() logic, so for unpriv this is zeroed out plus only root
>> loaded programs are added under kallsyms (capable(CAP_SYS_ADMIN)) anyway.
> 
> (Btw, something like above should have been in changelog to provide some more
> historical context of why we used to do it like that and explaining why it is
> okay to change it this way.)

Thanks Daniel!

I will send v2 with these fixes. 

Song

^ permalink raw reply

* Re: [PATCH iproute2-next v1 1/4] rdma: Update kernel include file to support IB device renaming
From: David Ahern @ 2018-11-02 16:43 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, netdev, RDMA mailing list, Stephen Hemminger,
	Steve Wise
In-Reply-To: <20181031071758.6178-2-leon@kernel.org>

On 10/31/18 1:17 AM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Bring kernel header file changes upto commit 05d940d3a3ec
> ("RDMA/nldev: Allow IB device rename through RDMA netlink")
> 
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> Reviewed-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  rdma/include/uapi/rdma/rdma_netlink.h | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 

applied to iproute2-next. Thanks

^ permalink raw reply

* Re: [PATCH iproute2-next v1 2/4] rdma: Introduce command execution helper with required device name
From: David Ahern @ 2018-11-02 16:43 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, netdev, RDMA mailing list, Stephen Hemminger,
	Steve Wise
In-Reply-To: <20181031071758.6178-3-leon@kernel.org>

On 10/31/18 1:17 AM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> In contradiction to various show commands, the set command explicitly
> requires to use device name as an argument. Provide new command
> execution helper which enforces it.
> 
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> Reviewed-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  rdma/rdma.h  |  1 +
>  rdma/utils.c | 10 ++++++++++
>  2 files changed, 11 insertions(+)
> 

applied to iproute2-next. Thanks

^ permalink raw reply

* Re: [PATCH iproute2-next v1 3/4] rdma: Add an option to rename IB device interface
From: David Ahern @ 2018-11-02 16:43 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, netdev, RDMA mailing list, Stephen Hemminger,
	Steve Wise
In-Reply-To: <20181031071758.6178-4-leon@kernel.org>

On 10/31/18 1:17 AM, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@mellanox.com>
> 
> Enrich rdmatool with an option to rename IB devices,
> the command interface follows Iproute2 convention:
> "rdma dev set [OLD-DEVNAME] name NEW-DEVNAME"
> 
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> Reviewed-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  rdma/dev.c | 35 +++++++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 

applied to iproute2-next. Thanks

^ permalink raw reply

* Re: [PATCH iproute2-next v1 4/4] rdma: Document IB device renaming option
From: David Ahern @ 2018-11-02 16:47 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Leon Romanovsky, netdev, RDMA mailing list, Stephen Hemminger,
	Steve Wise
In-Reply-To: <20181031071758.6178-5-leon@kernel.org>

On 10/31/18 1:17 AM, Leon Romanovsky wrote:
> diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8
> index 461681b6..b2f9964a 100644
> --- a/man/man8/rdma-dev.8
> +++ b/man/man8/rdma-dev.8
> @@ -22,6 +22,12 @@ rdmak-dev \- RDMA device configuration
>  .B rdma dev show
>  .RI "[ " DEV " ]"
>  
> +.ti -8
> +.B rdma dev set
> +.RI "[ " DEV " ]"
> +.BR name
> +.BR NEWNAME
> +
>  .ti -8
>  .B rdma dev help
>  
> @@ -33,6 +39,8 @@ rdmak-dev \- RDMA device configuration
>  - specifies the RDMA device to show.
>  If this argument is omitted all devices are listed.
>  
> +.SS rdma dev set - rename rdma device
> +

'nroff -u0 -Tlp -man man/man8/rdma-dev.8' shows that you lost the
spacing between that line and the Examples line.

   rdma dev set - rename rdma device
EXAMPLES
       rdma dev
           Shows the state of all RDMA devices on the system.

...


>  .SH "EXAMPLES"
>  .PP
>  rdma dev
> @@ -45,6 +53,11 @@ rdma dev show mlx5_3
>  Shows the state of specified RDMA device.
>  .RE
>  .PP
> +rdma dev set mlx5_3 name rdma_0
> +.RS 4
> +Renames the mlx5_3 device to be named rdma_0.

That does not read well. I suggest dropping the 'be named' to make it
"Renames the mlx5_3 device to rdma_0."

> +.RE
> +.PP
>  
>  .SH SEE ALSO
>  .BR rdma (8),
> 

^ permalink raw reply

* [PATCH  v2 bpf 0/3] show more accurrate bpf program address
From: Song Liu @ 2018-11-02 17:16 UTC (permalink / raw)
  To: netdev; +Cc: kernel-team, Song Liu, ast, daniel, sandipan

Changes v1 -> v2:
1. Added main program length to bpf_prog_info->jited_fun_lens (3/3).
2. Updated commit message of 1/3 and 2/3 with more background about the
   address masking, and why it is still save after the changes.
3. Replace "ulong" with "unsigned long".

This set improves bpf program address showed in /proc/kallsyms and in
bpf_prog_info. First, real program address is showed instead of page
address. Second, when there is no subprogram, bpf_prog_info->jited_ksyms
and bpf_prog_info->jited_fun_lens returns the main prog address and
length.

Song Liu (3):
  bpf: show real jited prog address in /proc/kallsyms
  bpf: show real jited address in bpf_prog_info->jited_ksyms
  bpf: show main program address and length in bpf_prog_info

 kernel/bpf/core.c    |  4 +---
 kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 13 deletions(-)

^ permalink raw reply

* [PATCH  v2 bpf 2/3] bpf: show real jited address in bpf_prog_info->jited_ksyms
From: Song Liu @ 2018-11-02 17:16 UTC (permalink / raw)
  To: netdev; +Cc: kernel-team, Song Liu, ast, daniel, sandipan
In-Reply-To: <20181102171617.310178-1-songliubraving@fb.com>

Currently, jited_ksyms in bpf_prog_info shows page addresses of jited
bpf program. The main reason here is to not expose randomized start
address. However, this is not ideal for detailed profiling (find hot
instructions from stack traces). This patch replaces the page address
with real prog start address.

This change is OK because bpf_prog_get_info_by_fd() is only available
to root.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ccb93277aae2..34a9eef5992c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2172,7 +2172,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
 			for (i = 0; i < ulen; i++) {
 				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				ksym_addr &= PAGE_MASK;
 				if (put_user((u64) ksym_addr, &user_ksyms[i]))
 					return -EFAULT;
 			}
-- 
2.17.1

^ permalink raw reply related

* [PATCH  v2 bpf 1/3] bpf: show real jited prog address in /proc/kallsyms
From: Song Liu @ 2018-11-02 17:16 UTC (permalink / raw)
  To: netdev; +Cc: kernel-team, Song Liu, ast, daniel, sandipan
In-Reply-To: <20181102171617.310178-1-songliubraving@fb.com>

Currently, /proc/kallsyms shows page address of jited bpf program. The
main reason here is to not expose randomized start address. However,
This is not ideal for detailed profiling (find hot instructions from
stack traces). This patch replaces the page address with real prog start
address.

This change is OK because these addresses are still protected by sysctl
kptr_restrict (see kallsyms_show_value()), and only programs loaded by
root are added to kallsyms (see bpf_prog_kallsyms_add()).

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/bpf/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6377225b2082..1a796e0799ec 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -553,7 +553,6 @@ bool is_bpf_text_address(unsigned long addr)
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym)
 {
-	unsigned long symbol_start, symbol_end;
 	struct bpf_prog_aux *aux;
 	unsigned int it = 0;
 	int ret = -ERANGE;
@@ -566,10 +565,9 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		if (it++ != symnum)
 			continue;
 
-		bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
 		bpf_get_prog_name(aux->prog, sym);
 
-		*value = symbol_start;
+		*value = (unsigned long)aux->prog->bpf_func;
 		*type  = BPF_SYM_ELF_TYPE;
 
 		ret = 0;
-- 
2.17.1

^ permalink raw reply related

* [PATCH  v2 bpf 3/3] bpf: show main program address and length in bpf_prog_info
From: Song Liu @ 2018-11-02 17:16 UTC (permalink / raw)
  To: netdev; +Cc: kernel-team, Song Liu, ast, daniel, sandipan
In-Reply-To: <20181102171617.310178-1-songliubraving@fb.com>

Currently, when there is no subprog (prog->aux->func_cnt == 0),
bpf_prog_info does not return any jited_ksyms or jited_func_lens. This
patch adds main program address (prog->bpf_func) and main program
length (prog->jited_len) to bpf_prog_info.

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/bpf/syscall.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 34a9eef5992c..9418174c276c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2158,11 +2158,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_ksyms;
-	info.nr_jited_ksyms = prog->aux->func_cnt;
+	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_ksyms && ulen) {
 		if (bpf_dump_raw_ok()) {
+			unsigned long ksym_addr;
 			u64 __user *user_ksyms;
-			ulong ksym_addr;
 			u32 i;
 
 			/* copy the address of the kernel symbol
@@ -2170,9 +2170,17 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			 */
 			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
-			for (i = 0; i < ulen; i++) {
-				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					ksym_addr = (unsigned long)
+						prog->aux->func[i]->bpf_func;
+					if (put_user((u64) ksym_addr,
+						     &user_ksyms[i]))
+						return -EFAULT;
+				}
+			} else {
+				ksym_addr = (unsigned long) prog->bpf_func;
+				if (put_user((u64) ksym_addr, &user_ksyms[0]))
 					return -EFAULT;
 			}
 		} else {
@@ -2181,7 +2189,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_func_lens;
-	info.nr_jited_func_lens = prog->aux->func_cnt;
+	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_func_lens && ulen) {
 		if (bpf_dump_raw_ok()) {
 			u32 __user *user_lens;
@@ -2190,9 +2198,16 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			/* copy the JITed image lengths for each function */
 			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
 			user_lens = u64_to_user_ptr(info.jited_func_lens);
-			for (i = 0; i < ulen; i++) {
-				func_len = prog->aux->func[i]->jited_len;
-				if (put_user(func_len, &user_lens[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					func_len =
+						prog->aux->func[i]->jited_len;
+					if (put_user(func_len, &user_lens[i]))
+						return -EFAULT;
+				}
+			} else {
+				func_len = prog->jited_len;
+				if (put_user(func_len, &user_lens[0]))
 					return -EFAULT;
 			}
 		} else {
-- 
2.17.1

^ permalink raw reply related

* Re: [PULL] vhost: cleanups and fixes
From: Linus Torvalds @ 2018-11-02 18:02 UTC (permalink / raw)
  To: mst
  Cc: mark.rutland, Kees Cook, kvm, virtualization, netdev,
	Linux Kernel Mailing List, Andrew Morton, bijan.mottahedeh,
	gedwards, joe, lenaic, liang.z.li, mhocko, mhocko, stefanha,
	wei.w.wang, jasowang
In-Reply-To: <20181102131334-mutt-send-email-mst@kernel.org>

On Fri, Nov 2, 2018 at 10:21 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> it seems that it depends on current not on the active mm.

Yes, see my other suggestion to just fix "use_mm()" to update the address range.

Would you mind testing that?

Because that would seem to be the *much* less error-prone model..

           Linus

^ permalink raw reply

* [PATCH net 0/6] s390/qeth: fixes 2018-11-02
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann

Hi Dave,

please apply one round of qeth fixes for -net.

Patch 1 is rather large and removes a use-after-free hazard from many of our
debug trace entries.
Patch 2 is yet another fix-up for the L3 subdriver's new IP address management
code.
Patch 3 and 4 resolve some fallout from the recent changes wrt how/when qeth
allocates its net_device.
Patch 5 makes sure we don't set reserved bits when building HW commands from
user-provided data.
And finally, patch 6 allows ethtool to play nice with new HW.

Thanks,
Julian

Julian Wiedmann (6):
  s390/qeth: sanitize strings in debug messages
  s390/qeth: fix HiperSockets sniffer
  s390/qeth: unregister netdevice only when registered
  s390/qeth: fix initial operstate
  s390/qeth: sanitize ARP requests
  s390/qeth: report 25Gbit link speed

 drivers/s390/net/qeth_core.h      |  27 +++--
 drivers/s390/net/qeth_core_main.c | 172 ++++++++++++++++---------------
 drivers/s390/net/qeth_core_mpc.h  |   4 +-
 drivers/s390/net/qeth_l2_main.c   |  39 +++----
 drivers/s390/net/qeth_l3_main.c   | 207 +++++++++++++-------------------------
 5 files changed, 207 insertions(+), 242 deletions(-)

-- 
2.16.4

^ permalink raw reply

* [PATCH net 2/6] s390/qeth: fix HiperSockets sniffer
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

Sniffing mode for L3 HiperSockets requires that no IP addresses are
registered with the HW. The preferred way to achieve this is for
userspace to delete all the IPs on the interface. But qeth is expected
to also tolerate a configuration where that is not the case, by skipping
the IP registration when in sniffer mode.
Since commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
reworked the IP registration logic in the L3 subdriver, this no longer
works. When the qeth device is set online, qeth_l3_recover_ip() now
unconditionally registers all unicast addresses from our internal
IP table.

While we could fix this particular problem by skipping
qeth_l3_recover_ip() on a sniffer device, the more future-proof change
is to skip the IP address registration at the lowest level. This way we
a) catch any future code path that attempts to register an IP address
   without considering the sniffer scenario, and
b) continue to build up our internal IP table, so that if sniffer mode
   is switched off later we can operate just like normal.

Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_l3_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ffa2aa1dd4c5..968e344a240b 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -278,9 +278,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)

 	QETH_CARD_TEXT(card, 4, "clearip");

-	if (recover && card->options.sniffer)
-		return;
-
 	spin_lock_bh(&card->ip_lock);

 	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
@@ -661,6 +658,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 	int rc = 0;
 	int cnt = 3;

+	if (card->options.sniffer)
+		return 0;

 	if (addr->proto == QETH_PROT_IPV4) {
 		QETH_CARD_TEXT(card, 2, "setaddr4");
@@ -695,6 +694,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 {
 	int rc = 0;

+	if (card->options.sniffer)
+		return 0;
+
 	if (addr->proto == QETH_PROT_IPV4) {
 		QETH_CARD_TEXT(card, 2, "deladdr4");
 		QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int));
-- 
2.16.4

^ permalink raw reply related

* [PATCH net 1/6] s390/qeth: sanitize strings in debug messages
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

As Documentation/s390/s390dbf.txt states quite clearly, using any
pointer in sprinf-formatted s390dbf debug entries is dangerous.
The pointers are dereferenced whenever the trace file is read from.
So if the referenced data has a shorter life-time than the trace file,
any read operation can result in a use-after-free.

So rip out all hazardous use of indirect data, and replace any usage of
dev_name() and such by the Bus ID number.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core.h      |  15 ++++-
 drivers/s390/net/qeth_core_main.c | 127 +++++++++++++++++---------------------
 drivers/s390/net/qeth_l2_main.c   |  24 +++----
 drivers/s390/net/qeth_l3_main.c   | 104 +++++++++++--------------------
 4 files changed, 119 insertions(+), 151 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 6843bc7ee9f2..884ba9dfb341 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -87,6 +87,18 @@ struct qeth_dbf_info {
 #define SENSE_RESETTING_EVENT_BYTE 1
 #define SENSE_RESETTING_EVENT_FLAG 0x80
 
+static inline u32 qeth_get_device_id(struct ccw_device *cdev)
+{
+	struct ccw_dev_id dev_id;
+	u32 id;
+
+	ccw_device_get_id(cdev, &dev_id);
+	id = dev_id.devno;
+	id |= (u32) (dev_id.ssid << 16);
+
+	return id;
+}
+
 /*
  * Common IO related definitions
  */
@@ -97,7 +109,8 @@ struct qeth_dbf_info {
 #define CARD_RDEV_ID(card) dev_name(&card->read.ccwdev->dev)
 #define CARD_WDEV_ID(card) dev_name(&card->write.ccwdev->dev)
 #define CARD_DDEV_ID(card) dev_name(&card->data.ccwdev->dev)
-#define CHANNEL_ID(channel) dev_name(&channel->ccwdev->dev)
+#define CCW_DEVID(cdev)		(qeth_get_device_id(cdev))
+#define CARD_DEVID(card)	(CCW_DEVID(CARD_RDEV(card)))
 
 /**
  * card stuff
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3274f13aad57..639ac0aca1e9 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -554,8 +554,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
 	if (!iob) {
 		dev_warn(&card->gdev->dev, "The qeth device driver "
 			"failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s issue_next_read failed: no iob "
-			"available\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "issue_next_read on device %x failed: no iob available\n",
+				 CARD_DEVID(card));
 		return -ENOMEM;
 	}
 	qeth_setup_ccw(channel->ccw, CCW_CMD_READ, QETH_BUFSIZE, iob->data);
@@ -563,8 +563,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
 	rc = ccw_device_start(channel->ccwdev, channel->ccw,
 			      (addr_t) iob, 0, 0);
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! "
-			"rc=%i\n", dev_name(&card->gdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
+				 rc, CARD_DEVID(card));
 		atomic_set(&channel->irq_pending, 0);
 		card->read_or_write_problem = 1;
 		qeth_schedule_recovery(card);
@@ -613,16 +613,14 @@ static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
 	const char *ipa_name;
 	int com = cmd->hdr.command;
 	ipa_name = qeth_get_ipa_cmd_name(com);
+
 	if (rc)
-		QETH_DBF_MESSAGE(2, "IPA: %s(x%X) for %s/%s returned "
-				"x%X \"%s\"\n",
-				ipa_name, com, dev_name(&card->gdev->dev),
-				QETH_CARD_IFNAME(card), rc,
-				qeth_get_ipa_msg(rc));
+		QETH_DBF_MESSAGE(2, "IPA: %s(%#x) for device %x returned %#x \"%s\"\n",
+				 ipa_name, com, CARD_DEVID(card), rc,
+				 qeth_get_ipa_msg(rc));
 	else
-		QETH_DBF_MESSAGE(5, "IPA: %s(x%X) for %s/%s succeeded\n",
-				ipa_name, com, dev_name(&card->gdev->dev),
-				QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(5, "IPA: %s(%#x) for device %x succeeded\n",
+				 ipa_name, com, CARD_DEVID(card));
 }
 
 static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
@@ -711,7 +709,7 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
 	QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
 	if ((buffer[2] & 0xc0) == 0xc0) {
-		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#04x\n",
 				 buffer[4]);
 		QETH_CARD_TEXT(card, 2, "ckidxres");
 		QETH_CARD_TEXT(card, 2, " idxterm");
@@ -972,8 +970,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
 		QETH_CARD_TEXT(card, 2, "CGENCHK");
 		dev_warn(&cdev->dev, "The qeth device driver "
 			"failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s check on device dstat=x%x, cstat=x%x\n",
-			dev_name(&cdev->dev), dstat, cstat);
+		QETH_DBF_MESSAGE(2, "check on channel %x with dstat=%#x, cstat=%#x\n",
+				 CCW_DEVID(cdev), dstat, cstat);
 		print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET,
 				16, 1, irb, 64, 1);
 		return 1;
@@ -1013,8 +1011,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
 
 	switch (PTR_ERR(irb)) {
 	case -EIO:
-		QETH_DBF_MESSAGE(2, "%s i/o-error on device\n",
-			dev_name(&cdev->dev));
+		QETH_DBF_MESSAGE(2, "i/o-error on channel %x\n",
+				 CCW_DEVID(cdev));
 		QETH_CARD_TEXT(card, 2, "ckirberr");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
 		break;
@@ -1031,8 +1029,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
 		}
 		break;
 	default:
-		QETH_DBF_MESSAGE(2, "%s unknown error %ld on device\n",
-			dev_name(&cdev->dev), PTR_ERR(irb));
+		QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n",
+				 PTR_ERR(irb), CCW_DEVID(cdev));
 		QETH_CARD_TEXT(card, 2, "ckirberr");
 		QETH_CARD_TEXT(card, 2, "  rc???");
 	}
@@ -1114,9 +1112,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 			dev_warn(&channel->ccwdev->dev,
 				"The qeth device driver failed to recover "
 				"an error on the device\n");
-			QETH_DBF_MESSAGE(2, "%s sense data available. cstat "
-				"0x%X dstat 0x%X\n",
-				dev_name(&channel->ccwdev->dev), cstat, dstat);
+			QETH_DBF_MESSAGE(2, "sense data available on channel %x: cstat %#X dstat %#X\n",
+					 CCW_DEVID(channel->ccwdev), cstat,
+					 dstat);
 			print_hex_dump(KERN_WARNING, "qeth: irb ",
 				DUMP_PREFIX_OFFSET, 16, 1, irb, 32, 1);
 			print_hex_dump(KERN_WARNING, "qeth: sense data ",
@@ -1890,8 +1888,8 @@ static int qeth_idx_activate_channel(struct qeth_card *card,
 	if (channel->state != CH_STATE_ACTIVATING) {
 		dev_warn(&channel->ccwdev->dev, "The qeth device driver"
 			" failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n",
-			dev_name(&channel->ccwdev->dev));
+		QETH_DBF_MESSAGE(2, "IDX activate timed out on channel %x\n",
+				 CCW_DEVID(channel->ccwdev));
 		QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME);
 		return -ETIME;
 	}
@@ -1926,17 +1924,15 @@ static void qeth_idx_write_cb(struct qeth_card *card,
 				"The adapter is used exclusively by another "
 				"host\n");
 		else
-			QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel:"
-				" negative reply\n",
-				dev_name(&channel->ccwdev->dev));
+			QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+					 CCW_DEVID(channel->ccwdev));
 		goto out;
 	}
 	memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
 	if ((temp & ~0x0100) != qeth_peer_func_level(card->info.func_level)) {
-		QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel: "
-			"function level mismatch (sent: 0x%x, received: "
-			"0x%x)\n", dev_name(&channel->ccwdev->dev),
-			card->info.func_level, temp);
+		QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+				 CCW_DEVID(channel->ccwdev),
+				 card->info.func_level, temp);
 		goto out;
 	}
 	channel->state = CH_STATE_UP;
@@ -1973,9 +1969,8 @@ static void qeth_idx_read_cb(struct qeth_card *card,
 				"insufficient authorization\n");
 			break;
 		default:
-			QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel:"
-				" negative reply\n",
-				dev_name(&channel->ccwdev->dev));
+			QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+					 CCW_DEVID(channel->ccwdev));
 		}
 		QETH_CARD_TEXT_(card, 2, "idxread%c",
 			QETH_IDX_ACT_CAUSE_CODE(iob->data));
@@ -1984,10 +1979,9 @@ static void qeth_idx_read_cb(struct qeth_card *card,
 
 	memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
 	if (temp != qeth_peer_func_level(card->info.func_level)) {
-		QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel: function "
-			"level mismatch (sent: 0x%x, received: 0x%x)\n",
-			dev_name(&channel->ccwdev->dev),
-			card->info.func_level, temp);
+		QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+				 CCW_DEVID(channel->ccwdev),
+				 card->info.func_level, temp);
 		goto out;
 	}
 	memcpy(&card->token.issuer_rm_r,
@@ -2096,9 +2090,8 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 				      (addr_t) iob, 0, 0, event_timeout);
 	spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: "
-			"ccw_device_start rc = %i\n",
-			dev_name(&channel->ccwdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n",
+				 CARD_DEVID(card), rc);
 		QETH_CARD_TEXT_(card, 2, " err%d", rc);
 		spin_lock_irq(&card->lock);
 		list_del_init(&reply->list);
@@ -2853,8 +2846,8 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 	} else {
 		dev_warn(&card->gdev->dev,
 			 "The qeth driver ran out of channel command buffers\n");
-		QETH_DBF_MESSAGE(1, "%s The qeth driver ran out of channel command buffers",
-				 dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(1, "device %x ran out of channel command buffers",
+				 CARD_DEVID(card));
 	}
 
 	return iob;
@@ -2989,10 +2982,9 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
 		return 0;
 	default:
 		if (cmd->hdr.return_code) {
-			QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Unhandled "
-						"rc=%d\n",
-						dev_name(&card->gdev->dev),
-						cmd->hdr.return_code);
+			QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
+					 CARD_DEVID(card),
+					 cmd->hdr.return_code);
 			return 0;
 		}
 	}
@@ -3004,8 +2996,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
 		card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported;
 		card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
 	} else
-		QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Flawed LIC detected"
-					"\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n",
+				 CARD_DEVID(card));
 	return 0;
 }
 
@@ -4297,10 +4289,9 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
 		cmd->data.setadapterparms.hdr.return_code);
 	if (cmd->data.setadapterparms.hdr.return_code !=
 						SET_ACCESS_CTRL_RC_SUCCESS)
-		QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%s,%d)==%d\n",
-				card->gdev->dev.kobj.name,
-				access_ctrl_req->subcmd_code,
-				cmd->data.setadapterparms.hdr.return_code);
+		QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n",
+				 access_ctrl_req->subcmd_code, CARD_DEVID(card),
+				 cmd->data.setadapterparms.hdr.return_code);
 	switch (cmd->data.setadapterparms.hdr.return_code) {
 	case SET_ACCESS_CTRL_RC_SUCCESS:
 		if (card->options.isolation == ISOLATION_MODE_NONE) {
@@ -4312,14 +4303,14 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
 		}
 		break;
 	case SET_ACCESS_CTRL_RC_ALREADY_NOT_ISOLATED:
-		QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already "
-				"deactivated\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already deactivated\n",
+				 CARD_DEVID(card));
 		if (fallback)
 			card->options.isolation = card->options.prev_isolation;
 		break;
 	case SET_ACCESS_CTRL_RC_ALREADY_ISOLATED:
-		QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already"
-				" activated\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already activated\n",
+				 CARD_DEVID(card));
 		if (fallback)
 			card->options.isolation = card->options.prev_isolation;
 		break;
@@ -4405,10 +4396,8 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback)
 		rc = qeth_setadpparms_set_access_ctrl(card,
 			card->options.isolation, fallback);
 		if (rc) {
-			QETH_DBF_MESSAGE(3,
-				"IPA(SET_ACCESS_CTRL,%s,%d) sent failed\n",
-				card->gdev->dev.kobj.name,
-				rc);
+			QETH_DBF_MESSAGE(3, "IPA(SET_ACCESS_CTRL(%d) on device %x: sent failed\n",
+					 rc, CARD_DEVID(card));
 			rc = -EOPNOTSUPP;
 		}
 	} else if (card->options.isolation != ISOLATION_MODE_NONE) {
@@ -4634,8 +4623,8 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 	rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
 				    qeth_snmp_command_cb, (void *)&qinfo);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "SNMP command failed on %s: (0x%x)\n",
-			   QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
+				 CARD_DEVID(card), rc);
 	else {
 		if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
 			rc = -EFAULT;
@@ -4869,8 +4858,8 @@ static void qeth_determine_capabilities(struct qeth_card *card)
 
 	rc = qeth_read_conf_data(card, (void **) &prcd, &length);
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s qeth_read_conf_data returned %i\n",
-			dev_name(&card->gdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "qeth_read_conf_data on device %x returned %i\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc);
 		goto out_offline;
 	}
@@ -5096,8 +5085,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 	qeth_update_from_chp_desc(card);
 retry:
 	if (retries < 3)
-		QETH_DBF_MESSAGE(2, "%s Retrying to do IDX activates.\n",
-			dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n",
+				 CARD_DEVID(card));
 	rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD);
 	ccw_device_set_offline(CARD_DDEV(card));
 	ccw_device_set_offline(CARD_WDEV(card));
@@ -5201,8 +5190,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 out:
 	dev_warn(&card->gdev->dev, "The qeth device driver failed to recover "
 		"an error on the device\n");
-	QETH_DBF_MESSAGE(2, "%s Initialization in hardsetup failed! rc=%d\n",
-		dev_name(&card->gdev->dev), rc);
+	QETH_DBF_MESSAGE(2, "Initialization for device %x failed in hardsetup! rc=%d\n",
+			 CARD_DEVID(card), rc);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 23aaf373f631..5b67fd1f2b77 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -146,11 +146,11 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 	QETH_CARD_TEXT(card, 2, "L2Wmac");
 	rc = qeth_l2_send_setdelmac(card, mac, cmd);
 	if (rc == -EEXIST)
-		QETH_DBF_MESSAGE(2, "MAC %pM already registered on %s\n",
-				 mac, QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "MAC already registered on device %x\n",
+				 CARD_DEVID(card));
 	else if (rc)
-		QETH_DBF_MESSAGE(2, "Failed to register MAC %pM on %s: %d\n",
-				 mac, QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "Failed to register MAC on device %x: %d\n",
+				 CARD_DEVID(card), rc);
 	return rc;
 }
 
@@ -163,8 +163,8 @@ static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac)
 	QETH_CARD_TEXT(card, 2, "L2Rmac");
 	rc = qeth_l2_send_setdelmac(card, mac, cmd);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "Failed to delete MAC %pM on %s: %d\n",
-				 mac, QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "Failed to delete MAC on device %u: %d\n",
+				 CARD_DEVID(card), rc);
 	return rc;
 }
 
@@ -260,9 +260,9 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 2, "L2sdvcb");
 	if (cmd->hdr.return_code) {
-		QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n",
+		QETH_DBF_MESSAGE(2, "Error in processing VLAN %u on device %x: %#x.\n",
 				 cmd->data.setdelvlan.vlan_id,
-				 QETH_CARD_IFNAME(card), cmd->hdr.return_code);
+				 CARD_DEVID(card), cmd->hdr.return_code);
 		QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command);
 		QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code);
 	}
@@ -455,8 +455,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 		rc = qeth_vm_request_mac(card);
 		if (!rc)
 			goto out;
-		QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %s: x%x\n",
-				 CARD_BUS_ID(card), rc);
+		QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "err%04x", rc);
 		/* fall back to alternative mechanism: */
 	}
@@ -468,8 +468,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 		rc = qeth_setadpparms_change_macaddr(card);
 		if (!rc)
 			goto out;
-		QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n",
-				 CARD_BUS_ID(card), rc);
+		QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
 		/* fall back once more: */
 	}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0b161cc1fd2e..ffa2aa1dd4c5 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -494,9 +494,8 @@ int qeth_l3_setrouting_v4(struct qeth_card *card)
 				  QETH_PROT_IPV4);
 	if (rc) {
 		card->options.route4.type = NO_ROUTER;
-		QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-			" on %s. Type set to 'no router'.\n", rc,
-			QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+				 rc, CARD_DEVID(card));
 	}
 	return rc;
 }
@@ -518,9 +517,8 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
 				  QETH_PROT_IPV6);
 	if (rc) {
 		card->options.route6.type = NO_ROUTER;
-		QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-			" on %s. Type set to 'no router'.\n", rc,
-			QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+				 rc, CARD_DEVID(card));
 	}
 	return rc;
 }
@@ -1070,8 +1068,8 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply,
 		}
 		break;
 	default:
-		QETH_DBF_MESSAGE(2, "Unknown sniffer action (0x%04x) on %s\n",
-			cmd->data.diagass.action, QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Unknown sniffer action (%#06x) on device %x\n",
+				 cmd->data.diagass.action, CARD_DEVID(card));
 	}
 
 	return 0;
@@ -1517,32 +1515,25 @@ static void qeth_l3_set_rx_mode(struct net_device *dev)
 	qeth_l3_handle_promisc_mode(card);
 }
 
-static const char *qeth_l3_arp_get_error_cause(int *rc)
+static int qeth_l3_arp_makerc(int rc)
 {
-	switch (*rc) {
-	case QETH_IPA_ARP_RC_FAILED:
-		*rc = -EIO;
-		return "operation failed";
+	switch (rc) {
+	case IPA_RC_SUCCESS:
+		return 0;
 	case QETH_IPA_ARP_RC_NOTSUPP:
-		*rc = -EOPNOTSUPP;
-		return "operation not supported";
-	case QETH_IPA_ARP_RC_OUT_OF_RANGE:
-		*rc = -EINVAL;
-		return "argument out of range";
 	case QETH_IPA_ARP_RC_Q_NOTSUPP:
-		*rc = -EOPNOTSUPP;
-		return "query operation not supported";
+		return -EOPNOTSUPP;
+	case QETH_IPA_ARP_RC_OUT_OF_RANGE:
+		return -EINVAL;
 	case QETH_IPA_ARP_RC_Q_NO_DATA:
-		*rc = -ENOENT;
-		return "no query data available";
+		return -ENOENT;
 	default:
-		return "unknown error";
+		return -EIO;
 	}
 }
 
 static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 {
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arpstnoe");
@@ -1560,13 +1551,10 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 	rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
 					  IPA_CMD_ASS_ARP_SET_NO_ENTRIES,
 					  no_entries);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on "
-			"%s: %s (0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static __u32 get_arp_entry_size(struct qeth_card *card,
@@ -1716,7 +1704,6 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 {
 	struct qeth_cmd_buffer *iob;
 	struct qeth_ipa_cmd *cmd;
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT_(card, 3, "qarpipv%i", prot);
@@ -1735,15 +1722,10 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 	rc = qeth_l3_send_ipa_arp_cmd(card, iob,
 			   QETH_SETASS_BASE_LEN+QETH_ARP_CMD_LEN,
 			   qeth_l3_arp_query_cb, (void *)qinfo);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2,
-			"Error while querying ARP cache on %s: %s "
-			"(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
@@ -1797,8 +1779,6 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 				struct qeth_arp_cache_entry *entry)
 {
 	struct qeth_cmd_buffer *iob;
-	char buf[16];
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arpadent");
@@ -1824,14 +1804,10 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 				   sizeof(struct qeth_arp_cache_entry),
 				   (unsigned long) entry,
 				   qeth_setassparms_cb, NULL);
-	if (rc) {
-		tmp = rc;
-		qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-		QETH_DBF_MESSAGE(2, "Could not add ARP entry for address %s "
-			"on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_remove_entry(struct qeth_card *card,
@@ -1839,7 +1815,6 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card,
 {
 	struct qeth_cmd_buffer *iob;
 	char buf[16] = {0, };
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arprment");
@@ -1864,21 +1839,15 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card,
 	rc = qeth_send_setassparms(card, iob,
 				   12, (unsigned long)buf,
 				   qeth_setassparms_cb, NULL);
-	if (rc) {
-		tmp = rc;
-		memset(buf, 0, 16);
-		qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-		QETH_DBF_MESSAGE(2, "Could not delete ARP entry for address %s"
-			" on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 {
 	int rc;
-	int tmp;
 
 	QETH_CARD_TEXT(card, 3, "arpflush");
 
@@ -1894,13 +1863,10 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 	}
 	rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
 					  IPA_CMD_ASS_ARP_FLUSH_CACHE, 0);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2, "Could not flush ARP cache on %s: %s "
-			"(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-- 
2.16.4

^ permalink raw reply related

* [PATCH net 4/6] s390/qeth: fix initial operstate
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

Setting the carrier 'on' for an unregistered netdevice doesn't update
its operstate. Fix this by delaying the update until the netdevice has
been registered.

Fixes: 91cc98f51e3d ("s390/qeth: remove duplicated carrier state tracking")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core.h      |  2 +-
 drivers/s390/net/qeth_core_main.c | 13 ++++++++++---
 drivers/s390/net/qeth_l2_main.c   | 10 +++++++---
 drivers/s390/net/qeth_l3_main.c   | 10 +++++++---
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index b3a0b8838d2f..90cb213b0d55 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -991,7 +991,7 @@ int qeth_wait_for_threads(struct qeth_card *, unsigned long);
 int qeth_do_run_thread(struct qeth_card *, unsigned long);
 void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long);
 void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long);
-int qeth_core_hardsetup_card(struct qeth_card *);
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok);
 void qeth_print_status_message(struct qeth_card *);
 int qeth_init_qdio_queues(struct qeth_card *);
 int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 639ac0aca1e9..aed1a7961553 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5075,7 +5075,7 @@ static struct ccw_driver qeth_ccw_driver = {
 	.remove = ccwgroup_remove_ccwdev,
 };
 
-int qeth_core_hardsetup_card(struct qeth_card *card)
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok)
 {
 	int retries = 3;
 	int rc;
@@ -5150,13 +5150,20 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 		if (rc == IPA_RC_LAN_OFFLINE) {
 			dev_warn(&card->gdev->dev,
 				"The LAN is offline\n");
-			netif_carrier_off(card->dev);
+			*carrier_ok = false;
 		} else {
 			rc = -ENODEV;
 			goto out;
 		}
 	} else {
-		netif_carrier_on(card->dev);
+		*carrier_ok = true;
+	}
+
+	if (qeth_netdev_is_registered(card->dev)) {
+		if (*carrier_ok)
+			netif_carrier_on(card->dev);
+		else
+			netif_carrier_off(card->dev);
 	}
 
 	card->options.ipa4.supported_funcs = 0;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2b978eba7e30..2914a1a69f83 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -863,7 +863,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_set_features	= qeth_set_features
 };
 
-static int qeth_l2_setup_netdev(struct qeth_card *card)
+static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
 	int rc;
 
@@ -920,6 +920,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	qeth_l2_request_initial_mac(card);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	rc = register_netdev(card->dev);
+	if (!rc && carrier_ok)
+		netif_carrier_on(card->dev);
+
 	if (rc)
 		card->dev->netdev_ops = NULL;
 	return rc;
@@ -950,6 +953,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 	int rc = 0;
 	enum qeth_card_states recover_flag;
+	bool carrier_ok;
 
 	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
@@ -957,7 +961,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 	recover_flag = card->state;
-	rc = qeth_core_hardsetup_card(card);
+	rc = qeth_core_hardsetup_card(card, &carrier_ok);
 	if (rc) {
 		QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
 		rc = -ENODEV;
@@ -968,7 +972,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		dev_info(&card->gdev->dev,
 		"The device represents a Bridge Capable Port\n");
 
-	rc = qeth_l2_setup_netdev(card);
+	rc = qeth_l2_setup_netdev(card, carrier_ok);
 	if (rc)
 		goto out_remove;
 
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index a719c5ec4171..b26f7d7a2ca0 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2351,7 +2351,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_neigh_setup	= qeth_l3_neigh_setup,
 };
 
-static int qeth_l3_setup_netdev(struct qeth_card *card)
+static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
 	unsigned int headroom;
 	int rc;
@@ -2425,6 +2425,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	rc = register_netdev(card->dev);
+	if (!rc && carrier_ok)
+		netif_carrier_on(card->dev);
+
 out:
 	if (rc)
 		card->dev->netdev_ops = NULL;
@@ -2476,6 +2479,7 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 	int rc = 0;
 	enum qeth_card_states recover_flag;
+	bool carrier_ok;
 
 	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
@@ -2483,14 +2487,14 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 	recover_flag = card->state;
-	rc = qeth_core_hardsetup_card(card);
+	rc = qeth_core_hardsetup_card(card, &carrier_ok);
 	if (rc) {
 		QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
 		rc = -ENODEV;
 		goto out_remove;
 	}
 
-	rc = qeth_l3_setup_netdev(card);
+	rc = qeth_l3_setup_netdev(card, carrier_ok);
 	if (rc)
 		goto out_remove;
 
-- 
2.16.4

^ permalink raw reply related

* [PATCH net 3/6] s390/qeth: unregister netdevice only when registered
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

qeth only registers its netdevice when the qeth device is first set
online. Thus a device that has never been set online will trigger
a WARN ("network todo 'hsi%d' but state 0") in unregister_netdev() when
removed.

Fix this by protecting the unregister step, just like we already protect
against repeated registering of the netdevice.

Fixes: d3d1b205e89f ("s390/qeth: allocate netdevice early")
Reported-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core.h    | 5 +++++
 drivers/s390/net/qeth_l2_main.c | 5 +++--
 drivers/s390/net/qeth_l3_main.c | 5 +++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 884ba9dfb341..b3a0b8838d2f 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -843,6 +843,11 @@ struct qeth_trap_id {
 /*some helper functions*/
 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
 
+static inline bool qeth_netdev_is_registered(struct net_device *dev)
+{
+	return dev->netdev_ops != NULL;
+}
+
 static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
 					  unsigned int elements)
 {
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 5b67fd1f2b77..2b978eba7e30 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -826,7 +826,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 
 	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_l2_set_offline(cgdev);
-	unregister_netdev(card->dev);
+	if (qeth_netdev_is_registered(card->dev))
+		unregister_netdev(card->dev);
 }
 
 static const struct ethtool_ops qeth_l2_ethtool_ops = {
@@ -866,7 +867,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 {
 	int rc;
 
-	if (card->dev->netdev_ops)
+	if (qeth_netdev_is_registered(card->dev))
 		return 0;
 
 	card->dev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 968e344a240b..a719c5ec4171 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2356,7 +2356,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	unsigned int headroom;
 	int rc;
 
-	if (card->dev->netdev_ops)
+	if (qeth_netdev_is_registered(card->dev))
 		return 0;
 
 	if (card->info.type == QETH_CARD_TYPE_OSD ||
@@ -2465,7 +2465,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_l3_set_offline(cgdev);
 
-	unregister_netdev(card->dev);
+	if (qeth_netdev_is_registered(card->dev))
+		unregister_netdev(card->dev);
 	qeth_l3_clear_ip_htable(card, 0);
 	qeth_l3_clear_ipato_list(card);
 }
-- 
2.16.4

^ permalink raw reply related

* [PATCH net 5/6] s390/qeth: sanitize ARP requests
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

The ARP_{ADD,REMOVE}_ENTRY cmd structs contain reserved fields.
Introduce a common helper that doesn't raw-copy the user-provided data
into the cmd, but only sets those fields that are strictly needed for
the command.

This also sets the correct command length for ARP_REMOVE_ENTRY.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core.h      |  5 ---
 drivers/s390/net/qeth_core_main.c | 12 ++---
 drivers/s390/net/qeth_core_mpc.h  |  2 +-
 drivers/s390/net/qeth_l3_main.c   | 94 +++++++++++----------------------------
 4 files changed, 34 insertions(+), 79 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 90cb213b0d55..04e294d1d16d 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1046,11 +1046,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
-int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
-			  long,
-			  int (*reply_cb)(struct qeth_card *,
-					  struct qeth_reply *, unsigned long),
-			  void *);
 int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 						 enum qeth_ipa_funcs,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index aed1a7961553..82282b2092d8 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5477,11 +5477,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd);
 
-int qeth_send_setassparms(struct qeth_card *card,
-			  struct qeth_cmd_buffer *iob, __u16 len, long data,
-			  int (*reply_cb)(struct qeth_card *,
-					  struct qeth_reply *, unsigned long),
-			  void *reply_param)
+static int qeth_send_setassparms(struct qeth_card *card,
+				 struct qeth_cmd_buffer *iob, u16 len,
+				 long data, int (*reply_cb)(struct qeth_card *,
+							    struct qeth_reply *,
+							    unsigned long),
+				 void *reply_param)
 {
 	int rc;
 	struct qeth_ipa_cmd *cmd;
@@ -5497,7 +5498,6 @@ int qeth_send_setassparms(struct qeth_card *card,
 	rc = qeth_send_ipa_cmd(card, iob, reply_cb, reply_param);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_send_setassparms);
 
 int qeth_send_simple_setassparms_prot(struct qeth_card *card,
 				      enum qeth_ipa_funcs ipa_func,
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index e85090467afe..80c036acf563 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -436,7 +436,7 @@ struct qeth_ipacmd_setassparms {
 		__u32 flags_32bit;
 		struct qeth_ipa_caps caps;
 		struct qeth_checksum_cmd chksum;
-		struct qeth_arp_cache_entry add_arp_entry;
+		struct qeth_arp_cache_entry arp_entry;
 		struct qeth_arp_query_data query_arp;
 		struct qeth_tso_start_data tso;
 		__u8 ip[16];
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b26f7d7a2ca0..f08b745c2007 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1777,13 +1777,18 @@ static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
 	return rc;
 }
 
-static int qeth_l3_arp_add_entry(struct qeth_card *card,
-				struct qeth_arp_cache_entry *entry)
+static int qeth_l3_arp_modify_entry(struct qeth_card *card,
+				    struct qeth_arp_cache_entry *entry,
+				    enum qeth_arp_process_subcmds arp_cmd)
 {
+	struct qeth_arp_cache_entry *cmd_entry;
 	struct qeth_cmd_buffer *iob;
 	int rc;
 
-	QETH_CARD_TEXT(card, 3, "arpadent");
+	if (arp_cmd == IPA_CMD_ASS_ARP_ADD_ENTRY)
+		QETH_CARD_TEXT(card, 3, "arpadd");
+	else
+		QETH_CARD_TEXT(card, 3, "arpdel");
 
 	/*
 	 * currently GuestLAN only supports the ARP assist function
@@ -1796,54 +1801,19 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 		return -EOPNOTSUPP;
 	}
 
-	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-				       IPA_CMD_ASS_ARP_ADD_ENTRY,
-				       sizeof(struct qeth_arp_cache_entry),
-				       QETH_PROT_IPV4);
+	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, arp_cmd,
+				       sizeof(*cmd_entry), QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	rc = qeth_send_setassparms(card, iob,
-				   sizeof(struct qeth_arp_cache_entry),
-				   (unsigned long) entry,
-				   qeth_setassparms_cb, NULL);
-	if (rc)
-		QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n",
-				 CARD_DEVID(card), rc);
-	return qeth_l3_arp_makerc(rc);
-}
-
-static int qeth_l3_arp_remove_entry(struct qeth_card *card,
-				struct qeth_arp_cache_entry *entry)
-{
-	struct qeth_cmd_buffer *iob;
-	char buf[16] = {0, };
-	int rc;
 
-	QETH_CARD_TEXT(card, 3, "arprment");
-
-	/*
-	 * currently GuestLAN only supports the ARP assist function
-	 * IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_REMOVE_ENTRY;
-	 * thus we say EOPNOTSUPP for this ARP function
-	 */
-	if (card->info.guestlan)
-		return -EOPNOTSUPP;
-	if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
-		return -EOPNOTSUPP;
-	}
-	memcpy(buf, entry, 12);
-	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-				       IPA_CMD_ASS_ARP_REMOVE_ENTRY,
-				       12,
-				       QETH_PROT_IPV4);
-	if (!iob)
-		return -ENOMEM;
-	rc = qeth_send_setassparms(card, iob,
-				   12, (unsigned long)buf,
-				   qeth_setassparms_cb, NULL);
+	cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry;
+	ether_addr_copy(cmd_entry->macaddr, entry->macaddr);
+	memcpy(cmd_entry->ipaddr, entry->ipaddr, 4);
+	rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n",
-				 CARD_DEVID(card), rc);
+		QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n",
+				 arp_cmd, CARD_DEVID(card), rc);
+
 	return qeth_l3_arp_makerc(rc);
 }
 
@@ -1875,6 +1845,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct qeth_card *card = dev->ml_priv;
 	struct qeth_arp_cache_entry arp_entry;
+	enum qeth_arp_process_subcmds arp_cmd;
 	int rc = 0;
 
 	switch (cmd) {
@@ -1893,27 +1864,16 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data);
 		break;
 	case SIOC_QETH_ARP_ADD_ENTRY:
-		if (!capable(CAP_NET_ADMIN)) {
-			rc = -EPERM;
-			break;
-		}
-		if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-				   sizeof(struct qeth_arp_cache_entry)))
-			rc = -EFAULT;
-		else
-			rc = qeth_l3_arp_add_entry(card, &arp_entry);
-		break;
 	case SIOC_QETH_ARP_REMOVE_ENTRY:
-		if (!capable(CAP_NET_ADMIN)) {
-			rc = -EPERM;
-			break;
-		}
-		if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-				   sizeof(struct qeth_arp_cache_entry)))
-			rc = -EFAULT;
-		else
-			rc = qeth_l3_arp_remove_entry(card, &arp_entry);
-		break;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry)))
+			return -EFAULT;
+
+		arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ?
+				IPA_CMD_ASS_ARP_ADD_ENTRY :
+				IPA_CMD_ASS_ARP_REMOVE_ENTRY;
+		return qeth_l3_arp_modify_entry(card, &arp_entry, arp_cmd);
 	case SIOC_QETH_ARP_FLUSH_CACHE:
 		if (!capable(CAP_NET_ADMIN)) {
 			rc = -EPERM;
-- 
2.16.4

^ permalink raw reply related

* [PATCH net 6/6] s390/qeth: report 25Gbit link speed
From: Julian Wiedmann @ 2018-11-02 18:04 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, linux-s390, Martin Schwidefsky, Heiko Carstens,
	Stefan Raspl, Ursula Braun, Julian Wiedmann
In-Reply-To: <20181102180413.53020-1-jwi@linux.ibm.com>

This adds the various identifiers for 25Gbit cards, and wires them up
into sysfs and ethtool.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 drivers/s390/net/qeth_core_main.c | 20 ++++++++++++++++++--
 drivers/s390/net/qeth_core_mpc.h  |  2 ++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 82282b2092d8..4bce5ae65a55 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -167,6 +167,8 @@ const char *qeth_get_cardname_short(struct qeth_card *card)
 				return "OSD_1000";
 			case QETH_LINK_TYPE_10GBIT_ETH:
 				return "OSD_10GIG";
+			case QETH_LINK_TYPE_25GBIT_ETH:
+				return "OSD_25GIG";
 			case QETH_LINK_TYPE_LANE_ETH100:
 				return "OSD_FE_LANE";
 			case QETH_LINK_TYPE_LANE_TR:
@@ -4432,7 +4434,8 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
 		rc = BMCR_FULLDPLX;
 		if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) &&
 		    (card->info.link_type != QETH_LINK_TYPE_OSN) &&
-		    (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH))
+		    (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) &&
+		    (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH))
 			rc |= BMCR_SPEED100;
 		break;
 	case MII_BMSR: /* Basic mode status register */
@@ -6166,8 +6169,14 @@ static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
 		WARN_ON_ONCE(1);
 	}
 
-	/* fallthrough from high to low, to select all legal speeds: */
+	/* partially does fall through, to also select lower speeds */
 	switch (maxspeed) {
+	case SPEED_25000:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     25000baseSR_Full);
+		break;
 	case SPEED_10000:
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     10000baseT_Full);
@@ -6250,6 +6259,10 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
 		cmd->base.speed = SPEED_10000;
 		cmd->base.port = PORT_FIBRE;
 		break;
+	case QETH_LINK_TYPE_25GBIT_ETH:
+		cmd->base.speed = SPEED_25000;
+		cmd->base.port = PORT_FIBRE;
+		break;
 	default:
 		cmd->base.speed = SPEED_10;
 		cmd->base.port = PORT_TP;
@@ -6316,6 +6329,9 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
 	case CARD_INFO_PORTS_10G:
 		cmd->base.speed = SPEED_10000;
 		break;
+	case CARD_INFO_PORTS_25G:
+		cmd->base.speed = SPEED_25000;
+		break;
 	}
 
 	return 0;
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 80c036acf563..3e54be201b27 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -90,6 +90,7 @@ enum qeth_link_types {
 	QETH_LINK_TYPE_GBIT_ETH     = 0x03,
 	QETH_LINK_TYPE_OSN          = 0x04,
 	QETH_LINK_TYPE_10GBIT_ETH   = 0x10,
+	QETH_LINK_TYPE_25GBIT_ETH   = 0x12,
 	QETH_LINK_TYPE_LANE_ETH100  = 0x81,
 	QETH_LINK_TYPE_LANE_TR      = 0x82,
 	QETH_LINK_TYPE_LANE_ETH1000 = 0x83,
@@ -347,6 +348,7 @@ enum qeth_card_info_port_speed {
 	CARD_INFO_PORTS_100M		= 0x00000006,
 	CARD_INFO_PORTS_1G		= 0x00000007,
 	CARD_INFO_PORTS_10G		= 0x00000008,
+	CARD_INFO_PORTS_25G		= 0x0000000A,
 };
 
 /* (SET)DELIP(M) IPA stuff ***************************************************/
-- 
2.16.4

^ permalink raw reply related

* [PATCH net] net-sysfs: fix formatting of tx_timeout attribute
From: Julian Wiedmann @ 2018-11-02 18:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Add a missing newline.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
---
 net/core/net-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bd67c4d0fcfd..ef06409d768e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1039,7 +1039,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf)
 	trans_timeout = queue->trans_timeout;
 	spin_unlock_irq(&queue->_xmit_lock);
 
-	return sprintf(buf, "%lu", trans_timeout);
+	return sprintf(buf, "%lu\n", trans_timeout);
 }
 
 static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
-- 
2.16.4

^ permalink raw reply related

* Re: Kernel 4.19 network performance - forwarding/routing normal users traffic
From: Paweł Staszewski @ 2018-11-02 19:02 UTC (permalink / raw)
  To: Aaron Lu, Jesper Dangaard Brouer
  Cc: Saeed Mahameed, eric.dumazet@gmail.com, netdev@vger.kernel.org,
	Tariq Toukan, ilias.apalodimas@linaro.org, yoel@kviknet.dk,
	mgorman@techsingularity.net
In-Reply-To: <20181102142024.GA18343@intel.com>



W dniu 02.11.2018 o 15:20, Aaron Lu pisze:
> On Fri, Nov 02, 2018 at 12:40:37PM +0100, Jesper Dangaard Brouer wrote:
>> On Fri, 2 Nov 2018 13:23:56 +0800
>> Aaron Lu <aaron.lu@intel.com> wrote:
>>
>>> On Thu, Nov 01, 2018 at 08:23:19PM +0000, Saeed Mahameed wrote:
>>>> On Thu, 2018-11-01 at 23:27 +0800, Aaron Lu wrote:
>>>>> On Thu, Nov 01, 2018 at 10:22:13AM +0100, Jesper Dangaard Brouer
>>>>> wrote:
>>>>> ... ...
>>>>>> Section copied out:
>>>>>>
>>>>>>    mlx5e_poll_tx_cq
>>>>>>    |
>>>>>>     --16.34%--napi_consume_skb
>>>>>>               |
>>>>>>               |--12.65%--__free_pages_ok
>>>>>>               |          |
>>>>>>               |           --11.86%--free_one_page
>>>>>>               |                     |
>>>>>>               |                     |--10.10%
>>>>>> --queued_spin_lock_slowpath
>>>>>>               |                     |
>>>>>>               |                      --0.65%--_raw_spin_lock
>>>>> This callchain looks like it is freeing higher order pages than order
>>>>> 0:
>>>>> __free_pages_ok is only called for pages whose order are bigger than
>>>>> 0.
>>>> mlx5 rx uses only order 0 pages, so i don't know where these high order
>>>> tx SKBs are coming from..
>>> Perhaps here:
>>> __netdev_alloc_skb(), __napi_alloc_skb(), __netdev_alloc_frag() and
>>> __napi_alloc_frag() will all call page_frag_alloc(), which will use
>>> __page_frag_cache_refill() to get an order 3 page if possible, or fall
>>> back to an order 0 page if order 3 page is not available.
>>>
>>> I'm not sure if your workload will use the above code path though.
>> TL;DR: this is order-0 pages (code-walk trough proof below)
>>
>> To Aaron, the network stack *can* call __free_pages_ok() with order-0
>> pages, via:
>>
>> static void skb_free_head(struct sk_buff *skb)
>> {
>> 	unsigned char *head = skb->head;
>>
>> 	if (skb->head_frag)
>> 		skb_free_frag(head);
>> 	else
>> 		kfree(head);
>> }
>>
>> static inline void skb_free_frag(void *addr)
>> {
>> 	page_frag_free(addr);
>> }
>>
>> /*
>>   * Frees a page fragment allocated out of either a compound or order 0 page.
>>   */
>> void page_frag_free(void *addr)
>> {
>> 	struct page *page = virt_to_head_page(addr);
>>
>> 	if (unlikely(put_page_testzero(page)))
>> 		__free_pages_ok(page, compound_order(page));
>> }
>> EXPORT_SYMBOL(page_frag_free);
> I think here is a problem - order 0 pages are freed directly to buddy,
> bypassing per-cpu-pages. This might be the reason lock contention
> appeared on free path. Can someone apply below diff and see if lock
> contention is gone?
Will test it tonight



>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index e2ef1c17942f..65c0ae13215a 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4554,8 +4554,14 @@ void page_frag_free(void *addr)
>   {
>   	struct page *page = virt_to_head_page(addr);
>   
> -	if (unlikely(put_page_testzero(page)))
> -		__free_pages_ok(page, compound_order(page));
> +	if (unlikely(put_page_testzero(page))) {
> +		unsigned int order = compound_order(page);
> +
> +		if (order == 0)
> +			free_unref_page(page);
> +		else
> +			__free_pages_ok(page, order);
> +	}
>   }
>   EXPORT_SYMBOL(page_frag_free);
>   
>> Notice for the mlx5 driver it support several RX-memory models, so it
>> can be hard to follow, but from the perf report output we can see that
>> is uses mlx5e_skb_from_cqe_linear, which use build_skb.
>>
>> --13.63%--mlx5e_skb_from_cqe_linear
>>            |
>>             --5.02%--build_skb
>>                       |
>>                        --1.85%--__build_skb
>>                                  |
>>                                   --1.00%--kmem_cache_alloc
>>
>> /* build_skb() is wrapper over __build_skb(), that specifically
>>   * takes care of skb->head and skb->pfmemalloc
>>   * This means that if @frag_size is not zero, then @data must be backed
>>   * by a page fragment, not kmalloc() or vmalloc()
>>   */
>> struct sk_buff *build_skb(void *data, unsigned int frag_size)
>> {
>> 	struct sk_buff *skb = __build_skb(data, frag_size);
>>
>> 	if (skb && frag_size) {
>> 		skb->head_frag = 1;
>> 		if (page_is_pfmemalloc(virt_to_head_page(data)))
>> 			skb->pfmemalloc = 1;
>> 	}
>> 	return skb;
>> }
>> EXPORT_SYMBOL(build_skb);
>>
>> It still doesn't prove, that the @data is backed by by a order-0 page.
>> For the mlx5 driver is uses mlx5e_page_alloc_mapped ->
>> page_pool_dev_alloc_pages(), and I can see perf report using
>> __page_pool_alloc_pages_slow().
>>
>> The setup for page_pool in mlx5 uses order=0.
>>
>> 	/* Create a page_pool and register it with rxq */
>> 	pp_params.order     = 0;
>> 	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
>> 	pp_params.pool_size = pool_size;
>> 	pp_params.nid       = cpu_to_node(c->cpu);
>> 	pp_params.dev       = c->pdev;
>> 	pp_params.dma_dir   = rq->buff.map_dir;
>>
>> 	/* page_pool can be used even when there is no rq->xdp_prog,
>> 	 * given page_pool does not handle DMA mapping there is no
>> 	 * required state to clear. And page_pool gracefully handle
>> 	 * elevated refcnt.
>> 	 */
>> 	rq->page_pool = page_pool_create(&pp_params);
>> 	if (IS_ERR(rq->page_pool)) {
>> 		err = PTR_ERR(rq->page_pool);
>> 		rq->page_pool = NULL;
>> 		goto err_free;
>> 	}
>> 	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
>> 					 MEM_TYPE_PAGE_POOL, rq->page_pool);
> Thanks for the detailed analysis, I'll need more time to understand the
> whole picture :-)
>

^ permalink raw reply

* [PATCH net-next v4 1/9] net: allow binding socket in a VRF when there's an unbound socket
From: Mike Manning @ 2018-11-02 19:10 UTC (permalink / raw)
  To: netdev; +Cc: Robert Shearman
In-Reply-To: <20181102191020.14170-1-mmanning@vyatta.att-mail.com>

From: Robert Shearman <rshearma@vyatta.att-mail.com>

Change the inet socket lookup to avoid packets arriving on a device
enslaved to an l3mdev from matching unbound sockets by removing the
wildcard for non sk_bound_dev_if and instead relying on check against
the secondary device index, which will be 0 when the input device is
not enslaved to an l3mdev and so match against an unbound socket and
not match when the input device is enslaved.

Change the socket binding to take the l3mdev into account to allow an
unbound socket to not conflict sockets bound to an l3mdev given the
datapath isolation now guaranteed.

Signed-off-by: Robert Shearman <rshearma@vyatta.att-mail.com>
Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
---
 Documentation/networking/vrf.txt |  9 +++++----
 include/net/inet6_hashtables.h   |  5 ++---
 include/net/inet_hashtables.h    | 13 ++++++-------
 include/net/inet_sock.h          | 13 +++++++++++++
 net/ipv4/inet_connection_sock.c  | 13 ++++++++++---
 net/ipv4/inet_hashtables.c       | 20 +++++++++++++++-----
 6 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 8ff7b4c8f91b..d4b129402d57 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -103,6 +103,11 @@ VRF device:
 
 or to specify the output device using cmsg and IP_PKTINFO.
 
+By default the scope of the port bindings for unbound sockets is
+limited to the default VRF. That is, it will not be matched by packets
+arriving on interfaces enslaved to an l3mdev and processes may bind to
+the same port if they bind to an l3mdev.
+
 TCP & UDP services running in the default VRF context (ie., not bound
 to any VRF device) can work across all VRF domains by enabling the
 tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
@@ -112,10 +117,6 @@ tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
 netfilter rules on the VRF device can be used to limit access to services
 running in the default VRF context as well.
 
-The default VRF does not have limited scope with respect to port bindings.
-That is, if a process does a wildcard bind to a port in the default VRF it
-owns the port across all VRF domains within the network namespace.
-
 ################################################################################
 
 Using iproute2 for VRFs
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index 6e91e38a31da..9db98af46985 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -115,9 +115,8 @@ int inet6_hash(struct sock *sk);
 	 ((__sk)->sk_family == AF_INET6)			&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
 	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))	||			\
-	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
+	 (((__sk)->sk_bound_dev_if == (__dif))	||			\
+	  ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 
 #endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 9141e95529e7..4ae060b4bac2 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -79,6 +79,7 @@ struct inet_ehash_bucket {
 
 struct inet_bind_bucket {
 	possible_net_t		ib_net;
+	int			l3mdev;
 	unsigned short		port;
 	signed char		fastreuse;
 	signed char		fastreuseport;
@@ -191,7 +192,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
 struct inet_bind_bucket *
 inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
 			struct inet_bind_hashbucket *head,
-			const unsigned short snum);
+			const unsigned short snum, int l3mdev);
 void inet_bind_bucket_destroy(struct kmem_cache *cachep,
 			      struct inet_bind_bucket *tb);
 
@@ -282,9 +283,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 #define INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif, __sdif) \
 	(((__sk)->sk_portpair == (__ports))			&&	\
 	 ((__sk)->sk_addrpair == (__cookie))			&&	\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))			||	\
-	   ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
+	 (((__sk)->sk_bound_dev_if == (__dif))			||	\
+	  ((__sk)->sk_bound_dev_if == (__sdif)))		&&	\
 	 net_eq(sock_net(__sk), (__net)))
 #else /* 32-bit arch */
 #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
@@ -294,9 +294,8 @@ static inline struct sock *inet_lookup_listener(struct net *net,
 	(((__sk)->sk_portpair == (__ports))		&&		\
 	 ((__sk)->sk_daddr	== (__saddr))		&&		\
 	 ((__sk)->sk_rcv_saddr	== (__daddr))		&&		\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))		||		\
-	   ((__sk)->sk_bound_dev_if == (__sdif)))	&&		\
+	 (((__sk)->sk_bound_dev_if == (__dif))		||		\
+	  ((__sk)->sk_bound_dev_if == (__sdif)))	&&		\
 	 net_eq(sock_net(__sk), (__net)))
 #endif /* 64-bit arch */
 
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a80fd0ac4563..ed3f723af00b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -130,6 +130,19 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 	return sk->sk_bound_dev_if;
 }
 
+static inline int inet_sk_bound_l3mdev(const struct sock *sk)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	struct net *net = sock_net(sk);
+
+	if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+		return l3mdev_master_ifindex_by_index(net,
+						      sk->sk_bound_dev_if);
+#endif
+
+	return 0;
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 15e7f7915a21..5c63449130d9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -183,7 +183,9 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
 	int i, low, high, attempt_half;
 	struct inet_bind_bucket *tb;
 	u32 remaining, offset;
+	int l3mdev;
 
+	l3mdev = inet_sk_bound_l3mdev(sk);
 	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
 other_half_scan:
 	inet_get_local_port_range(net, &low, &high);
@@ -219,7 +221,8 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
 						  hinfo->bhash_size)];
 		spin_lock_bh(&head->lock);
 		inet_bind_bucket_for_each(tb, &head->chain)
-			if (net_eq(ib_net(tb), net) && tb->port == port) {
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
 				if (!inet_csk_bind_conflict(sk, tb, false, false))
 					goto success;
 				goto next_port;
@@ -293,6 +296,9 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 	struct net *net = sock_net(sk);
 	struct inet_bind_bucket *tb = NULL;
 	kuid_t uid = sock_i_uid(sk);
+	int l3mdev;
+
+	l3mdev = inet_sk_bound_l3mdev(sk);
 
 	if (!port) {
 		head = inet_csk_find_open_port(sk, &tb, &port);
@@ -306,11 +312,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
 					  hinfo->bhash_size)];
 	spin_lock_bh(&head->lock);
 	inet_bind_bucket_for_each(tb, &head->chain)
-		if (net_eq(ib_net(tb), net) && tb->port == port)
+		if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+		    tb->port == port)
 			goto tb_found;
 tb_not_found:
 	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-				     net, head, port);
+				     net, head, port, l3mdev);
 	if (!tb)
 		goto fail_unlock;
 tb_found:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 411dd7a90046..40d722ab1738 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -65,12 +65,14 @@ static u32 sk_ehashfn(const struct sock *sk)
 struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
 						 struct net *net,
 						 struct inet_bind_hashbucket *head,
-						 const unsigned short snum)
+						 const unsigned short snum,
+						 int l3mdev)
 {
 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
 
 	if (tb) {
 		write_pnet(&tb->ib_net, net);
+		tb->l3mdev    = l3mdev;
 		tb->port      = snum;
 		tb->fastreuse = 0;
 		tb->fastreuseport = 0;
@@ -135,6 +137,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
 			table->bhash_size);
 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
 	struct inet_bind_bucket *tb;
+	int l3mdev;
 
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
@@ -143,6 +146,8 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
 		return -ENOENT;
 	}
 	if (tb->port != port) {
+		l3mdev = inet_sk_bound_l3mdev(sk);
+
 		/* NOTE: using tproxy and redirecting skbs to a proxy
 		 * on a different listener port breaks the assumption
 		 * that the listener socket's icsk_bind_hash is the same
@@ -150,12 +155,13 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
 		 * create a new bind bucket for the child here. */
 		inet_bind_bucket_for_each(tb, &head->chain) {
 			if (net_eq(ib_net(tb), sock_net(sk)) &&
-			    tb->port == port)
+			    tb->l3mdev == l3mdev && tb->port == port)
 				break;
 		}
 		if (!tb) {
 			tb = inet_bind_bucket_create(table->bind_bucket_cachep,
-						     sock_net(sk), head, port);
+						     sock_net(sk), head, port,
+						     l3mdev);
 			if (!tb) {
 				spin_unlock(&head->lock);
 				return -ENOMEM;
@@ -675,6 +681,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 	u32 remaining, offset;
 	int ret, i, low, high;
 	static u32 hint;
+	int l3mdev;
 
 	if (port) {
 		head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -693,6 +700,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		return ret;
 	}
 
+	l3mdev = inet_sk_bound_l3mdev(sk);
+
 	inet_get_local_port_range(net, &low, &high);
 	high++; /* [32768, 60999] -> [32768, 61000[ */
 	remaining = high - low;
@@ -719,7 +728,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		 * the established check is already unique enough.
 		 */
 		inet_bind_bucket_for_each(tb, &head->chain) {
-			if (net_eq(ib_net(tb), net) && tb->port == port) {
+			if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+			    tb->port == port) {
 				if (tb->fastreuse >= 0 ||
 				    tb->fastreuseport >= 0)
 					goto next_port;
@@ -732,7 +742,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		}
 
 		tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
-					     net, head, port);
+					     net, head, port, l3mdev);
 		if (!tb) {
 			spin_unlock_bh(&head->lock);
 			return -ENOMEM;
-- 
2.11.0

^ permalink raw reply related

* [PATCH net-next v4 0/9] vrf: allow simultaneous service instances in default and other VRFs
From: Mike Manning @ 2018-11-02 19:10 UTC (permalink / raw)
  To: netdev

Services currently have to be VRF-aware if they are using an unbound
socket. One cannot have multiple service instances running in the
default and other VRFs for services that are not VRF-aware and listen
on an unbound socket. This is because there is no easy way of isolating
packets received in the default VRF from those arriving in other VRFs.

This series provides this isolation for stream sockets subject to the
existing kernel parameter net.ipv4.tcp_l3mdev_accept not being set,
given that this is documented as allowing a single service instance to
work across all VRF domains. Similarly, net.ipv4.udp_l3mdev_accept is
checked for datagram sockets, and net.ipv4.raw_l3mdev_accept is
introduced for raw sockets. The functionality applies to UDP & TCP
services as well as those using raw sockets, and is for IPv4 and IPv6.

Example of running ssh instances in default and blue VRF:

$ /usr/sbin/sshd -D
$ ip vrf exec vrf-blue /usr/sbin/sshd
$ ss -ta | egrep 'State|ssh'
State   Recv-Q   Send-Q           Local Address:Port       Peer Address:Port
LISTEN  0        128           0.0.0.0%vrf-blue:ssh             0.0.0.0:*
LISTEN  0        128                    0.0.0.0:ssh             0.0.0.0:*
ESTAB   0        0              192.168.122.220:ssh       192.168.122.1:50282
LISTEN  0        128              [::]%vrf-blue:ssh                [::]:*
LISTEN  0        128                       [::]:ssh                [::]:*
ESTAB   0        0           [3000::2]%vrf-blue:ssh           [3000::9]:45896
ESTAB   0        0                    [2000::2]:ssh           [2000::9]:46398

v1:
   - Address Paolo Abeni's comments (patch 4/5)
   - Fix build when CONFIG_NET_L3_MASTER_DEV not defined (patch 1/5)
v2:
   - Address David Aherns' comments (patches 4/5 and 5/5)
   - Remove patches 3/5 and 5/5 from series for individual submissions
   - Include a sysctl for raw sockets as recommended by David Ahern
   - Expand series into 10 patches and provide improved descriptions
v3:
   - Update description for patch 1/10 and remove patch 6/10
v4:
   - Set default to enabled for raw socket sysctl as recommended by David Ahern

Dewi Morgan (1):
  ipv6: do not drop vrf udp multicast packets

Duncan Eastoe (1):
  net: fix raw socket lookup device bind matching with VRFs

Mike Manning (6):
  net: ensure unbound stream socket to be chosen when not in a VRF
  net: ensure unbound datagram socket to be chosen when not in a VRF
  net: provide a sysctl raw_l3mdev_accept for raw socket lookup with
    VRFs
  vrf: mark skb for multicast or link-local as enslaved to VRF
  ipv6: allow ping to link-local address in VRF
  ipv6: handling of multicast packets received in VRF

Robert Shearman (1):
  net: allow binding socket in a VRF when there's an unbound socket

 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 Documentation/networking/vrf.txt       | 22 +++++++++++++++++----
 drivers/net/vrf.c                      | 19 +++++++++---------
 include/net/inet6_hashtables.h         |  5 ++---
 include/net/inet_hashtables.h          | 24 ++++++++++++++++-------
 include/net/inet_sock.h                | 21 ++++++++++++++++++++
 include/net/netns/ipv4.h               |  3 +++
 include/net/raw.h                      | 13 +++++++++++++
 include/net/udp.h                      | 11 +++++++++++
 net/core/sock.c                        |  2 ++
 net/ipv4/af_inet.c                     |  2 ++
 net/ipv4/inet_connection_sock.c        | 13 ++++++++++---
 net/ipv4/inet_hashtables.c             | 34 ++++++++++++++++++++-------------
 net/ipv4/raw.c                         | 19 ++++++++++++++----
 net/ipv4/sysctl_net_ipv4.c             | 11 +++++++++++
 net/ipv4/udp.c                         | 15 ++++++---------
 net/ipv6/datagram.c                    |  5 ++++-
 net/ipv6/inet6_hashtables.c            | 14 ++++++--------
 net/ipv6/ip6_input.c                   | 35 +++++++++++++++++++++++++++++++---
 net/ipv6/ipv6_sockglue.c               |  2 +-
 net/ipv6/raw.c                         |  5 ++---
 net/ipv6/udp.c                         | 22 ++++++++++-----------
 22 files changed, 228 insertions(+), 81 deletions(-)

-- 
2.11.0

^ permalink raw reply

* [PATCH net-next v4 2/9] net: ensure unbound stream socket to be chosen when not in a VRF
From: Mike Manning @ 2018-11-02 19:10 UTC (permalink / raw)
  To: netdev
In-Reply-To: <20181102191020.14170-1-mmanning@vyatta.att-mail.com>

The commit a04a480d4392 ("net: Require exact match for TCP socket
lookups if dif is l3mdev") only ensures that the correct socket is
selected for packets in a VRF. However, there is no guarantee that
the unbound socket will be selected for packets when not in a VRF.
By checking for a device match in compute_score() also for the case
when there is no bound device and attaching a score to this, the
unbound socket is selected. And if a failure is returned when there
is no device match, this ensures that bound sockets are never selected,
even if there is no unbound socket.

Signed-off-by: Mike Manning <mmanning@vyatta.att-mail.com>
---
 include/net/inet_hashtables.h | 11 +++++++++++
 include/net/inet_sock.h       |  8 ++++++++
 net/ipv4/inet_hashtables.c    | 14 ++++++--------
 net/ipv6/inet6_hashtables.c   | 14 ++++++--------
 4 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 4ae060b4bac2..5de2d9f24c05 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -189,6 +189,17 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
 	hashinfo->ehash_locks = NULL;
 }
 
+static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if,
+					int dif, int sdif)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+	return inet_bound_dev_eq(net->ipv4.sysctl_tcp_l3mdev_accept,
+				    bound_dev_if, dif, sdif);
+#else
+	return inet_bound_dev_eq(1, bound_dev_if, dif, sdif);
+#endif
+}
+
 struct inet_bind_bucket *
 inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net,
 			struct inet_bind_hashbucket *head,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ed3f723af00b..e8eef85006aa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -143,6 +143,14 @@ static inline int inet_sk_bound_l3mdev(const struct sock *sk)
 	return 0;
 }
 
+static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if,
+				     int dif, int sdif)
+{
+	if (!bound_dev_if)
+		return !sdif || l3mdev_accept;
+	return bound_dev_if == dif || bound_dev_if == sdif;
+}
+
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 40d722ab1738..13890d5bfc34 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -235,6 +235,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 {
 	int score = -1;
 	struct inet_sock *inet = inet_sk(sk);
+	bool dev_match;
 
 	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
 			!ipv6_only_sock(sk)) {
@@ -245,15 +246,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score += 4;
 		}
-		if (sk->sk_bound_dev_if || exact_dif) {
-			bool dev_match = (sk->sk_bound_dev_if == dif ||
-					  sk->sk_bound_dev_if == sdif);
+		dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+						 dif, sdif);
+		if (!dev_match)
+			return -1;
+		score += 4;
 
-			if (!dev_match)
-				return -1;
-			if (sk->sk_bound_dev_if)
-				score += 4;
-		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
 	}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 3d7c7460a0c5..5eeeba7181a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -99,6 +99,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				const int dif, const int sdif, bool exact_dif)
 {
 	int score = -1;
+	bool dev_match;
 
 	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
 	    sk->sk_family == PF_INET6) {
@@ -109,15 +110,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
-		if (sk->sk_bound_dev_if || exact_dif) {
-			bool dev_match = (sk->sk_bound_dev_if == dif ||
-					  sk->sk_bound_dev_if == sdif);
+		dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+						 dif, sdif);
+		if (!dev_match)
+			return -1;
+		score++;
 
-			if (!dev_match)
-				return -1;
-			if (sk->sk_bound_dev_if)
-				score++;
-		}
 		if (sk->sk_incoming_cpu == raw_smp_processor_id())
 			score++;
 	}
-- 
2.11.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox