* [PATCH bpf] bpf: fix BTF verifier size resolution logic
From: Andrii Nakryiko @ 2019-07-10 8:08 UTC (permalink / raw)
To: andrii.nakryiko, ast, daniel, bpf, netdev, kernel-team
Cc: Andrii Nakryiko, Martin KaFai Lau
BTF verifier has Different logic depending on whether we are following
a PTR or STRUCT/ARRAY (or something else). This is an optimization to
stop early in DFS traversal while resolving BTF types. But it also
results in a size resolution bug, when there is a chain, e.g., of PTR ->
TYPEDEF -> ARRAY, in which case due to being in pointer context ARRAY
size won't be resolved, as it is considered to be a sink for pointer,
leading to TYPEDEF being in RESOLVED state with zero size, which is
completely wrong.
Optimization is doubtful, though, as btf_check_all_types() will iterate
over all BTF types anyways, so the only saving is a potentially slightly
shorter stack. But correctness is more important that tiny savings.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
kernel/bpf/btf.c | 42 +++---------------------------------------
1 file changed, 3 insertions(+), 39 deletions(-)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index cad09858a5f2..c68c7e73b0d1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -231,14 +231,6 @@ enum visit_state {
RESOLVED,
};
-enum resolve_mode {
- RESOLVE_TBD, /* To Be Determined */
- RESOLVE_PTR, /* Resolving for Pointer */
- RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union
- * or array
- */
-};
-
#define MAX_RESOLVE_DEPTH 32
struct btf_sec_info {
@@ -254,7 +246,6 @@ struct btf_verifier_env {
u32 log_type_id;
u32 top_stack;
enum verifier_phase phase;
- enum resolve_mode resolve_mode;
};
static const char * const btf_kind_str[NR_BTF_KINDS] = {
@@ -964,26 +955,7 @@ static void btf_verifier_env_free(struct btf_verifier_env *env)
static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
const struct btf_type *next_type)
{
- switch (env->resolve_mode) {
- case RESOLVE_TBD:
- /* int, enum or void is a sink */
- return !btf_type_needs_resolve(next_type);
- case RESOLVE_PTR:
- /* int, enum, void, struct, array, func or func_proto is a sink
- * for ptr
- */
- return !btf_type_is_modifier(next_type) &&
- !btf_type_is_ptr(next_type);
- case RESOLVE_STRUCT_OR_ARRAY:
- /* int, enum, void, ptr, func or func_proto is a sink
- * for struct and array
- */
- return !btf_type_is_modifier(next_type) &&
- !btf_type_is_array(next_type) &&
- !btf_type_is_struct(next_type);
- default:
- BUG();
- }
+ return !btf_type_needs_resolve(next_type);
}
static bool env_type_is_resolved(const struct btf_verifier_env *env,
@@ -1010,13 +982,6 @@ static int env_stack_push(struct btf_verifier_env *env,
v->type_id = type_id;
v->next_member = 0;
- if (env->resolve_mode == RESOLVE_TBD) {
- if (btf_type_is_ptr(t))
- env->resolve_mode = RESOLVE_PTR;
- else if (btf_type_is_struct(t) || btf_type_is_array(t))
- env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
- }
-
return 0;
}
@@ -1038,7 +1003,7 @@ static void env_stack_pop_resolved(struct btf_verifier_env *env,
env->visit_states[type_id] = RESOLVED;
}
-static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
+static const struct resolve_vertex *env_stack_peek(struct btf_verifier_env *env)
{
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
}
@@ -3030,9 +2995,8 @@ static int btf_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v;
int err = 0;
- env->resolve_mode = RESOLVE_TBD;
env_stack_push(env, t, type_id);
- while (!err && (v = env_stack_peak(env))) {
+ while (!err && (v = env_stack_peek(env))) {
env->log_type_id = v->type_id;
err = btf_type_ops(v->t)->resolve(env, v);
}
--
2.17.1
^ permalink raw reply related
* Re: Question about nf_conntrack_proto for IPsec
From: Florian Westphal @ 2019-07-10 8:07 UTC (permalink / raw)
To: Naruto Nguyen; +Cc: Florian Westphal, netfilter-devel, netdev, netfilter
In-Reply-To: <CANpxKHGa6DpV-9n8La7wh6r7MbEZpzGTWOO1AhmhWv072b4LAg@mail.gmail.com>
Naruto Nguyen <narutonguyen2018@gmail.com> wrote:
> Could you please elaborate more on how generic tracker tracks ESP connection?
All protocols that do not have a more specific l4 tracker are tracked
based on l3 protocol + l4 proto number.
IOW, any ESP packet sent between the same endpoint addresses is seen
as matching a single esp flow.
We could easily add the ESP SPI as additional distinction marker if needed.
^ permalink raw reply
* Re: [PATCH] ipvs: remove unnecessary space
From: Simon Horman @ 2019-07-10 8:06 UTC (permalink / raw)
To: yangxingwu, Pablo Neira Ayuso
Cc: wensong, ja, pablo, kadlec, fw, davem, netdev, lvs-devel,
netfilter-devel, coreteam, linux-kernel
In-Reply-To: <20190710074552.74394-1-xingwu.yang@gmail.com>
On Wed, Jul 10, 2019 at 03:45:52PM +0800, yangxingwu wrote:
> this patch removes the extra space.
>
> Signed-off-by: yangxingwu <xingwu.yang@gmail.com>
Thanks, this looks good to me.
Acked-by: Simon Horman <horms@verge.net.au>
Pablo, please consider including this in nf-next.
> ---
> net/netfilter/ipvs/ip_vs_mh.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
> index 94d9d34..98e358e 100644
> --- a/net/netfilter/ipvs/ip_vs_mh.c
> +++ b/net/netfilter/ipvs/ip_vs_mh.c
> @@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
> return 0;
> }
>
> - table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
> - sizeof(unsigned long), GFP_KERNEL);
> + table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
> + sizeof(unsigned long), GFP_KERNEL);
> if (!table)
> return -ENOMEM;
>
> --
> 1.8.3.1
>
^ permalink raw reply
* Re: [PATCH net-next,v4 08/12] drivers: net: use flow block API
From: Jiri Pirko @ 2019-07-10 8:01 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: netdev, davem, thomas.lendacky, f.fainelli, ariel.elior,
michael.chan, madalin.bucur, yisen.zhuang, salil.mehta,
jeffrey.t.kirsher, tariqt, saeedm, jiri, idosch, jakub.kicinski,
peppe.cavallaro, grygorii.strashko, andrew, vivien.didelot,
alexandre.torgue, joabreu, linux-net-drivers, ogerlitz,
Manish.Chopra, marcelo.leitner, mkubecek, venkatkumar.duvvuru,
maxime.chevallier, cphealy, phil, netfilter-devel
In-Reply-To: <20190709205550.3160-9-pablo@netfilter.org>
Tue, Jul 09, 2019 at 10:55:46PM CEST, pablo@netfilter.org wrote:
[...]
> static int
> mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
>- struct tcf_block *block, bool ingress,
>- struct netlink_ext_ack *extack)
>+ struct flow_block_offload *f, bool ingress)
> {
> struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
> struct mlxsw_sp_acl_block *acl_block;
>- struct tcf_block_cb *block_cb;
>+ struct flow_block_cb *block_cb;
>+ bool register_block = false;
> int err;
>
>- block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower,
>- mlxsw_sp);
>+ block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
>+ mlxsw_sp);
> if (!block_cb) {
>- acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net);
>+ acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
> if (!acl_block)
> return -ENOMEM;
>- block_cb = __tcf_block_cb_register(block,
>- mlxsw_sp_setup_tc_block_cb_flower,
>- mlxsw_sp, acl_block, extack);
>+ block_cb = flow_block_cb_alloc(f->net,
>+ mlxsw_sp_setup_tc_block_cb_flower,
>+ mlxsw_sp, acl_block,
>+ mlxsw_sp_tc_block_flower_release);
> if (IS_ERR(block_cb)) {
>+ mlxsw_sp_acl_block_destroy(acl_block);
> err = PTR_ERR(block_cb);
> goto err_cb_register;
> }
>+ register_block = true;
> } else {
>- acl_block = tcf_block_cb_priv(block_cb);
>+ acl_block = flow_block_cb_priv(block_cb);
> }
>- tcf_block_cb_incref(block_cb);
>+ flow_block_cb_incref(block_cb);
> err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
> mlxsw_sp_port, ingress);
> if (err)
>@@ -1622,28 +1634,31 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
> else
> mlxsw_sp_port->eg_acl_block = acl_block;
>
>+ if (register_block) {
>+ flow_block_cb_add(block_cb, f);
>+ list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
>+ }
What prevents you from doing these 2 above right after
flow_block_cb_alloc?
More than that, what prevents you do maintain the same flow as was there
originally? You just need struct flow_block as a replacement of
struct tcf_block and have it contained in both struct nft_base_chain
and struct tcf_block.
And you would push pointer to struct flow_block down to drivers
in struct flow_block_offload.
[...]
^ permalink raw reply
* [PATCH] [net-next] davinci_cpdma: don't cast dma_addr_t to pointer
From: Arnd Bergmann @ 2019-07-10 8:00 UTC (permalink / raw)
To: David S. Miller
Cc: Arnd Bergmann, Ivan Khoronzhuk, Grygorii Strashko, Andrew Lunn,
Ilias Apalodimas, linux-omap, netdev, linux-kernel
dma_addr_t may be 64-bit wide on 32-bit architectures, so it is not
valid to cast between it and a pointer:
drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_submit_si':
drivers/net/ethernet/ti/davinci_cpdma.c:1047:12: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast]
drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_idle_submit_mapped':
drivers/net/ethernet/ti/davinci_cpdma.c:1114:12: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
drivers/net/ethernet/ti/davinci_cpdma.c: In function 'cpdma_chan_submit_mapped':
drivers/net/ethernet/ti/davinci_cpdma.c:1164:12: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
Solve this by using two separate members in 'struct submit_info'.
Since this avoids the use of the 'flag' member, the structure does
not even grow in typical configurations.
Fixes: 6670acacd59e ("net: ethernet: ti: davinci_cpdma: add dma mapped submit")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
drivers/net/ethernet/ti/davinci_cpdma.c | 26 ++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 0ca2a1a254de..a65edd2770e6 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -138,8 +138,8 @@ struct submit_info {
struct cpdma_chan *chan;
int directed;
void *token;
- void *data;
- int flags;
+ void *data_virt;
+ dma_addr_t data_dma;
int len;
};
@@ -1043,12 +1043,12 @@ static int cpdma_chan_submit_si(struct submit_info *si)
mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
cpdma_desc_to_port(chan, mode, si->directed);
- if (si->flags & CPDMA_DMA_EXT_MAP) {
- buffer = (dma_addr_t)si->data;
+ if (si->data_dma) {
+ buffer = si->data_dma;
dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir);
swlen |= CPDMA_DMA_EXT_MAP;
} else {
- buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir);
+ buffer = dma_map_single(ctlr->dev, si->data_virt, len, chan->dir);
ret = dma_mapping_error(ctlr->dev, buffer);
if (ret) {
cpdma_desc_free(ctlr->pool, desc, 1);
@@ -1086,10 +1086,10 @@ int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data,
si.chan = chan;
si.token = token;
- si.data = data;
+ si.data_virt = data;
+ si.data_dma = 0;
si.len = len;
si.directed = directed;
- si.flags = 0;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state == CPDMA_STATE_TEARDOWN) {
@@ -1111,10 +1111,10 @@ int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token,
si.chan = chan;
si.token = token;
- si.data = (void *)data;
+ si.data_virt = NULL;
+ si.data_dma = data;
si.len = len;
si.directed = directed;
- si.flags = CPDMA_DMA_EXT_MAP;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state == CPDMA_STATE_TEARDOWN) {
@@ -1136,10 +1136,10 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
si.chan = chan;
si.token = token;
- si.data = data;
+ si.data_virt = data;
+ si.data_dma = 0;
si.len = len;
si.directed = directed;
- si.flags = 0;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state != CPDMA_STATE_ACTIVE) {
@@ -1161,10 +1161,10 @@ int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token,
si.chan = chan;
si.token = token;
- si.data = (void *)data;
+ si.data_virt = NULL;
+ si.data_dma = data;
si.len = len;
si.directed = directed;
- si.flags = CPDMA_DMA_EXT_MAP;
spin_lock_irqsave(&chan->lock, flags);
if (chan->state != CPDMA_STATE_ACTIVE) {
--
2.20.0
^ permalink raw reply related
* Re: [PATCH] tipc: ensure skb->lock is initialised
From: Eric Dumazet @ 2019-07-10 8:00 UTC (permalink / raw)
To: Jon Maloy, Eric Dumazet, Chris Packham, ying.xue@windriver.com,
davem@davemloft.net
Cc: netdev@vger.kernel.org, tipc-discussion@lists.sourceforge.net,
linux-kernel@vger.kernel.org
In-Reply-To: <MN2PR15MB35813EA3ADE7E5E83A657D3F9AF10@MN2PR15MB3581.namprd15.prod.outlook.com>
On 7/9/19 10:15 PM, Jon Maloy wrote:
>
> It is not only for lockdep purposes, -it is essential. But please provide details about where you see that more fixes are needed.
>
Simple fact that you detect a problem only when skb_queue_purge() is called should talk by itself.
As I stated, there are many places where the list is manipulated _without_ its spinlock being held.
You want consistency, then
- grab the spinlock all the time.
- Or do not ever use it.
Do not initialize the spinlock just in case a path will use skb_queue_purge() (instead of using __skb_queue_purge())
^ permalink raw reply
* general protection fault in rcu_core
From: syzbot @ 2019-07-10 7:57 UTC (permalink / raw)
To: ast, bp, daniel, drake, hpa, jacob.jun.pan, john.fastabend,
linux-kernel, mingo, netdev, puwen, rppt, syzkaller-bugs, tglx,
x86
Hello,
syzbot found the following crash on:
HEAD commit: 4608a726 Add linux-next specific files for 20190709
git tree: linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=14458387a00000
kernel config: https://syzkaller.appspot.com/x/.config?x=7a02e36d356a9a17
dashboard link: https://syzkaller.appspot.com/bug?extid=73ac69a8f7a5e5c126f1
compiler: gcc (GCC) 9.0.0 20181231 (experimental)
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=128e3217a00000
The bug was bisected to:
commit e9db4ef6bf4ca9894bb324c76e01b8f1a16b2650
Author: John Fastabend <john.fastabend@gmail.com>
Date: Sat Jun 30 13:17:47 2018 +0000
bpf: sockhash fix omitted bucket lock in sock_close
bisection log: https://syzkaller.appspot.com/x/bisect.txt?x=1155a96fa00000
final crash: https://syzkaller.appspot.com/x/report.txt?x=1355a96fa00000
console output: https://syzkaller.appspot.com/x/log.txt?x=1555a96fa00000
IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+73ac69a8f7a5e5c126f1@syzkaller.appspotmail.com
Fixes: e9db4ef6bf4c ("bpf: sockhash fix omitted bucket lock in sock_close")
kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 9817 Comm: blkid Not tainted 5.2.0-next-20190709 #34
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:lookup_object lib/debugobjects.c:193 [inline]
RIP: 0010:debug_object_active_state lib/debugobjects.c:900 [inline]
RIP: 0010:debug_object_active_state+0x16e/0x350 lib/debugobjects.c:885
Code: 1a 4c 89 e0 48 c1 e8 03 80 3c 08 00 0f 85 6c 01 00 00 4d 8b 24 24 4d
85 e4 74 6a 4d 8d 44 24 18 83 c3 01 4c 89 c7 48 c1 ef 03 <80> 3c 0f 00 0f
85 17 01 00 00 4d 3b 7c 24 18 75 c6 49 8d 7c 24 10
RSP: 0018:ffff8880ae809d00 EFLAGS: 00010802
RAX: 1ffffffff0c4eadc RBX: 0000000000000005 RCX: dffffc0000000000
RDX: 0000000000000001 RSI: 0000000000000286 RDI: 1dc43d1fffffc920
RBP: ffff8880ae809de8 R08: ee21e8fffffe4901 R09: ffffed1015d0138d
R10: ffffffff8a9b1768 R11: 0000000000000003 R12: ee21e8fffffe48e9
R13: 1ffff11015d013a4 R14: ffffffff88dab220 R15: ffff8880a21f7f58
FS: 00007f3fd76fc740(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f3fd72daa20 CR3: 000000008fe3c000 CR4: 00000000001406f0
Call Trace:
<IRQ>
debug_rcu_head_unqueue kernel/rcu/rcu.h:185 [inline]
rcu_do_batch kernel/rcu/tree.c:2113 [inline]
rcu_core+0x745/0x1580 kernel/rcu/tree.c:2314
rcu_core_si+0x9/0x10 kernel/rcu/tree.c:2323
__do_softirq+0x262/0x98c kernel/softirq.c:292
invoke_softirq kernel/softirq.c:373 [inline]
irq_exit+0x19b/0x1e0 kernel/softirq.c:413
exiting_irq arch/x86/include/asm/apic.h:537 [inline]
smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1095
apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:828
</IRQ>
RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:767
[inline]
RIP: 0010:console_unlock+0xdab/0xf10 kernel/printk/printk.c:2467
Code: 88 48 ba 00 00 00 00 00 fc ff df 48 c1 e8 03 80 3c 10 00 75 30 48 83
3d 42 4a 77 07 00 74 1f e8 7b ab 16 00 48 8b 7d 98 57 9d <0f> 1f 44 00 00
e9 64 fa ff ff e8 c6 ea 50 00 e9 0e f5 ff ff e8 5c
RSP: 0018:ffff8880991bf2b8 EFLAGS: 00000293 ORIG_RAX: ffffffffffffff13
RAX: ffff8880a7e1e000 RBX: 0000000000000200 RCX: 1ffffffff134a59e
RDX: 0000000000000000 RSI: ffffffff815b9995 RDI: 0000000000000293
RBP: ffff8880991bf340 R08: ffff8880a7e1e000 R09: fffffbfff1349f60
R10: fffffbfff1349f5f R11: ffffffff89a4faff R12: 0000000000000001
R13: ffffffff843434b0 R14: dffffc0000000000 R15: ffffffff893cb710
vprintk_emit+0x2a0/0x700 kernel/printk/printk.c:1986
vprintk_default+0x28/0x30 kernel/printk/printk.c:2013
vprintk_func+0x7e/0x189 kernel/printk/printk_safe.c:386
printk+0xba/0xed kernel/printk/printk.c:2046
__warn_printk+0x9b/0xf3 kernel/panic.c:630
debug_mutex_wake_waiter+0x1d7/0x330 kernel/locking/mutex-debug.c:41
__mutex_unlock_slowpath+0x3d5/0x6b0 kernel/locking/mutex.c:1241
mutex_unlock+0xd/0x10 kernel/locking/mutex.c:714
kobj_lookup+0x250/0x460 drivers/base/map.c:123
get_gendisk+0x4d/0x390 block/genhd.c:869
bdev_get_gendisk fs/block_dev.c:1100 [inline]
__blkdev_get+0x457/0x1660 fs/block_dev.c:1493
blkdev_get+0xc4/0x990 fs/block_dev.c:1652
blkdev_open+0x205/0x290 fs/block_dev.c:1810
do_dentry_open+0x4df/0x1250 fs/open.c:778
vfs_open+0xa0/0xd0 fs/open.c:887
do_last fs/namei.c:3416 [inline]
path_openat+0x10e9/0x4630 fs/namei.c:3533
do_filp_open+0x1a1/0x280 fs/namei.c:3563
do_sys_open+0x3fe/0x5d0 fs/open.c:1070
__do_sys_open fs/open.c:1088 [inline]
__se_sys_open fs/open.c:1083 [inline]
__x64_sys_open+0x7e/0xc0 fs/open.c:1083
do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f3fd7004120
Code: 48 8b 15 1b 4d 2b 00 f7 d8 64 89 02 83 c8 ff c3 90 90 90 90 90 90 90
90 90 90 83 3d d5 a4 2b 00 00 75 10 b8 02 00 00 00 0f 05 <48> 3d 01 f0 ff
ff 73 31 c3 48 83 ec 08 e8 5e 8c 01 00 48 89 04 24
RSP: 002b:00007ffc2fe70dc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000002
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3fd7004120
RDX: 00007ffc2fe72f33 RSI: 0000000000000000 RDI: 00007ffc2fe72f33
RBP: 0000000000000000 R08: 0000000000000078 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000001882030
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000005
Modules linked in:
---[ end trace d75a73838ffe8a54 ]---
RIP: 0010:lookup_object lib/debugobjects.c:193 [inline]
RIP: 0010:debug_object_active_state lib/debugobjects.c:900 [inline]
RIP: 0010:debug_object_active_state+0x16e/0x350 lib/debugobjects.c:885
Code: 1a 4c 89 e0 48 c1 e8 03 80 3c 08 00 0f 85 6c 01 00 00 4d 8b 24 24 4d
85 e4 74 6a 4d 8d 44 24 18 83 c3 01 4c 89 c7 48 c1 ef 03 <80> 3c 0f 00 0f
85 17 01 00 00 4d 3b 7c 24 18 75 c6 49 8d 7c 24 10
RSP: 0018:ffff8880ae809d00 EFLAGS: 00010802
RAX: 1ffffffff0c4eadc RBX: 0000000000000005 RCX: dffffc0000000000
RDX: 0000000000000001 RSI: 0000000000000286 RDI: 1dc43d1fffffc920
RBP: ffff8880ae809de8 R08: ee21e8fffffe4901 R09: ffffed1015d0138d
R10: ffffffff8a9b1768 R11: 0000000000000003 R12: ee21e8fffffe48e9
R13: 1ffff11015d013a4 R14: ffffffff88dab220 R15: ffff8880a21f7f58
FS: 00007f3fd76fc740(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f3fd72daa20 CR3: 000000008fe3c000 CR4: 00000000001406f0
---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.
syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
For information about bisection process see: https://goo.gl/tpsmEJ#bisection
syzbot can test patches for this bug, for details see:
https://goo.gl/tpsmEJ#testing-patches
^ permalink raw reply
* Re: [PATCH] vhost: fix null pointer dereference in vhost_del_umem_range
From: Denis Kirjanov @ 2019-07-10 7:56 UTC (permalink / raw)
To: David Miller; +Cc: mst, jasowang, kvm, netdev
In-Reply-To: <20190709.125850.2133620086434576103.davem@davemloft.net>
On 7/9/19, David Miller <davem@davemloft.net> wrote:
> From: Denis Kirjanov <kda@linux-powerpc.org>
> Date: Tue, 9 Jul 2019 13:42:51 +0200
>
>> @@ -962,7 +962,8 @@ static void vhost_del_umem_range(struct vhost_umem
>> *umem,
>>
>> while ((node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
>> start, end)))
>> - vhost_umem_free(umem, node);
>> + if (node)
>> + vhost_umem_free(umem, node);
>
> If 'node' is NULL we will not be in the body of the loop as per
> the while() condition.
The patch is incorrect, please ignore
>
> How did you test this?
>
^ permalink raw reply
* Re: [PATCH net-next,v4 12/12] netfilter: nf_tables: add hardware offload support
From: Jiri Pirko @ 2019-07-10 7:52 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: netdev, davem, thomas.lendacky, f.fainelli, ariel.elior,
michael.chan, madalin.bucur, yisen.zhuang, salil.mehta,
jeffrey.t.kirsher, tariqt, saeedm, jiri, idosch, jakub.kicinski,
peppe.cavallaro, grygorii.strashko, andrew, vivien.didelot,
alexandre.torgue, joabreu, linux-net-drivers, ogerlitz,
Manish.Chopra, marcelo.leitner, mkubecek, venkatkumar.duvvuru,
maxime.chevallier, cphealy, phil, netfilter-devel
In-Reply-To: <20190709205550.3160-13-pablo@netfilter.org>
Tue, Jul 09, 2019 at 10:55:50PM CEST, pablo@netfilter.org wrote:
[...]
>+ if (!dev || !dev->netdev_ops->ndo_setup_tc)
Why didn't you rename ndo_setup_tc? I put a comment about it in the
previous version thread. I expect that you can at least write why it is
a wrong idea.
[...]
^ permalink raw reply
* [PATCH] ipvs: remove unnecessary space
From: yangxingwu @ 2019-07-10 7:45 UTC (permalink / raw)
To: wensong
Cc: horms, ja, pablo, kadlec, fw, davem, netdev, lvs-devel,
netfilter-devel, coreteam, linux-kernel, yangxingwu
this patch removes the extra space.
Signed-off-by: yangxingwu <xingwu.yang@gmail.com>
---
net/netfilter/ipvs/ip_vs_mh.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c
index 94d9d34..98e358e 100644
--- a/net/netfilter/ipvs/ip_vs_mh.c
+++ b/net/netfilter/ipvs/ip_vs_mh.c
@@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s,
return 0;
}
- table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
- sizeof(unsigned long), GFP_KERNEL);
+ table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE),
+ sizeof(unsigned long), GFP_KERNEL);
if (!table)
return -ENOMEM;
--
1.8.3.1
^ permalink raw reply related
* [PATCH ipsec 2/2] xfrm interface: ifname may be wrong in logs
From: Nicolas Dichtel @ 2019-07-10 7:45 UTC (permalink / raw)
To: steffen.klassert, davem; +Cc: netdev, Nicolas Dichtel
In-Reply-To: <20190710074536.7505-1-nicolas.dichtel@6wind.com>
The ifname is copied when the interface is created, but is never updated
later. In fact, this property is used only in one error message, where the
netdevice pointer is available, thus let's use it.
Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
include/net/xfrm.h | 1 -
net/xfrm/xfrm_interface.c | 10 +---------
2 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a2907873ed56..287e39753d94 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -988,7 +988,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
struct xfrm_if_parms {
- char name[IFNAMSIZ]; /* name of XFRM device */
int link; /* ifindex of underlying L2 interface */
u32 if_id; /* interface identifyer */
};
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index dfa5aebdec57..a60d391f7ebe 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -145,8 +145,6 @@ static int xfrmi_create(struct net_device *dev)
if (err < 0)
goto out;
- strcpy(xi->p.name, dev->name);
-
dev_hold(dev);
xfrmi_link(xfrmn, xi);
@@ -294,7 +292,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (tdev == dev) {
stats->collisions++;
net_warn_ratelimited("%s: Local routing loop detected!\n",
- xi->p.name);
+ dev->name);
goto tx_err_dst_release;
}
@@ -638,12 +636,6 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
int err;
xfrmi_netlink_parms(data, &p);
-
- if (!tb[IFLA_IFNAME])
- return -EINVAL;
-
- nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ);
-
xi = xfrmi_locate(net, &p);
if (xi)
return -EEXIST;
--
2.21.0
^ permalink raw reply related
* [PATCH ipsec 1/2] xfrm interface: avoid corruption on changelink
From: Nicolas Dichtel @ 2019-07-10 7:45 UTC (permalink / raw)
To: steffen.klassert, davem; +Cc: netdev, Nicolas Dichtel
In-Reply-To: <20190710074536.7505-1-nicolas.dichtel@6wind.com>
The new parameters must not be stored in the netdev_priv() before
validation, it may corrupt the interface. Note also that if data is NULL,
only a memset() is done.
$ ip link add xfrm1 type xfrm dev lo if_id 1
$ ip link add xfrm2 type xfrm dev lo if_id 2
$ ip link set xfrm1 type xfrm dev lo if_id 2
RTNETLINK answers: File exists
$ ip -d link list dev xfrm1
5: xfrm1@lo: <NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/none 00:00:00:00:00:00 brd 00:00:00:00:00:00 promiscuity 0 minmtu 68 maxmtu 1500
xfrm if_id 0x2 addrgenmode eui64 numtxqueues 1 numrxqueues 1 gso_max_size 65536 gso_max_segs 65535
=> "if_id 0x2"
Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
net/xfrm/xfrm_interface.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 7dbe0c608df5..dfa5aebdec57 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -671,12 +671,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct xfrm_if *xi = netdev_priv(dev);
struct net *net = dev_net(dev);
+ struct xfrm_if_parms p;
+ struct xfrm_if *xi;
- xfrmi_netlink_parms(data, &xi->p);
-
- xi = xfrmi_locate(net, &xi->p);
+ xfrmi_netlink_parms(data, &p);
+ xi = xfrmi_locate(net, &p);
if (!xi) {
xi = netdev_priv(dev);
} else {
@@ -684,7 +684,7 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
}
- return xfrmi_update(xi, &xi->p);
+ return xfrmi_update(xi, &p);
}
static size_t xfrmi_get_size(const struct net_device *dev)
--
2.21.0
^ permalink raw reply related
* [PATCH ipsec 0/2] xfrm interface: bug fix on changelink
From: Nicolas Dichtel @ 2019-07-10 7:45 UTC (permalink / raw)
To: steffen.klassert, davem; +Cc: netdev
Here are two bug fix seen by code review. The first one avoids a corruption of
existing xfrm interfaces and the second is a minor fix of an error message.
include/net/xfrm.h | 1 -
net/xfrm/xfrm_interface.c | 20 ++++++--------------
2 files changed, 6 insertions(+), 15 deletions(-)
Regards,
Nicolas
^ permalink raw reply
* Re: [PATCH net-next,v4 05/12] net: flow_offload: add list handling functions
From: Jiri Pirko @ 2019-07-10 7:36 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: netdev, davem, thomas.lendacky, f.fainelli, ariel.elior,
michael.chan, madalin.bucur, yisen.zhuang, salil.mehta,
jeffrey.t.kirsher, tariqt, saeedm, jiri, idosch, jakub.kicinski,
peppe.cavallaro, grygorii.strashko, andrew, vivien.didelot,
alexandre.torgue, joabreu, linux-net-drivers, ogerlitz,
Manish.Chopra, marcelo.leitner, mkubecek, venkatkumar.duvvuru,
maxime.chevallier, cphealy, phil, netfilter-devel
In-Reply-To: <20190709205550.3160-6-pablo@netfilter.org>
Tue, Jul 09, 2019 at 10:55:43PM CEST, pablo@netfilter.org wrote:
[...]
>@@ -176,6 +176,7 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
> if (!block_cb)
> return ERR_PTR(-ENOMEM);
>
>+ block_cb->net = net;
> block_cb->cb = cb;
> block_cb->cb_ident = cb_ident;
> block_cb->cb_priv = cb_priv;
>@@ -194,6 +195,22 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
> }
> EXPORT_SYMBOL(flow_block_cb_free);
>
>+struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
>+ tc_setup_cb_t *cb, void *cb_ident)
>+{
>+ struct flow_block_cb *block_cb;
>+
>+ list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
>+ if (block_cb->net == f->net &&
I don't understand why you need net for this. You should have a list of
cbs per subsystem (tc/nft) go over it here.
The clash of 2 suybsytems is prevented later on by
flow_block_cb_is_busy().
Am I missing something?
If not, could you please remove use of net from flow_block_cb_alloc()
and from here and replace it by some shared flow structure holding the
cb list that would be used by both tc and nft?
>+ block_cb->cb == cb &&
>+ block_cb->cb_ident == cb_ident)
>+ return block_cb;
>+ }
>+
>+ return NULL;
>+}
>+EXPORT_SYMBOL(flow_block_cb_lookup);
>+
[...]
^ permalink raw reply
* Re: [PATCH v6 rdma-next 0/6] RDMA/qedr: Use the doorbell overflow recovery mechanism for RDMA
From: Gal Pressman @ 2019-07-10 7:32 UTC (permalink / raw)
To: Michal Kalderon, ariel.elior, jgg, dledford
Cc: linux-rdma, davem, netdev, sleybo
In-Reply-To: <20190709141735.19193-1-michal.kalderon@marvell.com>
On 09/07/2019 17:17, Michal Kalderon wrote:
> This patch series uses the doorbell overflow recovery mechanism
> introduced in
> commit 36907cd5cd72 ("qed: Add doorbell overflow recovery mechanism")
> for rdma ( RoCE and iWARP )
>
> The first three patches modify the core code to contain helper
> functions for managing mmap_xa inserting, getting and freeing
> entries. The code was taken almost as is from the efa driver.
> There is still an open discussion on whether we should take
> this even further and make the entire mmap generic. Until a
> decision is made, I only created the database API and modified
> the efa and qedr driver to use it. The doorbell recovery code will be based
> on the common code.
>
> Efa driver was compile tested only.
For the whole series:
Tested-by: Gal Pressman <galpress@amazon.com>
^ permalink raw reply
* Re: [PATCH] ipvs: Delete some unused space characters in Kconfig
From: Simon Horman @ 2019-07-10 7:29 UTC (permalink / raw)
To: xianfengting221, Pablo Neira Ayuso
Cc: wensong, ja, pablo, kadlec, fw, davem, netdev, lvs-devel,
linux-kernel
In-Reply-To: <1562473009-29726-1-git-send-email-xianfengting221@163.com>
On Sun, Jul 07, 2019 at 12:16:49PM +0800, xianfengting221@163.com wrote:
> From: Hu Haowen <xianfengting221@163.com>
>
> The space characters at the end of lines are always unused and
> not easy to find. This patch deleted some of them I have found
> in Kconfig.
>
> Signed-off-by: Hu Haowen <xianfengting221@163.com>
> ---
>
> This is my first patch to the Linux kernel, so please forgive
> me if anything went wrong.
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Thanks Hu,
this looks good to me.
Pablo, please consider this for inclusion in nf-next.
>
> net/netfilter/ipvs/Kconfig | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
> index f6f1a0d..54afad5 100644
> --- a/net/netfilter/ipvs/Kconfig
> +++ b/net/netfilter/ipvs/Kconfig
> @@ -120,7 +120,7 @@ config IP_VS_RR
>
> If you want to compile it in kernel, say Y. To compile it as a
> module, choose M here. If unsure, say N.
> -
> +
> config IP_VS_WRR
> tristate "weighted round-robin scheduling"
> ---help---
> @@ -138,7 +138,7 @@ config IP_VS_LC
> tristate "least-connection scheduling"
> ---help---
> The least-connection scheduling algorithm directs network
> - connections to the server with the least number of active
> + connections to the server with the least number of active
> connections.
>
> If you want to compile it in kernel, say Y. To compile it as a
> @@ -193,7 +193,7 @@ config IP_VS_LBLCR
> tristate "locality-based least-connection with replication scheduling"
> ---help---
> The locality-based least-connection with replication scheduling
> - algorithm is also for destination IP load balancing. It is
> + algorithm is also for destination IP load balancing. It is
> usually used in cache cluster. It differs from the LBLC scheduling
> as follows: the load balancer maintains mappings from a target
> to a set of server nodes that can serve the target. Requests for
> @@ -250,8 +250,8 @@ config IP_VS_SED
> tristate "shortest expected delay scheduling"
> ---help---
> The shortest expected delay scheduling algorithm assigns network
> - connections to the server with the shortest expected delay. The
> - expected delay that the job will experience is (Ci + 1) / Ui if
> + connections to the server with the shortest expected delay. The
> + expected delay that the job will experience is (Ci + 1) / Ui if
> sent to the ith server, in which Ci is the number of connections
> on the ith server and Ui is the fixed service rate (weight)
> of the ith server.
> --
> 2.7.4
>
>
^ permalink raw reply
* Re: [RFC PATCH net-next 0/3] net: batched receive in GRO path
From: Paolo Abeni @ 2019-07-10 7:27 UTC (permalink / raw)
To: Edward Cree, David Miller; +Cc: netdev, Eric Dumazet
In-Reply-To: <7920e85c-439e-0622-46f8-0602cf37e306@solarflare.com>
Hi,
On Tue, 2019-07-09 at 20:27 +0100, Edward Cree wrote:
> Where not specified (as batch=), net.core.gro_normal_batch was set to 8.
> The net-next baseline used for these tests was commit 7d30a7f6424e.
> TCP 4 streams, GRO on: all results line rate (9.415Gbps)
> net-next: 210.3% cpu
> after #1: 181.5% cpu (-13.7%, p=0.031 vs net-next)
> after #3: 191.7% cpu (- 8.9%, p=0.102 vs net-next)
> TCP 4 streams, GRO off:
> after #1: 7.785 Gbps
> after #3: 8.387 Gbps (+ 7.7%, p=0.215 vs #1, but note *)
> TCP 1 stream, GRO on: all results line rate & ~200% cpu.
> TCP 1 stream, GRO off:
> after #1: 6.444 Gbps
> after #3: 7.363 Gbps (+14.3%, p=0.003 vs #1)
> batch=16: 7.199 Gbps
> batch= 4: 7.354 Gbps
> batch= 0: 5.899 Gbps
> TCP 100 RR, GRO off:
> net-next: 995.083 us
> after #1: 969.167 us (- 2.6%, p=0.204 vs net-next)
> after #3: 976.433 us (- 1.9%, p=0.254 vs net-next)
>
> (*) These tests produced a mixture of line-rate and below-line-rate results,
> meaning that statistically speaking the results were 'censored' by the
> upper bound, and were thus not normally distributed, making a Welch t-test
> mathematically invalid. I therefore also calculated estimators according
> to [2], which gave the following:
> after #1: 8.155 Gbps
> after #3: 8.716 Gbps (+ 6.9%, p=0.291 vs #1)
> (though my procedure for determining ν wasn't mathematically well-founded
> either, so take that p-value with a grain of salt).
I'm toying with a patch similar to your 3/3 (most relevant difference
being the lack of a limit to the batch size), on top of ixgbe (which
sends all the pkts to the GRO engine), and I'm observing more
controversial results (UDP only):
* when a single rx queue is running, I see a just-above-noise
peformance delta
* when multiple rx queues are running, I observe measurable regressions
(note: I use small pkts, still well under line rate even with multiple
rx queues)
I'll try to test your patch in the following days.
Side note: I think that in patch 3/3, it's necessary to add a call to
gro_normal_list() also inside napi_busy_loop().
Cheers,
Paolo
^ permalink raw reply
* [PATCH iproute2-rc 4/8] rdma: Add rdma statistic counter per-port auto mode support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
With per-QP statistic counter support, a user is allowed to monitor
specific QPs categories, which are bound to/unbound from counters
dynamically allocated/deallocated.
In per-port "auto" mode, QPs are bound to counters automatically
according to common criteria. For example a per "type"(qp type)
scheme, where in each process all QPs have same qp type are bind
automatically to a single counter.
Currently only "type" (qp type) is supported. Examples:
$ rdma statistic qp set link mlx5_2/1 auto type on
$ rdma statistic qp set link mlx5_2/1 auto off
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/stat.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++
rdma/utils.c | 1 +
2 files changed, 88 insertions(+)
diff --git a/rdma/stat.c b/rdma/stat.c
index 0c239851..ad1cc063 100644
--- a/rdma/stat.c
+++ b/rdma/stat.c
@@ -14,12 +14,17 @@ static int stat_help(struct rd *rd)
pr_out(" %s statistic OBJECT show\n", rd->filename);
pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename);
pr_out(" %s statistic OBJECT mode\n", rd->filename);
+ pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename);
pr_out("where OBJECT: = { qp }\n");
+ pr_out(" CRITERIA : = { type }\n");
+ pr_out(" COUNTER_SCOPE: = { link | dev }\n");
pr_out("Examples:\n");
pr_out(" %s statistic qp show\n", rd->filename);
pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename);
pr_out(" %s statistic qp mode\n", rd->filename);
pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename);
+ pr_out(" %s statistic qp set link mlx5_2/1 auto type on\n", rd->filename);
+ pr_out(" %s statistic qp set link mlx5_2/1 auto off\n", rd->filename);
return 0;
}
@@ -381,6 +386,87 @@ static int stat_qp_show(struct rd *rd)
return rd_exec_cmd(rd, cmds, "parameter");
}
+static int stat_qp_set_link_auto_sendmsg(struct rd *rd, uint32_t mask)
+{
+ uint32_t seq;
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET,
+ &seq, (NLM_F_REQUEST | NLM_F_ACK));
+
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE,
+ RDMA_COUNTER_MODE_AUTO);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask);
+
+ return rd_sendrecv_msg(rd, seq);
+}
+
+static int stat_one_qp_set_link_auto_off(struct rd *rd)
+{
+ return stat_qp_set_link_auto_sendmsg(rd, 0);
+}
+
+static int stat_one_qp_set_auto_type_on(struct rd *rd)
+{
+ return stat_qp_set_link_auto_sendmsg(rd, RDMA_COUNTER_MASK_QP_TYPE);
+}
+
+static int stat_one_qp_set_link_auto_type(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_help },
+ { "on", stat_one_qp_set_auto_type_on },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+static int stat_one_qp_set_link_auto(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_one_qp_link_get_mode },
+ { "off", stat_one_qp_set_link_auto_off },
+ { "type", stat_one_qp_set_link_auto_type },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+static int stat_one_qp_set_link(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_one_qp_link_get_mode },
+ { "auto", stat_one_qp_set_link_auto },
+ { 0 }
+ };
+
+ if (!rd->port_idx)
+ return 0;
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+static int stat_qp_set_link(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_one_qp_set_link, false);
+}
+
+static int stat_qp_set(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_help },
+ { "link", stat_qp_set_link },
+ { "help", stat_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
static int stat_qp(struct rd *rd)
{
const struct rd_cmd cmds[] = {
@@ -388,6 +474,7 @@ static int stat_qp(struct rd *rd)
{ "show", stat_qp_show },
{ "list", stat_qp_show },
{ "mode", stat_qp_get_mode },
+ { "set", stat_qp_set },
{ "help", stat_help },
{ 0 }
};
diff --git a/rdma/utils.c b/rdma/utils.c
index 9c885ad7..aed1a3d0 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -445,6 +445,7 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64,
[RDMA_NLDEV_ATTR_STAT_MODE] = MNL_TYPE_U32,
[RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32,
+ [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = MNL_TYPE_U32,
};
int rd_attr_check(const struct nlattr *attr, int *typep)
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 8/8] rdma: Document counter statistic
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
Add document of accessing the QP counter, including bind/unbind a QP
to a counter manually or automatically, and dump counter statistics.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
man/man8/rdma-dev.8 | 1 +
man/man8/rdma-link.8 | 1 +
man/man8/rdma-resource.8 | 1 +
man/man8/rdma-statistic.8 | 167 ++++++++++++++++++++++++++++++++++++++
man/man8/rdma.8 | 7 +-
5 files changed, 176 insertions(+), 1 deletion(-)
create mode 100644 man/man8/rdma-statistic.8
diff --git a/man/man8/rdma-dev.8 b/man/man8/rdma-dev.8
index 38e34b3b..e77e7cd0 100644
--- a/man/man8/rdma-dev.8
+++ b/man/man8/rdma-dev.8
@@ -77,6 +77,7 @@ previously created using iproute2 ip command.
.BR rdma-link (8),
.BR rdma-resource (8),
.BR rdma-system (8),
+.BR rdma-statistic (8),
.br
.SH AUTHOR
diff --git a/man/man8/rdma-link.8 b/man/man8/rdma-link.8
index b3b40de7..32f80228 100644
--- a/man/man8/rdma-link.8
+++ b/man/man8/rdma-link.8
@@ -97,6 +97,7 @@ Removes RXE link rxe_eth0
.BR rdma (8),
.BR rdma-dev (8),
.BR rdma-resource (8),
+.BR rdma-statistic (8),
.br
.SH AUTHOR
diff --git a/man/man8/rdma-resource.8 b/man/man8/rdma-resource.8
index 40b073db..05030d0a 100644
--- a/man/man8/rdma-resource.8
+++ b/man/man8/rdma-resource.8
@@ -103,6 +103,7 @@ Show CQs belonging to pid 30489
.BR rdma (8),
.BR rdma-dev (8),
.BR rdma-link (8),
+.BR rdma-statistic (8),
.br
.SH AUTHOR
diff --git a/man/man8/rdma-statistic.8 b/man/man8/rdma-statistic.8
new file mode 100644
index 00000000..2c31b08a
--- /dev/null
+++ b/man/man8/rdma-statistic.8
@@ -0,0 +1,167 @@
+.TH RDMA\-STATISTIC 8 "17 Mar 2019" "iproute2" "Linux"
+.SH NAME
+rdma-statistic \- RDMA statistic counter configuration
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B rdma
+.RI "[ " OPTIONS " ]"
+.B statistic
+.RI " { " COMMAND " | "
+.BR help " }"
+.sp
+
+.ti -8
+.B rdma statistic
+.RI "[ " OBJECT " ]"
+.B show
+
+.ti -8
+.B rdma statistic
+.RI "[ " OBJECT " ]"
+.B show link
+.RI "[ " DEV/PORT_INDX " ]"
+
+.ti -8
+.B rdma statistic
+.IR OBJECT
+.B mode
+
+.ti -8
+.B rdma statistic
+.IR OBJECT
+.B set
+.IR COUNTER_SCOPE
+.RI "[ " DEV/PORT_INDEX "]"
+.B auto
+.RI "{ " CRITERIA " | "
+.BR off " }"
+
+.ti -8
+.B rdma statistic
+.IR OBJECT
+.B bind
+.IR COUNTER_SCOPE
+.RI "[ " DEV/PORT_INDEX "]"
+.RI "[ " OBJECT-ID " ]"
+.RI "[ " COUNTER-ID " ]"
+
+.ti -8
+.B rdma statistic
+.IR OBJECT
+.B unbind
+.IR COUNTER_SCOPE
+.RI "[ " DEV/PORT_INDEX "]"
+.RI "[ " COUNTER-ID " ]"
+.RI "[ " OBJECT-ID " ]"
+
+.ti -8
+.IR COUNTER_SCOPE " := "
+.RB "{ " link " | " dev " }"
+
+.ti -8
+.IR OBJECT " := "
+.RB "{ " qp " }"
+
+.ti -8
+.IR CRITERIA " := "
+.RB "{ " type " }"
+
+.SH "DESCRIPTION"
+.SS rdma statistic [object] show - Queries the specified RDMA device for RDMA and driver-specific statistics. Show the default hw counters if object is not specified
+
+.PP
+.I "DEV"
+- specifies counters on this RDMA device to show.
+
+.I "PORT_INDEX"
+- specifies counters on this RDMA port to show.
+
+.SS rdma statistic <object> set - configure counter statistic auto-mode for a specific device/port
+In auto mode all objects belong to one category are bind automatically to a single counter set.
+
+.SS rdma statistic <object> bind - manually bind an object (e.g., a qp) with a counter
+When bound the statistics of this object are available in this counter.
+
+.SS rdma statistic <object> unbind - manually unbind an object (e.g., a qp) from the counter previously bound
+When unbound the statistics of this object are no longer available in this counter; And if object id is not specified then all objects on this counter will be unbound.
+
+.I "COUNTER-ID"
+- specifies the id of the counter to be bound.
+If this argument is omitted then a new counter will be allocated.
+
+.SH "EXAMPLES"
+.PP
+rdma statistic show
+.RS 4
+Shows the state of the default counter of all RDMA devices on the system.
+.RE
+.PP
+rdma statistic show link mlx5_2/1
+.RS 4
+Shows the state of the default counter of specified RDMA port
+.RE
+.PP
+rdma statistic qp show
+.RS 4
+Shows the state of all qp counters of all RDMA devices on the system.
+.RE
+.PP
+rdma statistic qp show link mlx5_2/1
+.RS 4
+Shows the state of all qp counters of specified RDMA port.
+.RE
+.PP
+rdma statistic qp show link mlx5_2 pid 30489
+.RS 4
+Shows the state of all qp counters of specified RDMA port and belonging to pid 30489
+.RE
+.PP
+rdma statistic qp mode
+.RS 4
+List current counter mode on all deivces
+.RE
+.PP
+rdma statistic qp mode link mlx5_2/1
+.RS 4
+List current counter mode of device mlx5_2 port 1
+.RE
+.PP
+rdma statistic qp set link mlx5_2/1 auto type on
+.RS 4
+On device mlx5_2 port 1, for each new QP bind it with a counter automatically. Per counter for QPs with same qp type in each process. Currently only "type" is supported.
+.RE
+.PP
+rdma statistic qp set link mlx5_2/1 auto off
+.RS 4
+Turn-off auto mode on device mlx5_2 port 1. The allocated counters can be manually accessed.
+.RE
+.PP
+rdma statistic qp bind link mlx5_2/1 lqpn 178
+.RS 4
+On device mlx5_2 port 1, allocate a counter and bind the specified qp on it
+.RE
+.PP
+rdma statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178
+.RS 4
+On device mlx5_2 port 1, bind the specified qp on the specified counter
+.RE
+.PP
+rdma statistic qp unbind link mlx5_2/1 cntn 4
+.RS 4
+On device mlx5_2 port 1, unbind all QPs on the specified counter. After that this counter will be released automatically by the kernel.
+
+.RE
+.PP
+
+.SH SEE ALSO
+.BR rdma (8),
+.BR rdma-dev (8),
+.BR rdma-link (8),
+.BR rdma-resource (8),
+.br
+
+.SH AUTHOR
+Mark Zhang <markz@mellanox.com>
diff --git a/man/man8/rdma.8 b/man/man8/rdma.8
index 3ae33987..ef29b1c6 100644
--- a/man/man8/rdma.8
+++ b/man/man8/rdma.8
@@ -19,7 +19,7 @@ rdma \- RDMA tool
.ti -8
.IR OBJECT " := { "
-.BR dev " | " link " | " system " }"
+.BR dev " | " link " | " system " | " statistic " }"
.sp
.ti -8
@@ -74,6 +74,10 @@ Generate JSON output.
.B sys
- RDMA subsystem related.
+.TP
+.B statistic
+- RDMA counter statistic related.
+
.PP
The names of all objects may be written in full or
abbreviated form, for example
@@ -112,6 +116,7 @@ Exit status is 0 if command was successful or a positive integer upon failure.
.BR rdma-link (8),
.BR rdma-resource (8),
.BR rdma-system (8),
+.BR rdma-statistic (8),
.br
.SH REPORTING BUGS
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 7/8] rdma: Add default counter show support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
Show default counter statistics, which are same through the sysfs
interface: /sys/class/infiniband/<dev>/ports/<port>/hw_counters/
Example:
$ rdma stat show link mlx5_2/1
link mlx5_2/1 rx_write_requests 8 rx_read_requests 4 rx_atomic_requests 0
out_of_buffer 0 out_of_sequence 0 duplicate_request 0 rnr_nak_retry_err 0
packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0
resp_local_length_error 0 resp_cqe_error 0 req_cqe_error 0
req_remote_invalid_request 0 req_remote_access_errors 0
resp_remote_access_errors 0 resp_cqe_flush_error 0 req_cqe_flush_error 0
rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0
np_cnp_sent 0 rx_icrc_encapsulated 0
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/stat.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 73 insertions(+), 1 deletion(-)
diff --git a/rdma/stat.c b/rdma/stat.c
index 942c1ac3..ef0bbcf1 100644
--- a/rdma/stat.c
+++ b/rdma/stat.c
@@ -17,6 +17,8 @@ static int stat_help(struct rd *rd)
pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename);
pr_out(" %s statistic OBJECT bind COUNTER_SCOPE [DEV/PORT_INDEX] [OBJECT-ID] [COUNTER-ID]\n", rd->filename);
pr_out(" %s statistic OBJECT unbind COUNTER_SCOPE [DEV/PORT_INDEX] [COUNTER-ID]\n", rd->filename);
+ pr_out(" %s statistic show\n", rd->filename);
+ pr_out(" %s statistic show link [ DEV/PORT_INDEX ]\n", rd->filename);
pr_out("where OBJECT: = { qp }\n");
pr_out(" CRITERIA : = { type }\n");
pr_out(" COUNTER_SCOPE: = { link | dev }\n");
@@ -31,6 +33,8 @@ static int stat_help(struct rd *rd)
pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178 cntn 4\n", rd->filename);
pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4\n", rd->filename);
pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178\n", rd->filename);
+ pr_out(" %s statistic show\n", rd->filename);
+ pr_out(" %s statistic show link mlx5_2/1\n", rd->filename);
return 0;
}
@@ -674,10 +678,78 @@ static int stat_qp(struct rd *rd)
return rd_exec_cmd(rd, cmds, "parameter");
}
+static int stat_show_parse_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
+ struct rd *rd = data;
+ const char *name;
+ uint32_t port;
+ int ret;
+
+ mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS])
+ return MNL_CB_ERROR;
+
+ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]);
+ port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (rd->json_output) {
+ jsonw_string_field(rd->jw, "ifname", name);
+ jsonw_uint_field(rd->jw, "port", port);
+ } else {
+ pr_out("link %s/%u ", name, port);
+ }
+
+ ret = res_get_hwcounters(rd, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], true);
+
+ if (!rd->json_output)
+ pr_out("\n");
+ return ret;
+}
+
+static int stat_show_one_link(struct rd *rd)
+{
+ int flags = NLM_F_REQUEST | NLM_F_ACK;
+ uint32_t seq;
+ int ret;
+
+ if (!rd->port_idx)
+ return 0;
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ ret = rd_send_msg(rd);
+ if (ret)
+ return ret;
+
+ return rd_recv_msg(rd, stat_show_parse_cb, rd, seq);
+}
+
+static int stat_show_link(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_show_one_link, false);
+}
+
+static int stat_show(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_show_link },
+ { "link", stat_show_link },
+ { "help", stat_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
int cmd_stat(struct rd *rd)
{
const struct rd_cmd cmds[] = {
- { NULL, stat_help },
+ { NULL, stat_show },
+ { "show", stat_show },
+ { "list", stat_show },
{ "help", stat_help },
{ "qp", stat_qp },
{ 0 }
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 6/8] rdma: Add stat manual mode support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
In manual mode a QP can be manually bound to a counter. If the counter
id(cntn) is not specified that kernel will allocate one. After a
successful bind, the cntn can be seen through "rdma statistic qp show".
And in unbind if lqpn is not specified then all QPs on this counter will
be unbound.
The manual and auto mode are mutual-exclusive.
Examples:
$ rdma statistic qp bind link mlx5_2/1 lqpn 178
$ rdma statistic qp bind link mlx5_2/1 lqpn 178 cntn 4
$ rdma statistic qp unbind link mlx5_2/1 cntn 4
$ rdma statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/stat.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 192 insertions(+)
diff --git a/rdma/stat.c b/rdma/stat.c
index ad1cc063..942c1ac3 100644
--- a/rdma/stat.c
+++ b/rdma/stat.c
@@ -15,6 +15,8 @@ static int stat_help(struct rd *rd)
pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename);
pr_out(" %s statistic OBJECT mode\n", rd->filename);
pr_out(" %s statistic OBJECT set COUNTER_SCOPE [DEV/PORT_INDEX] auto {CRITERIA | off}\n", rd->filename);
+ pr_out(" %s statistic OBJECT bind COUNTER_SCOPE [DEV/PORT_INDEX] [OBJECT-ID] [COUNTER-ID]\n", rd->filename);
+ pr_out(" %s statistic OBJECT unbind COUNTER_SCOPE [DEV/PORT_INDEX] [COUNTER-ID]\n", rd->filename);
pr_out("where OBJECT: = { qp }\n");
pr_out(" CRITERIA : = { type }\n");
pr_out(" COUNTER_SCOPE: = { link | dev }\n");
@@ -25,6 +27,10 @@ static int stat_help(struct rd *rd)
pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename);
pr_out(" %s statistic qp set link mlx5_2/1 auto type on\n", rd->filename);
pr_out(" %s statistic qp set link mlx5_2/1 auto off\n", rd->filename);
+ pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178\n", rd->filename);
+ pr_out(" %s statistic qp bind link mlx5_2/1 lqpn 178 cntn 4\n", rd->filename);
+ pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4\n", rd->filename);
+ pr_out(" %s statistic qp unbind link mlx5_2/1 cntn 4 lqpn 178\n", rd->filename);
return 0;
}
@@ -467,6 +473,190 @@ static int stat_qp_set(struct rd *rd)
return rd_exec_cmd(rd, cmds, "parameter");
}
+static int stat_get_arg(struct rd *rd, const char *arg)
+{
+ int value = 0;
+ char *endp;
+
+ if (strcmpx(rd_argv(rd), arg) != 0)
+ return -EINVAL;
+
+ rd_arg_inc(rd);
+ value = strtol(rd_argv(rd), &endp, 10);
+ rd_arg_inc(rd);
+
+ return value;
+}
+
+static int stat_one_qp_bind(struct rd *rd)
+{
+ int lqpn = 0, cntn = 0, ret;
+ uint32_t seq;
+
+ if (rd_no_arg(rd)) {
+ stat_help(rd);
+ return -EINVAL;
+ }
+
+ ret = rd_build_filter(rd, stat_valid_filters);
+ if (ret)
+ return ret;
+
+ lqpn = stat_get_arg(rd, "lqpn");
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_SET,
+ &seq, (NLM_F_REQUEST | NLM_F_ACK));
+
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE,
+ RDMA_COUNTER_MODE_MANUAL);
+
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_RES_LQPN, lqpn);
+
+ if (rd_argc(rd)) {
+ cntn = stat_get_arg(rd, "cntn");
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
+ cntn);
+ }
+
+ return rd_sendrecv_msg(rd, seq);
+}
+
+static int do_stat_qp_unbind_lqpn(struct rd *rd, uint32_t cntn, uint32_t lqpn)
+{
+ uint32_t seq;
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_DEL,
+ &seq, (NLM_F_REQUEST | NLM_F_ACK));
+
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE,
+ RDMA_COUNTER_MODE_MANUAL);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_RES_LQPN, lqpn);
+
+ return rd_sendrecv_msg(rd, seq);
+}
+
+static int stat_get_counter_parse_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
+ struct nlattr *nla_table, *nla_entry;
+ struct rd *rd = data;
+ uint32_t lqpn, cntn;
+ int err;
+
+ mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
+ return MNL_CB_ERROR;
+ cntn = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+
+ nla_table = tb[RDMA_NLDEV_ATTR_RES_QP];
+ if (!nla_table)
+ return MNL_CB_ERROR;
+
+ mnl_attr_for_each_nested(nla_entry, nla_table) {
+ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {};
+
+ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line);
+ if (err != MNL_CB_OK)
+ return -EINVAL;
+
+ if (!nla_line[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ lqpn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_LQPN]);
+ err = do_stat_qp_unbind_lqpn(rd, cntn, lqpn);
+ if (err)
+ return MNL_CB_ERROR;
+ }
+
+ return MNL_CB_OK;
+}
+
+static int stat_one_qp_unbind(struct rd *rd)
+{
+ int flags = NLM_F_REQUEST | NLM_F_ACK, ret;
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ int lqpn = 0, cntn = 0;
+ unsigned int portid;
+ uint32_t seq;
+
+ ret = rd_build_filter(rd, stat_valid_filters);
+ if (ret)
+ return ret;
+
+ cntn = stat_get_arg(rd, "cntn");
+ if (rd_argc(rd)) {
+ lqpn = stat_get_arg(rd, "lqpn");
+ return do_stat_qp_unbind_lqpn(rd, cntn, lqpn);
+ }
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn);
+ ret = rd_send_msg(rd);
+ if (ret)
+ return ret;
+
+
+ /* Can't use rd_recv_msg() since the callback also calls it (recursively),
+ * then rd_recv_msg() always return -1 here
+ */
+ portid = mnl_socket_get_portid(rd->nl);
+ ret = mnl_socket_recvfrom(rd->nl, buf, sizeof(buf));
+ if (ret <= 0)
+ return ret;
+
+ ret = mnl_cb_run(buf, ret, seq, portid, stat_get_counter_parse_cb, rd);
+ mnl_socket_close(rd->nl);
+ if (ret != MNL_CB_OK)
+ return ret;
+
+ return 0;
+}
+
+static int stat_qp_bind_link(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_one_qp_bind, true);
+}
+
+static int stat_qp_bind(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_help },
+ { "link", stat_qp_bind_link },
+ { "help", stat_help },
+ { 0 },
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+static int stat_qp_unbind_link(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_one_qp_unbind, true);
+}
+
+static int stat_qp_unbind(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_help },
+ { "link", stat_qp_unbind_link },
+ { "help", stat_help },
+ { 0 },
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
static int stat_qp(struct rd *rd)
{
const struct rd_cmd cmds[] = {
@@ -475,6 +665,8 @@ static int stat_qp(struct rd *rd)
{ "list", stat_qp_show },
{ "mode", stat_qp_get_mode },
{ "set", stat_qp_set },
+ { "bind", stat_qp_bind },
+ { "unbind", stat_qp_unbind },
{ "help", stat_help },
{ 0 }
};
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 5/8] rdma: Make get_port_from_argv() returns valid port in strict port mode
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
When strict_port is set, make get_port_from_argv() returns failure if
no valid port is specified.
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/utils.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/rdma/utils.c b/rdma/utils.c
index aed1a3d0..95b669f3 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -56,7 +56,7 @@ bool rd_no_arg(struct rd *rd)
* mlx5_1/1 | 1 | false
* mlx5_1/- | 0 | false
*
- * In strict mode, /- will return error.
+ * In strict port mode, a non-0 port must be provided
*/
static int get_port_from_argv(struct rd *rd, uint32_t *port,
bool *is_dump_all, bool strict_port)
@@ -64,7 +64,7 @@ static int get_port_from_argv(struct rd *rd, uint32_t *port,
char *slash;
*port = 0;
- *is_dump_all = true;
+ *is_dump_all = strict_port ? false : true;
slash = strchr(rd_argv(rd), '/');
/* if no port found, return 0 */
@@ -83,6 +83,9 @@ static int get_port_from_argv(struct rd *rd, uint32_t *port,
if (!*port && strlen(slash))
return -EINVAL;
}
+ if (strict_port && (*port == 0))
+ return -EINVAL;
+
return 0;
}
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 3/8] rdma: Add get per-port counter mode support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
Add an interface to show which mode is active. Two modes are supported:
- "auto": In this mode all QPs belong to one category are bind automatically
to a single counter set. Currently only "qp type" is supported;
- "manual": In this mode QPs are bound to a counter manually.
Examples:
$ rdma statistic qp mode
0/1: mlx5_0/1: qp auto off
1/1: mlx5_1/1: qp auto off
2/1: mlx5_2/1: qp auto type on
3/1: mlx5_3/1: qp auto off
$ rdma statistic qp mode link mlx5_0
0/1: mlx5_0/1: qp auto off
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/stat.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++
rdma/utils.c | 2 +
2 files changed, 142 insertions(+)
diff --git a/rdma/stat.c b/rdma/stat.c
index da35ef7d..0c239851 100644
--- a/rdma/stat.c
+++ b/rdma/stat.c
@@ -13,13 +13,152 @@ static int stat_help(struct rd *rd)
pr_out("Usage: %s [ OPTIONS ] statistic { COMMAND | help }\n", rd->filename);
pr_out(" %s statistic OBJECT show\n", rd->filename);
pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename);
+ pr_out(" %s statistic OBJECT mode\n", rd->filename);
+ pr_out("where OBJECT: = { qp }\n");
pr_out("Examples:\n");
pr_out(" %s statistic qp show\n", rd->filename);
pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename);
+ pr_out(" %s statistic qp mode\n", rd->filename);
+ pr_out(" %s statistic qp mode link mlx5_0\n", rd->filename);
return 0;
}
+struct counter_param {
+ char *name;
+ uint32_t attr;
+};
+
+static struct counter_param auto_params[] = {
+ { "type", RDMA_COUNTER_MASK_QP_TYPE, },
+ { NULL },
+};
+
+static int prepare_auto_mode_str(struct nlattr **tb, uint32_t mask,
+ char *output, int len)
+{
+ char s[] = "qp auto";
+ int i, outlen = strlen(s);
+
+ memset(output, 0, len);
+ snprintf(output, len, "%s", s);
+
+ if (mask) {
+ for (i = 0; auto_params[i].name != NULL; i++) {
+ if (mask & auto_params[i].attr) {
+ outlen += strlen(auto_params[i].name) + 1;
+ if (outlen >= len)
+ return -EINVAL;
+ strcat(output, " ");
+ strcat(output, auto_params[i].name);
+ }
+ }
+
+ if (outlen + strlen(" on") >= len)
+ return -EINVAL;
+ strcat(output, " on");
+ } else {
+ if (outlen + strlen(" off") >= len)
+ return -EINVAL;
+ strcat(output, " off");
+ }
+
+ return 0;
+}
+
+static int qp_link_get_mode_parse_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
+ uint32_t mode = 0, mask = 0;
+ char output[128] = {};
+ struct rd *rd = data;
+ uint32_t idx, port;
+ const char *name;
+
+ mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME])
+ return MNL_CB_ERROR;
+
+ if (!tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ pr_err("This tool doesn't support switches yet\n");
+ return MNL_CB_ERROR;
+ }
+
+ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ port = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_MODE])
+ mode = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (!tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ return MNL_CB_ERROR;
+ mask = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+ prepare_auto_mode_str(tb, mask, output, sizeof(output));
+ } else {
+ snprintf(output, sizeof(output), "qp auto off");
+ }
+
+ if (rd->json_output) {
+ jsonw_uint_field(rd->jw, "ifindex", idx);
+ jsonw_uint_field(rd->jw, "port", port);
+ jsonw_string_field(rd->jw, "mode", output);
+ } else {
+ pr_out("%u/%u: %s/%u: %s\n", idx, port, name, port, output);
+ }
+
+ return MNL_CB_OK;
+}
+
+static int stat_one_qp_link_get_mode(struct rd *rd)
+{
+ uint32_t seq;
+ int ret;
+
+ if (!rd->port_idx)
+ return 0;
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET,
+ &seq, (NLM_F_REQUEST | NLM_F_ACK));
+
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ /* Make RDMA_NLDEV_ATTR_STAT_MODE valid so that kernel knows
+ * return only mode instead of all counters
+ */
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_MODE,
+ RDMA_COUNTER_MODE_MANUAL);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ ret = rd_send_msg(rd);
+ if (ret)
+ return ret;
+
+ if (rd->json_output)
+ jsonw_start_object(rd->jw);
+ ret = rd_recv_msg(rd, qp_link_get_mode_parse_cb, rd, seq);
+ if (rd->json_output)
+ jsonw_end_object(rd->jw);
+
+ return ret;
+}
+
+static int stat_qp_link_get_mode(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_one_qp_link_get_mode, false);
+}
+
+static int stat_qp_get_mode(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_qp_link_get_mode },
+ { "link", stat_qp_link_get_mode },
+ { "help", stat_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
static int res_get_hwcounters(struct rd *rd, struct nlattr *hwc_table, bool print)
{
struct nlattr *nla_entry;
@@ -248,6 +387,7 @@ static int stat_qp(struct rd *rd)
{ NULL, stat_qp_show },
{ "show", stat_qp_show },
{ "list", stat_qp_show },
+ { "mode", stat_qp_get_mode },
{ "help", stat_help },
{ 0 }
};
diff --git a/rdma/utils.c b/rdma/utils.c
index 7bc0439a..9c885ad7 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -443,6 +443,8 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = MNL_TYPE_NESTED,
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = MNL_TYPE_NUL_STRING,
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64,
+ [RDMA_NLDEV_ATTR_STAT_MODE] = MNL_TYPE_U32,
+ [RDMA_NLDEV_ATTR_STAT_RES] = MNL_TYPE_U32,
};
int rd_attr_check(const struct nlattr *attr, int *typep)
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 2/8] rdma: Add "stat qp show" support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
This patch presents link, id, task name, lqpn, as well as all sub
counters of a QP counter.
A QP counter is a dynamically allocated statistic counter that is
bound with one or more QPs. It has several sub-counters, each is
used for a different purpose.
Examples:
$ rdma stat qp show
link mlx5_2/1 cntn 5 pid 31609 comm client.1 rx_write_requests 0
rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 out_of_sequence 0
duplicate_request 0 rnr_nak_retry_err 0 packet_seq_err 0
implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0
resp_cqe_error 0 req_cqe_error 0 req_remote_invalid_request 0
req_remote_access_errors 0 resp_remote_access_errors 0
resp_cqe_flush_error 0 req_cqe_flush_error 0
LQPN: <178>
$ rdma stat show link rocep1s0f5/1
link rocep1s0f5/1 rx_write_requests 0 rx_read_requests 0 rx_atomic_requests 0 out_of_buffer 0 duplicate_request 0
rnr_nak_retry_err 0 packet_seq_err 0 implied_nak_seq_err 0 local_ack_timeout_err 0 resp_local_length_error 0 resp_cqe_error 0
req_cqe_error 0 req_remote_invalid_request 0 req_remote_access_errors 0 resp_remote_access_errors 0 resp_cqe_flush_error 0
req_cqe_flush_error 0 rp_cnp_ignored 0 rp_cnp_handled 0 np_ecn_marked_roce_packets 0 np_cnp_sent 0
$ rdma stat show link rocep1s0f5/1 -p
link rocep1s0f5/1
rx_write_requests 0
rx_read_requests 0
rx_atomic_requests 0
out_of_buffer 0
duplicate_request 0
rnr_nak_retry_err 0
packet_seq_err 0
implied_nak_seq_err 0
local_ack_timeout_err 0
resp_local_length_error 0
resp_cqe_error 0
req_cqe_error 0
req_remote_invalid_request 0
req_remote_access_errors 0
resp_remote_access_errors 0
resp_cqe_flush_error 0
req_cqe_flush_error 0
rp_cnp_ignored 0
rp_cnp_handled 0
np_ecn_marked_roce_packets 0
np_cnp_sent 0
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/Makefile | 2 +-
rdma/rdma.c | 3 +-
rdma/rdma.h | 1 +
rdma/stat.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++++++
rdma/utils.c | 7 ++
5 files changed, 279 insertions(+), 2 deletions(-)
create mode 100644 rdma/stat.c
diff --git a/rdma/Makefile b/rdma/Makefile
index 4847f27e..e3f550bf 100644
--- a/rdma/Makefile
+++ b/rdma/Makefile
@@ -7,7 +7,7 @@ ifeq ($(HAVE_MNL),y)
CFLAGS += -I./include/uapi/
RDMA_OBJ = rdma.o utils.o dev.o link.o res.o res-pd.o res-mr.o res-cq.o \
- res-cmid.o res-qp.o sys.o
+ res-cmid.o res-qp.o sys.o stat.o
TARGETS += rdma
endif
diff --git a/rdma/rdma.c b/rdma/rdma.c
index e9f1b4bb..4e34da92 100644
--- a/rdma/rdma.c
+++ b/rdma/rdma.c
@@ -11,7 +11,7 @@ static void help(char *name)
{
pr_out("Usage: %s [ OPTIONS ] OBJECT { COMMAND | help }\n"
" %s [ -f[orce] ] -b[atch] filename\n"
- "where OBJECT := { dev | link | resource | system | help }\n"
+ "where OBJECT := { dev | link | resource | system | statistic | help }\n"
" OPTIONS := { -V[ersion] | -d[etails] | -j[son] | -p[retty]}\n", name, name);
}
@@ -30,6 +30,7 @@ static int rd_cmd(struct rd *rd, int argc, char **argv)
{ "link", cmd_link },
{ "resource", cmd_res },
{ "system", cmd_sys },
+ { "statistic", cmd_stat },
{ 0 }
};
diff --git a/rdma/rdma.h b/rdma/rdma.h
index 885a751e..23157743 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -94,6 +94,7 @@ int cmd_dev(struct rd *rd);
int cmd_link(struct rd *rd);
int cmd_res(struct rd *rd);
int cmd_sys(struct rd *rd);
+int cmd_stat(struct rd *rd);
int rd_exec_cmd(struct rd *rd, const struct rd_cmd *c, const char *str);
int rd_exec_dev(struct rd *rd, int (*cb)(struct rd *rd));
int rd_exec_require_dev(struct rd *rd, int (*cb)(struct rd *rd));
diff --git a/rdma/stat.c b/rdma/stat.c
new file mode 100644
index 00000000..da35ef7d
--- /dev/null
+++ b/rdma/stat.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * rdma.c RDMA tool
+ * Authors: Mark Zhang <markz@mellanox.com>
+ */
+
+#include "rdma.h"
+#include "res.h"
+#include <inttypes.h>
+
+static int stat_help(struct rd *rd)
+{
+ pr_out("Usage: %s [ OPTIONS ] statistic { COMMAND | help }\n", rd->filename);
+ pr_out(" %s statistic OBJECT show\n", rd->filename);
+ pr_out(" %s statistic OBJECT show link [ DEV/PORT_INDEX ] [ FILTER-NAME FILTER-VALUE ]\n", rd->filename);
+ pr_out("Examples:\n");
+ pr_out(" %s statistic qp show\n", rd->filename);
+ pr_out(" %s statistic qp show link mlx5_2/1\n", rd->filename);
+
+ return 0;
+}
+
+static int res_get_hwcounters(struct rd *rd, struct nlattr *hwc_table, bool print)
+{
+ struct nlattr *nla_entry;
+ const char *nm;
+ uint64_t v;
+ int err;
+
+ mnl_attr_for_each_nested(nla_entry, hwc_table) {
+ struct nlattr *hw_line[RDMA_NLDEV_ATTR_MAX] = {};
+
+ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, hw_line);
+ if (err != MNL_CB_OK)
+ return -EINVAL;
+
+ if (!hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] ||
+ !hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE]) {
+ return -EINVAL;
+ }
+
+ if (!print)
+ continue;
+
+ nm = mnl_attr_get_str(hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME]);
+ v = mnl_attr_get_u64(hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE]);
+ if (rd->pretty_output && !rd->json_output)
+ newline_indent(rd);
+ res_print_uint(rd, nm, v, hw_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME]);
+ }
+
+ return MNL_CB_OK;
+}
+
+static int res_counter_line(struct rd *rd, const char *name, int index,
+ struct nlattr **nla_line)
+{
+ uint32_t cntn, port = 0, pid = 0, qpn;
+ struct nlattr *hwc_table, *qp_table;
+ struct nlattr *nla_entry;
+ const char *comm = NULL;
+ bool isfirst;
+ int err;
+
+ if (nla_line[RDMA_NLDEV_ATTR_PORT_INDEX])
+ port = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_PORT_INDEX]);
+
+ hwc_table = nla_line[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS];
+ qp_table = nla_line[RDMA_NLDEV_ATTR_RES_QP];
+ if (!hwc_table || !qp_table ||
+ !nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
+ return MNL_CB_ERROR;
+
+ cntn = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ if (rd_is_filtered_attr(rd, "cntn", cntn,
+ nla_line[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]))
+ return MNL_CB_OK;
+
+ if (nla_line[RDMA_NLDEV_ATTR_RES_PID]) {
+ pid = mnl_attr_get_u32(nla_line[RDMA_NLDEV_ATTR_RES_PID]);
+ comm = get_task_name(pid);
+ }
+ if (rd_is_filtered_attr(rd, "pid", pid,
+ nla_line[RDMA_NLDEV_ATTR_RES_PID]))
+ return MNL_CB_OK;
+
+ if (nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME])
+ comm = (char *)mnl_attr_get_str(
+ nla_line[RDMA_NLDEV_ATTR_RES_KERN_NAME]);
+
+ mnl_attr_for_each_nested(nla_entry, qp_table) {
+ struct nlattr *qp_line[RDMA_NLDEV_ATTR_MAX] = {};
+
+ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, qp_line);
+ if (err != MNL_CB_OK)
+ return -EINVAL;
+
+ if (!qp_line[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = mnl_attr_get_u32(qp_line[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (rd_is_filtered_attr(rd, "lqpn", qpn,
+ qp_line[RDMA_NLDEV_ATTR_RES_LQPN]))
+ return MNL_CB_OK;
+ }
+
+ err = res_get_hwcounters(rd, hwc_table, false);
+ if (err != MNL_CB_OK)
+ return err;
+
+ if (rd->json_output) {
+ jsonw_string_field(rd->jw, "ifname", name);
+ if (port)
+ jsonw_uint_field(rd->jw, "port", port);
+ jsonw_uint_field(rd->jw, "cntn", cntn);
+ } else {
+ if (port)
+ pr_out("link %s/%u cntn %u ", name, port, cntn);
+ else
+ pr_out("dev %s cntn %u ", name, cntn);
+ }
+
+ res_print_uint(rd, "pid", pid, nla_line[RDMA_NLDEV_ATTR_RES_PID]);
+ print_comm(rd, comm, nla_line);
+
+ res_get_hwcounters(rd, hwc_table, true);
+
+ isfirst = true;
+ mnl_attr_for_each_nested(nla_entry, qp_table) {
+ struct nlattr *qp_line[RDMA_NLDEV_ATTR_MAX] = {};
+
+ if (isfirst && !rd->json_output)
+ pr_out("\n LQPN: <");
+
+ err = mnl_attr_parse_nested(nla_entry, rd_attr_cb, qp_line);
+ if (err != MNL_CB_OK)
+ return -EINVAL;
+
+ if (!qp_line[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = mnl_attr_get_u32(qp_line[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (rd->json_output) {
+ jsonw_uint_field(rd->jw, "lqpn", qpn);
+ } else {
+ if (isfirst)
+ pr_out("%d", qpn);
+ else
+ pr_out(", %d", qpn);
+ }
+ isfirst = false;
+ }
+
+ if (!rd->json_output)
+ pr_out(">\n");
+ return MNL_CB_OK;
+}
+
+static int stat_qp_show_parse_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX] = {};
+ struct nlattr *nla_table, *nla_entry;
+ struct rd *rd = data;
+ const char *name;
+ uint32_t idx;
+ int ret;
+
+ mnl_attr_parse(nlh, 0, rd_attr_cb, tb);
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+ !tb[RDMA_NLDEV_ATTR_STAT_COUNTER])
+ return MNL_CB_ERROR;
+
+ name = mnl_attr_get_str(tb[RDMA_NLDEV_ATTR_DEV_NAME]);
+ idx = mnl_attr_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ nla_table = tb[RDMA_NLDEV_ATTR_STAT_COUNTER];
+
+ mnl_attr_for_each_nested(nla_entry, nla_table) {
+ struct nlattr *nla_line[RDMA_NLDEV_ATTR_MAX] = {};
+
+ ret = mnl_attr_parse_nested(nla_entry, rd_attr_cb, nla_line);
+ if (ret != MNL_CB_OK)
+ break;
+
+ ret = res_counter_line(rd, name, idx, nla_line);
+ if (ret != MNL_CB_OK)
+ break;
+ }
+
+ return ret;
+}
+
+static const struct filters stat_valid_filters[MAX_NUMBER_OF_FILTERS] = {
+ { .name = "cntn", .is_number = true },
+ { .name = "lqpn", .is_number = true },
+ { .name = "pid", .is_number = true },
+};
+
+static int stat_qp_show_one_link(struct rd *rd)
+{
+ int flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP;
+ uint32_t seq;
+ int ret;
+
+ if (!rd->port_idx)
+ return 0;
+
+ ret = rd_build_filter(rd, stat_valid_filters);
+ if (ret)
+ return ret;
+
+ rd_prepare_msg(rd, RDMA_NLDEV_CMD_STAT_GET, &seq, flags);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_PORT_INDEX, rd->port_idx);
+ mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_STAT_RES, RDMA_NLDEV_ATTR_RES_QP);
+ ret = rd_send_msg(rd);
+ if (ret)
+ return ret;
+
+ if (rd->json_output)
+ jsonw_start_object(rd->jw);
+ ret = rd_recv_msg(rd, stat_qp_show_parse_cb, rd, seq);
+ if (rd->json_output)
+ jsonw_end_object(rd->jw);
+
+ return ret;
+}
+
+static int stat_qp_show_link(struct rd *rd)
+{
+ return rd_exec_link(rd, stat_qp_show_one_link, false);
+}
+
+static int stat_qp_show(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_qp_show_link },
+ { "link", stat_qp_show_link },
+ { "help", stat_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+static int stat_qp(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_qp_show },
+ { "show", stat_qp_show },
+ { "list", stat_qp_show },
+ { "help", stat_help },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "parameter");
+}
+
+int cmd_stat(struct rd *rd)
+{
+ const struct rd_cmd cmds[] = {
+ { NULL, stat_help },
+ { "help", stat_help },
+ { "qp", stat_qp },
+ { 0 }
+ };
+
+ return rd_exec_cmd(rd, cmds, "statistic command");
+}
diff --git a/rdma/utils.c b/rdma/utils.c
index 558d1c29..7bc0439a 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -436,6 +436,13 @@ static const enum mnl_attr_data_type nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_DRIVER_S64] = MNL_TYPE_U64,
[RDMA_NLDEV_ATTR_DRIVER_U64] = MNL_TYPE_U64,
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = MNL_TYPE_U8,
+ [RDMA_NLDEV_ATTR_STAT_COUNTER] = MNL_TYPE_NESTED,
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = MNL_TYPE_NESTED,
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = MNL_TYPE_U32,
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = MNL_TYPE_NESTED,
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = MNL_TYPE_NESTED,
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = MNL_TYPE_NUL_STRING,
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = MNL_TYPE_U64,
};
int rd_attr_check(const struct nlattr *attr, int *typep)
--
2.20.1
^ permalink raw reply related
* [PATCH iproute2-rc 1/8] rdma: Update uapi headers to add statistic counter support
From: Leon Romanovsky @ 2019-07-10 7:24 UTC (permalink / raw)
To: Stephen Hemminger
Cc: Leon Romanovsky, netdev, David Ahern, Mark Zhang,
RDMA mailing list
In-Reply-To: <20190710072455.9125-1-leon@kernel.org>
From: Mark Zhang <markz@mellanox.com>
Update rdma_netlink.h to kernel commit 6e7be47a5345 ("RDMA/nldev:
Allow get default counter statistics through RDMA netlink").
Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
rdma/include/uapi/rdma/rdma_netlink.h | 82 +++++++++++++++++++++++++--
1 file changed, 78 insertions(+), 4 deletions(-)
diff --git a/rdma/include/uapi/rdma/rdma_netlink.h b/rdma/include/uapi/rdma/rdma_netlink.h
index 41cfa84c..d42d6fb2 100644
--- a/rdma/include/uapi/rdma/rdma_netlink.h
+++ b/rdma/include/uapi/rdma/rdma_netlink.h
@@ -147,6 +147,18 @@ enum {
IWPM_NLA_HELLO_MAX
};
+/* For RDMA_NLDEV_ATTR_DEV_NODE_TYPE */
+enum {
+ /* IB values map to NodeInfo:NodeType. */
+ RDMA_NODE_IB_CA = 1,
+ RDMA_NODE_IB_SWITCH,
+ RDMA_NODE_IB_ROUTER,
+ RDMA_NODE_RNIC,
+ RDMA_NODE_USNIC,
+ RDMA_NODE_USNIC_UDP,
+ RDMA_NODE_UNSPECIFIED,
+};
+
/*
* Local service operations:
* RESOLVE - The client requests the local service to resolve a path.
@@ -267,11 +279,15 @@ enum rdma_nldev_command {
RDMA_NLDEV_CMD_RES_PD_GET, /* can dump */
- RDMA_NLDEV_NUM_OPS
-};
+ RDMA_NLDEV_CMD_GET_CHARDEV,
-enum {
- RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+ RDMA_NLDEV_CMD_STAT_SET,
+
+ RDMA_NLDEV_CMD_STAT_GET, /* can dump */
+
+ RDMA_NLDEV_CMD_STAT_DEL,
+
+ RDMA_NLDEV_NUM_OPS
};
enum rdma_nldev_print_type {
@@ -478,10 +494,68 @@ enum rdma_nldev_attr {
* File descriptor handle of the net namespace object
*/
RDMA_NLDEV_NET_NS_FD, /* u32 */
+ /*
+ * Information about a chardev.
+ * CHARDEV_TYPE is the name of the chardev ABI (ie uverbs, umad, etc)
+ * CHARDEV_ABI signals the ABI revision (historical)
+ * CHARDEV_NAME is the kernel name for the /dev/ file (no directory)
+ * CHARDEV is the 64 bit dev_t for the inode
+ */
+ RDMA_NLDEV_ATTR_CHARDEV_TYPE, /* string */
+ RDMA_NLDEV_ATTR_CHARDEV_NAME, /* string */
+ RDMA_NLDEV_ATTR_CHARDEV_ABI, /* u64 */
+ RDMA_NLDEV_ATTR_CHARDEV, /* u64 */
+ RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID, /* u64 */
+ /*
+ * Counter-specific attributes.
+ */
+ RDMA_NLDEV_ATTR_STAT_MODE, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_RES, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_COUNTER, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_COUNTER_ID, /* u32 */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTERS, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY, /* nested table */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, /* string */
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, /* u64 */
/*
* Always the end
*/
RDMA_NLDEV_ATTR_MAX
};
+
+/*
+ * Supported counter bind modes. All modes are mutual-exclusive.
+ */
+enum rdma_nl_counter_mode {
+ RDMA_COUNTER_MODE_NONE,
+
+ /*
+ * A qp is bound with a counter automatically during initialization
+ * based on the auto mode (e.g., qp type, ...)
+ */
+ RDMA_COUNTER_MODE_AUTO,
+
+ /*
+ * Which qp are bound with which counter is explicitly specified
+ * by the user
+ */
+ RDMA_COUNTER_MODE_MANUAL,
+
+ /*
+ * Always the end
+ */
+ RDMA_COUNTER_MODE_MAX,
+};
+
+/*
+ * Supported criteria in counter auto mode.
+ * Currently only "qp type" is supported
+ */
+enum rdma_nl_counter_mask {
+ RDMA_COUNTER_MASK_QP_TYPE = 1,
+};
+
#endif /* _RDMA_NETLINK_H */
--
2.20.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox