* [PATCH bpf-next v3 08/11] samples/bpf: add unaligned chunks mode support to xdpsock
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
This patch adds support for the unaligned chunks mode. The addition of the
unaligned chunks option will allow users to run the application with more
relaxed chunk placement in the XDP umem.
Unaligned chunks mode can be used with the '-u' or '--unaligned' command
line options.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
samples/bpf/xdpsock_user.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 93eaaf7239b2..26ba1a1fd582 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -67,6 +67,8 @@ static int opt_ifindex;
static int opt_queue;
static int opt_poll;
static int opt_interval = 1;
+static u32 opt_umem_flags;
+static int opt_unaligned_chunks;
static u32 opt_xdp_bind_flags;
static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
static __u32 prog_id;
@@ -282,7 +284,9 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = opt_xsk_frame_size,
.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+ .flags = opt_umem_flags
};
+
int ret;
umem = calloc(1, sizeof(*umem));
@@ -291,6 +295,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
&cfg);
+
if (ret)
exit_with_error(-ret);
@@ -352,6 +357,7 @@ static struct option long_options[] = {
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
{"frame-size", required_argument, 0, 'f'},
+ {"unaligned", no_argument, 0, 'u'},
{0, 0, 0, 0}
};
@@ -372,6 +378,7 @@ static void usage(const char *prog)
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
" -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n"
+ " -u, --unaligned Enable unaligned chunk placement\n"
"\n";
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
exit(EXIT_FAILURE);
@@ -384,7 +391,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
+ c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:u", long_options,
&option_index);
if (c == -1)
break;
@@ -424,12 +431,17 @@ static void parse_command_line(int argc, char **argv)
case 'c':
opt_xdp_bind_flags |= XDP_COPY;
break;
+ case 'u':
+ opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNKS;
+ opt_unaligned_chunks = 1;
+ break;
case 'F':
opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
case 'f':
opt_xsk_frame_size = atoi(optarg);
break;
+
default:
usage(basename(argv[0]));
}
@@ -442,7 +454,8 @@ static void parse_command_line(int argc, char **argv)
usage(basename(argv[0]));
}
- if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+ if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
+ !opt_unaligned_chunks) {
fprintf(stderr, "--frame-size=%d is not a power of two\n",
opt_xsk_frame_size);
usage(basename(argv[0]));
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 07/11] libbpf: add flags to umem config
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
This patch adds a 'flags' field to the umem_config and umem_reg structs.
This will allow for more options to be added for configuring umems.
The first use for the flags field is to add a flag for unaligned chunks
mode. These flags can either be user-provided or filled with a default.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
v2:
- Removed the headroom check from this patch. It has moved to the
previous patch.
---
tools/include/uapi/linux/if_xdp.h | 4 ++++
tools/lib/bpf/xsk.c | 3 +++
tools/lib/bpf/xsk.h | 2 ++
3 files changed, 9 insertions(+)
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index faaa5ca2a117..594bcebb189c 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -17,6 +17,9 @@
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNKS (1 << 0)
+
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
@@ -53,6 +56,7 @@ struct xdp_umem_reg {
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
+ __u32 flags;
};
struct xdp_statistics {
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 5007b5d4fd2c..5e7e4d420ee0 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -116,6 +116,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
return;
}
@@ -123,6 +124,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg,
cfg->comp_size = usr_cfg->comp_size;
cfg->frame_size = usr_cfg->frame_size;
cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
}
static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
@@ -182,6 +184,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
mr.len = size;
mr.chunk_size = umem->config.frame_size;
mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
if (err) {
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 833a6e60d065..44a03d8c34b9 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -170,12 +170,14 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
struct xsk_umem_config {
__u32 fill_size;
__u32 comp_size;
__u32 frame_size;
__u32 frame_headroom;
+ __u32 flags;
};
/* Flags for the libbpf_flags field. */
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 06/11] mlx5e: modify driver for handling offsets
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
With the addition of the unaligned chunks option, we need to make sure we
handle the offsets accordingly based on the mode we are currently running
in. This patch modifies the driver to appropriately mask the address for
each case.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
v3:
- Use new helper function to handle offset
---
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 ++++++--
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 9 +++++++--
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index b0b982cf69bb..d5245893d2c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -122,6 +122,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
+ struct xdp_umem *umem = rq->umem;
struct xdp_buff xdp;
u32 act;
int err;
@@ -138,8 +139,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
- if (xsk)
- xdp.handle += xdp.data - xdp.data_hard_start;
+ if (xsk) {
+ u64 off = xdp.data - xdp.data_hard_start;
+
+ xdp.handle = xsk_umem_handle_offset(umem, xdp.handle, off);
+ }
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 35e188cf4ea4..f596e63cba00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -61,6 +61,7 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
bool flush = false;
+ u64 addr, offset;
xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
@@ -82,8 +83,12 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
break;
}
- xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
- xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ /* for unaligned chunks need to take offset from upper bits */
+ offset = (desc.addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+ addr = (desc.addr & XSK_UNALIGNED_BUF_ADDR_MASK);
+
+ xdptxd.dma_addr = xdp_umem_get_dma(umem, addr + offset);
+ xdptxd.data = xdp_umem_get_data(umem, addr + offset);
xdptxd.len = desc.len;
dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 05/11] ixgbe: modify driver for handling offsets
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
With the addition of the unaligned chunks option, we need to make sure we
handle the offsets accordingly based on the mode we are currently running
in. This patch modifies the driver to appropriately mask the address for
each case.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
v3:
- Use new helper function to handle offset
---
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index bc86057628c8..4481916563a8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -143,7 +143,9 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = IXGBE_XDP_PASS;
+ u64 offset = umem->headroom;
struct bpf_prog *xdp_prog;
struct xdp_frame *xdpf;
u32 act;
@@ -151,7 +153,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- xdp->handle += xdp->data - xdp->data_hard_start;
+ offset += xdp->data - xdp->data_hard_start;
+
+ xdp->handle = xsk_umem_handle_offset(umem, xdp->handle, offset);
+
switch (act) {
case XDP_PASS:
break;
@@ -243,7 +248,7 @@ void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
bi->addr += hr;
- bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+ bi->handle = (u64)handle;
}
static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
@@ -269,7 +274,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = handle;
xsk_umem_discard_addr(umem);
return true;
@@ -296,7 +301,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = handle;
xsk_umem_discard_addr_rq(umem);
return true;
@@ -565,6 +570,7 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
struct ixgbe_tx_buffer *tx_bi;
bool work_done = true;
struct xdp_desc desc;
+ u64 addr, offset;
dma_addr_t dma;
u32 cmd_type;
@@ -578,7 +584,11 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+ /* for unaligned chunks need to take offset from upper bits */
+ offset = (desc.addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+ addr = (desc.addr & XSK_UNALIGNED_BUF_ADDR_MASK);
+
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, addr + offset);
dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
DMA_BIDIRECTIONAL);
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 04/11] i40e: modify driver for handling offsets
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
With the addition of the unaligned chunks option, we need to make sure we
handle the offsets accordingly based on the mode we are currently running
in. This patch modifies the driver to appropriately mask the address for
each case.
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
v3:
- Use new helper function for handling the offset
---
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index dfa096db2244..599498e21e4a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -190,7 +190,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
**/
static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
{
+ struct xdp_umem *umem = rx_ring->xsk_umem;
int err, result = I40E_XDP_PASS;
+ u64 offset = umem->headroom;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
u32 act;
@@ -201,7 +203,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
*/
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
- xdp->handle += xdp->data - xdp->data_hard_start;
+ offset += xdp->data - xdp->data_hard_start;
+
+ xdp->handle = xsk_umem_handle_offset(umem, xdp->handle, offset);
+
switch (act) {
case XDP_PASS:
break;
@@ -262,7 +267,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = handle;
xsk_umem_discard_addr(umem);
return true;
@@ -299,7 +304,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
bi->addr = xdp_umem_get_data(umem, handle);
bi->addr += hr;
- bi->handle = handle + umem->headroom;
+ bi->handle = handle;
xsk_umem_discard_addr_rq(umem);
return true;
@@ -464,7 +469,7 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
bi->addr += hr;
- bi->handle = (u64)handle + rx_ring->xsk_umem->headroom;
+ bi->handle = (u64)handle;
}
/**
@@ -635,6 +640,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
struct i40e_tx_buffer *tx_bi;
bool work_done = true;
struct xdp_desc desc;
+ u64 addr, offset;
dma_addr_t dma;
while (budget-- > 0) {
@@ -647,7 +653,11 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
break;
- dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
+ /* for unaligned chunks need to take offset from upper bits */
+ offset = (desc.addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+ addr = (desc.addr & XSK_UNALIGNED_BUF_ADDR_MASK);
+
+ dma = xdp_umem_get_dma(xdp_ring->xsk_umem, addr + offset);
dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
DMA_BIDIRECTIONAL);
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 03/11] xsk: add support to allow unaligned chunk placement
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
Currently, addresses are chunk size aligned. This means, we are very
restricted in terms of where we can place chunk within the umem. For
example, if we have a chunk size of 2k, then our chunks can only be placed
at 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0).
This patch introduces the ability to use unaligned chunks. With these
changes, we are no longer bound to having to place chunks at a 2k (or
whatever your chunk size is) interval. Since we are no longer dealing with
aligned chunks, they can now cross page boundaries. Checks for page
contiguity have been added in order to keep track of which pages are
followed by a physically contiguous page.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
v2:
- Add checks for the flags coming from userspace
- Fix how we get chunk_size in xsk_diag.c
- Add defines for masking the new descriptor format
- Modified the rx functions to use new descriptor format
- Modified the tx functions to use new descriptor format
v3:
- Add helper function to do address/offset masking/addition
---
include/net/xdp_sock.h | 17 ++++++++
include/uapi/linux/if_xdp.h | 9 ++++
net/xdp/xdp_umem.c | 18 +++++---
net/xdp/xsk.c | 86 ++++++++++++++++++++++++++++++-------
net/xdp/xsk_diag.c | 2 +-
net/xdp/xsk_queue.h | 68 +++++++++++++++++++++++++----
6 files changed, 170 insertions(+), 30 deletions(-)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69796d264f06..738996c0f995 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -19,6 +19,7 @@ struct xsk_queue;
struct xdp_umem_page {
void *addr;
dma_addr_t dma;
+ bool next_pg_contig;
};
struct xdp_umem_fq_reuse {
@@ -48,6 +49,7 @@ struct xdp_umem {
bool zc;
spinlock_t xsk_list_lock;
struct list_head xsk_list;
+ u32 flags;
};
struct xdp_sock {
@@ -144,6 +146,15 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
rq->handles[rq->length++] = addr;
}
+
+static inline u64 xsk_umem_handle_offset(struct xdp_umem *umem, u64 handle,
+ u64 offset)
+{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+ return handle |= (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+ else
+ return handle += offset;
+}
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
@@ -241,6 +252,12 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
{
}
+static inline u64 xsk_umem_handle_offset(struct xdp_umem *umem, u64 handle,
+ u64 offset)
+{
+ return NULL;
+}
+
#endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index faaa5ca2a117..f8dc68fcdf78 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -17,6 +17,9 @@
#define XDP_COPY (1 << 1) /* Force copy-mode */
#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNKS (1 << 0)
+
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
@@ -53,6 +56,7 @@ struct xdp_umem_reg {
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
+ __u32 flags;
};
struct xdp_statistics {
@@ -74,6 +78,11 @@ struct xdp_options {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
/* Rx/Tx descriptor */
struct xdp_desc {
__u64 addr;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 83de74ca729a..952ca22103e9 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -299,6 +299,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
+ bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNKS;
u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
@@ -314,7 +315,10 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (!is_power_of_2(chunk_size))
+ if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNKS))
+ return -EINVAL;
+
+ if (!unaligned_chunks && !is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -331,9 +335,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (chunks == 0)
return -EINVAL;
- chunks_per_page = PAGE_SIZE / chunk_size;
- if (chunks < chunks_per_page || chunks % chunks_per_page)
- return -EINVAL;
+ if (!unaligned_chunks) {
+ chunks_per_page = PAGE_SIZE / chunk_size;
+ if (chunks < chunks_per_page || chunks % chunks_per_page)
+ return -EINVAL;
+ }
headroom = ALIGN(headroom, 64);
@@ -342,13 +348,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
umem->address = (unsigned long)addr;
- umem->chunk_mask = ~((u64)chunk_size - 1);
+ umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+ : ~((u64)chunk_size - 1);
umem->size = size;
umem->headroom = headroom;
umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
+ umem->flags = mr->flags;
INIT_LIST_HEAD(&umem->xsk_list);
spin_lock_init(&umem->xsk_list_lock);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59b57d708697..b3ab653091c4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- return xskq_peek_addr(umem->fq, addr);
+ return xskq_peek_addr(umem->fq, addr, umem);
}
EXPORT_SYMBOL(xsk_umem_peek_addr);
@@ -55,21 +55,42 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)
}
EXPORT_SYMBOL(xsk_umem_discard_addr);
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+ u32 len, u32 metalen)
+{
+ void *to_buf = xdp_umem_get_data(umem, addr);
+
+ if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+ void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+ u64 page_start = addr & (PAGE_SIZE - 1);
+ u64 first_len = PAGE_SIZE - (addr - page_start);
+
+ memcpy(to_buf, from_buf, first_len + metalen);
+ memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+ return;
+ }
+
+ memcpy(to_buf, from_buf, len + metalen);
+}
+
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- void *to_buf, *from_buf;
+ u64 offset = xs->umem->headroom;
+ void *from_buf;
u32 metalen;
u64 addr;
int err;
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
}
- addr += xs->umem->headroom;
-
if (unlikely(xdp_data_meta_unsupported(xdp))) {
from_buf = xdp->data;
metalen = 0;
@@ -78,9 +99,13 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
metalen = xdp->data - xdp->data_meta;
}
- to_buf = xdp_umem_get_data(xs->umem, addr);
- memcpy(to_buf, from_buf, len + metalen);
- addr += metalen;
+ __xsk_rcv_memcpy(xs->umem, addr + offset, from_buf, len, metalen);
+
+ offset += metalen;
+ if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+ addr |= offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+ else
+ addr += offset;
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err) {
xskq_discard_addr(xs->umem->fq);
@@ -127,6 +152,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
u32 len = xdp->data_end - xdp->data;
void *buffer;
u64 addr;
+ u64 offset = xs->umem->headroom;
int err;
spin_lock_bh(&xs->rx_lock);
@@ -136,17 +162,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
goto out_unlock;
}
- if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
}
- addr += xs->umem->headroom;
-
- buffer = xdp_umem_get_data(xs->umem, addr);
+ buffer = xdp_umem_get_data(xs->umem, addr + offset);
memcpy(buffer, xdp->data_meta, len + metalen);
- addr += metalen;
+ offset += metalen;
+
+ addr = xsk_umem_handle_offset(xs->umem, addr, offset);
err = xskq_produce_batch_desc(xs->rx, addr, len);
if (err)
goto out_drop;
@@ -190,7 +216,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, desc))
+ if (!xskq_peek_desc(xs->tx, desc, umem))
continue;
if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -243,7 +269,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_peek_desc(xs->tx, &desc)) {
+ while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@@ -262,6 +288,10 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb_put(skb, len);
addr = desc.addr;
+ if (xs->umem->flags & XDP_UMEM_UNALIGNED_CHUNKS)
+ addr = (addr & XSK_UNALIGNED_BUF_ADDR_MASK) |
+ (addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
@@ -272,7 +302,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
@@ -412,6 +442,28 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
return sock;
}
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+ int i;
+
+ if (flags & XDP_ZEROCOPY) {
+ for (i = 0; i < umem->npgs - 1; i++)
+ umem->pages[i].next_pg_contig =
+ (umem->pages[i].dma + PAGE_SIZE ==
+ umem->pages[i + 1].dma);
+ return;
+ }
+
+ for (i = 0; i < umem->npgs - 1; i++)
+ umem->pages[i].next_pg_contig =
+ (umem->pages[i].addr + PAGE_SIZE ==
+ umem->pages[i + 1].addr);
+}
+
static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -500,6 +552,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
if (err)
goto out_unlock;
+
+ xsk_check_page_contiguity(xs->umem, flags);
}
xs->dev = dev;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..9986a759fe06 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
du.id = umem->id;
du.size = umem->size;
du.num_pages = umem->npgs;
- du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+ du.chunk_size = umem->chunk_size_nohr + umem->headroom;
du.headroom = umem->headroom;
du.ifindex = umem->dev ? umem->dev->ifindex : 0;
du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..0d77212367f0 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -133,6 +133,16 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
/* UMEM queue */
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+ u64 length)
+{
+ bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+ bool next_pg_contig =
+ umem->pages[(addr >> PAGE_SHIFT) + 1].next_pg_contig;
+
+ return cross_pg && !next_pg_contig;
+}
+
static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
if (addr >= q->size) {
@@ -143,23 +153,50 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
return true;
}
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+ u64 length,
+ struct xdp_umem *umem)
+{
+ addr += addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+ addr &= XSK_UNALIGNED_BUF_ADDR_MASK;
+ if (addr >= q->size ||
+ xskq_crosses_non_contig_pg(umem, addr, length)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+ if (xskq_is_valid_addr_unaligned(q, *addr,
+ umem->chunk_size_nohr,
+ umem))
+ return addr;
+ goto out;
+ }
+
if (xskq_is_valid_addr(q, *addr))
return addr;
+out:
q->cons_tail++;
}
return NULL;
}
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -170,7 +207,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
smp_rmb();
}
- return xskq_validate_addr(q, addr);
+ return xskq_validate_addr(q, addr, umem);
}
static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -229,8 +266,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
/* Rx/Tx queue */
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+ struct xdp_umem *umem)
{
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNKS) {
+ if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+ return false;
+
+ if (d->len > umem->chunk_size_nohr || d->options) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+ }
+
if (!xskq_is_valid_addr(q, d->addr))
return false;
@@ -244,14 +294,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
}
static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
while (q->cons_tail != q->cons_head) {
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
*desc = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_desc(q, desc))
+ if (xskq_is_valid_desc(q, desc, umem))
return desc;
q->cons_tail++;
@@ -261,7 +312,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
}
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
- struct xdp_desc *desc)
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
if (q->cons_tail == q->cons_head) {
smp_mb(); /* D, matches A */
@@ -272,7 +324,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
smp_rmb(); /* C, matches B */
}
- return xskq_validate_desc(q, desc);
+ return xskq_validate_desc(q, desc, umem);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 02/11] ixgbe: simplify Rx buffer recycle
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
Currently, the dma, addr and handle are modified when we reuse Rx buffers
in zero-copy mode. However, this is not required as the inputs to the
function are copies, not the original values themselves. As we use the
copies within the function, we can use the original 'obi' values
directly without having to mask and add the headroom.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6b609553329f..bc86057628c8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -201,8 +201,6 @@ ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *obi)
{
- unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
- u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
u16 nta = rx_ring->next_to_alloc;
struct ixgbe_rx_buffer *nbi;
@@ -212,14 +210,9 @@ static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* transfer page from old buffer to new buffer */
- nbi->dma = obi->dma & mask;
- nbi->dma += hr;
-
- nbi->addr = (void *)((unsigned long)obi->addr & mask);
- nbi->addr += hr;
-
- nbi->handle = obi->handle & mask;
- nbi->handle += rx_ring->xsk_umem->headroom;
+ nbi->dma = obi->dma;
+ nbi->addr = obi->addr;
+ nbi->handle = obi->handle;
obi->addr = NULL;
obi->skb = NULL;
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 01/11] i40e: simplify Rx buffer recycle
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190724051043.14348-1-kevin.laatz@intel.com>
Currently, the dma, addr and handle are modified when we reuse Rx buffers
in zero-copy mode. However, this is not required as the inputs to the
function are copies, not the original values themselves. As we use the
copies within the function, we can use the original 'old_bi' values
directly without having to mask and add the headroom.
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
---
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 32bad014d76c..dfa096db2244 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -420,8 +420,6 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *old_bi)
{
struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
- unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
- u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
u16 nta = rx_ring->next_to_alloc;
/* update, and store next to alloc */
@@ -429,14 +427,9 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* transfer page from old buffer to new buffer */
- new_bi->dma = old_bi->dma & mask;
- new_bi->dma += hr;
-
- new_bi->addr = (void *)((unsigned long)old_bi->addr & mask);
- new_bi->addr += hr;
-
- new_bi->handle = old_bi->handle & mask;
- new_bi->handle += rx_ring->xsk_umem->headroom;
+ new_bi->dma = old_bi->dma;
+ new_bi->addr = old_bi->addr;
+ new_bi->handle = old_bi->handle;
old_bi->addr = NULL;
}
--
2.17.1
^ permalink raw reply related
* [PATCH bpf-next v3 00/11] XDP unaligned chunk placement support
From: Kevin Laatz @ 2019-07-24 5:10 UTC (permalink / raw)
To: netdev, ast, daniel, bjorn.topel, magnus.karlsson, jakub.kicinski,
jonathan.lemon, saeedm, maximmi, stephen
Cc: bruce.richardson, ciara.loftus, bpf, intel-wired-lan, Kevin Laatz
In-Reply-To: <20190716030637.5634-1-kevin.laatz@intel.com>
This patch set adds the ability to use unaligned chunks in the XDP umem.
Currently, all chunk addresses passed to the umem are masked to be chunk
size aligned (max is PAGE_SIZE). This limits where we can place chunks
within the umem as well as limiting the packet sizes that are supported.
The changes in this patch set removes these restrictions, allowing XDP to
be more flexible in where it can place a chunk within a umem. By relaxing
where the chunks can be placed, it allows us to use an arbitrary buffer
size and place that wherever we have a free address in the umem. These
changes add the ability to support arbitrary frame sizes up to 4k
(PAGE_SIZE) and make it easy to integrate with other existing frameworks
that have their own memory management systems, such as DPDK.
In DPDK, for example, there is already support for AF_XDP with zero-copy.
However, with this patch set the integration will be much more seamless.
You can find the DPDK AF_XDP driver at:
https://git.dpdk.org/dpdk/tree/drivers/net/af_xdp
Since we are now dealing with arbitrary frame sizes, we need also need to
update how we pass around addresses. Currently, the addresses can simply be
masked to 2k to get back to the original address. This becomes less trivial
when using frame sizes that are not a 'power of 2' size. This patch set
modifies the Rx/Tx descriptor format to use the upper 16-bits of the addr
field for an offset value, leaving the lower 48-bits for the address (this
leaves us with 256 Terabytes, which should be enough!). We only need to use
the upper 16-bits to store the offset when running in unaligned mode.
Rather than adding the offset (headroom etc) to the address, we will store
it in the upper 16-bits of the address field. This way, we can easily add
the offset to the address where we need it, using some bit manipulation and
addition, and we can also easily get the original address wherever we need
it (for example in i40e_zca_free) by simply masking to get the lower
48-bits of the address field.
The numbers below were recorded with the following set up:
- Intel(R) Xeon(R) Gold 6140 CPU @ 2.30GHz
- Intel Corporation Ethernet Controller XXV710 for 25GbE SFP28 (rev 02)
- Driver: i40e
- Application: xdpsock with l2fwd (single interface)
These are solely for comparing performance with and without the patches.
The largest drop was ~1% (in zero-copy mode).
+-------------------------+------------+-----------------+-------------+
| Buffer size: 2048 | SKB mode | Zero-copy | Copy |
+-------------------------+------------+-----------------+-------------+
| Aligned (baseline) | 1.7 Mpps | 15.3 Mpps | 2.08 Mpps |
+-------------------------+------------+-----------------+-------------+
| Aligned (with patches) | 1.7 Mpps | 15.1 Mpps | 2.08 Mpps |
+-------------------------+------------+-----------------+-------------+
| Unaligned | 1.7 Mpps | 14.5 Mpps | 2.08 Mpps |
+-------------------------+------------+-----------------+-------------+
This patch set has been applied against commit 66b5f1c43984
("net-ipv6-ndisc: add support for RFC7710 RA Captive Portal Identifier")
Structure of the patch set:
Patch 1:
- Remove unnecessary masking and headroom addition during zero-copy Rx
buffer recycling in i40e. This change is required in order for the
buffer recycling to work in the unaligned chunk mode.
Patch 2:
- Remove unnecessary masking and headroom addition during
zero-copy Rx buffer recycling in ixgbe. This change is required in
order for the buffer recycling to work in the unaligned chunk mode.
Patch 3:
- Add infrastructure for unaligned chunks. Since we are dealing with
unaligned chunks that could potentially cross a physical page boundary,
we add checks to keep track of that information. We can later use this
information to correctly handle buffers that are placed at an address
where they cross a page boundary. This patch also modifies the
existing Rx and Tx functions to use the new descriptor format. To
handle addresses correctly, we need to mask appropriately based on
whether we are in aligned or unaligned mode.
Patch 4:
- This patch updates the i40e driver to make use of the new descriptor
format.
Patch 5:
- This patch updates the ixgbe driver to make use of the new descriptor
format.
Patch 6:
- This patch updates the mlx5e driver to make use of the new descriptor
format. These changes are required to handle the new descriptor format
and for unaligned chunks support.
Patch 7:
- Add flags for umem configuration to libbpf
Patch 8:
- Modify xdpsock application to add a command line option for
unaligned chunks
Patch 9:
- Since we can now run the application in unaligned chunk mode, we need
to make sure we recycle the buffers appropriately.
Patch 10:
- Adds hugepage support to the xdpsock application
Patch 11:
- Documentation update to include the unaligned chunk scenario. We need
to explicitly state that the incoming addresses are only masked in the
aligned chunk mode and not the unaligned chunk mode.
---
v2:
- fixed checkpatch issues
- fixed Rx buffer recycling for unaligned chunks in xdpsock
- removed unused defines
- fixed how chunk_size is calculated in xsk_diag.c
- added some performance numbers to cover letter
- modified descriptor format to make it easier to retrieve original
address
- removed patch adding off_t off to the zero copy allocator. This is no
longer needed with the new descriptor format.
v3:
- added patch for mlx5 driver changes needed for unaligned chunks
- moved offset handling to new helper function
- changed value used for the umem chunk_mask. Now using the new
descriptor format to save us doing the calculations in a number of
places meaning more of the code is left unchanged while adding
unaligned chunk support.
Kevin Laatz (11):
i40e: simplify Rx buffer recycle
ixgbe: simplify Rx buffer recycle
xsk: add support to allow unaligned chunk placement
i40e: modify driver for handling offsets
ixgbe: modify driver for handling offsets
mlx5e: modify driver for handling offsets
libbpf: add flags to umem config
samples/bpf: add unaligned chunks mode support to xdpsock
samples/bpf: add buffer recycling for unaligned chunks to xdpsock
samples/bpf: use hugepages in xdpsock app
doc/af_xdp: include unaligned chunk case
Documentation/networking/af_xdp.rst | 10 ++-
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 33 +++----
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 33 +++----
.../net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 +-
.../ethernet/mellanox/mlx5/core/en/xsk/tx.c | 9 +-
include/net/xdp_sock.h | 17 ++++
include/uapi/linux/if_xdp.h | 9 ++
net/xdp/xdp_umem.c | 18 ++--
net/xdp/xsk.c | 86 +++++++++++++++----
net/xdp/xsk_diag.c | 2 +-
net/xdp/xsk_queue.h | 68 +++++++++++++--
samples/bpf/xdpsock_user.c | 56 ++++++++----
tools/include/uapi/linux/if_xdp.h | 4 +
tools/lib/bpf/xsk.c | 3 +
tools/lib/bpf/xsk.h | 2 +
15 files changed, 274 insertions(+), 84 deletions(-)
--
2.17.1
^ permalink raw reply
* Re: [Intel-wired-lan] [PATCH v2 00/10] XDP unaligned chunk placement support
From: Magnus Karlsson @ 2019-07-24 13:25 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Kevin Laatz, Jakub Kicinski, Daniel Borkmann, Network Development,
ciara.loftus, Alexei Starovoitov, intel-wired-lan, Jonathan Lemon,
bruce.richardson, bpf, Björn Töpel, Karlsson, Magnus
In-Reply-To: <CAADnVQLEdJwT7DRCpp2zuKWTg0uj=WKQkFc2LZ4o+1fDgnEFLg@mail.gmail.com>
On Tue, Jul 23, 2019 at 11:08 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> Johnathan, Bjorn, Jakub,
> Please review!
> The patch set has been pending for a week.
There is a v3 coming out shortly so I suggest we wait for that. It
will have Mellanox support for this feature too and some clean ups. I
refrained from posting a review on the mailing list due to the merge
window being closed last week, but maybe that was not correct. Should
I still post reviews for new features submitted during the closed
merge window period? I am happy to do it since I do not have any other
tasks during that time. It is a quite period for me. Just let me know.
/Magnus
> On Tue, Jul 16, 2019 at 4:21 AM Kevin Laatz <kevin.laatz@intel.com> wrote:
> >
> > This patch set adds the ability to use unaligned chunks in the XDP umem.
> >
> > Currently, all chunk addresses passed to the umem are masked to be chunk
> > size aligned (default is 2k, max is PAGE_SIZE). This limits where we can
> > place chunks within the umem as well as limiting the packet sizes that are
> > supported.
> >
> > The changes in this patch set removes these restrictions, allowing XDP to
> > be more flexible in where it can place a chunk within a umem. By relaxing
> > where the chunks can be placed, it allows us to use an arbitrary buffer
> > size and place that wherever we have a free address in the umem. These
> > changes add the ability to support arbitrary frame sizes up to 4k
> > (PAGE_SIZE) and make it easy to integrate with other existing frameworks
> > that have their own memory management systems, such as DPDK.
> >
> > Since we are now dealing with arbitrary frame sizes, we need also need to
> > update how we pass around addresses. Currently, the addresses can simply be
> > masked to 2k to get back to the original address. This becomes less trivial
> > when using frame sizes that are not a 'power of 2' size. This patch set
> > modifies the Rx/Tx descriptor format to use the upper 16-bits of the addr
> > field for an offset value, leaving the lower 48-bits for the address (this
> > leaves us with 256 Terabytes, which should be enough!). We only need to use
> > the upper 16-bits to store the offset when running in unaligned mode.
> > Rather than adding the offset (headroom etc) to the address, we will store
> > it in the upper 16-bits of the address field. This way, we can easily add
> > the offset to the address where we need it, using some bit manipulation and
> > addition, and we can also easily get the original address wherever we need
> > it (for example in i40e_zca_free) by simply masking to get the lower
> > 48-bits of the address field.
> >
> > The numbers below were recorded with the following set up:
> > - Intel(R) Xeon(R) Gold 6140 CPU @ 2.30GHz
> > - Intel Corporation Ethernet Controller XXV710 for 25GbE SFP28 (rev 02)
> > - Driver: i40e
> > - Application: xdpsock with l2fwd (single interface)
> >
> > These are solely for comparing performance with and without the patches.
> > The largest drop was ~1% (in zero-copy mode).
> >
> > +-------------------------+------------+-----------------+-------------+
> > | Buffer size: 2048 | SKB mode | Zero-copy | Copy |
> > +-------------------------+------------+-----------------+-------------+
> > | Aligned (baseline) | 1.7 Mpps | 15.3 Mpps | 2.08 Mpps |
> > +-------------------------+------------+-----------------+-------------+
> > | Aligned (with patches) | 1.7 Mpps | 15.1 Mpps | 2.08 Mpps |
> > +-------------------------+------------+-----------------+-------------+
> > | Unaligned | 1.7 Mpps | 14.5 Mpps | 2.08 Mpps |
> > +-------------------------+------------+-----------------+-------------+
> >
> > NOTE: We are currently working on the changes required in the Mellanox
> > driver. We will include these in the v3.
> >
> > Structure of the patchset:
> > Patch 1:
> > - Remove unnecessary masking and headroom addition during zero-copy Rx
> > buffer recycling in i40e. This change is required in order for the
> > buffer recycling to work in the unaligned chunk mode.
> >
> > Patch 2:
> > - Remove unnecessary masking and headroom addition during
> > zero-copy Rx buffer recycling in ixgbe. This change is required in
> > order for the buffer recycling to work in the unaligned chunk mode.
> >
> > Patch 3:
> > - Add infrastructure for unaligned chunks. Since we are dealing with
> > unaligned chunks that could potentially cross a physical page boundary,
> > we add checks to keep track of that information. We can later use this
> > information to correctly handle buffers that are placed at an address
> > where they cross a page boundary. This patch also modifies the
> > existing Rx and Tx functions to use the new descriptor format. To
> > handle addresses correctly, we need to mask appropriately based on
> > whether we are in aligned or unaligned mode.
> >
> > Patch 4:
> > - This patch updates the i40e driver to make use of the new descriptor
> > format. The new format is particularly useful here since we can now
> > retrieve the original address in places like i40e_zca_free with ease.
> > This saves us doing various calculations to get the original address
> > back.
> >
> > Patch 5:
> > - This patch updates the ixgbe driver to make use of the new descriptor
> > format. The new format is particularly useful here since we can now
> > retrieve the original address in places like ixgbe_zca_free with ease.
> > This saves us doing various calculations to get the original address
> > back.
> >
> > Patch 6:
> > - Add flags for umem configuration to libbpf
> >
> > Patch 7:
> > - Modify xdpsock application to add a command line option for
> > unaligned chunks
> >
> > Patch 8:
> > - Since we can now run the application in unaligned chunk mode, we need
> > to make sure we recycle the buffers appropriately.
> >
> > Patch 9:
> > - Adds hugepage support to the xdpsock application
> >
> > Patch 10:
> > - Documentation update to include the unaligned chunk scenario. We need
> > to explicitly state that the incoming addresses are only masked in the
> > aligned chunk mode and not the unaligned chunk mode.
> >
> > ---
> > v2:
> > - fixed checkpatch issues
> > - fixed Rx buffer recycling for unaligned chunks in xdpsock
> > - removed unused defines
> > - fixed how chunk_size is calculated in xsk_diag.c
> > - added some performance numbers to cover letter
> > - modified descriptor format to make it easier to retrieve original
> > address
> > - removed patch adding off_t off to the zero copy allocator. This is no
> > longer needed with the new descriptor format.
> >
> > Kevin Laatz (10):
> > i40e: simplify Rx buffer recycle
> > ixgbe: simplify Rx buffer recycle
> > xsk: add support to allow unaligned chunk placement
> > i40e: modify driver for handling offsets
> > ixgbe: modify driver for handling offsets
> > libbpf: add flags to umem config
> > samples/bpf: add unaligned chunks mode support to xdpsock
> > samples/bpf: add buffer recycling for unaligned chunks to xdpsock
> > samples/bpf: use hugepages in xdpsock app
> > doc/af_xdp: include unaligned chunk case
> >
> > Documentation/networking/af_xdp.rst | 10 ++-
> > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 39 +++++----
> > drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 39 +++++----
> > include/net/xdp_sock.h | 2 +
> > include/uapi/linux/if_xdp.h | 9 ++
> > net/xdp/xdp_umem.c | 17 ++--
> > net/xdp/xsk.c | 89 ++++++++++++++++----
> > net/xdp/xsk_diag.c | 2 +-
> > net/xdp/xsk_queue.h | 70 +++++++++++++--
> > samples/bpf/xdpsock_user.c | 61 ++++++++++----
> > tools/include/uapi/linux/if_xdp.h | 4 +
> > tools/lib/bpf/xsk.c | 3 +
> > tools/lib/bpf/xsk.h | 2 +
> > 13 files changed, 266 insertions(+), 81 deletions(-)
> >
> > --
> > 2.17.1
> >
> _______________________________________________
> Intel-wired-lan mailing list
> Intel-wired-lan@osuosl.org
> https://lists.osuosl.org/mailman/listinfo/intel-wired-lan
^ permalink raw reply
* [PATCH 7/7] can: gw: Fix error path of cgw_module_init
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, YueHaibing, Oliver Hartkopp,
Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: YueHaibing <yuehaibing@huawei.com>
This patch add error path for cgw_module_init to avoid possible crash if
some error occurs.
Fixes: c1aabdf379bc ("can-gw: add netlink based CAN routing")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
net/can/gw.c | 48 +++++++++++++++++++++++++++++++++---------------
1 file changed, 33 insertions(+), 15 deletions(-)
diff --git a/net/can/gw.c b/net/can/gw.c
index 5275ddf580bc..72711053ebe6 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1046,32 +1046,50 @@ static __init int cgw_module_init(void)
pr_info("can: netlink gateway (rev " CAN_GW_VERSION ") max_hops=%d\n",
max_hops);
- register_pernet_subsys(&cangw_pernet_ops);
+ ret = register_pernet_subsys(&cangw_pernet_ops);
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
cgw_cache = kmem_cache_create("can_gw", sizeof(struct cgw_job),
0, 0, NULL);
-
if (!cgw_cache)
- return -ENOMEM;
+ goto out_cache_create;
/* set notifier */
notifier.notifier_call = cgw_notifier;
- register_netdevice_notifier(¬ifier);
+ ret = register_netdevice_notifier(¬ifier);
+ if (ret)
+ goto out_register_notifier;
ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_GETROUTE,
NULL, cgw_dump_jobs, 0);
- if (ret) {
- unregister_netdevice_notifier(¬ifier);
- kmem_cache_destroy(cgw_cache);
- return -ENOBUFS;
- }
-
- /* Only the first call to rtnl_register_module can fail */
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
- cgw_create_job, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
- cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register1;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_NEWROUTE,
+ cgw_create_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register2;
+ ret = rtnl_register_module(THIS_MODULE, PF_CAN, RTM_DELROUTE,
+ cgw_remove_job, NULL, 0);
+ if (ret)
+ goto out_rtnl_register3;
return 0;
+
+out_rtnl_register3:
+ rtnl_unregister(PF_CAN, RTM_NEWROUTE);
+out_rtnl_register2:
+ rtnl_unregister(PF_CAN, RTM_GETROUTE);
+out_rtnl_register1:
+ unregister_netdevice_notifier(¬ifier);
+out_register_notifier:
+ kmem_cache_destroy(cgw_cache);
+out_cache_create:
+ unregister_pernet_subsys(&cangw_pernet_ops);
+
+ return ret;
}
static __exit void cgw_module_exit(void)
--
2.20.1
^ permalink raw reply related
* [PATCH 5/7] can: flexcan: fix stop mode acknowledgment
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, Joakim Zhang, Marc Kleine-Budde,
linux-stable
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Joakim Zhang <qiangqing.zhang@nxp.com>
To enter stop mode, the CPU should manually assert a global Stop Mode
request and check the acknowledgment asserted by FlexCAN. The CPU must
only consider the FlexCAN in stop mode when both request and
acknowledgment conditions are satisfied.
Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v5.0
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/flexcan.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 33ce45d51e15..fcec8bcb53d6 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -400,9 +400,10 @@ static void flexcan_enable_wakeup_irq(struct flexcan_priv *priv, bool enable)
priv->write(reg_mcr, ®s->mcr);
}
-static inline void flexcan_enter_stop_mode(struct flexcan_priv *priv)
+static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
+ unsigned int ackval;
u32 reg_mcr;
reg_mcr = priv->read(®s->mcr);
@@ -412,20 +413,37 @@ static inline void flexcan_enter_stop_mode(struct flexcan_priv *priv)
/* enable stop request */
regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr,
1 << priv->stm.req_bit, 1 << priv->stm.req_bit);
+
+ /* get stop acknowledgment */
+ if (regmap_read_poll_timeout(priv->stm.gpr, priv->stm.ack_gpr,
+ ackval, ackval & (1 << priv->stm.ack_bit),
+ 0, FLEXCAN_TIMEOUT_US))
+ return -ETIMEDOUT;
+
+ return 0;
}
-static inline void flexcan_exit_stop_mode(struct flexcan_priv *priv)
+static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
+ unsigned int ackval;
u32 reg_mcr;
/* remove stop request */
regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr,
1 << priv->stm.req_bit, 0);
+ /* get stop acknowledgment */
+ if (regmap_read_poll_timeout(priv->stm.gpr, priv->stm.ack_gpr,
+ ackval, !(ackval & (1 << priv->stm.ack_bit)),
+ 0, FLEXCAN_TIMEOUT_US))
+ return -ETIMEDOUT;
+
reg_mcr = priv->read(®s->mcr);
reg_mcr &= ~FLEXCAN_MCR_SLF_WAK;
priv->write(reg_mcr, ®s->mcr);
+
+ return 0;
}
static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv)
@@ -1614,7 +1632,9 @@ static int __maybe_unused flexcan_suspend(struct device *device)
*/
if (device_may_wakeup(device)) {
enable_irq_wake(dev->irq);
- flexcan_enter_stop_mode(priv);
+ err = flexcan_enter_stop_mode(priv);
+ if (err)
+ return err;
} else {
err = flexcan_chip_disable(priv);
if (err)
@@ -1664,10 +1684,13 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device)
{
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
+ int err;
if (netif_running(dev) && device_may_wakeup(device)) {
flexcan_enable_wakeup_irq(priv, false);
- flexcan_exit_stop_mode(priv);
+ err = flexcan_exit_stop_mode(priv);
+ if (err)
+ return err;
}
return 0;
--
2.20.1
^ permalink raw reply related
* [PATCH 6/7] can: peak_usb: fix potential double kfree_skb()
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, Stephane Grosjean, linux-stable,
Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Stephane Grosjean <s.grosjean@peak-system.com>
When closing the CAN device while tx skbs are inflight, echo skb could
be released twice. By calling close_candev() before unlinking all
pending tx urbs, then the internal echo_skb[] array is fully and
correctly cleared before the USB write callback and, therefore,
can_get_echo_skb() are called, for each aborted URB.
Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core")
Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/usb/peak_usb/pcan_usb_core.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 458154c9b482..22b9c8e6d040 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -568,16 +568,16 @@ static int peak_usb_ndo_stop(struct net_device *netdev)
dev->state &= ~PCAN_USB_STATE_STARTED;
netif_stop_queue(netdev);
+ close_candev(netdev);
+
+ dev->can.state = CAN_STATE_STOPPED;
+
/* unlink all pending urbs and free used memory */
peak_usb_unlink_all_urbs(dev);
if (dev->adapter->dev_stop)
dev->adapter->dev_stop(dev);
- close_candev(netdev);
-
- dev->can.state = CAN_STATE_STOPPED;
-
/* can set bus off now */
if (dev->adapter->dev_set_bus) {
int err = dev->adapter->dev_set_bus(dev, 0);
--
2.20.1
^ permalink raw reply related
* [PATCH 4/7] can: flexcan: fix an use-after-free in flexcan_setup_stop_mode()
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev; +Cc: davem, linux-can, kernel, Wen Yang, linux-stable,
Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Wen Yang <wen.yang99@zte.com.cn>
The gpr_np variable is still being used in dev_dbg() after the
of_node_put() call, which may result in use-after-free.
Fixes: de3578c198c6 ("can: flexcan: add self wakeup support")
Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Cc: linux-stable <stable@vger.kernel.org> # >= v5.0
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/flexcan.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index f2fe344593d5..33ce45d51e15 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1437,10 +1437,10 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
priv = netdev_priv(dev);
priv->stm.gpr = syscon_node_to_regmap(gpr_np);
- of_node_put(gpr_np);
if (IS_ERR(priv->stm.gpr)) {
dev_dbg(&pdev->dev, "could not find gpr regmap\n");
- return PTR_ERR(priv->stm.gpr);
+ ret = PTR_ERR(priv->stm.gpr);
+ goto out_put_node;
}
priv->stm.req_gpr = out_val[1];
@@ -1455,7 +1455,9 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
device_set_wakeup_capable(&pdev->dev, true);
- return 0;
+out_put_node:
+ of_node_put(gpr_np);
+ return ret;
}
static const struct of_device_id flexcan_of_match[] = {
--
2.20.1
^ permalink raw reply related
* [PATCH 2/7] can: rcar_canfd: fix possible IRQ storm on high load
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, Nikita Yushchenko, linux-stable,
Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
We have observed rcar_canfd driver entering IRQ storm under high load,
with following scenario:
- rcar_canfd_global_interrupt() in entered due to Rx available,
- napi_schedule_prep() is called, and sets NAPIF_STATE_SCHED in state
- Rx fifo interrupts are masked,
- rcar_canfd_global_interrupt() is entered again, this time due to
error interrupt (e.g. due to overflow),
- since scheduled napi poller has not yet executed, condition for calling
napi_schedule_prep() from rcar_canfd_global_interrupt() remains true,
thus napi_schedule_prep() gets called and sets NAPIF_STATE_MISSED flag
in state,
- later, napi poller function rcar_canfd_rx_poll() gets executed, and
calls napi_complete_done(),
- due to NAPIF_STATE_MISSED flag in state, this call does not clear
NAPIF_STATE_SCHED flag from state,
- on return from napi_complete_done(), rcar_canfd_rx_poll() unmasks Rx
interrutps,
- Rx interrupt happens, rcar_canfd_global_interrupt() gets called
and calls napi_schedule_prep(),
- since NAPIF_STATE_SCHED is set in state at this time, this call
returns false,
- due to that false return, rcar_canfd_global_interrupt() returns
without masking Rx interrupt
- and this results into IRQ storm: unmasked Rx interrupt happens again
and again is misprocessed in the same way.
This patch fixes that scenario by unmasking Rx interrupts only when
napi_complete_done() returns true, which means it has cleared
NAPIF_STATE_SCHED in state.
Fixes: dd3bd23eb438 ("can: rcar_canfd: Add Renesas R-Car CAN FD driver")
Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/rcar/rcar_canfd.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index 05410008aa6b..de34a4b82d4a 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -1508,10 +1508,11 @@ static int rcar_canfd_rx_poll(struct napi_struct *napi, int quota)
/* All packets processed */
if (num_pkts < quota) {
- napi_complete_done(napi, num_pkts);
- /* Enable Rx FIFO interrupts */
- rcar_canfd_set_bit(priv->base, RCANFD_RFCC(ridx),
- RCANFD_RFCC_RFIE);
+ if (napi_complete_done(napi, num_pkts)) {
+ /* Enable Rx FIFO interrupts */
+ rcar_canfd_set_bit(priv->base, RCANFD_RFCC(ridx),
+ RCANFD_RFCC_RFIE);
+ }
}
return num_pkts;
}
--
2.20.1
^ permalink raw reply related
* [PATCH 3/7] can: mcp251x: add error check when wq alloc failed
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, Weitao Hou, Willem de Bruijn,
Sean Nyekjaer, Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Weitao Hou <houweitaoo@gmail.com>
add error check when workqueue alloc failed, and remove redundant code
to make it clear.
Fixes: e0000163e30e ("can: Driver for the Microchip MCP251x SPI CAN controllers")
Signed-off-by: Weitao Hou <houweitaoo@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Tested-by: Sean Nyekjaer <sean@geanix.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/spi/mcp251x.c | 49 ++++++++++++++++-------------------
1 file changed, 22 insertions(+), 27 deletions(-)
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 44e99e3d7134..2aec934fab0c 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -664,17 +664,6 @@ static int mcp251x_power_enable(struct regulator *reg, int enable)
return regulator_disable(reg);
}
-static void mcp251x_open_clean(struct net_device *net)
-{
- struct mcp251x_priv *priv = netdev_priv(net);
- struct spi_device *spi = priv->spi;
-
- free_irq(spi->irq, priv);
- mcp251x_hw_sleep(spi);
- mcp251x_power_enable(priv->transceiver, 0);
- close_candev(net);
-}
-
static int mcp251x_stop(struct net_device *net)
{
struct mcp251x_priv *priv = netdev_priv(net);
@@ -940,37 +929,43 @@ static int mcp251x_open(struct net_device *net)
flags | IRQF_ONESHOT, DEVICE_NAME, priv);
if (ret) {
dev_err(&spi->dev, "failed to acquire irq %d\n", spi->irq);
- mcp251x_power_enable(priv->transceiver, 0);
- close_candev(net);
- goto open_unlock;
+ goto out_close;
}
priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
0);
+ if (!priv->wq) {
+ ret = -ENOMEM;
+ goto out_clean;
+ }
INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
ret = mcp251x_hw_reset(spi);
- if (ret) {
- mcp251x_open_clean(net);
- goto open_unlock;
- }
+ if (ret)
+ goto out_free_wq;
ret = mcp251x_setup(net, spi);
- if (ret) {
- mcp251x_open_clean(net);
- goto open_unlock;
- }
+ if (ret)
+ goto out_free_wq;
ret = mcp251x_set_normal_mode(spi);
- if (ret) {
- mcp251x_open_clean(net);
- goto open_unlock;
- }
+ if (ret)
+ goto out_free_wq;
can_led_event(net, CAN_LED_EVENT_OPEN);
netif_wake_queue(net);
+ mutex_unlock(&priv->mcp_lock);
-open_unlock:
+ return 0;
+
+out_free_wq:
+ destroy_workqueue(priv->wq);
+out_clean:
+ free_irq(spi->irq, priv);
+ mcp251x_hw_sleep(spi);
+out_close:
+ mcp251x_power_enable(priv->transceiver, 0);
+ close_candev(net);
mutex_unlock(&priv->mcp_lock);
return ret;
}
--
2.20.1
^ permalink raw reply related
* [PATCH 1/7] can: dev: call netif_carrier_off() in register_candev()
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev
Cc: davem, linux-can, kernel, Rasmus Villemoes, Willem de Bruijn,
Marc Kleine-Budde
In-Reply-To: <20190724130322.31702-1-mkl@pengutronix.de>
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
CONFIG_CAN_LEDS is deprecated. When trying to use the generic netdev
trigger as suggested, there's a small inconsistency with the link
property: The LED is on initially, stays on when the device is brought
up, and then turns off (as expected) when the device is brought down.
Make sure the LED always reflects the state of the CAN device.
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
drivers/net/can/dev.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index b6b93a2d93a5..483d270664cc 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -1249,6 +1249,8 @@ int register_candev(struct net_device *dev)
return -EINVAL;
dev->rtnl_link_ops = &can_link_ops;
+ netif_carrier_off(dev);
+
return register_netdev(dev);
}
EXPORT_SYMBOL_GPL(register_candev);
--
2.20.1
^ permalink raw reply related
* pull-request: can 2019-07-24
From: Marc Kleine-Budde @ 2019-07-24 13:03 UTC (permalink / raw)
To: netdev; +Cc: davem, linux-can, kernel
Hello David,
this is a pull reqeust of 7 patches for net/master.
The first patch is by Rasmus Villemoes add a missing netif_carrier_off() to
register_candev() so that generic netdev trigger based LEDs are initially off.
Nikita Yushchenko's patch for the rcar_canfd driver fixes a possible IRQ storm
on high load.
The patch by Weitao Hou for the mcp251x driver add missing error checking to
the work queue allocation.
Both Wen Yang's and Joakim Zhang's patch for the flexcan driver fix a problem
with the stop-mode.
Stephane Grosjean contributes a patch for the peak_usb driver to fix a
potential double kfree_skb().
The last patch is by YueHaibing and fixes the error path in can-gw's
cgw_module_init() function.
regards,
Marc
---
The following changes since commit d86afb89305de205b0d2f20c2160adf039e9508d:
net: thunderx: Use fwnode_get_mac_address() (2019-07-23 14:09:21 -0700)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git tags/linux-can-fixes-for-5.3-20190724
for you to fetch changes up to b7a14297f102b6e2ce6f16feffebbb9bde1e9b55:
can: gw: Fix error path of cgw_module_init (2019-07-24 11:19:03 +0200)
----------------------------------------------------------------
linux-can-fixes-for-5.3-20190724
----------------------------------------------------------------
Joakim Zhang (1):
can: flexcan: fix stop mode acknowledgment
Nikita Yushchenko (1):
can: rcar_canfd: fix possible IRQ storm on high load
Rasmus Villemoes (1):
can: dev: call netif_carrier_off() in register_candev()
Stephane Grosjean (1):
can: peak_usb: fix potential double kfree_skb()
Weitao Hou (1):
can: mcp251x: add error check when wq alloc failed
Wen Yang (1):
can: flexcan: fix an use-after-free in flexcan_setup_stop_mode()
YueHaibing (1):
can: gw: Fix error path of cgw_module_init
drivers/net/can/dev.c | 2 ++
drivers/net/can/flexcan.c | 39 ++++++++++++++++++----
drivers/net/can/rcar/rcar_canfd.c | 9 ++---
drivers/net/can/spi/mcp251x.c | 49 +++++++++++++---------------
drivers/net/can/usb/peak_usb/pcan_usb_core.c | 8 ++---
net/can/gw.c | 48 ++++++++++++++++++---------
6 files changed, 98 insertions(+), 57 deletions(-)
^ permalink raw reply
* [PATCH] qlge: Fix build error without CONFIG_ETHERNET
From: YueHaibing @ 2019-07-24 13:01 UTC (permalink / raw)
To: manishc, GR-Linux-NIC-Dev, gregkh, bpoirier
Cc: linux-kernel, devel, netdev, YueHaibing
Now if CONFIG_ETHERNET is not set, QLGE driver
building fails:
drivers/staging/qlge/qlge_main.o: In function `qlge_remove':
drivers/staging/qlge/qlge_main.c:4831: undefined reference to `unregister_netdev'
Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 955315b0dc8c ("qlge: Move drivers/net/ethernet/qlogic/qlge/ to drivers/staging/qlge/")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
drivers/staging/qlge/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/qlge/Kconfig b/drivers/staging/qlge/Kconfig
index ae9ed2c..a3cb25a3 100644
--- a/drivers/staging/qlge/Kconfig
+++ b/drivers/staging/qlge/Kconfig
@@ -2,7 +2,7 @@
config QLGE
tristate "QLogic QLGE 10Gb Ethernet Driver Support"
- depends on PCI
+ depends on ETHERNET && PCI
help
This driver supports QLogic ISP8XXX 10Gb Ethernet cards.
--
2.7.4
^ permalink raw reply related
* Re: [RFC PATCH net-next 00/12] drop_monitor: Capture dropped packets and metadata
From: Jiri Pirko @ 2019-07-24 12:58 UTC (permalink / raw)
To: Ido Schimmel
Cc: Toke Høiland-Jørgensen, netdev, davem, nhorman, dsahern,
roopa, nikolay, jakub.kicinski, andy, f.fainelli, andrew,
vivien.didelot, mlxsw, Ido Schimmel
In-Reply-To: <20190723151423.GA10342@splinter>
Tue, Jul 23, 2019 at 05:14:23PM CEST, idosch@idosch.org wrote:
>On Tue, Jul 23, 2019 at 02:17:49PM +0200, Toke Høiland-Jørgensen wrote:
>> Ido Schimmel <idosch@idosch.org> writes:
>>
>> > On Mon, Jul 22, 2019 at 09:43:15PM +0200, Toke Høiland-Jørgensen wrote:
>> >> Is there a mechanism for the user to filter the packets before they are
>> >> sent to userspace? A bpf filter would be the obvious choice I guess...
>> >
>> > Hi Toke,
>> >
>> > Yes, it's on my TODO list to write an eBPF program that only lets
>> > "unique" packets to be enqueued on the netlink socket. Where "unique" is
>> > defined as {5-tuple, PC}. The rest of the copies will be counted in an
>> > eBPF map, which is just a hash table keyed by {5-tuple, PC}.
>>
>> Yeah, that's a good idea. Or even something simpler like tcpdump-style
>> filters for the packets returned by drop monitor (say if I'm just trying
>> to figure out what happens to my HTTP requests).
>
>Yep, that's a good idea. I guess different users will use different
>programs. Will look into both options.
>
>> > I think it would be good to have the program as part of the bcc
>> > repository [1]. What do you think?
>>
>> Sure. We could also add it to the XDP tutorial[2]; it could go into a
>> section on introspection and debugging (just added a TODO about that[3]).
>
>Great!
>
>> >> For integrating with XDP the trick would be to find a way to do it that
>> >> doesn't incur any overhead when it's not enabled. Are you envisioning
>> >> that this would be enabled separately for the different "modes" (kernel,
>> >> hardware, XDP, etc)?
>> >
>> > Yes. Drop monitor have commands to enable and disable tracing, but they
>> > don't carry any attributes at the moment. My plan is to add an attribute
>> > (e.g., 'NET_DM_ATTR_DROP_TYPE') that will specify the type of drops
>> > you're interested in - SW/HW/XDP. If the attribute is not specified,
>> > then current behavior is maintained and all the drop types are traced.
>> > But if you're only interested in SW drops, then overhead for the rest
>> > should be zero.
>>
>> Makes sense (although "should be" is the key here ;)).
>>
>> I'm also worried about the drop monitor getting overwhelmed; if you turn
>> it on for XDP and you're running a filtering program there, you'll
>> suddenly get *a lot* of drops.
>>
>> As I read your patch, the current code can basically queue up an
>> unbounded number of packets waiting to go out over netlink, can't it?
>
>That's a very good point. Each CPU holds a drop list. It probably makes
>sense to limit it by default (to 1000?) and allow user to change it
Shouldn't the queue len be configurable?
>later, if needed. I can expose a counter that shows how many packets
>were dropped because of this limit. It can be used as an indication to
>adjust the queue length (or flip to 'summary' mode).
^ permalink raw reply
* RE: [PATCH net-next 1/3] enetc: Add mdio bus driver for the PCIe MDIO endpoint
From: Claudiu Manoil @ 2019-07-24 12:57 UTC (permalink / raw)
To: Claudiu Manoil, Andrew Lunn
Cc: David S . Miller, devicetree@vger.kernel.org,
netdev@vger.kernel.org, Alexandru Marginean,
linux-kernel@vger.kernel.org, Leo Li, Rob Herring,
linux-arm-kernel@lists.infradead.org
In-Reply-To: <VI1PR04MB4880DAE881769F6DC845CEEF96C60@VI1PR04MB4880.eurprd04.prod.outlook.com>
>-----Original Message-----
>From: netdev-owner@vger.kernel.org <netdev-owner@vger.kernel.org> On
>Behalf Of Claudiu Manoil
>Sent: Wednesday, July 24, 2019 12:53 PM
>To: Andrew Lunn <andrew@lunn.ch>
>Cc: David S . Miller <davem@davemloft.net>; devicetree@vger.kernel.org;
>netdev@vger.kernel.org; Alexandru Marginean
><alexandru.marginean@nxp.com>; linux-kernel@vger.kernel.org; Leo Li
><leoyang.li@nxp.com>; Rob Herring <robh+dt@kernel.org>; linux-arm-
>kernel@lists.infradead.org
>Subject: RE: [PATCH net-next 1/3] enetc: Add mdio bus driver for the PCIe MDIO
>endpoint
>
>>-----Original Message-----
>>From: Andrew Lunn <andrew@lunn.ch>
>>Sent: Wednesday, July 24, 2019 1:25 AM
>>To: Claudiu Manoil <claudiu.manoil@nxp.com>
>>Cc: David S . Miller <davem@davemloft.net>; devicetree@vger.kernel.org;
>>netdev@vger.kernel.org; Alexandru Marginean
>><alexandru.marginean@nxp.com>; linux-kernel@vger.kernel.org; Leo Li
>><leoyang.li@nxp.com>; Rob Herring <robh+dt@kernel.org>; linux-arm-
>>kernel@lists.infradead.org
>>Subject: Re: [PATCH net-next 1/3] enetc: Add mdio bus driver for the
>>PCIe MDIO endpoint
>>
>>> + bus = mdiobus_alloc_size(sizeof(u32 *));
>>> + if (!bus)
>>> + return -ENOMEM;
>>> +
>>
>>> + bus->priv = pci_iomap_range(pdev, 0, ENETC_MDIO_REG_OFFSET, 0);
>>
>>This got me confused for a while. You allocate space for a u32 pointer.
>>bus->priv will point to this space. However, you are not using this
>>space, you {ab}use the pointer to directly hold the return from
>>pci_iomap_range(). This works, but sparse is probably unhappy, and you
>>are wasting the space the u32 pointer takes.
>>
>
>Thanks Andrew,
>This is not what I wanted to do, don't ask me how I got to this, it's confusing
>indeed.
>What's needed here is mdiobus_alloc() or better, devm_mdiobus_alloc().
>I've got to do some cleanup in the local mdio bus probing too.
>Will send v2.
>
This is tricky actually, mdiobus_alloc won't do it, I still need to allocate space
for the pointer.
So it's not ok to store the register space pointer directly into priv
(even if iomap returns void *), it's unusual.
Looks like I will have to use double pointers!
^ permalink raw reply
* Re: [PATCH] net: 9p: Fix possible null-pointer dereferences in p9_cm_event_handler()
From: Dominique Martinet @ 2019-07-24 12:55 UTC (permalink / raw)
To: Jia-Ju Bai; +Cc: ericvh, lucho, davem, v9fs-developer, netdev, linux-kernel
In-Reply-To: <20190724103948.5834-1-baijiaju1990@gmail.com>
Jia-Ju Bai wrote on Wed, Jul 24, 2019:
> In p9_cm_event_handler(), there is an if statement on 260 to check
> whether rdma is NULL, which indicates that rdma can be NULL.
> If so, using rdma->xxx may cause a possible null-pointer dereference.
The final dereference (complete(&rdma->cm_done) line 285) has been here
from the start, so we would have seen crashes by now if rdma could be
null at this point.
Let's do it the other way around and remove the useless "if (rdma)" that
has been here from day 1 instead ; I basically did the same with
c->status a few months ago (from a coverity report)...
That said, please note that 'rdma checked for null in this event->event
== RDMA_CM_EVENT_DISCONNECTED branch' does not mean rdma can be null in
other branches.
static analysis does not say anything more than the final complete()
should also have a check if the previous one has, but nothing about the
other cases in the switch.
Thanks,
--
Dominique
^ permalink raw reply
* Re: [RFC PATCH net-next 10/12] drop_monitor: Add packet alert mode
From: Jiri Pirko @ 2019-07-24 12:53 UTC (permalink / raw)
To: Ido Schimmel
Cc: netdev, davem, nhorman, dsahern, roopa, nikolay, jakub.kicinski,
toke, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190722183134.14516-11-idosch@idosch.org>
Mon, Jul 22, 2019 at 08:31:32PM CEST, idosch@idosch.org wrote:
>From: Ido Schimmel <idosch@mellanox.com>
>
>So far drop monitor supported only one alert mode in which a summary of
>locations in which packets were recently dropped was sent to user space.
>
>This alert mode is sufficient in order to understand that packets were
>dropped, but lacks information to perform a more detailed analysis.
>
>Add a new alert mode in which the dropped packet itself is passed to
>user space along with metadata: The drop location (as program counter
>and resolved symbol), ingress / egress netdevice and arrival / departure
>timestamp. More metadata can be added in the future.
>
>To avoid performing expensive operations in the context in which
>kfree_skb() is invoked (can be hard IRQ), the dropped skb is cloned and
>queued on per-CPU skb drop list. Then, in process context the netlink
>message is allocated, prepared and finally sent to user space.
>
>Signed-off-by: Ido Schimmel <idosch@mellanox.com>
>---
> include/uapi/linux/net_dropmon.h | 29 ++++
> net/core/drop_monitor.c | 287 ++++++++++++++++++++++++++++++-
> 2 files changed, 308 insertions(+), 8 deletions(-)
>
>diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
>index 5edbd0a675fd..7708c8a440a1 100644
>--- a/include/uapi/linux/net_dropmon.h
>+++ b/include/uapi/linux/net_dropmon.h
>@@ -53,6 +53,7 @@ enum {
> NET_DM_CMD_CONFIG,
> NET_DM_CMD_START,
> NET_DM_CMD_STOP,
>+ NET_DM_CMD_PACKET_ALERT,
> _NET_DM_CMD_MAX,
> };
>
>@@ -62,4 +63,32 @@ enum {
> * Our group identifiers
> */
> #define NET_DM_GRP_ALERT 1
>+
>+enum net_dm_attr {
>+ NET_DM_ATTR_UNSPEC,
>+
>+ NET_DM_ATTR_ALERT_MODE, /* u8 */
>+ NET_DM_ATTR_PC, /* u64 */
>+ NET_DM_ATTR_SYMBOL, /* string */
>+ NET_DM_ATTR_NETDEV_IFINDEX, /* u32 */
>+ NET_DM_ATTR_NETDEV_NAME, /* string */
>+ NET_DM_ATTR_TIMESTAMP, /* struct timespec */
>+ NET_DM_ATTR_PAYLOAD, /* binary */
>+ NET_DM_ATTR_PAD,
>+
>+ __NET_DM_ATTR_MAX,
>+ NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
>+};
>+
>+/**
>+ * enum net_dm_alert_mode - Alert mode.
>+ * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
>+ * @NET_DM_ALERT_MODE_PACKET: Each dropped packet is sent to user space along
>+ * with metadata.
>+ */
>+enum net_dm_alert_mode {
>+ NET_DM_ALERT_MODE_SUMMARY,
>+ NET_DM_ALERT_MODE_PACKET,
>+};
>+
> #endif
>diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
>index f441fb653567..512935fc235b 100644
>--- a/net/core/drop_monitor.c
>+++ b/net/core/drop_monitor.c
>@@ -54,6 +54,7 @@ static DEFINE_MUTEX(net_dm_mutex);
> struct per_cpu_dm_data {
> spinlock_t lock; /* Protects 'skb' and 'send_timer' */
> struct sk_buff *skb;
>+ struct sk_buff_head drop_queue;
> struct work_struct dm_alert_work;
> struct timer_list send_timer;
> };
>@@ -75,6 +76,22 @@ static int dm_delay = 1;
> static unsigned long dm_hw_check_delta = 2*HZ;
> static LIST_HEAD(hw_stats_list);
>
>+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
>+
>+struct net_dm_skb_cb {
>+ void *pc;
>+};
>+
>+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
>+
>+struct net_dm_alert_ops {
>+ void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
>+ void *location);
>+ void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
>+ int work, int budget);
>+ void (*work_item_func)(struct work_struct *work);
>+};
>+
> static int net_dm_nl_pre_doit(const struct genl_ops *ops,
> struct sk_buff *skb, struct genl_info *info)
> {
>@@ -255,10 +272,194 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
> rcu_read_unlock();
> }
>
>+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
>+ .kfree_skb_probe = trace_kfree_skb_hit,
>+ .napi_poll_probe = trace_napi_poll_hit,
>+ .work_item_func = send_dm_alert,
>+};
>+
>+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
>+ struct sk_buff *skb,
>+ void *location)
>+{
>+ struct per_cpu_dm_data *data;
>+ struct sk_buff *nskb;
>+ unsigned long flags;
>+
>+ nskb = skb_clone(skb, GFP_ATOMIC);
>+ if (!nskb)
>+ return;
>+
>+ NET_DM_SKB_CB(nskb)->pc = location;
>+ if (nskb->dev)
>+ dev_hold(nskb->dev);
>+
>+ data = this_cpu_ptr(&dm_cpu_data);
>+ spin_lock_irqsave(&data->drop_queue.lock, flags);
>+
>+ __skb_queue_tail(&data->drop_queue, nskb);
>+
>+ if (!timer_pending(&data->send_timer)) {
>+ data->send_timer.expires = jiffies + dm_delay * HZ;
>+ add_timer(&data->send_timer);
>+ }
>+
>+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
>+}
>+
>+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
>+ struct napi_struct *napi,
>+ int work, int budget)
>+{
>+}
>+
>+#define NET_DM_MAX_SYMBOL_LEN 40
>+
>+static size_t net_dm_packet_report_size(size_t payload_len)
>+{
>+ size_t size;
>+
>+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
>+
>+ return NLMSG_ALIGN(size) +
>+ /* NET_DM_ATTR_PC */
>+ nla_total_size(sizeof(u64)) +
>+ /* NET_DM_ATTR_SYMBOL */
>+ nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
>+ /* NET_DM_ATTR_NETDEV_IFINDEX */
>+ nla_total_size(sizeof(u32)) +
>+ /* NET_DM_ATTR_NETDEV_NAME */
>+ nla_total_size(IFNAMSIZ + 1) +
>+ /* NET_DM_ATTR_TIMESTAMP */
>+ nla_total_size(sizeof(struct timespec)) +
>+ /* NET_DM_ATTR_PAYLOAD */
>+ nla_total_size(payload_len);
>+}
>+
>+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
>+ size_t payload_len)
>+{
>+ u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
>+ char buf[NET_DM_MAX_SYMBOL_LEN];
>+ struct nlattr *attr;
>+ struct timespec ts;
>+ void *hdr;
>+
>+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
>+ NET_DM_CMD_PACKET_ALERT);
>+ if (!hdr)
>+ return -EMSGSIZE;
>+
>+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
>+ goto nla_put_failure;
>+
>+ snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
>+ if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
>+ goto nla_put_failure;
>+
>+ if (skb->dev &&
>+ nla_put_u32(msg, NET_DM_ATTR_NETDEV_IFINDEX, skb->dev->ifindex))
>+ goto nla_put_failure;
>+
>+ if (skb->dev &&
>+ nla_put_string(msg, NET_DM_ATTR_NETDEV_NAME, skb->dev->name))
>+ goto nla_put_failure;
>+
>+ if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
>+ nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
>+ goto nla_put_failure;
>+
>+ if (!payload_len)
>+ goto out;
>+
>+ attr = skb_put(msg, nla_total_size(payload_len));
>+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
>+ attr->nla_len = nla_attr_size(payload_len);
>+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
>+ goto nla_put_failure;
>+
>+out:
>+ genlmsg_end(msg, hdr);
>+
>+ return 0;
>+
>+nla_put_failure:
>+ genlmsg_cancel(msg, hdr);
>+ return -EMSGSIZE;
>+}
>+
>+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
>+
>+static void net_dm_packet_report(struct sk_buff *skb)
>+{
>+ struct sk_buff *msg;
>+ size_t payload_len;
>+ int rc;
>+
>+ /* Make sure we start copying the packet from the MAC header */
>+ if (skb->data > skb_mac_header(skb))
>+ skb_push(skb, skb->data - skb_mac_header(skb));
>+ else
>+ skb_pull(skb, skb_mac_header(skb) - skb->data);
>+
>+ /* Ensure packet fits inside a single netlink attribute */
>+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
>+
>+ msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
>+ if (!msg)
>+ goto out;
>+
>+ rc = net_dm_packet_report_fill(msg, skb, payload_len);
>+ if (rc) {
>+ nlmsg_free(msg);
>+ goto out;
>+ }
>+
>+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
>+
>+out:
>+ if (skb->dev)
>+ dev_put(skb->dev);
>+ consume_skb(skb);
>+}
>+
>+static void net_dm_packet_work(struct work_struct *work)
>+{
>+ struct per_cpu_dm_data *data;
>+ struct sk_buff_head list;
>+ struct sk_buff *skb;
>+ unsigned long flags;
>+
>+ data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
>+
>+ __skb_queue_head_init(&list);
>+
>+ spin_lock_irqsave(&data->drop_queue.lock, flags);
>+ skb_queue_splice_tail_init(&data->drop_queue, &list);
>+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
>+
>+ while ((skb = __skb_dequeue(&list)))
>+ net_dm_packet_report(skb);
>+}
>+
>+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
>+ .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
>+ .napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
>+ .work_item_func = net_dm_packet_work,
>+};
>+
>+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
>+ [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
>+ [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
>+};
Please split this patch into 2:
1) introducing the ops and modes (only summary)
2) introducing the packet mode
>+
> static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
> {
>+ const struct net_dm_alert_ops *ops;
> int cpu, rc;
>
>+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
>+
> if (!try_module_get(THIS_MODULE)) {
> NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
> return -ENODEV;
>@@ -267,17 +468,17 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
> for_each_possible_cpu(cpu) {
> struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
>
>- INIT_WORK(&data->dm_alert_work, send_dm_alert);
>+ INIT_WORK(&data->dm_alert_work, ops->work_item_func);
> timer_setup(&data->send_timer, sched_send_work, 0);
> }
>
>- rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
>+ rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
> if (rc) {
> NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
> goto err_module_put;
> }
>
>- rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
>+ rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
> if (rc) {
> NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
> goto err_unregister_trace;
>@@ -286,7 +487,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
> return 0;
>
> err_unregister_trace:
>- unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
>+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
> err_module_put:
> module_put(THIS_MODULE);
> return rc;
>@@ -295,10 +496,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
> static void net_dm_trace_off_set(void)
> {
> struct dm_hw_stat_delta *new_stat, *temp;
>+ const struct net_dm_alert_ops *ops;
> int cpu;
>
>- unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
>- unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
>+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
>+
>+ unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
>+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
>
> tracepoint_synchronize_unregister();
>
>@@ -307,9 +511,18 @@ static void net_dm_trace_off_set(void)
> */
> for_each_possible_cpu(cpu) {
> struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
>+ struct sk_buff *skb;
>
> del_timer_sync(&data->send_timer);
> cancel_work_sync(&data->dm_alert_work);
>+ /* If we deactivated a pending timer, some packets are still
>+ * queued and we need to free them.
>+ */
>+ while ((skb = __skb_dequeue(&data->drop_queue))) {
>+ if (skb->dev)
>+ dev_put(skb->dev);
>+ consume_skb(skb);
>+ }
> }
>
> list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
>@@ -351,12 +564,61 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
> return rc;
> }
>
>+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
>+ enum net_dm_alert_mode *p_alert_mode)
>+{
>+ u8 val;
>+
>+ val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
>+
>+ switch (val) {
>+ case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
>+ case NET_DM_ALERT_MODE_PACKET:
>+ *p_alert_mode = val;
>+ break;
>+ default:
>+ return -EINVAL;
>+ }
>+
>+ return 0;
>+}
>+
>+static int net_dm_alert_mode_set(struct genl_info *info)
>+{
>+ struct netlink_ext_ack *extack = info->extack;
>+ enum net_dm_alert_mode alert_mode;
>+ int rc;
>+
>+ if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
>+ return 0;
>+
>+ rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
>+ if (rc) {
>+ NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
>+ return -EINVAL;
>+ }
>+
>+ net_dm_alert_mode = alert_mode;
2 things:
1) Shouldn't you check if the tracing is on and return -EBUSY in case it is?
2) You setup the mode globally. I guess it is fine and it does not make
sense to do it otherwise, right? Like per-net or something.
>+
>+ return 0;
>+}
>+
> static int net_dm_cmd_config(struct sk_buff *skb,
> struct genl_info *info)
> {
>- NL_SET_ERR_MSG_MOD(info->extack, "Command not supported");
>+ struct netlink_ext_ack *extack = info->extack;
>+ int rc;
>
>- return -EOPNOTSUPP;
>+ if (trace_state == TRACE_ON) {
>+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
>+ return -EOPNOTSUPP;
>+ }
>+
>+ rc = net_dm_alert_mode_set(info);
>+ if (rc)
>+ return rc;
>+
>+ return 0;
> }
>
> static int net_dm_cmd_trace(struct sk_buff *skb,
>@@ -411,6 +673,11 @@ static int dropmon_net_event(struct notifier_block *ev_block,
> return NOTIFY_DONE;
> }
>
>+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
>+ [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
>+ [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
>+};
>+
> static const struct genl_ops dropmon_ops[] = {
> {
> .cmd = NET_DM_CMD_CONFIG,
>@@ -434,6 +701,8 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
> .hdrsize = 0,
> .name = "NET_DM",
> .version = 2,
>+ .maxattr = NET_DM_ATTR_MAX,
>+ .policy = net_dm_nl_policy,
> .pre_doit = net_dm_nl_pre_doit,
> .post_doit = net_dm_nl_post_doit,
> .module = THIS_MODULE,
>@@ -479,6 +748,7 @@ static int __init init_net_drop_monitor(void)
> INIT_WORK(&data->dm_alert_work, send_dm_alert);
> timer_setup(&data->send_timer, sched_send_work, 0);
> spin_lock_init(&data->lock);
>+ skb_queue_head_init(&data->drop_queue);
> reset_per_cpu_data(data);
> }
>
>@@ -509,6 +779,7 @@ static void exit_net_drop_monitor(void)
> * to this struct and can free the skb inside it
> */
> kfree_skb(data->skb);
>+ WARN_ON(!skb_queue_empty(&data->drop_queue));
> }
>
> BUG_ON(genl_unregister_family(&net_drop_monitor_family));
>--
>2.21.0
>
^ permalink raw reply
* Re: [PATCH net-next 1/4] sctp: check addr_size with sa_family_t size in __sctp_setsockopt_connectx
From: Marcelo Ricardo Leitner @ 2019-07-24 12:49 UTC (permalink / raw)
To: Neil Horman; +Cc: Xin Long, network dev, linux-sctp, davem
In-Reply-To: <20190724123650.GD6204@localhost.localdomain>
On Wed, Jul 24, 2019 at 09:36:50AM -0300, Marcelo Ricardo Leitner wrote:
> On Wed, Jul 24, 2019 at 07:22:35AM -0400, Neil Horman wrote:
> > On Wed, Jul 24, 2019 at 03:21:12PM +0800, Xin Long wrote:
> > > On Tue, Jul 23, 2019 at 11:25 PM Neil Horman <nhorman@tuxdriver.com> wrote:
> > > >
> > > > On Tue, Jul 23, 2019 at 01:37:57AM +0800, Xin Long wrote:
> > > > > Now __sctp_connect() is called by __sctp_setsockopt_connectx() and
> > > > > sctp_inet_connect(), the latter has done addr_size check with size
> > > > > of sa_family_t.
> > > > >
> > > > > In the next patch to clean up __sctp_connect(), we will remove
> > > > > addr_size check with size of sa_family_t from __sctp_connect()
> > > > > for the 1st address.
> > > > >
> > > > > So before doing that, __sctp_setsockopt_connectx() should do
> > > > > this check first, as sctp_inet_connect() does.
> > > > >
> > > > > Signed-off-by: Xin Long <lucien.xin@gmail.com>
> > > > > ---
> > > > > net/sctp/socket.c | 2 +-
> > > > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> > > > > index aa80cda..5f92e4a 100644
> > > > > --- a/net/sctp/socket.c
> > > > > +++ b/net/sctp/socket.c
> > > > > @@ -1311,7 +1311,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk,
> > > > > pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n",
> > > > > __func__, sk, addrs, addrs_size);
> > > > >
> > > > > - if (unlikely(addrs_size <= 0))
> > > > > + if (unlikely(addrs_size < sizeof(sa_family_t)))
> > > > I don't think this is what you want to check for here. sa_family_t is
> > > > an unsigned short, and addrs_size is the number of bytes in the addrs
> > > > array. The addrs array should be at least the size of one struct
> > > > sockaddr (16 bytes iirc), and, if larger, should be a multiple of
> > > > sizeof(struct sockaddr)
> > > sizeof(struct sockaddr) is not the right value to check either.
> > >
> > > The proper check will be done later in __sctp_connect():
> > >
> > > af = sctp_get_af_specific(daddr->sa.sa_family);
> > > if (!af || af->sockaddr_len > addrs_size)
> > > return -EINVAL;
> > >
> > > So the check 'addrs_size < sizeof(sa_family_t)' in this patch is
> > > just to make sure daddr->sa.sa_family is accessible. the same
> > > check is also done in sctp_inet_connect().
> > >
> > That doesn't make much sense, if the proper check is done in __sctp_connect with
> > the size of the families sockaddr_len, then we don't need this check at all, we
> > can just let memdup_user take the fault on copy_to_user and return -EFAULT. If
> > we get that from memdup_user, we know its not accessible, and can bail out.
> >
> > About the only thing we need to check for here is that addr_len isn't some
> > absurdly high value (i.e. a negative value), so that we avoid trying to kmalloc
> > upwards of 2G in memdup_user. Your change does that just fine, but its no
> > better or worse than checking for <=0
>
> One can argue that such check against absurdly high values is random
> and not effective, as 2G can be somewhat reasonable on 8GB systems but
> certainly isn't on 512MB ones. On that, kmemdup_user() will also fail
> gracefully as it uses GFP_USER and __GFP_NOWARN.
>
> The original check is more for protecting for sane usage of the
> variable, which is an int, and a negative value is questionable. We
> could cast, yes, but.. was that really the intent of the application?
> Probably not.
Though that said, I'm okay with the new check here: a quick sanity
check that can avoid expensive calls to kmalloc(), while more refined
check is done later on.
>
> >
> > Neil
> >
> > > >
> > > > Neil
> > > >
> > > > > return -EINVAL;
> > > > >
> > > > > kaddrs = memdup_user(addrs, addrs_size);
> > > > > --
> > > > > 2.1.0
> > > > >
> > > > >
> > >
^ permalink raw reply
* Re: [PATCH 1/2] net: mac80211: Fix possible null-pointer dereferences in ieee80211_setup_sdata()
From: Johannes Berg @ 2019-07-24 12:40 UTC (permalink / raw)
To: Jia-Ju Bai, davem; +Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20190724123623.10093-1-baijiaju1990@gmail.com>
On Wed, 2019-07-24 at 20:36 +0800, Jia-Ju Bai wrote:
> In ieee80211_setup_sdata(), there is an if statement on line 1410 to
> check whether sdata->dev is NULL:
> if (sdata->dev)
>
> When sdata->dev is NULL, it is used on lines 1458 and 1459:
> sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
> sdata->dev->netdev_ops = &ieee80211_monitorif_ops;
>
> Thus, possible null-pointer dereferences may occur.
No, this cannot happen, monitor interfaces (NL80211_IFTYPE_MONITOR) must
have a valid ->dev, only P2P device and NAN might not.
johannes
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox