* [PATCH bpf-next 4/5] samples/bpf: adapted to new uapi
From: Björn Töpel @ 2018-06-04 11:57 UTC (permalink / raw)
To: bjorn.topel, magnus.karlsson, magnus.karlsson, alexander.h.duyck,
alexander.duyck, ast, brouer, daniel, netdev, mykyta.iziumtsev
Cc: Björn Töpel, john.fastabend, willemdebruijn.kernel, mst,
michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
francois.ozog, ilias.apalodimas, brian.brooks, andy, michael.chan
In-Reply-To: <20180604115715.17895-1-bjorn.topel@gmail.com>
From: Björn Töpel <bjorn.topel@intel.com>
Here, the xdpsock sample application is adjusted to the new descriptor
format.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
samples/bpf/xdpsock_user.c | 84 ++++++++++++++++++++--------------------------
1 file changed, 36 insertions(+), 48 deletions(-)
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index e379eac034ac..b71a342b9082 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -46,6 +46,7 @@
#define NUM_FRAMES 131072
#define FRAME_HEADROOM 0
+#define FRAME_SHIFT 11
#define FRAME_SIZE 2048
#define NUM_DESCS 1024
#define BATCH_SIZE 16
@@ -55,6 +56,7 @@
#define DEBUG_HEXDUMP 0
+typedef __u64 u64;
typedef __u32 u32;
static unsigned long prev_time;
@@ -81,12 +83,12 @@ struct xdp_umem_uqueue {
u32 size;
u32 *producer;
u32 *consumer;
- u32 *ring;
+ u64 *ring;
void *map;
};
struct xdp_umem {
- char (*frames)[FRAME_SIZE];
+ char *frames;
struct xdp_umem_uqueue fq;
struct xdp_umem_uqueue cq;
int fd;
@@ -214,7 +216,7 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
- fq->ring[idx] = d[i].idx;
+ fq->ring[idx] = d[i].addr;
}
u_smp_wmb();
@@ -224,7 +226,7 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
return 0;
}
-static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
+static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d,
size_t nb)
{
u32 i;
@@ -246,7 +248,7 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
}
static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
- u32 *d, size_t nb)
+ u64 *d, size_t nb)
{
u32 idx, i, entries = umem_nb_avail(cq, nb);
@@ -266,10 +268,9 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
return entries;
}
-static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off)
+static inline void *xq_get_data(struct xdpsock *xsk, u64 addr)
{
- lassert(idx < NUM_FRAMES);
- return &xsk->umem->frames[idx][off];
+ return &xsk->umem->frames[addr];
}
static inline int xq_enq(struct xdp_uqueue *uq,
@@ -285,9 +286,8 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- r[idx].idx = descs[i].idx;
+ r[idx].addr = descs[i].addr;
r[idx].len = descs[i].len;
- r[idx].offset = descs[i].offset;
}
u_smp_wmb();
@@ -297,7 +297,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
- __u32 idx, unsigned int ndescs)
+ unsigned int id, unsigned int ndescs)
{
struct xdp_desc *r = uq->ring;
unsigned int i;
@@ -308,9 +308,8 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
- r[idx].idx = idx + i;
+ r[idx].addr = (id + i) << FRAME_SHIFT;
r[idx].len = sizeof(pkt_data) - 1;
- r[idx].offset = 0;
}
u_smp_wmb();
@@ -357,17 +356,21 @@ static void swap_mac_addresses(void *data)
*dst_addr = tmp;
}
-#if DEBUG_HEXDUMP
-static void hex_dump(void *pkt, size_t length, const char *prefix)
+static void hex_dump(void *pkt, size_t length, u64 addr)
{
- int i = 0;
const unsigned char *address = (unsigned char *)pkt;
const unsigned char *line = address;
size_t line_size = 32;
unsigned char c;
+ char buf[32];
+ int i = 0;
+ if (!DEBUG_HEXDUMP)
+ return;
+
+ sprintf(buf, "addr=%llu", addr);
printf("length = %zu\n", length);
- printf("%s | ", prefix);
+ printf("%s | ", buf);
while (length-- > 0) {
printf("%02X ", *address++);
if (!(++i % line_size) || (length == 0 && i % line_size)) {
@@ -382,12 +385,11 @@ static void hex_dump(void *pkt, size_t length, const char *prefix)
}
printf("\n");
if (length > 0)
- printf("%s | ", prefix);
+ printf("%s | ", buf);
}
}
printf("\n");
}
-#endif
static size_t gen_eth_frame(char *frame)
{
@@ -412,8 +414,8 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
mr.addr = (__u64)bufs;
mr.len = NUM_FRAMES * FRAME_SIZE;
- mr.frame_size = FRAME_SIZE;
- mr.frame_headroom = FRAME_HEADROOM;
+ mr.chunk_size = FRAME_SIZE;
+ mr.headroom = FRAME_HEADROOM;
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
@@ -426,7 +428,7 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
&optlen) == 0);
umem->fq.map = mmap(0, off.fr.desc +
- FQ_NUM_DESCS * sizeof(u32),
+ FQ_NUM_DESCS * sizeof(u64),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
@@ -439,7 +441,7 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
umem->fq.ring = umem->fq.map + off.fr.desc;
umem->cq.map = mmap(0, off.cr.desc +
- CQ_NUM_DESCS * sizeof(u32),
+ CQ_NUM_DESCS * sizeof(u64),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
@@ -451,14 +453,14 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
umem->cq.consumer = umem->cq.map + off.cr.consumer;
umem->cq.ring = umem->cq.map + off.cr.desc;
- umem->frames = (char (*)[FRAME_SIZE])bufs;
+ umem->frames = bufs;
umem->fd = sfd;
if (opt_bench == BENCH_TXONLY) {
int i;
- for (i = 0; i < NUM_FRAMES; i++)
- (void)gen_eth_frame(&umem->frames[i][0]);
+ for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE)
+ (void)gen_eth_frame(&umem->frames[i]);
}
return umem;
@@ -472,7 +474,7 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
struct xdpsock *xsk;
bool shared = true;
socklen_t optlen;
- u32 i;
+ u64 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
lassert(sfd >= 0);
@@ -508,7 +510,7 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
- for (i = 0; i < NUM_DESCS / 2; i++)
+ for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE)
lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1)
== 0);
}
@@ -727,7 +729,7 @@ static void kick_tx(int fd)
static inline void complete_tx_l2fwd(struct xdpsock *xsk)
{
- u32 descs[BATCH_SIZE];
+ u64 descs[BATCH_SIZE];
unsigned int rcvd;
size_t ndescs;
@@ -749,7 +751,7 @@ static inline void complete_tx_l2fwd(struct xdpsock *xsk)
static inline void complete_tx_only(struct xdpsock *xsk)
{
- u32 descs[BATCH_SIZE];
+ u64 descs[BATCH_SIZE];
unsigned int rcvd;
if (!xsk->outstanding_tx)
@@ -774,17 +776,9 @@ static void rx_drop(struct xdpsock *xsk)
return;
for (i = 0; i < rcvd; i++) {
- u32 idx = descs[i].idx;
-
- lassert(idx < NUM_FRAMES);
-#if DEBUG_HEXDUMP
- char *pkt;
- char buf[32];
+ char *pkt = xq_get_data(xsk, descs[i].addr);
- pkt = xq_get_data(xsk, idx, descs[i].offset);
- sprintf(buf, "idx=%d", idx);
- hex_dump(pkt, descs[i].len, buf);
-#endif
+ hex_dump(pkt, descs[i].len, descs[i].addr);
}
xsk->rx_npkts += rcvd;
@@ -867,17 +861,11 @@ static void l2fwd(struct xdpsock *xsk)
}
for (i = 0; i < rcvd; i++) {
- char *pkt = xq_get_data(xsk, descs[i].idx,
- descs[i].offset);
+ char *pkt = xq_get_data(xsk, descs[i].addr);
swap_mac_addresses(pkt);
-#if DEBUG_HEXDUMP
- char buf[32];
- u32 idx = descs[i].idx;
- sprintf(buf, "idx=%d", idx);
- hex_dump(pkt, descs[i].len, buf);
-#endif
+ hex_dump(pkt, descs[i].len, descs[i].addr);
}
xsk->rx_npkts += rcvd;
--
2.14.1
^ permalink raw reply related
* [PATCH bpf-next 3/5] xsk: new descriptor addressing scheme
From: Björn Töpel @ 2018-06-04 11:57 UTC (permalink / raw)
To: bjorn.topel, magnus.karlsson, magnus.karlsson, alexander.h.duyck,
alexander.duyck, ast, brouer, daniel, netdev, mykyta.iziumtsev
Cc: Björn Töpel, john.fastabend, willemdebruijn.kernel, mst,
michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
francois.ozog, ilias.apalodimas, brian.brooks, andy, michael.chan
In-Reply-To: <20180604115715.17895-1-bjorn.topel@gmail.com>
From: Björn Töpel <bjorn.topel@intel.com>
Currently, AF_XDP only supports a fixed frame-size memory scheme where
each frame is referenced via an index (idx). A user passes the frame
index to the kernel, and the kernel acts upon the data. Some NICs,
however, do not have a fixed frame-size model, instead they have a
model where a memory window is passed to the hardware and multiple
frames are filled into that window (referred to as the "type-writer"
model).
By changing the descriptor format from the current frame index
addressing scheme, AF_XDP can in the future be extended to support
these kinds of NICs.
In the index-based model, an idx refers to a frame of size
frame_size. Addressing a frame in the UMEM is done by offseting the
UMEM starting address by a global offset, idx * frame_size + offset.
Communicating via the fill- and completion-rings are done by means of
idx.
In this commit, the idx is removed in favor of an address (addr),
which is a relative address ranging over the UMEM. To convert an
idx-based address to the new addr is simply: addr = idx * frame_size +
offset.
We also stop referring to the UMEM "frame" as a frame. Instead it is
simply called a chunk.
To transfer ownership of a chunk to the kernel, the addr of the chunk
is passed in the fill-ring. Note, that the kernel will mask addr to
make it chunk aligned, so there is no need for userspace to do
that. E.g., for a chunk size of 2k, passing an addr of 2048, 2050 or
3000 to the fill-ring will refer to the same chunk.
On the completion-ring, the addr will match that of the Tx descriptor,
passed to the kernel.
Changing the descriptor format to use chunks/addr will allow for
future changes to move to a type-writer based model, where multiple
frames can reside in one chunk. In this model passing one single chunk
into the fill-ring, would potentially result in multiple Rx
descriptors.
This commit changes the uapi of AF_XDP sockets, and updates the
documentation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
Documentation/networking/af_xdp.rst | 101 +++++++++++++++++++++---------------
include/uapi/linux/if_xdp.h | 12 ++---
net/xdp/xdp_umem.c | 33 ++++++------
net/xdp/xdp_umem.h | 27 +++-------
net/xdp/xdp_umem_props.h | 4 +-
net/xdp/xsk.c | 30 ++++++-----
net/xdp/xsk_queue.c | 2 +-
net/xdp/xsk_queue.h | 43 +++++++--------
8 files changed, 123 insertions(+), 129 deletions(-)
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 91928d9ee4bf..ff929cfab4f4 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -12,7 +12,7 @@ packet processing.
This document assumes that the reader is familiar with BPF and XDP. If
not, the Cilium project has an excellent reference guide at
-http://cilium.readthedocs.io/en/doc-1.0/bpf/.
+http://cilium.readthedocs.io/en/latest/bpf/.
Using the XDP_REDIRECT action from an XDP program, the program can
redirect ingress frames to other XDP enabled netdevs, using the
@@ -33,22 +33,22 @@ for a while due to a possible retransmit, the descriptor that points
to that packet can be changed to point to another and reused right
away. This again avoids copying data.
-The UMEM consists of a number of equally size frames and each frame
-has a unique frame id. A descriptor in one of the rings references a
-frame by referencing its frame id. The user space allocates memory for
-this UMEM using whatever means it feels is most appropriate (malloc,
-mmap, huge pages, etc). This memory area is then registered with the
-kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two
-rings: the FILL ring and the COMPLETION ring. The fill ring is used by
-the application to send down frame ids for the kernel to fill in with
-RX packet data. References to these frames will then appear in the RX
-ring once each packet has been received. The completion ring, on the
-other hand, contains frame ids that the kernel has transmitted
-completely and can now be used again by user space, for either TX or
-RX. Thus, the frame ids appearing in the completion ring are ids that
-were previously transmitted using the TX ring. In summary, the RX and
-FILL rings are used for the RX path and the TX and COMPLETION rings
-are used for the TX path.
+The UMEM consists of a number of equally sized chunks. A descriptor in
+one of the rings references a frame by referencing its addr. The addr
+is simply an offset within the entire UMEM region. The user space
+allocates memory for this UMEM using whatever means it feels is most
+appropriate (malloc, mmap, huge pages, etc). This memory area is then
+registered with the kernel using the new setsockopt XDP_UMEM_REG. The
+UMEM also has two rings: the FILL ring and the COMPLETION ring. The
+fill ring is used by the application to send down addr for the kernel
+to fill in with RX packet data. References to these frames will then
+appear in the RX ring once each packet has been received. The
+completion ring, on the other hand, contains frame addr that the
+kernel has transmitted completely and can now be used again by user
+space, for either TX or RX. Thus, the frame addrs appearing in the
+completion ring are addrs that were previously transmitted using the
+TX ring. In summary, the RX and FILL rings are used for the RX path
+and the TX and COMPLETION rings are used for the TX path.
The socket is then finally bound with a bind() call to a device and a
specific queue id on that device, and it is not until bind is
@@ -59,13 +59,13 @@ wants to do this, it simply skips the registration of the UMEM and its
corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind
call and submits the XSK of the process it would like to share UMEM
with as well as its own newly created XSK socket. The new process will
-then receive frame id references in its own RX ring that point to this
-shared UMEM. Note that since the ring structures are single-consumer /
-single-producer (for performance reasons), the new process has to
-create its own socket with associated RX and TX rings, since it cannot
-share this with the other process. This is also the reason that there
-is only one set of FILL and COMPLETION rings per UMEM. It is the
-responsibility of a single process to handle the UMEM.
+then receive frame addr references in its own RX ring that point to
+this shared UMEM. Note that since the ring structures are
+single-consumer / single-producer (for performance reasons), the new
+process has to create its own socket with associated RX and TX rings,
+since it cannot share this with the other process. This is also the
+reason that there is only one set of FILL and COMPLETION rings per
+UMEM. It is the responsibility of a single process to handle the UMEM.
How is then packets distributed from an XDP program to the XSKs? There
is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The
@@ -102,10 +102,10 @@ UMEM
UMEM is a region of virtual contiguous memory, divided into
equal-sized frames. An UMEM is associated to a netdev and a specific
-queue id of that netdev. It is created and configured (frame size,
-frame headroom, start address and size) by using the XDP_UMEM_REG
-setsockopt system call. A UMEM is bound to a netdev and queue id, via
-the bind() system call.
+queue id of that netdev. It is created and configured (chunk size,
+headroom, start address and size) by using the XDP_UMEM_REG setsockopt
+system call. A UMEM is bound to a netdev and queue id, via the bind()
+system call.
An AF_XDP is socket linked to a single UMEM, but one UMEM can have
multiple AF_XDP sockets. To share an UMEM created via one socket A,
@@ -147,13 +147,17 @@ UMEM Fill Ring
~~~~~~~~~~~~~~
The Fill ring is used to transfer ownership of UMEM frames from
-user-space to kernel-space. The UMEM indicies are passed in the
-ring. As an example, if the UMEM is 64k and each frame is 4k, then the
-UMEM has 16 frames and can pass indicies between 0 and 15.
+user-space to kernel-space. The UMEM addrs are passed in the ring. As
+an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has
+16 chunks and can pass addrs between 0 and 64k.
Frames passed to the kernel are used for the ingress path (RX rings).
-The user application produces UMEM indicies to this ring.
+The user application produces UMEM addrs to this ring. Note that the
+kernel will mask the incoming addr. E.g. for a chunk size of 2k, the
+log2(2048) LSB of the addr will be masked off, meaning that 2048, 2050
+and 3000 refers to the same chunk.
+
UMEM Completetion Ring
~~~~~~~~~~~~~~~~~~~~~~
@@ -165,16 +169,15 @@ used.
Frames passed from the kernel to user-space are frames that has been
sent (TX ring) and can be used by user-space again.
-The user application consumes UMEM indicies from this ring.
+The user application consumes UMEM addrs from this ring.
RX Ring
~~~~~~~
The RX ring is the receiving side of a socket. Each entry in the ring
-is a struct xdp_desc descriptor. The descriptor contains UMEM index
-(idx), the length of the data (len), the offset into the frame
-(offset).
+is a struct xdp_desc descriptor. The descriptor contains UMEM offset
+(addr) and the length of the data (len).
If no frames have been passed to kernel via the Fill ring, no
descriptors will (or can) appear on the RX ring.
@@ -221,38 +224,50 @@ side is xdpsock_user.c and the XDP side xdpsock_kern.c.
Naive ring dequeue and enqueue could look like this::
+ // struct xdp_rxtx_ring {
+ // __u32 *producer;
+ // __u32 *consumer;
+ // struct xdp_desc *desc;
+ // };
+
+ // struct xdp_umem_ring {
+ // __u32 *producer;
+ // __u32 *consumer;
+ // __u64 *desc;
+ // };
+
// typedef struct xdp_rxtx_ring RING;
// typedef struct xdp_umem_ring RING;
// typedef struct xdp_desc RING_TYPE;
- // typedef __u32 RING_TYPE;
+ // typedef __u64 RING_TYPE;
int dequeue_one(RING *ring, RING_TYPE *item)
{
- __u32 entries = ring->ptrs.producer - ring->ptrs.consumer;
+ __u32 entries = *ring->producer - *ring->consumer;
if (entries == 0)
return -1;
// read-barrier!
- *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)];
- ring->ptrs.consumer++;
+ *item = ring->desc[*ring->consumer & (RING_SIZE - 1)];
+ (*ring->consumer)++;
return 0;
}
int enqueue_one(RING *ring, const RING_TYPE *item)
{
- u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer);
+ u32 free_entries = RING_SIZE - (*ring->producer - *ring->consumer);
if (free_entries == 0)
return -1;
- ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item;
+ ring->desc[*ring->producer & (RING_SIZE - 1)] = *item;
// write-barrier!
- ring->ptrs.producer++;
+ (*ring->producer)++;
return 0;
}
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 4737cfe222f5..e411d6f9ac65 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -48,8 +48,8 @@ struct xdp_mmap_offsets {
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
- __u32 frame_size; /* Frame size */
- __u32 frame_headroom; /* Frame head room */
+ __u32 chunk_size;
+ __u32 headroom;
};
struct xdp_statistics {
@@ -66,13 +66,11 @@ struct xdp_statistics {
/* Rx/Tx descriptor */
struct xdp_desc {
- __u32 idx;
+ __u64 addr;
__u32 len;
- __u16 offset;
- __u8 flags;
- __u8 padding[5];
+ __u32 options;
};
-/* UMEM descriptor is __u32 */
+/* UMEM descriptor is __u64 */
#endif /* _LINUX_IF_XDP_H */
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 87998818116f..9ad791ff4739 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -14,7 +14,7 @@
#include "xdp_umem.h"
-#define XDP_UMEM_MIN_FRAME_SIZE 2048
+#define XDP_UMEM_MIN_CHUNK_SIZE 2048
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
@@ -151,12 +151,12 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
- u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
+ u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
+ unsigned int chunks, chunks_per_page;
u64 addr = mr->addr, size = mr->len;
- unsigned int nframes, nfpp;
int size_chk, err;
- if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
+ if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
* - using an IOMMU, or
@@ -166,7 +166,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (!is_power_of_2(frame_size))
+ if (!is_power_of_2(chunk_size))
return -EINVAL;
if (!PAGE_ALIGNED(addr)) {
@@ -179,33 +179,30 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if ((addr + size) < addr)
return -EINVAL;
- nframes = (unsigned int)div_u64(size, frame_size);
- if (nframes == 0 || nframes > UINT_MAX)
+ chunks = (unsigned int)div_u64(size, chunk_size);
+ if (chunks == 0)
return -EINVAL;
- nfpp = PAGE_SIZE / frame_size;
- if (nframes < nfpp || nframes % nfpp)
+ chunks_per_page = PAGE_SIZE / chunk_size;
+ if (chunks < chunks_per_page || chunks % chunks_per_page)
return -EINVAL;
- frame_headroom = ALIGN(frame_headroom, 64);
+ headroom = ALIGN(headroom, 64);
- size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
+ size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
if (size_chk < 0)
return -EINVAL;
umem->pid = get_task_pid(current, PIDTYPE_PID);
- umem->size = (size_t)size;
umem->address = (unsigned long)addr;
- umem->props.frame_size = frame_size;
- umem->props.nframes = nframes;
- umem->frame_headroom = frame_headroom;
+ umem->props.chunk_mask = ~((u64)chunk_size - 1);
+ umem->props.size = size;
+ umem->headroom = headroom;
+ umem->chunk_size_nohr = chunk_size - headroom;
umem->npgs = size / PAGE_SIZE;
umem->pgs = NULL;
umem->user = NULL;
- umem->frame_size_log2 = ilog2(frame_size);
- umem->nfpp_mask = nfpp - 1;
- umem->nfpplog2 = ilog2(nfpp);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index 0881cf456230..aeadd1bcb72d 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -18,35 +18,20 @@ struct xdp_umem {
struct xsk_queue *cq;
struct page **pgs;
struct xdp_umem_props props;
- u32 npgs;
- u32 frame_headroom;
- u32 nfpp_mask;
- u32 nfpplog2;
- u32 frame_size_log2;
+ u32 headroom;
+ u32 chunk_size_nohr;
struct user_struct *user;
struct pid *pid;
unsigned long address;
- size_t size;
refcount_t users;
struct work_struct work;
+ u32 npgs;
};
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u32 idx)
-{
- u64 pg, off;
- char *data;
-
- pg = idx >> umem->nfpplog2;
- off = (idx & umem->nfpp_mask) << umem->frame_size_log2;
-
- data = page_address(umem->pgs[pg]);
- return data + off;
-}
-
-static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
- u32 idx)
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
- return xdp_umem_get_data(umem, idx) + umem->frame_headroom;
+ return page_address(umem->pgs[addr >> PAGE_SHIFT]) +
+ (addr & (PAGE_SIZE - 1));
}
bool xdp_umem_validate_queues(struct xdp_umem *umem);
diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h
index 2cf8ec485fd2..40eab10dfc49 100644
--- a/net/xdp/xdp_umem_props.h
+++ b/net/xdp/xdp_umem_props.h
@@ -7,8 +7,8 @@
#define XDP_UMEM_PROPS_H_
struct xdp_umem_props {
- u32 frame_size;
- u32 nframes;
+ u64 chunk_mask;
+ u64 size;
};
#endif /* XDP_UMEM_PROPS_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 966307ce4b8e..4688c750df1d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -41,24 +41,27 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 id, len = xdp->data_end - xdp->data;
+ u32 len = xdp->data_end - xdp->data;
void *buffer;
+ u64 addr;
int err;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
- if (!xskq_peek_id(xs->umem->fq, &id)) {
+ if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+ len > xs->umem->chunk_size_nohr) {
xs->rx_dropped++;
return -ENOSPC;
}
- buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
+ addr += xs->umem->headroom;
+
+ buffer = xdp_umem_get_data(xs->umem, addr);
memcpy(buffer, xdp->data, len);
- err = xskq_produce_batch_desc(xs->rx, id, len,
- xs->umem->frame_headroom);
+ err = xskq_produce_batch_desc(xs->rx, addr, len);
if (!err)
- xskq_discard_id(xs->umem->fq);
+ xskq_discard_addr(xs->umem->fq);
else
xs->rx_dropped++;
@@ -95,10 +98,10 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_destruct_skb(struct sk_buff *skb)
{
- u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+ u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
- WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+ WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
sock_wfree(skb);
}
@@ -123,14 +126,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
- u32 id, len;
+ u64 addr;
+ u32 len;
if (max_batch-- == 0) {
err = -EAGAIN;
goto out;
}
- if (xskq_reserve_id(xs->umem->cq)) {
+ if (xskq_reserve_addr(xs->umem->cq)) {
err = -EAGAIN;
goto out;
}
@@ -153,8 +157,8 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
}
skb_put(skb, len);
- id = desc.idx;
- buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+ addr = desc.addr;
+ buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
if (unlikely(err)) {
kfree_skb(skb);
@@ -164,7 +168,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->dev = xs->dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index ebe85e59507e..6c32e92e98fc 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -17,7 +17,7 @@ void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props)
static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
{
- return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u32);
+ return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);
}
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index b5924e7aeb2b..337e5ad3b10e 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -27,7 +27,7 @@ struct xdp_rxtx_ring {
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
- u32 desc[0] ____cacheline_aligned_in_smp;
+ u64 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue {
@@ -76,24 +76,25 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
/* UMEM queue */
-static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx)
+static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
{
- if (unlikely(idx >= q->umem_props.nframes)) {
+ if (addr >= q->umem_props.size) {
q->invalid_descs++;
return false;
}
+
return true;
}
-static inline u32 *xskq_validate_id(struct xsk_queue *q, u32 *id)
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- *id = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_id(q, *id))
- return id;
+ *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask;
+ if (xskq_is_valid_addr(q, *addr))
+ return addr;
q->cons_tail++;
}
@@ -101,7 +102,7 @@ static inline u32 *xskq_validate_id(struct xsk_queue *q, u32 *id)
return NULL;
}
-static inline u32 *xskq_peek_id(struct xsk_queue *q, u32 *id)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
{
if (q->cons_tail == q->cons_head) {
WRITE_ONCE(q->ring->consumer, q->cons_tail);
@@ -111,19 +112,19 @@ static inline u32 *xskq_peek_id(struct xsk_queue *q, u32 *id)
smp_rmb();
}
- return xskq_validate_id(q, id);
+ return xskq_validate_addr(q, addr);
}
-static inline void xskq_discard_id(struct xsk_queue *q)
+static inline void xskq_discard_addr(struct xsk_queue *q)
{
q->cons_tail++;
}
-static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
+static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- ring->desc[q->prod_tail++ & q->ring_mask] = id;
+ ring->desc[q->prod_tail++ & q->ring_mask] = addr;
/* Order producer and data */
smp_wmb();
@@ -132,7 +133,7 @@ static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
return 0;
}
-static inline int xskq_reserve_id(struct xsk_queue *q)
+static inline int xskq_reserve_addr(struct xsk_queue *q)
{
if (xskq_nb_free(q, q->prod_head, 1) == 0)
return -ENOSPC;
@@ -145,16 +146,11 @@ static inline int xskq_reserve_id(struct xsk_queue *q)
static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
{
- u32 buff_len;
-
- if (unlikely(d->idx >= q->umem_props.nframes)) {
- q->invalid_descs++;
+ if (!xskq_is_valid_addr(q, d->addr))
return false;
- }
- buff_len = q->umem_props.frame_size;
- if (unlikely(d->len > buff_len || d->len == 0 ||
- d->offset > buff_len || d->offset + d->len > buff_len)) {
+ if (((d->addr + d->len) & q->umem_props.chunk_mask) !=
+ (d->addr & q->umem_props.chunk_mask)) {
q->invalid_descs++;
return false;
}
@@ -199,7 +195,7 @@ static inline void xskq_discard_desc(struct xsk_queue *q)
}
static inline int xskq_produce_batch_desc(struct xsk_queue *q,
- u32 id, u32 len, u16 offset)
+ u64 addr, u32 len)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx;
@@ -208,9 +204,8 @@ static inline int xskq_produce_batch_desc(struct xsk_queue *q,
return -ENOSPC;
idx = (q->prod_head++) & q->ring_mask;
- ring->desc[idx].idx = id;
+ ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
- ring->desc[idx].offset = offset;
return 0;
}
--
2.14.1
^ permalink raw reply related
* [PATCH bpf-next 2/5] xsk: proper Rx drop statistics update
From: Björn Töpel @ 2018-06-04 11:57 UTC (permalink / raw)
To: bjorn.topel, magnus.karlsson, magnus.karlsson, alexander.h.duyck,
alexander.duyck, ast, brouer, daniel, netdev, mykyta.iziumtsev
Cc: Björn Töpel, john.fastabend, willemdebruijn.kernel, mst,
michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
francois.ozog, ilias.apalodimas, brian.brooks, andy, michael.chan
In-Reply-To: <20180604115715.17895-1-bjorn.topel@gmail.com>
From: Björn Töpel <bjorn.topel@intel.com>
Previously, rx_dropped could be updated incorrectly, e.g. if the XDP
program redirected the frame to a socket bound to a different queue
than where the XDP program was executing.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
net/xdp/xsk.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 43554eb56fe6..966307ce4b8e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -48,8 +48,10 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
- if (!xskq_peek_id(xs->umem->fq, &id))
+ if (!xskq_peek_id(xs->umem->fq, &id)) {
+ xs->rx_dropped++;
return -ENOSPC;
+ }
buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
memcpy(buffer, xdp->data, len);
@@ -57,6 +59,8 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
xs->umem->frame_headroom);
if (!err)
xskq_discard_id(xs->umem->fq);
+ else
+ xs->rx_dropped++;
return err;
}
@@ -68,8 +72,6 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
err = __xsk_rcv(xs, xdp);
if (likely(!err))
xdp_return_buff(xdp);
- else
- xs->rx_dropped++;
return err;
}
@@ -87,8 +89,6 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
err = __xsk_rcv(xs, xdp);
if (!err)
xsk_flush(xs);
- else
- xs->rx_dropped++;
return err;
}
--
2.14.1
^ permalink raw reply related
* [PATCH bpf-next 1/5] xsk: proper fill queue descriptor validation
From: Björn Töpel @ 2018-06-04 11:57 UTC (permalink / raw)
To: bjorn.topel, magnus.karlsson, magnus.karlsson, alexander.h.duyck,
alexander.duyck, ast, brouer, daniel, netdev, mykyta.iziumtsev
Cc: Björn Töpel, john.fastabend, willemdebruijn.kernel, mst,
michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
francois.ozog, ilias.apalodimas, brian.brooks, andy, michael.chan
In-Reply-To: <20180604115715.17895-1-bjorn.topel@gmail.com>
From: Björn Töpel <bjorn.topel@intel.com>
Previously the fill queue descriptor was not copied to kernel space
prior validating it, making it possible for userland to change the
descriptor post-kernel-validation.
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
---
net/xdp/xsk.c | 11 +++++------
net/xdp/xsk_queue.h | 32 +++++++++-----------------------
2 files changed, 14 insertions(+), 29 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cce0e4f8a536..43554eb56fe6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -41,20 +41,19 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- u32 *id, len = xdp->data_end - xdp->data;
+ u32 id, len = xdp->data_end - xdp->data;
void *buffer;
- int err = 0;
+ int err;
if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
return -EINVAL;
- id = xskq_peek_id(xs->umem->fq);
- if (!id)
+ if (!xskq_peek_id(xs->umem->fq, &id))
return -ENOSPC;
- buffer = xdp_umem_get_data_with_headroom(xs->umem, *id);
+ buffer = xdp_umem_get_data_with_headroom(xs->umem, id);
memcpy(buffer, xdp->data, len);
- err = xskq_produce_batch_desc(xs->rx, *id, len,
+ err = xskq_produce_batch_desc(xs->rx, id, len,
xs->umem->frame_headroom);
if (!err)
xskq_discard_id(xs->umem->fq);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index cb8e5be35110..b5924e7aeb2b 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -85,14 +85,15 @@ static inline bool xskq_is_valid_id(struct xsk_queue *q, u32 idx)
return true;
}
-static inline u32 *xskq_validate_id(struct xsk_queue *q)
+static inline u32 *xskq_validate_id(struct xsk_queue *q, u32 *id)
{
while (q->cons_tail != q->cons_head) {
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- if (xskq_is_valid_id(q, ring->desc[idx]))
- return &ring->desc[idx];
+ *id = READ_ONCE(ring->desc[idx]);
+ if (xskq_is_valid_id(q, *id))
+ return id;
q->cons_tail++;
}
@@ -100,28 +101,22 @@ static inline u32 *xskq_validate_id(struct xsk_queue *q)
return NULL;
}
-static inline u32 *xskq_peek_id(struct xsk_queue *q)
+static inline u32 *xskq_peek_id(struct xsk_queue *q, u32 *id)
{
- struct xdp_umem_ring *ring;
-
if (q->cons_tail == q->cons_head) {
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb();
-
- return xskq_validate_id(q);
}
- ring = (struct xdp_umem_ring *)q->ring;
- return &ring->desc[q->cons_tail & q->ring_mask];
+ return xskq_validate_id(q, id);
}
static inline void xskq_discard_id(struct xsk_queue *q)
{
q->cons_tail++;
- (void)xskq_validate_id(q);
}
static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
@@ -174,11 +169,9 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
unsigned int idx = q->cons_tail & q->ring_mask;
- if (xskq_is_valid_desc(q, &ring->desc[idx])) {
- if (desc)
- *desc = ring->desc[idx];
+ *desc = READ_ONCE(ring->desc[idx]);
+ if (xskq_is_valid_desc(q, desc))
return desc;
- }
q->cons_tail++;
}
@@ -189,27 +182,20 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
struct xdp_desc *desc)
{
- struct xdp_rxtx_ring *ring;
-
if (q->cons_tail == q->cons_head) {
WRITE_ONCE(q->ring->consumer, q->cons_tail);
q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
/* Order consumer and data */
smp_rmb();
-
- return xskq_validate_desc(q, desc);
}
- ring = (struct xdp_rxtx_ring *)q->ring;
- *desc = ring->desc[q->cons_tail & q->ring_mask];
- return desc;
+ return xskq_validate_desc(q, desc);
}
static inline void xskq_discard_desc(struct xsk_queue *q)
{
q->cons_tail++;
- (void)xskq_validate_desc(q, NULL);
}
static inline int xskq_produce_batch_desc(struct xsk_queue *q,
--
2.14.1
^ permalink raw reply related
* [PATCH bpf-next 0/5] AF_XDP: bug fixes and descriptor changes
From: Björn Töpel @ 2018-06-04 11:57 UTC (permalink / raw)
To: bjorn.topel, magnus.karlsson, magnus.karlsson, alexander.h.duyck,
alexander.duyck, ast, brouer, daniel, netdev, mykyta.iziumtsev
Cc: Björn Töpel, john.fastabend, willemdebruijn.kernel, mst,
michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
francois.ozog, ilias.apalodimas, brian.brooks, andy, michael.chan
From: Björn Töpel <bjorn.topel@intel.com>
An issue with the current AF_XDP uapi raised by Mykyta Iziumtsev (see
https://www.spinics.net/lists/netdev/msg503664.html) is that it does
not support NICs that have a "type-writer" model in an efficient
way. In this model, a memory window is passed to the hardware and
multiple frames might be filled into that window, instead of just one
that we have in the current fixed frame-size model.
This patch set fixes two bugs in the current implementation and then
changes the uapi so that the type-writer model can be supported
efficiently by a possible future extension of AF_XDP.
These are the uapi changes in this patch:
* Change the "u32 idx" in the descriptors to "u64 addr". The current
idx based format does NOT work for the type-writer model (as packets
can start anywhere within a frame) but that a relative address
pointer (the u64 addr) works well for both models in the prototype
code we have that supports both models. We increased it from u32 to
u64 to support umems larger than 4G. We have also removed the u16
offset when having a "u64 addr" since that information is already
carried in the least significant bits of the address.
* We want to use "u8 padding[5]" for something useful in the future
(since we are not allowed to change its name), so we now call it
just options so it can be extended for various purposes in the
future. It is an u32 as that it what is left of the 16 byte
descriptor.
* We changed the name of frame_size in the UMEM_REG setsockopt to
chunk_size since this naming also makes sense to the type-writer
model.
With these changes to the uapi, we believe the type-writer model can
be supported without having to resort to a new descriptor format. The
type-writer model could then be supported, from the uapi point of
view, by setting a flag at bind time and providing a new flag bit in
the options field of the descriptor that signals to user space that
all packets have been written in a chunk. Or with a new chunk
completion queue as suggested by Mykyta in his latest feedback mail on
the list.
We based this patch set on bpf-next commit bd3a08aaa9a3 ("bpf:
flowlabel in bpf_fib_lookup should be flowinfo")
The structure of the patch set is as follows:
Patches 1-2: Fixes two bugs in the current implementation.
Patches 3-4: Prepares the uapi for a "type-writer" model and modifies
the sample application so that it works with the new
uapi.
Patch 5: Small performance improvement patch for the sample application.
Cheers: Magnus and Björn
Björn Töpel (4):
xsk: proper fill queue descriptor validation
xsk: proper Rx drop statistics update
xsk: new descriptor addressing scheme
samples/bpf: adapted to new uapi
Magnus Karlsson (1):
samples/bpf: minor *_nb_free performance fix
Documentation/networking/af_xdp.rst | 101 +++++++++++++++++++++---------------
include/uapi/linux/if_xdp.h | 12 ++---
net/xdp/xdp_umem.c | 33 ++++++------
net/xdp/xdp_umem.h | 27 +++-------
net/xdp/xdp_umem_props.h | 4 +-
net/xdp/xsk.c | 41 ++++++++-------
net/xdp/xsk_queue.c | 2 +-
net/xdp/xsk_queue.h | 63 ++++++++--------------
samples/bpf/xdpsock_user.c | 92 +++++++++++++++-----------------
9 files changed, 172 insertions(+), 203 deletions(-)
--
2.14.1
^ permalink raw reply
* [PATCH net] ipmr: fix error path when mr_table_alloc fails
From: Sabrina Dubroca @ 2018-06-04 11:55 UTC (permalink / raw)
To: netdev
Cc: Sabrina Dubroca, Eric Dumazet, Nikolay Aleksandrov, Yuval Mintz,
Ivan Vecera
commit 0bbbf0e7d0e7 ("ipmr, ip6mr: Unite creation of new mr_table")
refactored ipmr_new_table, so that it now returns NULL when
mr_table_alloc fails. Unfortunately, all callers of ipmr_new_table
expect an ERR_PTR. commit 66fb33254f45 ("ipmr: properly check
rhltable_init() return value") followed suit.
This can result in NULL deref, when ipmr_rules_exit calls
ipmr_free_table with NULL net->ipv4.mrt in the
!CONFIG_IP_MROUTE_MULTIPLE_TABLES version.
This patch makes mr_table_alloc return errors, and changes
ip6mr_new_table and its callers to return/expect error pointers as
well. It also removes the version of mr_table_alloc defined under
!CONFIG_IP_MROUTE_COMMON, since it is never used.
Fixes: 0bbbf0e7d0e7 ("ipmr, ip6mr: Unite creation of new mr_table")
Fixes: 66fb33254f45 ("ipmr: properly check rhltable_init() return value")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
---
include/linux/mroute_base.h | 10 ----------
net/ipv4/ipmr_base.c | 8 +++++---
net/ipv6/ip6mr.c | 19 +++++++++++++------
3 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index d617fe45543e..d633f737b3c6 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -307,16 +307,6 @@ static inline void vif_device_init(struct vif_device *v,
{
}
-static inline void *
-mr_table_alloc(struct net *net, u32 id,
- struct mr_table_ops *ops,
- void (*expire_func)(struct timer_list *t),
- void (*table_set)(struct mr_table *mrt,
- struct net *net))
-{
- return NULL;
-}
-
static inline void *mr_mfc_find_parent(struct mr_table *mrt,
void *hasharg, int parent)
{
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 30221701614c..cafb0506c8c9 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -35,17 +35,19 @@ mr_table_alloc(struct net *net, u32 id,
struct net *net))
{
struct mr_table *mrt;
+ int err;
mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
if (!mrt)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mrt->id = id;
write_pnet(&mrt->net, net);
mrt->ops = *ops;
- if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) {
+ err = rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ if (err) {
kfree(mrt);
- return NULL;
+ return ERR_PTR(err);
}
INIT_LIST_HEAD(&mrt->mfc_cache_list);
INIT_LIST_HEAD(&mrt->mfc_unres_queue);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 298fd8b6ed17..f9b801bd00f8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -227,8 +227,8 @@ static int __net_init ip6mr_rules_init(struct net *net)
INIT_LIST_HEAD(&net->ipv6.mr6_tables);
mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
- if (!mrt) {
- err = -ENOMEM;
+ if (IS_ERR(mrt)) {
+ err = PTR_ERR(mrt);
goto err1;
}
@@ -301,8 +301,13 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
static int __net_init ip6mr_rules_init(struct net *net)
{
- net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
- return net->ipv6.mrt6 ? 0 : -ENOMEM;
+ struct mr_table *mrt;
+
+ mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+ if (IS_ERR(mrt))
+ return PTR_ERR(mrt);
+ net->ipv6.mrt6 = mrt;
+ return 0;
}
static void __net_exit ip6mr_rules_exit(struct net *net)
@@ -1743,6 +1748,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
case MRT6_TABLE:
{
+ struct mr_table *mrt;
u32 v;
if (optlen != sizeof(u32))
@@ -1757,8 +1763,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
rtnl_lock();
ret = 0;
- if (!ip6mr_new_table(net, v))
- ret = -ENOMEM;
+ mrt = ip6mr_new_table(net, v);
+ if (IS_ERR(mrt))
+ ret = PTR_ERR(mrt);
raw6_sk(sk)->ip6mr_table = v;
rtnl_unlock();
return ret;
--
2.17.1
^ permalink raw reply related
* Re: [PATCH 3/6] ravb: remove custom .set_link_ksettings from ethtool ops
From: Vladimir Zapolskiy @ 2018-06-04 11:07 UTC (permalink / raw)
To: Sergei Shtylyov, David S. Miller; +Cc: netdev, linux-renesas-soc
In-Reply-To: <6029136a-0aac-31f6-ef9e-edb3b7b45cd1@cogentembedded.com>
Hello Sergei,
On 06/03/2018 06:42 PM, Sergei Shtylyov wrote:
> Hello!
>
> Sorry for the delay replying, the management keeps me busy... :-(
>
> On 05/28/2018 12:51 PM, Vladimir Zapolskiy wrote:
>
>>>> The change replaces a custom implementation of .set_link_ksettings
>>>> callback with a shared phy_ethtool_set_link_ksettings(), this fixes
>>>> sleep in atomic context bug, which is encountered every time when link
>>>> settings are changed by ethtool.
>>>
>>> Seeing it now...
>
> And to say that this is *fixed* by removing the custom method is err...
> simply misleading. The sleep in atomic context is fixed solely by the removal
> of the spinlock grabbing before the phylib call.
>
As I've already said, "the removal of the spinlock grabbing before the phylib
call" is not a valid fix, but it will introduce another regression.
>>>> Now duplex mode setting is enforced in ravb_adjust_link() only, also
>>>> now TX/RX is disabled when link is put down or modifications to E-MAC
>>>> registers ECMR and GECMR are expected for both cases of checked and
>>>> ignored link status pin state from E-MAC interrupt handler.
>>>>
>>>> Signed-off-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
>>>> ---
>>>> drivers/net/ethernet/renesas/ravb_main.c | 58 +++++++++-----------------------
>>>> 1 file changed, 15 insertions(+), 43 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
>>>> index 3d91caa44176..0d811c02ff34 100644
>>>> --- a/drivers/net/ethernet/renesas/ravb_main.c
>>>> +++ b/drivers/net/ethernet/renesas/ravb_main.c
>>>> @@ -980,6 +980,13 @@ static void ravb_adjust_link(struct net_device *ndev)
>>>> struct ravb_private *priv = netdev_priv(ndev);
>>>> struct phy_device *phydev = ndev->phydev;
>>>> bool new_state = false;
>>>> + unsigned long flags;
>>>> +
>>>> + spin_lock_irqsave(&priv->lock, flags);
>>>> +
>>>> + /* Disable TX and RX right over here, if E-MAC change is ignored */
>>>> + if (priv->no_avb_link)
>>>> + ravb_rcv_snd_disable(ndev);
>>>>
>>>> if (phydev->link) {
>>>> if (phydev->duplex != priv->duplex) {
>>>> @@ -997,18 +1004,21 @@ static void ravb_adjust_link(struct net_device *ndev)
>>>> ravb_modify(ndev, ECMR, ECMR_TXF, 0);
>>>> new_state = true;
>>>> priv->link = phydev->link;
>>>> - if (priv->no_avb_link)
>>>> - ravb_rcv_snd_enable(ndev);
>>>> }
>>>> } else if (priv->link) {
>>>> new_state = true;
>>>> priv->link = 0;
>>>> priv->speed = 0;
>>>> priv->duplex = -1;
>>>> - if (priv->no_avb_link)
>>>> - ravb_rcv_snd_disable(ndev);
>>>> }
>>>>
>>>> + /* Enable TX and RX right over here, if E-MAC change is ignored */
>>>> + if (priv->no_avb_link && phydev->link)
>>>> + ravb_rcv_snd_enable(ndev);
>>>> +
>>>> + mmiowb();
>>>> + spin_unlock_irqrestore(&priv->lock, flags);
>>>> +
>>>
>>> I like this part. :-)
>>>
>>
>> A weight off my mind :) And I hope that this change will remain the less
>> questionable one, other ones from the series are trivial.
>>
>> Anyway I hope it is understandable that this part of the change can not
>> be simply extracted from the rest one below, otherwise there'll be bugs of
>> another type intorduced.
>
> I never said I'd like to apply this part alone, my idea was more like removing
> the spinlock grabbing and the duplex handling down below.
>
As I've already said, "the removal of the spinlock grabbing" is not a valid fix,
but it will introduce another regression.
Please tell me, a removal of duplex handling change should be done before
a removal of the spinlock grabbing? Or after?
> [...]
>>>> @@ -1096,44 +1106,6 @@ static int ravb_phy_start(struct net_device *ndev)
>>>> return 0;
>>>> }
>>>>
>>>> -static int ravb_set_link_ksettings(struct net_device *ndev,
>>>> - const struct ethtool_link_ksettings *cmd)
>>>> -{
>>>> - struct ravb_private *priv = netdev_priv(ndev);
>>>> - unsigned long flags;
>>>> - int error;
>>>> -
>>>> - if (!ndev->phydev)
>>>> - return -ENODEV;
>>>> -
>>>> - spin_lock_irqsave(&priv->lock, flags);
>>>> -
>>>> - /* Disable TX and RX */
>>>> - ravb_rcv_snd_disable(ndev);
>>>> -
>>>> - error = phy_ethtool_ksettings_set(ndev->phydev, cmd);
>>>> - if (error)
>>>> - goto error_exit;
>>>> -
>>>> - if (cmd->base.duplex == DUPLEX_FULL)
>>>> - priv->duplex = 1;
>>>> - else
>>>> - priv->duplex = 0;
>>>> -
>>>> - ravb_set_duplex(ndev);
>>>> -
>>>> -error_exit:
>>>> - mdelay(1);
>>>> -
>>>> - /* Enable TX and RX */
>>>> - ravb_rcv_snd_enable(ndev);
>>>> -
>>>> - mmiowb();
>>>> - spin_unlock_irqrestore(&priv->lock, flags);
>>>> -
>>>> - return error;
>>>> -}
>>>> -
>>>
>>> But this part is clearly lumping it all together...
>>
>> Please elaborate.
>
> My point is still that complete removal of the custom method was somewhat
> premature and completely unnecessary for fixing the issues we have.
>
>>> [...]
>>>> @@ -1357,7 +1329,7 @@ static const struct ethtool_ops ravb_ethtool_ops = {
>>>> .set_ringparam = ravb_set_ringparam,
>>>> .get_ts_info = ravb_get_ts_info,
>>>> .get_link_ksettings = phy_ethtool_get_link_ksettings,
>>>> - .set_link_ksettings = ravb_set_link_ksettings,
>>>> + .set_link_ksettings = phy_ethtool_set_link_ksettings,
>>>
>>> Should have been a part of the final patch in the fix/enhancement chain...
>>
>> Please elaborate.
>>
>> Do you mean that firstly I have to make erroneous ravb_set_link_ksettings()
>> to look similar to phy_ethtool_set_link_ksettings() and then remove it?
>
> Yes.
>
Then this change of "ravb_set_link_ksettings() looks similar to
phy_ethtool_set_link_ksettings()" will be a single commit, and it will be
a fix. Does it sound good?
>> As I see it in the current context (removal of ravb_set_duplex() call and
>> so on), the problem with this approach is that the actual fix change will
>> be done on top of a number of enchancement changes, thus it contradicts to
>
> Now I have to ask you to elaborate. I have no idea what you mean. :-(
>
My statement is based on the next facts:
1. ravb_set_duplex() call in ravb_set_link_ksettings() is unnecessary,
however its removal is an enchancement,
2. removal of the spinlock grabbing is just a *part* of the proper fix,
and the complete proper fix includes ravb_set_duplex() call removal,
adding spinlock grabbing to ravb_adjust_link() and other modifications
to ravb_adjust_link() from this commit.
3. commits with fixes must precede commits with enchancements in the
series, because enchancements are not backported.
The question remains the same, what do you ask me to do?
> And of course, sometimes the things are broken in a so subtle way, that
> only as pile of "cleanups" fixed them, we had that situation in e.g. the
> R-Car I2C driver -- *none* of AFAIR 9 patches was good as a -stable patch...
>
>> the accepted development/maintenace model "fixes first", and most probably
>> it won't be possible to backport the real fix, however this sole change can
>> be backported.
>
> My idea was to move the [G]ECMR writes to the adjust_link() callback and
> to stop grabbing the spinlock where it *was* grabbed in the same fix patch.
> Then just a single clean up, to start using the new phylib method.
>
It will be okay iff ravb_set_duplex() call removal is added to the first
part ("fixes" part) of two changes. Please confirm that our understanding
is aligned.
^ permalink raw reply
* Re: bpf_redirect_map not working after tail call
From: Jesper Dangaard Brouer via iovisor-dev @ 2018-06-04 11:04 UTC (permalink / raw)
To: iovisor-dev, Daniel Borkmann
Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <319a775a-4913-1e58-c0dd-f3d6e3f56d9e-8RLafaVCWuNeoWH0uzbU5w@public.gmane.org>
On Fri, 1 Jun 2018 14:15:58 +0200
Sebastiano Miano via iovisor-dev <iovisor-dev-9jONkmmOlFHEE9lA1F8Ukti2O/JbrIOy@public.gmane.org> wrote:
> Dear all,
>
> We have noticed that the bpf_redirect_map returns an error when it is
> called after a tail call.
> The xdp_redirect_map program (under sample/bpf) works fine, but if we
> modify it as shown in the following diff, it doesn't work anymore.
> I have debugged it with the xdp_monitor application and the error
> returned is EFAULT.
> Is this a known issue? Am I doing something wrong?
Argh, this is likely an issue/bug due to the check xdp_map_invalid(),
that was introduced in commit 7c3001313396 ("bpf: fix ri->map_owner
pointer on bpf_prog_realloc").
To Daniel, I don't know how to solve this, could you give some advice?
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
unsigned long aux)
{
return (unsigned long)xdp_prog->aux != aux;
}
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
void *fwd = NULL;
int err;
[...]
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
err = -EFAULT;
map = NULL;
goto err;
}
[...]
> P.S. I have tested the program with the latest bpf-next kernel.
>
> ------------
>
> diff --git a/samples/bpf/xdp_redirect_map_kern.c
> b/samples/bpf/xdp_redirect_map_kern.c
> index 740a529..bf1275a 100644
> --- a/samples/bpf/xdp_redirect_map_kern.c
> +++ b/samples/bpf/xdp_redirect_map_kern.c
> @@ -36,6 +36,13 @@ struct bpf_map_def SEC("maps") rxcnt = {
> .max_entries = 1,
> };
>
> +struct bpf_map_def SEC("maps") prog_table = {
> + .type = BPF_MAP_TYPE_PROG_ARRAY,
> + .key_size = sizeof(int),
> + .value_size = sizeof(int),
> + .max_entries = 32,
> +};
> +
> static void swap_src_dst_mac(void *data)
> {
> unsigned short *p = data;
> @@ -89,4 +96,15 @@ int xdp_redirect_dummy_prog(struct xdp_md *ctx)
> return XDP_PASS;
> }
>
> +/* Entry point */
> +SEC("xdp_redirect_entry_point")
> +int xdp_redirect_entry_point_prog(struct xdp_md *ctx)
> +{
> + //char fmt[] = "xdp_redirect_entry_point\n";
> + //bpf_trace_printk(fmt, sizeof(fmt));
> + bpf_tail_call(ctx, &prog_table, 0);
> + // Tail call failed
> + return XDP_DROP;
> +}
> +
> char _license[] SEC("license") = "GPL";
> diff --git a/samples/bpf/xdp_redirect_map_user.c
> b/samples/bpf/xdp_redirect_map_user.c
> index 4445e76..b2d2059 100644
> --- a/samples/bpf/xdp_redirect_map_user.c
> +++ b/samples/bpf/xdp_redirect_map_user.c
> @@ -120,7 +120,13 @@ int main(int argc, char **argv)
> return 1;
> }
>
> - if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[0], xdp_flags) < 0) {
> + ret = bpf_map_update_elem(map_fd[2], &key, &prog_fd[0], 0);
> + if (ret) {
> + perror("bpf_update_elem");
> + goto out;
> + }
> +
> + if (bpf_set_link_xdp_fd(ifindex_in, prog_fd[2], xdp_flags) < 0) {
> printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
> return 1;
> }
> _______________________________________________
> iovisor-dev mailing list
> iovisor-dev-9jONkmmOlFHEE9lA1F8Ukti2O/JbrIOy@public.gmane.org
> https://lists.iovisor.org/mailman/listinfo/iovisor-dev
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply
* Re: [PATCH bpf-next v3 05/11] bpf: avoid retpoline for lookup/update/delete calls on maps
From: Phil Sutter @ 2018-06-04 11:02 UTC (permalink / raw)
To: Jesper Dangaard Brouer
Cc: Daniel Borkmann, alexei.starovoitov, netdev, Jakub Kicinski,
Jakub Kicinski, Quentin Monnet
In-Reply-To: <20180603190855.53447a7d@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 776 bytes --]
Hi!
On Sun, Jun 03, 2018 at 07:08:55PM +0200, Jesper Dangaard Brouer wrote:
[...]
> Secondly I personally *hate* how the 'ip' does it's short options
> parsing and especially order/precedence ambiguity. Phil Sutter
> (Fedora/RHEL iproute2 maintainer) have a funny quiz illustrating the
> ambiguity issues.
Hehe, yes. It's a classical case of something smart evolving into a
pain: At first there's only 'ip link', so you allow 'ip l' as a
shortcut. Then someone implements 'ip l2tp' - so what do you do?
Establish a policy of abbreviation having to be unique and break
existing behaviour or accept the mess and head on.
My suggestion would be to not get into the abbreviated subcommands
business at all but instead ship and maintain a bash-completion script.
Cheers, Phil
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* [PATCH] atmel: use memdup_user to simplify the code
From: YueHaibing @ 2018-06-04 10:32 UTC (permalink / raw)
To: davem, kvalo; +Cc: netdev, linux-kernel, linux-wireless, YueHaibing
use existing memdup_user() helper function instead of open-coding
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
drivers/net/wireless/atmel/atmel.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index d122386..30b479a 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -2657,14 +2657,9 @@ static int atmel_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
break;
}
- if (!(new_firmware = kmalloc(com.len, GFP_KERNEL))) {
- rc = -ENOMEM;
- break;
- }
-
- if (copy_from_user(new_firmware, com.data, com.len)) {
- kfree(new_firmware);
- rc = -EFAULT;
+ new_firmware = memdup_user(com.data, com.len);
+ if (IS_ERR(new_firmware)) {
+ rc = PTR_ERR(new_firmware);
break;
}
--
2.7.0
^ permalink raw reply related
* pull request: bluetooth-next 2018-06-04
From: Johan Hedberg @ 2018-06-04 10:15 UTC (permalink / raw)
To: davem; +Cc: linux-bluetooth, netdev
[-- Attachment #1: Type: text/plain, Size: 2766 bytes --]
Hi Dave,
Here's one last bluetooth-next pull request for the 4.18 kernel:
- New USB device IDs for Realtek 8822BE and 8723DE
- reset/resume fix for Dell Inspiron 5565
- Fix HCI_UART_INIT_PENDING flag behavior
- Fix patching behavior for some ATH3012 models
- A few other minor cleanups & fixes
Please let me know if there are any issues pulling. Thanks.
Johan
---
The following changes since commit 874fcf1de613ad7b3cecf8a9cfe806fe7c2e3c68:
Merge tag 'mlx5e-updates-2018-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux (2018-05-29 09:45:13 -0400)
are available in the Git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git for-upstream
for you to fetch changes up to 1cd2fabf4bdcf95eda6a1bcebc4a0a965509da36:
Bluetooth: btusb: Add additional device ID for RTL8822BE (2018-05-30 15:45:01 +0200)
----------------------------------------------------------------
Andy Shevchenko (2):
Bluetooth: Re-use kstrtobool_from_user()
Bluetooth: btmrvl: Re-use kstrtol_from_user()
Arend van Spriel (1):
Bluetooth: btmrvl: support sysfs initiated firmware coredump
Artiom Vaskov (1):
Bluetooth: btusb: Add additional device ID for RTL8822BE
Hans de Goede (4):
Bluetooth: btusb: Add Dell Inspiron 5565 to btusb_needs_reset_resume_table
Bluetooth: hci_uart: Restore hci_dev->flush callback on open()
Bluetooth: hci_serdev: Move serdev_device_close/open into common hci_serdev code
Bluetooth: hci_serdev: Fix HCI_UART_INIT_PENDING not working
Jian-Hong Pan (1):
Bluetooth: btusb: Add a new Realtek 8723DE ID 2ff8:b011
Takashi Iwai (1):
Bluetooth: btusb: Apply QCA Rome patches for some ATH3012 models
Thierry Escande (1):
Bluetooth: hci_qca: Fix "Sleep inside atomic section" warning
Vaibhav Murkute (1):
Bluetooth: hci_serdev: Removed unnecessary curly braces
drivers/bluetooth/btmrvl_debugfs.c | 55 +++-----------------------------------
drivers/bluetooth/btmrvl_drv.h | 2 --
drivers/bluetooth/btmrvl_main.c | 6 -----
drivers/bluetooth/btmrvl_sdio.c | 11 +++++---
drivers/bluetooth/btusb.c | 43 ++++++++++++++++++++++++-----
drivers/bluetooth/hci_bcm.c | 10 +------
drivers/bluetooth/hci_ldisc.c | 22 ++++++++-------
drivers/bluetooth/hci_ll.c | 3 ---
drivers/bluetooth/hci_nokia.c | 3 ---
drivers/bluetooth/hci_qca.c | 2 +-
drivers/bluetooth/hci_serdev.c | 32 ++++++++++++++--------
drivers/bluetooth/hci_uart.h | 1 +
net/bluetooth/hci_core.c | 23 +++++-----------
net/bluetooth/hci_debugfs.c | 24 ++++++-----------
net/bluetooth/smp.c | 12 +++------
15 files changed, 102 insertions(+), 147 deletions(-)
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]
^ permalink raw reply
* [PATCH] docs: networking: fix minor typos in various documentation files
From: Olivier Gayot @ 2018-06-04 10:07 UTC (permalink / raw)
To: David S . Miller; +Cc: netdev, Olivier Gayot
This patch fixes some typos/misspelling errors in the
Documentation/networking files.
Signed-off-by: Olivier Gayot <olivier.gayot@sigexec.com>
---
Documentation/networking/6lowpan.txt | 4 ++--
Documentation/networking/gtp.txt | 4 ++--
Documentation/networking/ila.txt | 2 +-
Documentation/networking/ip-sysctl.txt | 2 +-
Documentation/networking/ipsec.txt | 4 ++--
Documentation/networking/ipvlan.txt | 4 ++--
Documentation/networking/kcm.txt | 10 +++++-----
Documentation/networking/nf_conntrack-sysctl.txt | 2 +-
8 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/Documentation/networking/6lowpan.txt b/Documentation/networking/6lowpan.txt
index a7dc7e939c7a..2e5a939d7e6f 100644
--- a/Documentation/networking/6lowpan.txt
+++ b/Documentation/networking/6lowpan.txt
@@ -24,10 +24,10 @@ enum lowpan_lltypes.
Example to evaluate the private usually you can do:
-static inline sturct lowpan_priv_foobar *
+static inline struct lowpan_priv_foobar *
lowpan_foobar_priv(struct net_device *dev)
{
- return (sturct lowpan_priv_foobar *)lowpan_priv(dev)->priv;
+ return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv;
}
switch (dev->type) {
diff --git a/Documentation/networking/gtp.txt b/Documentation/networking/gtp.txt
index 0d9c18f05ec6..6966bbec1ecb 100644
--- a/Documentation/networking/gtp.txt
+++ b/Documentation/networking/gtp.txt
@@ -67,7 +67,7 @@ Don't be confused by terminology: The GTP User Plane goes through
kernel accelerated path, while the GTP Control Plane goes to
Userspace :)
-The official homepge of the module is at
+The official homepage of the module is at
https://osmocom.org/projects/linux-kernel-gtp-u/wiki
== Userspace Programs with Linux Kernel GTP-U support ==
@@ -120,7 +120,7 @@ If yo have questions regarding how to use the Kernel GTP module from
your own software, or want to contribute to the code, please use the
osmocom-net-grps mailing list for related discussion. The list can be
reached at osmocom-net-gprs@lists.osmocom.org and the mailman
-interface for managign your subscription is at
+interface for managing your subscription is at
https://lists.osmocom.org/mailman/listinfo/osmocom-net-gprs
== Issue Tracker ==
diff --git a/Documentation/networking/ila.txt b/Documentation/networking/ila.txt
index 78df879abd26..a17dac9dc915 100644
--- a/Documentation/networking/ila.txt
+++ b/Documentation/networking/ila.txt
@@ -121,7 +121,7 @@ three options to deal with this:
- checksum neutral mapping
When an address is translated the difference can be offset
- elsewhere in a part of the packet that is covered by the
+ elsewhere in a part of the packet that is covered by
the checksum. The low order sixteen bits of the identifier
are used. This method is preferred since it doesn't require
parsing a packet beyond the IP header and in most cases the
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 35ffaa281b26..92a91a025757 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -26,7 +26,7 @@ ip_no_pmtu_disc - INTEGER
discarded. Outgoing frames are handled the same as in mode 1,
implicitly setting IP_PMTUDISC_DONT on every created socket.
- Mode 3 is a hardend pmtu discover mode. The kernel will only
+ Mode 3 is a hardened pmtu discover mode. The kernel will only
accept fragmentation-needed errors if the underlying protocol
can verify them besides a plain socket lookup. Current
protocols for which pmtu events will be honored are TCP, SCTP
diff --git a/Documentation/networking/ipsec.txt b/Documentation/networking/ipsec.txt
index 8dbc08b7e431..ba794b7e51be 100644
--- a/Documentation/networking/ipsec.txt
+++ b/Documentation/networking/ipsec.txt
@@ -25,8 +25,8 @@ Quote from RFC3173:
is implementation dependent.
Current IPComp implementation is indeed by the book, while as in practice
-when sending non-compressed packet to the peer(whether or not packet len
-is smaller than the threshold or the compressed len is large than original
+when sending non-compressed packet to the peer (whether or not packet len
+is smaller than the threshold or the compressed len is larger than original
packet len), the packet is dropped when checking the policy as this packet
matches the selector but not coming from any XFRM layer, i.e., with no
security path. Such naked packet will not eventually make it to upper layer.
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 812ef003e0a8..27a38e50c287 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -73,11 +73,11 @@ mode to make conn-tracking work.
This is the default option. To configure the IPvlan port in this mode,
user can choose to either add this option on the command-line or don't specify
anything. This is the traditional mode where slaves can cross-talk among
-themseleves apart from talking through the master device.
+themselves apart from talking through the master device.
5.2 private:
If this option is added to the command-line, the port is set in private
-mode. i.e. port wont allow cross communication between slaves.
+mode. i.e. port won't allow cross communication between slaves.
5.3 vepa:
If this is added to the command-line, the port is set in VEPA mode.
diff --git a/Documentation/networking/kcm.txt b/Documentation/networking/kcm.txt
index 9a513295b07c..b773a5278ac4 100644
--- a/Documentation/networking/kcm.txt
+++ b/Documentation/networking/kcm.txt
@@ -1,4 +1,4 @@
-Kernel Connection Mulitplexor
+Kernel Connection Multiplexor
-----------------------------
Kernel Connection Multiplexor (KCM) is a mechanism that provides a message based
@@ -31,7 +31,7 @@ KCM implements an NxM multiplexor in the kernel as diagrammed below:
KCM sockets
-----------
-The KCM sockets provide the user interface to the muliplexor. All the KCM sockets
+The KCM sockets provide the user interface to the multiplexor. All the KCM sockets
bound to a multiplexor are considered to have equivalent function, and I/O
operations in different sockets may be done in parallel without the need for
synchronization between threads in userspace.
@@ -199,7 +199,7 @@ while. Example use:
BFP programs for message delineation
------------------------------------
-BPF programs can be compiled using the BPF LLVM backend. For exmple,
+BPF programs can be compiled using the BPF LLVM backend. For example,
the BPF program for parsing Thrift is:
#include "bpf.h" /* for __sk_buff */
@@ -222,7 +222,7 @@ messages. The kernel provides necessary assurances that messages are sent
and received atomically. This relieves much of the burden applications have
in mapping a message based protocol onto the TCP stream. KCM also make
application layer messages a unit of work in the kernel for the purposes of
-steerng and scheduling, which in turn allows a simpler networking model in
+steering and scheduling, which in turn allows a simpler networking model in
multithreaded applications.
Configurations
@@ -272,7 +272,7 @@ on the socket thus waking up the application thread. When the application
sees the error (which may just be a disconnect) it should unattach the
socket from KCM and then close it. It is assumed that once an error is
posted on the TCP socket the data stream is unrecoverable (i.e. an error
-may have occurred in the middle of receiving a messssge).
+may have occurred in the middle of receiving a message).
TCP connection monitoring
-------------------------
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 433b6724797a..1669dc2419fd 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -156,7 +156,7 @@ nf_conntrack_timestamp - BOOLEAN
nf_conntrack_udp_timeout - INTEGER (seconds)
default 30
-nf_conntrack_udp_timeout_stream2 - INTEGER (seconds)
+nf_conntrack_udp_timeout_stream - INTEGER (seconds)
default 180
This extended timeout will be used in case there is an UDP stream
--
2.17.1
^ permalink raw reply related
* Re: [RFC feedback] AF_XDP and non-Intel hardware
From: Mykyta Iziumtsev @ 2018-06-04 9:44 UTC (permalink / raw)
To: Björn Töpel
Cc: Netdev, Björn Töpel, Karlsson, Magnus, Zhang, Qi Z,
Francois Ozog, Ilias Apalodimas, Brian Brooks,
Jesper Dangaard Brouer, Andy Gospodarek, michael.chan,
Luke Gorrie
In-Reply-To: <CAJ+HfNjxyeOGCw0Fi70HcG_aq1c_dc6JHF3+87K8G6YqBjj54A@mail.gmail.com>
Hi Björn and Magnus,
Here is a short update on the proposed changes to AF_XDP.
During implementation phase I figured out that 'end-of-page' in RX
descriptor's flags won't work, unfortunately. This is because the
kernel driver can't know if the frame will be last on the page when RX
descriptor is being filled in. Imagine that the driver is processing a
frame on a page, and there is still 1000 bytes of non utilized memory
beyond this frame. If the next frame (which haven't yet arrived) is
less than that -- the NIC can place it on the same page, otherwise the
NIC will skip remaining 1000 bytes and take next page. Of course, the
driver can update RX descriptor retrospectively, or use 'empty' RX
descriptors just to indicate page completion, but it's too complex or
inefficient and as such doesn't look good.
What I could propose instead is to make the 'filling' and 'page
completion' symmetric. That is, UMEM will have 1 queue to pass pages
from application to the kernel driver (fring in your terminology) and
one to indicate complete pages back to the application.
That 'page completion' queue can be added as a third queue to UMEM, or
alternatively TX completions can be decoupled from UMEM to make it an
simple 'memory area' object with two directional queues which work
with RX pages only. TX completions can be handled in a simpler manner
as part of TX queues: just make sure consumer index is advanced only
when the frame is finally sent out. The TX completion processing can
be then done by observing consume index advance from application. That
would not only be more in line with best NIC ring design practices
but will improve cache locality and eliminate critical section if TX
queues are assigned to different CPUs.
With best regards,
Mykyta
On 22 May 2018 at 14:09, Björn Töpel <bjorn.topel@gmail.com> wrote:
> 2018-05-22 9:45 GMT+02:00 Mykyta Iziumtsev <mykyta.iziumtsev@linaro.org>:
>> On 21 May 2018 at 20:55, Björn Töpel <bjorn.topel@gmail.com> wrote:
>>> 2018-05-21 14:34 GMT+02:00 Mykyta Iziumtsev <mykyta.iziumtsev@linaro.org>:
>>>> Hi Björn and Magnus,
>>>>
>>>> (This thread is a follow up to private dialogue. The intention is to
>>>> let community know that AF_XDP can be enhanced further to make it
>>>> compatible with wider range of NIC vendors).
>>>>
>>>
>>> Mykyta, thanks for doing the write-up and sending it to the netdev
>>> list! The timing could not be better -- we need to settle on an uapi
>>> that works for all vendors prior enabling it in the kernel.
>>>
>>>> There are two NIC variations which don't fit well with current AF_XDP proposal.
>>>>
>>>> The first variation is represented by some NXP and Cavium NICs. AF_XDP
>>>> expects NIC to put incoming frames into slots defined by UMEM area.
>>>> Here slot size is set in XDP_UMEM_REG xdp_umem.reg.frame_size and
>>>> slots available to NIC are communicated to the kernel via UMEM fill
>>>> queue. While Intel NICs support only one slot size, NXP and Cavium
>>>> support multiple slot sizes to optimize memory usage (e.g. { 128, 512,
>>>> 2048 }, please refer to [1] for rationale). On frame reception the NIC
>>>> pulls a slot from best fit pool based on frame size.
>>>>
>>>> The second variation is represented by e.g. Chelsio T5/T6 and Netcope
>>>> NICs. As shown above, AF_XDP expects NIC to put incoming frames at
>>>> predefined addresses. This is not the case here as the NIC is in
>>>> charge of placing frames in memory based on it's own algorithm. For
>>>> example, Chelsio T5/T6 is expecting to get whole pages from the driver
>>>> and puts incoming frames on the page in a 'tape recorder' fashion.
>>>> Assuming 'base' is page address and flen[N] is an array of frame
>>>> lengths, the frame placement in memory will look like that:
>>>> base + 0
>>>> base + frame_len[0]
>>>> base + frame_len[0] + frame_len[1]
>>>> base + frame_len[0] + frame_len[1] + frame_len[2]
>>>> ...
>>>>
>>>> To better support these two NIC variations I suggest to abandon 'frame
>>>> size' structuring in UMEM and stick with 'pages' instead. The NIC
>>>> kernel driver is then responsible for splitting provided pages into
>>>> slots expected by underlying HW (or not splitting at all in case of
>>>> Chelsio/Netcope).
>>>>
>>>
>>> Let's call the first variation "multi-pool" and the second
>>> "type-writer" for simplicity. The type-writer model is very
>>> interesting, and makes a lot of sense when the PCIe bus is a
>>> constraint.
>>>
>>>> On XDP_UMEM_REG the application needs to specify page_size. Then the
>>>> application can pass empty pages to the kernel driver using UMEM
>>>> 'fill' queue by specifying page offset within the UMEM area. xdp_desc
>>>> format needs to be changed as well: frame location will be defined by
>>>> offset from the beginning of UMEM area instead of frame index. As
>>>> payload headroom can vary with AF_XDP we'll need to specify it in
>>>> xdp_desc as well. Beside that it could be worth to consider changing
>>>> payload length to u16 as 64k+ frames aren't very common in networking.
>>>> The resulting xdp_desc would look like that:
>>>>
>>>> struct xdp_desc {
>>>> __u64 offset;
>>>> __u16 headroom;
>>>> __u16 len;
>>>> __u8 flags;
>>>> __u8 padding[3];
>>>> };
>>>>
>>>
>>> Let's expand a bit here; Instead of passing indicies to fixed sized
>>> frames to the fill ring, we now pass a memory region. For simplicity,
>>> let's say a page. The fill ring descriptor requires offset and
>>> len. The offset is a global offset from an UMEM perspective, and len
>>> is the size of the region.
>>>
>>
>> I would rather stick with region equal to page (regular or huge page,
>> defined by application). The page size can be extracted from
>> vm_area_struct in XDP_UMEM_REG (preferred) or configured by
>> application.
>>
>
> Ok, thinking more about it I prefer this as well. This means that we
> only need to grow the UMEM fring/cring descriptors to u64, and not
> care about length. As you state below, this makes the validation
> simple.
>
> We might consider exposing a "page size hint", that the user can set.
> For 4G hugepage scenario, it might make sense to have a chunk *less*
> than 4G to avoid HW Rx memory running low when the end of a chunk is
> approaching.
>
> As for THP, I need to think about proper behavior here.
>
>>> On the Rx ring the descriptor, as you wrote, must be changed as well
>>> to your suggestion above. Note, that headroom is still needed, since
>>> XDP can change the size of a packet, so the fixed headroom stated in
>>> UMEM registration is not sufficient.
>>>
>>> This model is obviously more flexible, but then also a bit more
>>> complex. E.g. a fixed-frame NIC (like ixgbe), what should the
>>> behaviour be? Should the fill ring entry be used only for *one* frame,
>>> or chopped up to multiple receive frames? Should it be configurable?
>>> Driver specific?
>>
>> I think driver-specific makes most sense here. In case of fixed-frame
>> NIC the driver shall chop the ring entry into multiple receive frames.
>>
>
> Let's start there, keeping the configuration space small.
>
>>>
>>> Also, validating the entries in the fill queue require more work
>>> (compared to the current index variant). Currently, we only skip
>>> invalid indicies. What should we do when say, you pass a memory window
>>> that is too narrow (say 128B) but correct from a UMEM
>>> perspective. Going this path, we need to add pretty hard constraints
>>> so we don't end up it too complex code -- because then it'll be too
>>> slow.
>>
>> If we stick with pages -- the only possible erroneous input will be
>> 'page out of UMEM boundaries'. The validation will be essentially:
>>
>> if ((offset > umem->size) || (offset & (umem->page_size - 1))
>> fail
>>
>> The question is what shall be done if validation fails ? Would
>> SEGFAULT be reasonable ? This is more or less equivalent to
>> dereferencing invalid pointer.
>>
>
> The current scheme is simply dropping that kernel skips the invalid
> fill ring entry. SIGSEGV is an interesting idea!
>
>>>
>>>> In current proposal you have a notion of 'frame ownership': 'owned by
>>>> kernel' or 'owned by application'. The ownership is transferred by
>>>> means of enqueueing frame index in UMEM 'fill' queue (from application
>>>> to kernel) or in UMEM 'tx completion' queue (from kernel to
>>>> application). If you decide to adopt 'page' approach this notion needs
>>>> to be changed a bit. This is because in case of packet forwarding one
>>>> and the same page can be used for RX (parts of it enqueued in HW 'free
>>>> lists') and TX (forwarding of previously RXed packets).
>>>>
>>>> I propose to define 'ownership' as a right to manipulate the
>>>> partitioning of the page into frames. Whenever application passes a
>>>> page to the kernel via UMEM 'fill' queue -- the ownership is
>>>> transferred to the kernel. The application can't allocate packets on
>>>> this page until kernel is done with it, but it can change payload of
>>>> RXed packets before forwarding them. The kernel can pass ownership
>>>> back by means of 'end-of-page' in xdp_desc.flags.
>>>>
>>>
>>> I like the end-of-page mechanism.
>>>
>>>> The pages are definitely supposed to be recycled sooner or later. Even
>>>> if it's not part of kernel API and the housekeeping implementation
>>>> resided completely in application I still would like to propose
>>>> possible (hopefully, cost efficient) solution to that. The recycling
>>>> could be achieved by keeping refcount on pages and recycling the page
>>>> only when it's owned by application and refcount reaches 0.
>>>>
>>>> Whenever application transfers page ownership to the kernel the
>>>> refcount shall be initialized to 0. With each incoming RX xdp_desc the
>>>> corresponding page needs to be identified (xdp_desc.offset >>
>>>> PAGE_SHIFT) and refcount incremented. When the packet gets freed the
>>>> refcount shall be decremented. If packet is forwarded in TX xdp_desc
>>>> -- the refcount gets decremented only on TX completion (again,
>>>> tx_completion.desc >> PAGE_SHIFT). For packets originating from the
>>>> application itself the payload buffers needs to be allocated from
>>>> empty page owned by the application and refcount needs to be
>>>> incremented as well.
>>>>
>>>
>>> Strictly speaking, we're not enforcing correctness in the current
>>> solution. If the userspace application passes index 1 mulitple times
>>> to the fill ring, and at the same time send index 1, things will
>>> break. So, with the existing solution the userspace application
>>> *still* needs to track the frames. With this new model, the
>>> tracking/refcounting will be more complex. That might be a concern.
>>>
>>> For the multi-pool NICs I think we can still just have one UMEM, and
>>> let the driver decide where in which pool to place this certain chunk
>>> of memory. Thoughts?
>>
>> Definitely agree with that. This is HW specific and exposing it to the
>> application would only harm portability.
>>
>
> Good stuff, we're on the same page then.
>
>>>
>>> Now, how do we go forward? I think this is very important, and I will
>>> hack a copy-mode version for this new API. I'm a bit worried that the
>>> complexity/configuration space will grow and impact performance, but
>>> let's see.
>>>
>>> To prove that the API works for the NICs you mentioned, we need an
>>> actual zero-copy implementation for them. Do you think Linaro could
>>> work on a zero-copy variant for any of the NICs above?
>>>
>>
>> Linaro will definitely contribute zero-copy implementation for some
>> ARM-based NICs with 'multi-pool' variation.
>
> Very nice!
>
>> Implementation of
>> 'type-writer' variation is up to Chelsio/Netcope, we only try to come
>> up with API which (most probably) will fit them as well.
>>
>
> Let's hope we get an implementation from these vendors as well! :-)
>
>
> Björn
>
>>>
>>> Again thanks for bringing this up!
>>> Björn
>>>
>>>
>>>
>>>> [1] https://en.wikipedia.org/wiki/Internet_Mix
>>>>
>>>> With best regards,
>>>> Mykyta
^ permalink raw reply
* [PATCH net v3] ipv6: omit traffic class when calculating flow hash
From: Michal Kubecek @ 2018-06-04 9:36 UTC (permalink / raw)
To: David S. Miller
Cc: netdev, linux-kernel, Nicolas Dichtel, Tom Herbert, David Ahern,
Ido Schimmel
In-Reply-To: <687af1bc-a1f9-c0d2-dd3a-eeef3a2bf9b4@gmail.com>
Some of the code paths calculating flow hash for IPv6 use flowlabel member
of struct flowi6 which, despite its name, encodes both flow label and
traffic class. If traffic class changes within a TCP connection (as e.g.
ssh does), ECMP route can switch between path. It's also inconsistent with
other code paths where ip6_flowlabel() (returning only flow label) is used
to feed the key.
Use only flow label everywhere, including one place where hash key is set
using ip6_flowinfo().
Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)")
Fixes: f70ea018da06 ("net: Add functions to get skb->hash based on flow structures")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
---
v2: introduce and use an inline helper as suggested by David Ahern
v3: keep the cast out of the helper to make future cleanup easier
include/net/ipv6.h | 5 +++++
net/core/flow_dissector.c | 2 +-
net/ipv6/route.c | 4 ++--
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 836f31af1369..a406f2e8680a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -906,6 +906,11 @@ static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel)
return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel;
}
+static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
+{
+ return fl6->flowlabel & IPV6_FLOWLABEL_MASK;
+}
+
/*
* Prototypes exported by ipv6
*/
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d29f09bc5ff9..0234f8d1f0ac 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1334,7 +1334,7 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
keys->ports.src = fl6->fl6_sport;
keys->ports.dst = fl6->fl6_dport;
keys->keyid.keyid = fl6->fl6_gre_key;
- keys->tags.flow_label = (__force u32)fl6->flowlabel;
+ keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
keys->basic.ip_proto = fl6->flowi6_proto;
return flow_hash_from_keys(keys);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f4d61736c41a..4530a82aaa2e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1868,7 +1868,7 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
} else {
keys->addrs.v6addrs.src = key_iph->saddr;
keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->tags.flow_label = ip6_flowlabel(key_iph);
keys->basic.ip_proto = key_iph->nexthdr;
}
}
@@ -1889,7 +1889,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
} else {
hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.addrs.v6addrs.dst = fl6->daddr;
- hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
break;
--
2.17.1
^ permalink raw reply related
* Re: [PATCH net-next v2 1/3] bpf: implement bpf_get_current_cgroup_id() helper
From: Daniel Borkmann @ 2018-06-04 9:08 UTC (permalink / raw)
To: Yonghong Song, ast, netdev; +Cc: kernel-team
In-Reply-To: <20180603225943.2370719-2-yhs@fb.com>
On 06/04/2018 12:59 AM, Yonghong Song wrote:
> bpf has been used extensively for tracing. For example, bcc
> contains an almost full set of bpf-based tools to trace kernel
> and user functions/events. Most tracing tools are currently
> either filtered based on pid or system-wide.
>
> Containers have been used quite extensively in industry and
> cgroup is often used together to provide resource isolation
> and protection. Several processes may run inside the same
> container. It is often desirable to get container-level tracing
> results as well, e.g. syscall count, function count, I/O
> activity, etc.
>
> This patch implements a new helper, bpf_get_current_cgroup_id(),
> which will return cgroup id based on the cgroup within which
> the current task is running.
>
> The later patch will provide an example to show that
> userspace can get the same cgroup id so it could
> configure a filter or policy in the bpf program based on
> task cgroup id.
>
> The helper is currently implemented for tracing. It can
> be added to other program types as well when needed.
>
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
> include/linux/bpf.h | 1 +
> include/uapi/linux/bpf.h | 8 +++++++-
> kernel/bpf/core.c | 1 +
> kernel/bpf/helpers.c | 15 +++++++++++++++
> kernel/trace/bpf_trace.c | 2 ++
> 5 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index bbe2974..995c3b1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -746,6 +746,7 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> extern const struct bpf_func_proto bpf_get_stack_proto;
> extern const struct bpf_func_proto bpf_sock_map_update_proto;
> extern const struct bpf_func_proto bpf_sock_hash_update_proto;
> +extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
>
> /* Shared helpers among cBPF and eBPF. */
> void bpf_user_rnd_init_once(void);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index f0b6608..18712b0 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2070,6 +2070,11 @@ union bpf_attr {
> * **CONFIG_SOCK_CGROUP_DATA** configuration option.
> * Return
> * The id is returned or 0 in case the id could not be retrieved.
> + *
> + * u64 bpf_get_current_cgroup_id(void)
> + * Return
> + * A 64-bit integer containing the current cgroup id based
> + * on the cgroup within which the current task is running.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -2151,7 +2156,8 @@ union bpf_attr {
> FN(lwt_seg6_action), \
> FN(rc_repeat), \
> FN(rc_keydown), \
> - FN(skb_cgroup_id),
> + FN(skb_cgroup_id), \
> + FN(get_current_cgroup_id),
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> * function eBPF program intends to call
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 527587d..9f14937 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1765,6 +1765,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> const struct bpf_func_proto bpf_sock_map_update_proto __weak;
> const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
> +const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
>
> const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 3d24e23..73065e2 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -179,3 +179,18 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
> .arg1_type = ARG_PTR_TO_UNINIT_MEM,
> .arg2_type = ARG_CONST_SIZE,
> };
> +
> +#ifdef CONFIG_CGROUPS
> +BPF_CALL_0(bpf_get_current_cgroup_id)
> +{
> + struct cgroup *cgrp = task_dfl_cgroup(current);
> +
> + return cgrp->kn->id.id;
> +}
> +
> +const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
> + .func = bpf_get_current_cgroup_id,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> +};
> +#endif
Nit: why not moving this function directly to bpf_trace.c?
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 752992c..e2ab5b7 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -564,6 +564,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> return &bpf_get_prandom_u32_proto;
> case BPF_FUNC_probe_read_str:
> return &bpf_probe_read_str_proto;
> + case BPF_FUNC_get_current_cgroup_id:
> + return &bpf_get_current_cgroup_id_proto;
When you have !CONFIG_CGROUPS, then it relies on the weak definition of the
bpf_get_current_cgroup_id_proto, which I would think at latest in fixup_bpf_calls()
bails out with 'kernel subsystem misconfigured func' due to func being NULL.
Can't we just do the #ifdef CONFIG_CGROUPS around BPF_FUNC_get_current_cgroup_id
case instead? Then we bail out normally with 'unknown func' when cgroups are
not configured?
> default:
> return NULL;
> }
>
^ permalink raw reply
* Re: [PATCH 0/3] sh_eth: fix & clean up sh_eth_soft_swap()
From: Geert Uytterhoeven @ 2018-06-04 9:01 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <9027499a-0e19-7721-a17f-26e86885da3f@cogentembedded.com>
Hi Sergei,
On Sat, Jun 2, 2018 at 9:32 PM, Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> Here's a set of 3 patches against DaveM's 'net-next.git' repo. First one fixes an
> old buffer endiannes issue (luckily, the ARM SoCs are smart enough to not actually
> care) plus couple clean ups around sh_eth_soft_swap()...
>
> [1/1] sh_eth: make sh_eth_soft_swap() work on ARM
> [2/3] sh_eth: uninline sh_eth_soft_swap()
> [3/3] sh_eth: use DIV_ROUND_UP() in sh_eth_soft_swap()
Does the swapping actually work?
In sh_eth_rx(), it's called before dma_unmap_single(), without calling
dma_sync_single_for_cpu() first.
Shouldn't it be called after the unmap instead?
In addition, why is it passed the dma_addr converted to virt, while the skb
address is available?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 3/3] sh_eth: use DIV_ROUND_UP() in sh_eth_soft_swap()
From: Geert Uytterhoeven @ 2018-06-04 8:55 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <995798f8-69e0-6471-8eca-eefc420dbe21@cogentembedded.com>
On Sat, Jun 2, 2018 at 9:40 PM, Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> When initializing 'maxp' in sh_eth_soft_swap(), the buffer length needs
> to be rounded up -- that's just asking for DIV_ROUND_UP()!
>
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 2/3] sh_eth: uninline sh_eth_soft_swap()
From: Geert Uytterhoeven @ 2018-06-04 8:50 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <a1eb21c8-ab44-3022-b239-0b5c33691d3b@cogentembedded.com>
On Sat, Jun 2, 2018 at 9:38 PM, Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> sh_eth_tsu_soft_swap() is called twice by the driver, remove *inline* and
> move that function from the header to the driver itself to let gcc decide
> whether to expand it inline or not...
>
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 1/3] sh_eth: make sh_eth_soft_swap() work on ARM
From: Geert Uytterhoeven @ 2018-06-04 8:49 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <CAMuHMdVb2q=S6ucYCpwnVse-COefcuUGT6DO5Ympo+BjiVN9sg@mail.gmail.com>
On Mon, Jun 4, 2018 at 10:48 AM, Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Sat, Jun 2, 2018 at 9:37 PM, Sergei Shtylyov
> <sergei.shtylyov@cogentembedded.com> wrote:
>> Browsing thru the driver disassembly, I noticed that ARM gcc generated
>> no code whatsoever for sh_eth_soft_swap() while building a little-endian
>> kernel -- apparently __LITTLE_ENDIAN__ was not being #define'd, however
>> it got implicitly #define'd when building with the SH gcc (I could only
>> find the explicit #define __LITTLE_ENDIAN that was #include'd when building
>> a little-endian kernel). Luckily, the Ether controller only doing big-
>> endian DMA is encountered on the early SH771x SoCs only and all ARM SoCs
>> implement EDMR.DE and thus set 'sh_eth_cpu_data::hw_swap'. But anyway, we
>> need to fix the #ifdef inside sh_eth_soft_swap() to something that would
>> work on all architectures...
>>
>> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
>
> Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Forgot to say: nice catch ;-)
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH 1/3] sh_eth: make sh_eth_soft_swap() work on ARM
From: Geert Uytterhoeven @ 2018-06-04 8:48 UTC (permalink / raw)
To: Sergei Shtylyov; +Cc: netdev, David S. Miller, Linux-Renesas
In-Reply-To: <c83970ff-337d-6628-6976-e1665c44dd0b@cogentembedded.com>
On Sat, Jun 2, 2018 at 9:37 PM, Sergei Shtylyov
<sergei.shtylyov@cogentembedded.com> wrote:
> Browsing thru the driver disassembly, I noticed that ARM gcc generated
> no code whatsoever for sh_eth_soft_swap() while building a little-endian
> kernel -- apparently __LITTLE_ENDIAN__ was not being #define'd, however
> it got implicitly #define'd when building with the SH gcc (I could only
> find the explicit #define __LITTLE_ENDIAN that was #include'd when building
> a little-endian kernel). Luckily, the Ether controller only doing big-
> endian DMA is encountered on the early SH771x SoCs only and all ARM SoCs
> implement EDMR.DE and thus set 'sh_eth_cpu_data::hw_swap'. But anyway, we
> need to fix the #ifdef inside sh_eth_soft_swap() to something that would
> work on all architectures...
>
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
^ permalink raw reply
* Re: [PATCH net] sctp: not allow to set rto_min with a value below 200 msecs
From: Dmitry Vyukov @ 2018-06-04 8:34 UTC (permalink / raw)
To: Xin Long
Cc: Marcelo Ricardo Leitner, Neal Cardwell, Michael Tuexen,
Neil Horman, Netdev, linux-sctp, David Miller, David Ahern,
Eric Dumazet, syzkaller
In-Reply-To: <CADvbK_fbKbH2wm6Xurr+ELVag-LvyQdL+peJd=wp7OL7_zMZTQ@mail.gmail.com>
On Tue, May 29, 2018 at 7:45 PM, Xin Long <lucien.xin@gmail.com> wrote:
> On Wed, May 30, 2018 at 1:06 AM, Marcelo Ricardo Leitner
> <marcelo.leitner@gmail.com> wrote:
>> On Tue, May 29, 2018 at 12:03:46PM -0400, Neal Cardwell wrote:
>>> On Tue, May 29, 2018 at 11:45 AM Marcelo Ricardo Leitner <
>>> marcelo.leitner@gmail.com> wrote:
>>> > - patch2 - fix rtx attack vector
>>> > - Add the floor value to rto_min to HZ/20 (which fits the values
>>> > that Michael shared on the other email)
>>>
>>> I would encourage allowing minimum RTO values down to 5ms, if the ACK
>>> policy in the receiver makes this feasible. Our experience is that in
>>> datacenter environments it can be advantageous to allow timer-based loss
>>> recoveries using timeout values as low as 5ms, e.g.:
>>
>> Thanks Neal. On Xin's tests, the hearbeat timer becomes an issue at
>> ~25ms already. Xin, can you share more details on the hw, which CPU
>> was used?
Hi,
Did we reach any decision on this? This continues to produce bug
reports on syzbot.
I am not sure whom you are asking, because Xin is you unless I am
missing something :)
But if you mean syzbot hardware, then it's GCE VMs with modern Intel
CPUs but an important aspect is a heavy-debug config (which you can
take from here https://syzkaller.appspot.com/bug?extid=3dcd59a1f907245f891f)
and systematic bug reporting. So if it's any flaky in your testing, it
will produce dozens of bug emails on syzbot.
> It was on a KVM guest, "-smp 2,cores=1,threads=1,sockets=2"
> # lscpu
> Architecture: x86_64
> CPU op-mode(s): 32-bit, 64-bit
> Byte Order: Little Endian
> CPU(s): 2
> On-line CPU(s) list: 0,1
> Thread(s) per core: 1
> Core(s) per socket: 1
> Socket(s): 2
> NUMA node(s): 1
> Vendor ID: GenuineIntel
> CPU family: 6
> Model: 13
> Model name: QEMU Virtual CPU version 1.5.3
> Stepping: 3
> CPU MHz: 2397.222
> BogoMIPS: 4794.44
> Hypervisor vendor: KVM
> Virtualization type: full
> L1d cache: 32K
> L1i cache: 32K
> L2 cache: 4096K
> NUMA node0 CPU(s): 0,1
> Flags: fpu de pse tsc msr pae mce cx8 apic sep mtrr
> pge mca cmov pse36 clflush mmx fxsr sse sse2 syscall nx lm rep_good
> nopl cpuid pni cx16 hypervisor lahf_lm abm pti
>
> If we're counting on max_t to fix this CPU stuck. It should not that
> matter if min rto < the value causing that stuck.
>
>>
>> Anyway, what about we add a floor to rto_max too, so that RTO can
>> actually grow into something bigger that don't hog the CPU? Like:
>> rto_min floor = 5ms
>> rto_max floor = 50ms
>>
>> Marcelo
^ permalink raw reply
* Re: [PATCH net-next] netfilter: fix null-ptr-deref in nf_nat_decode_session
From: Pablo Neira Ayuso @ 2018-06-04 8:01 UTC (permalink / raw)
To: Prashant Bhole
Cc: Florian Westphal, David S . Miller, Jozsef Kadlecsik, netdev,
netfilter-devel
In-Reply-To: <3c408f33-9c9c-b16f-3433-8931ea3e4bae@lab.ntt.co.jp>
On Mon, Jun 04, 2018 at 10:10:08AM +0900, Prashant Bhole wrote:
> CC netfilter-devel
>
> On 5/28/2018 7:52 PM, Florian Westphal wrote:
> > Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp> wrote:
> > > Add null check for nat_hook in nf_nat_decode_session()
> >
> > Acked-by: Florian Westphal <fw@strlen.de>
>
> Hi Pablo,
> Just pinging in case this patch was missed.
The original submission was missing Cc: netfilter-devel@vger.kernel.org,
so patchwork didn't catch it:
http://patchwork.ozlabs.org/project/netfilter-devel/list/
Will include this patch in the next batch. Thanks.
^ permalink raw reply
* Re: [PATCH net v2 0/2] ip[6] tunnels: fix mtu calculations
From: Nicolas Dichtel @ 2018-06-04 7:56 UTC (permalink / raw)
To: David Miller; +Cc: petrm, idosch, netdev
In-Reply-To: <20180601.135741.863528561800652928.davem@davemloft.net>
Le 01/06/2018 à 19:57, David Miller a écrit :
[snip]
> I think the 0xfff8 value might come from the requirement that ipv6
> fragments need to be a multiple of 8 bytes long.
>
Oh, thanks for the explanation!
^ permalink raw reply
* Re: [PATCH net-next 0/2] cls_flower: Various fixes
From: Roi Dayan @ 2018-06-04 7:35 UTC (permalink / raw)
To: Jiri Pirko, Cong Wang
Cc: Paul Blakey, Jiri Pirko, Jamal Hadi Salim, David Miller,
Linux Kernel Network Developers, Yevgeny Kliteynik, Shahar Klein,
Mark Bloch, Or Gerlitz
In-Reply-To: <20180603193911.GA3013@nanopsycho>
On 03/06/2018 22:39, Jiri Pirko wrote:
> Sun, Jun 03, 2018 at 08:33:25PM CEST, xiyou.wangcong@gmail.com wrote:
>> On Wed, May 30, 2018 at 1:17 AM, Paul Blakey <paulb@mellanox.com> wrote:
>>> Two of the fixes are for my multiple mask patch
>>>
>>> Paul Blakey (2):
>>> cls_flower: Fix missing free of rhashtable
>>> cls_flower: Fix comparing of old filter mask with new filter
>>
>> Both are bug fixes and one-line fixes, so definitely should go
>> to -net tree and -stable tree.
>
> I agree.
>
it's because the commit they fix doesn't exists in net yet.
^ permalink raw reply
* [PATCH 10/10] dpaa_eth: add the get_ts_info interface for ethtool
From: Yangbo Lu @ 2018-06-04 7:08 UTC (permalink / raw)
To: netdev, madalin.bucur, Richard Cochran, Rob Herring, Shawn Guo,
David S . Miller
Cc: devicetree, linuxppc-dev, linux-arm-kernel, linux-kernel,
Yangbo Lu
In-Reply-To: <20180604070837.19265-1-yangbo.lu@nxp.com>
Added the get_ts_info interface for ethtool to check
the timestamping capability.
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 44 ++++++++++++++++++++
1 files changed, 44 insertions(+), 0 deletions(-)
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 2f933b6..a20b434 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -32,6 +32,9 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/string.h>
+#include <linux/of_platform.h>
+#include <linux/net_tstamp.h>
+#include <linux/fsl/ptp_qoriq.h>
#include "dpaa_eth.h"
#include "mac.h"
@@ -515,6 +518,46 @@ static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
return ret;
}
+static int dpaa_get_ts_info(struct net_device *net_dev,
+ struct ethtool_ts_info *info)
+{
+ struct device *dev = net_dev->dev.parent;
+ struct device_node *mac_node = dev->of_node;
+ struct device_node *fman_node = NULL, *ptp_node = NULL;
+ struct platform_device *ptp_dev = NULL;
+ struct qoriq_ptp *ptp = NULL;
+
+ fman_node = of_get_parent(mac_node);
+ if (fman_node)
+ ptp_node = of_parse_phandle(fman_node, "ptimer-handle", 0);
+
+ if (ptp_node)
+ ptp_dev = of_find_device_by_node(ptp_node);
+
+ if (ptp_dev)
+ ptp = platform_get_drvdata(ptp_dev);
+
+ if (ptp) {
+ info->phc_index = ptp->phc_index;
+#ifdef CONFIG_FSL_DPAA_ETH_TS
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+ info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+ return 0;
+#endif
+ } else {
+ info->phc_index = -1;
+ }
+
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+ return 0;
+}
+
const struct ethtool_ops dpaa_ethtool_ops = {
.get_drvinfo = dpaa_get_drvinfo,
.get_msglevel = dpaa_get_msglevel,
@@ -530,4 +573,5 @@ static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
.set_link_ksettings = dpaa_set_link_ksettings,
.get_rxnfc = dpaa_get_rxnfc,
.set_rxnfc = dpaa_set_rxnfc,
+ .get_ts_info = dpaa_get_ts_info,
};
--
1.7.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox