Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next] net: sched: remove duplicated include from cls_api.c
From: YueHaibing @ 2019-02-13  1:42 UTC (permalink / raw)
  To: Jamal Hadi Salim, Cong Wang, Jiri Pirko, David S . Miller
  Cc: YueHaibing, netdev, kernel-janitors

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 net/sched/cls_api.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 02cf6d2fa0e1..774b663e07f1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -38,7 +38,6 @@
 #include <net/tc_act/tc_csum.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_skbedit.h>
-#include <net/tc_act/tc_mirred.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];




^ permalink raw reply related

* [PATCH bpf-next 1/2] tools/bpf: replace bzero with memset
From: Andrii Nakryiko @ 2019-02-13  1:29 UTC (permalink / raw)
  To: andrii.nakryiko, netdev, kernel-team, yhs, ast, daniel,
	david.laight, acme
  Cc: Andrii Nakryiko
In-Reply-To: <20190213012941.2571769-1-andriin@fb.com>

bzero() call is deprecated and superseded by memset().

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reported-by: David Laight <david.laight@aculab.com>
---
 tools/lib/bpf/bpf.c    | 48 +++++++++++++++++++++---------------------
 tools/lib/bpf/btf.c    |  5 ++---
 tools/lib/bpf/libbpf.c |  5 ++---
 3 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index a5261f39e2bd..9cd015574e83 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -22,7 +22,7 @@
  */
 
 #include <stdlib.h>
-#include <strings.h>
+#include <string.h>
 #include <memory.h>
 #include <unistd.h>
 #include <asm/unistd.h>
@@ -228,7 +228,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 
 	name_len = load_attr->name ? strlen(load_attr->name) : 0;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = load_attr->prog_type;
 	attr.expected_attach_type = load_attr->expected_attach_type;
 	attr.insn_cnt = (__u32)load_attr->insns_cnt;
@@ -340,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.prog_type = type;
 	attr.insn_cnt = (__u32)insns_cnt;
 	attr.insns = ptr_to_u64(insns);
@@ -360,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
@@ -373,7 +373,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
@@ -385,7 +385,7 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
@@ -398,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
@@ -410,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 
@@ -421,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.next_key = ptr_to_u64(next_key);
@@ -433,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.pathname = ptr_to_u64((void *)pathname);
 	attr.bpf_fd = fd;
 
@@ -444,7 +444,7 @@ int bpf_obj_get(const char *pathname)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.pathname = ptr_to_u64((void *)pathname);
 
 	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
@@ -455,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	   = target_fd;
 	attr.attach_bpf_fd = prog_fd;
 	attr.attach_type   = type;
@@ -468,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	 = target_fd;
 	attr.attach_type = type;
 
@@ -479,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.target_fd	 = target_fd;
 	attr.attach_bpf_fd = prog_fd;
 	attr.attach_type = type;
@@ -493,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 	union bpf_attr attr;
 	int ret;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.query.target_fd	= target_fd;
 	attr.query.attach_type	= type;
 	attr.query.query_flags	= query_flags;
@@ -514,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
 	union bpf_attr attr;
 	int ret;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.test.prog_fd = prog_fd;
 	attr.test.data_in = ptr_to_u64(data);
 	attr.test.data_out = ptr_to_u64(data_out);
@@ -539,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
 	if (!test_attr->data_out && test_attr->data_size_out > 0)
 		return -EINVAL;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.test.prog_fd = test_attr->prog_fd;
 	attr.test.data_in = ptr_to_u64(test_attr->data_in);
 	attr.test.data_out = ptr_to_u64(test_attr->data_out);
@@ -559,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
 	union bpf_attr attr;
 	int err;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.start_id = start_id;
 
 	err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
@@ -574,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
 	union bpf_attr attr;
 	int err;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.start_id = start_id;
 
 	err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
@@ -588,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.prog_id = id;
 
 	return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -598,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.map_id = id;
 
 	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -608,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.btf_id = id;
 
 	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
@@ -619,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
 	union bpf_attr attr;
 	int err;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.info.bpf_fd = prog_fd;
 	attr.info.info_len = *info_len;
 	attr.info.info = ptr_to_u64(info);
@@ -635,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 {
 	union bpf_attr attr;
 
-	bzero(&attr, sizeof(attr));
+	memset(&attr, 0, sizeof(attr));
 	attr.raw_tracepoint.name = ptr_to_u64(name);
 	attr.raw_tracepoint.prog_fd = prog_fd;
 
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 6953fedb88ff..ade1c32fb083 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4,7 +4,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <strings.h>
 #include <unistd.h>
 #include <errno.h>
 #include <linux/err.h>
@@ -484,7 +483,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
 		goto exit_free;
 	}
 
-	bzero(ptr, last_size);
+	memset(ptr, 0, last_size);
 	btf_info.btf = ptr_to_u64(ptr);
 	err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 
@@ -498,7 +497,7 @@ int btf__get_from_id(__u32 id, struct btf **btf)
 			goto exit_free;
 		}
 		ptr = temp_ptr;
-		bzero(ptr, last_size);
+		memset(ptr, 0, last_size);
 		btf_info.btf = ptr_to_u64(ptr);
 		err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
 	}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e3c39edfb9d3..6ef7e6e4cbd3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -18,7 +18,6 @@
 #include <libgen.h>
 #include <inttypes.h>
 #include <string.h>
-#include <strings.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <errno.h>
@@ -308,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 		return -EINVAL;
 	}
 
-	bzero(prog, sizeof(*prog));
+	memset(prog, 0, sizeof(*prog));
 
 	prog->section_name = strdup(section_name);
 	if (!prog->section_name) {
@@ -1577,7 +1576,7 @@ bpf_program__load(struct bpf_program *prog,
 		struct bpf_prog_prep_result result;
 		bpf_program_prep_t preprocessor = prog->preprocessor;
 
-		bzero(&result, sizeof(result));
+		memset(&result, 0, sizeof(result));
 		err = preprocessor(prog, i, prog->insns,
 				   prog->insns_cnt, &result);
 		if (err) {
-- 
2.17.1


^ permalink raw reply related

* [PATCH bpf-next 0/2] tools/bpf: smaller clean ups
From: Andrii Nakryiko @ 2019-02-13  1:29 UTC (permalink / raw)
  To: andrii.nakryiko, netdev, kernel-team, yhs, ast, daniel,
	david.laight, acme
  Cc: Andrii Nakryiko

This patchset replaces bzero() with memset() and syncs if_link.h header
to suppress unsynchronized headers warning.

Andrii Nakryiko (2):
  tools/bpf: replace bzero with memset
  tools: sync uapi/linux/if_link.h header

 tools/include/uapi/linux/if_link.h |  1 +
 tools/lib/bpf/bpf.c                | 48 +++++++++++++++---------------
 tools/lib/bpf/btf.c                |  5 ++--
 tools/lib/bpf/libbpf.c             |  5 ++--
 4 files changed, 29 insertions(+), 30 deletions(-)

-- 
2.17.1


^ permalink raw reply

* [PATCH bpf-next 2/2] tools: sync uapi/linux/if_link.h header
From: Andrii Nakryiko @ 2019-02-13  1:29 UTC (permalink / raw)
  To: andrii.nakryiko, netdev, kernel-team, yhs, ast, daniel,
	david.laight, acme
  Cc: Andrii Nakryiko
In-Reply-To: <20190213012941.2571769-1-andriin@fb.com>

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
---
 tools/include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index d6533828123a..5b225ff63b48 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -925,6 +925,7 @@ enum {
 enum {
 	LINK_XSTATS_TYPE_UNSPEC,
 	LINK_XSTATS_TYPE_BRIDGE,
+	LINK_XSTATS_TYPE_BOND,
 	__LINK_XSTATS_TYPE_MAX
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH net-next 0/2] Remove unused variables
From: David Miller @ 2019-02-13  1:32 UTC (permalink / raw)
  To: f.fainelli; +Cc: netdev
In-Reply-To: <20190212234000.15796-1-f.fainelli@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 12 Feb 2019 15:39:57 -0800

> This removes unused variables from mlxsw and ethsw after the recent
> removal of SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, build scripts are now
> fixed to take care of those warnings :).

Series applied, thanks Florian.

^ permalink raw reply

* [PATCH] libertas_tf: remove set but not used variable 'flags'
From: YueHaibing @ 2019-02-13  1:49 UTC (permalink / raw)
  To: Kalle Valo, Colin Ian King
  Cc: YueHaibing, linux-wireless, kernel-janitors, netdev

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/wireless/marvell/libertas_tf/main.c: In function 'lbtf_rx':
drivers/net/wireless/marvell/libertas_tf/main.c:554:15: warning:
 variable 'flags' set but not used [-Wunused-but-set-variable]

It never used and can be removed.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/wireless/marvell/libertas_tf/main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c
index a7cb7d06e5e6..d36962528838 100644
--- a/drivers/net/wireless/marvell/libertas_tf/main.c
+++ b/drivers/net/wireless/marvell/libertas_tf/main.c
@@ -551,7 +551,6 @@ int lbtf_rx(struct lbtf_private *priv, struct sk_buff *skb)
 	struct ieee80211_rx_status stats;
 	struct rxpd *prxpd;
 	int need_padding;
-	unsigned int flags;
 	struct ieee80211_hdr *hdr;
 
 	lbtf_deb_enter(LBTF_DEB_RX);
@@ -572,7 +571,6 @@ int lbtf_rx(struct lbtf_private *priv, struct sk_buff *skb)
 	skb_pull(skb, sizeof(struct rxpd));
 
 	hdr = (struct ieee80211_hdr *)skb->data;
-	flags = le32_to_cpu(*(__le32 *)(skb->data + 4));
 
 	need_padding = ieee80211_is_data_qos(hdr->frame_control);
 	need_padding ^= ieee80211_has_a4(hdr->frame_control);




^ permalink raw reply related

* [net-next PATCH V3 0/3] Fix page_pool API and dma address storage
From: Jesper Dangaard Brouer @ 2019-02-13  1:55 UTC (permalink / raw)
  To: netdev, linux-mm
  Cc: Toke Høiland-Jørgensen, Ilias Apalodimas, willy,
	Saeed Mahameed, Alexander Duyck, Jesper Dangaard Brouer,
	Andrew Morton, mgorman, David S. Miller, Tariq Toukan

As pointed out by David Miller in [1] the current page_pool implementation
stores dma_addr_t in page->private. This won't work on 32-bit platforms with
64-bit DMA addresses since the page->private is an unsigned long and the
dma_addr_t a u64.

Since no driver is yet using the DMA mapping capabilities of the API let's
fix this by storing the information in 'struct page' and use that to store
and retrieve DMA addresses from network drivers.

As long as the addresses returned from dma_map_page() are aligned the first
bit, used by the compound pages code should not be set.

Ilias tested the first two patches on Espressobin driver mvneta, for which
we have patches for using the DMA API of page_pool.

[1]: https://lore.kernel.org/netdev/20181207.230655.1261252486319967024.davem@davemloft.net/

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
---

Ilias Apalodimas (1):
      net: page_pool: don't use page->private to store dma_addr_t

Jesper Dangaard Brouer (2):
      mm: add dma_addr_t to struct page
      page_pool: use DMA_ATTR_SKIP_CPU_SYNC for DMA mappings


 include/linux/mm_types.h |    7 +++++++
 net/core/page_pool.c     |   22 ++++++++++++++--------
 2 files changed, 21 insertions(+), 8 deletions(-)

--

^ permalink raw reply

* [net-next PATCH V3 1/3] mm: add dma_addr_t to struct page
From: Jesper Dangaard Brouer @ 2019-02-13  1:55 UTC (permalink / raw)
  To: netdev, linux-mm
  Cc: Toke Høiland-Jørgensen, Ilias Apalodimas, willy,
	Saeed Mahameed, Alexander Duyck, Jesper Dangaard Brouer,
	Andrew Morton, mgorman, David S. Miller, Tariq Toukan
In-Reply-To: <155002290134.5597.6544755780651689517.stgit@firesoul>

The page_pool API is using page->private to store DMA addresses.
As pointed out by David Miller we can't use that on 32-bit architectures
with 64-bit DMA

This patch adds a new dma_addr_t struct to allow storing DMA addresses

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h |    7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2c471a2c43fa..0a36a22228e7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -95,6 +95,13 @@ struct page {
 			 */
 			unsigned long private;
 		};
+		struct {	/* page_pool used by netstack */
+			/**
+			 * @dma_addr: might require a 64-bit value even on
+			 * 32-bit architectures.
+			 */
+			dma_addr_t dma_addr;
+		};
 		struct {	/* slab, slob and slub */
 			union {
 				struct list_head slab_list;	/* uses lru */


^ permalink raw reply related

* [net-next PATCH V3 2/3] net: page_pool: don't use page->private to store dma_addr_t
From: Jesper Dangaard Brouer @ 2019-02-13  1:55 UTC (permalink / raw)
  To: netdev, linux-mm
  Cc: Toke Høiland-Jørgensen, Ilias Apalodimas, willy,
	Saeed Mahameed, Alexander Duyck, Jesper Dangaard Brouer,
	Andrew Morton, mgorman, David S. Miller, Tariq Toukan
In-Reply-To: <155002290134.5597.6544755780651689517.stgit@firesoul>

From: Ilias Apalodimas <ilias.apalodimas@linaro.org>

As pointed out by David Miller the current page_pool implementation
stores dma_addr_t in page->private.
This won't work on 32-bit platforms with 64-bit DMA addresses since the
page->private is an unsigned long and the dma_addr_t a u64.

A previous patch is adding dma_addr_t on struct page to accommodate this.
This patch adapts the page_pool related functions to use the newly added
struct for storing and retrieving DMA addresses from network drivers.

Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 net/core/page_pool.c |   13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 43a932cb609b..897a69a1477e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -136,7 +136,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
 		goto skip_dma_map;
 
-	/* Setup DMA mapping: use page->private for DMA-addr
+	/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
+	 * since dma_addr_t can be either 32 or 64 bits and does not always fit
+	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
 	 * This mapping is kept for lifetime of page, until leaving pool.
 	 */
 	dma = dma_map_page(pool->p.dev, page, 0,
@@ -146,7 +148,7 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 		put_page(page);
 		return NULL;
 	}
-	set_page_private(page, dma); /* page->private = dma; */
+	page->dma_addr = dma;
 
 skip_dma_map:
 	/* When page just alloc'ed is should/must have refcnt 1. */
@@ -175,13 +177,16 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
 static void __page_pool_clean_page(struct page_pool *pool,
 				   struct page *page)
 {
+	dma_addr_t dma;
+
 	if (!(pool->p.flags & PP_FLAG_DMA_MAP))
 		return;
 
+	dma = page->dma_addr;
 	/* DMA unmap */
-	dma_unmap_page(pool->p.dev, page_private(page),
+	dma_unmap_page(pool->p.dev, dma,
 		       PAGE_SIZE << pool->p.order, pool->p.dma_dir);
-	set_page_private(page, 0);
+	page->dma_addr = 0;
 }
 
 /* Return a page to the page allocator, cleaning up our state */


^ permalink raw reply related

* [net-next PATCH V3 3/3] page_pool: use DMA_ATTR_SKIP_CPU_SYNC for DMA mappings
From: Jesper Dangaard Brouer @ 2019-02-13  1:55 UTC (permalink / raw)
  To: netdev, linux-mm
  Cc: Toke Høiland-Jørgensen, Ilias Apalodimas, willy,
	Saeed Mahameed, Alexander Duyck, Jesper Dangaard Brouer,
	Andrew Morton, mgorman, David S. Miller, Tariq Toukan
In-Reply-To: <155002290134.5597.6544755780651689517.stgit@firesoul>

As pointed out by Alexander Duyck, the DMA mapping done in page_pool needs
to use the DMA attribute DMA_ATTR_SKIP_CPU_SYNC.

As the principle behind page_pool keeping the pages mapped is that the
driver takes over the DMA-sync steps.

Reported-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
---
 net/core/page_pool.c |   11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 897a69a1477e..5b2252c6d49b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -141,9 +141,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 	 * into page private data (i.e 32bit cpu with 64bit DMA caps)
 	 * This mapping is kept for lifetime of page, until leaving pool.
 	 */
-	dma = dma_map_page(pool->p.dev, page, 0,
-			   (PAGE_SIZE << pool->p.order),
-			   pool->p.dma_dir);
+	dma = dma_map_page_attrs(pool->p.dev, page, 0,
+				 (PAGE_SIZE << pool->p.order),
+				 pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(pool->p.dev, dma)) {
 		put_page(page);
 		return NULL;
@@ -184,8 +184,9 @@ static void __page_pool_clean_page(struct page_pool *pool,
 
 	dma = page->dma_addr;
 	/* DMA unmap */
-	dma_unmap_page(pool->p.dev, dma,
-		       PAGE_SIZE << pool->p.order, pool->p.dma_dir);
+	dma_unmap_page_attrs(pool->p.dev, dma,
+			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+			     DMA_ATTR_SKIP_CPU_SYNC);
 	page->dma_addr = 0;
 }
 


^ permalink raw reply related

* [PATCH iproute2] lib/libnetlink: ensure a minimum of 32KB for the buffer used in rtnl_recvmsg()
From: Eric Dumazet @ 2019-02-13  1:58 UTC (permalink / raw)
  To: David Ahern, Stephen Hemminger
  Cc: netdev, Eric Dumazet, Eric Dumazet, Hangbin Liu, Phil Sutter

In the past, we tried to increase the buffer size up to 32 KB in order
to reduce number of syscalls per dump.

Commit 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
brought the size back to 4KB because the kernel can not know the application
is ready to receive bigger requests.

See kernel commits 9063e21fb026 ("netlink: autosize skb lengthes") and
d35c99ff77ec ("netlink: do not enter direct reclaim from netlink_dump()")
for more details.

Fixes: 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hangbin Liu <liuhangbin@gmail.com>
Cc: Phil Sutter <phil@nwl.cc>
---
 lib/libnetlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/libnetlink.c b/lib/libnetlink.c
index 1892a02ab5d0d73776c9882ffc77edcd2c663d01..0d48a3d43cf03065dacbd419578ab10af56431a4 100644
--- a/lib/libnetlink.c
+++ b/lib/libnetlink.c
@@ -718,6 +718,8 @@ static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
 	if (len < 0)
 		return len;
 
+	if (len < 32768)
+		len = 32768;
 	buf = malloc(len);
 	if (!buf) {
 		fprintf(stderr, "malloc error: not enough buffer\n");
-- 
2.21.0.rc0.258.g878e2cd30e-goog


^ permalink raw reply related

* [PATCH -next] net: ipvlan_l3s: fix kconfig unmet dependency warning
From: Randy Dunlap @ 2019-02-13  2:02 UTC (permalink / raw)
  To: LKML, netdev@vger.kernel.org, Mahesh Bandewar, David Miller; +Cc: Andrew Morton

From: Randy Dunlap <rdunlap@infradead.org>

Fix the kconfig warning in IPVLAN_L3S when neither INET nor IPV6
is enabled:

WARNING: unmet direct dependencies detected for NET_L3_MASTER_DEV
  Depends on [n]: NET [=y] && (INET [=n] || IPV6 [=n])
  Selected by [y]:
  - IPVLAN_L3S [=y] && NETDEVICES [=y] && NET_CORE [=y] && NETFILTER [=y]

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Mahesh Bandewar <maheshb@google.com>
---
Seen in mmotm but applies to linux-next.

 drivers/net/Kconfig |    1 +
 1 file changed, 1 insertion(+)

--- mmotm-2019-0212-1537.orig/drivers/net/Kconfig
+++ mmotm-2019-0212-1537/drivers/net/Kconfig
@@ -147,6 +147,7 @@ config MACVTAP
 
 config IPVLAN_L3S
 	depends on NETFILTER
+	depends on INET || IPV6 # same as NET_L3_MASTER_DEV
 	def_bool y
 	select NET_L3_MASTER_DEV
 



^ permalink raw reply

* Re: [PATCH iproute2] lib/libnetlink: ensure a minimum of 32KB for the buffer used in rtnl_recvmsg()
From: David Ahern @ 2019-02-13  2:04 UTC (permalink / raw)
  To: Eric Dumazet, Stephen Hemminger
  Cc: netdev, Eric Dumazet, Hangbin Liu, Phil Sutter
In-Reply-To: <20190213015841.140383-1-edumazet@google.com>

On 2/12/19 6:58 PM, Eric Dumazet wrote:
> In the past, we tried to increase the buffer size up to 32 KB in order
> to reduce number of syscalls per dump.
> 
> Commit 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
> brought the size back to 4KB because the kernel can not know the application
> is ready to receive bigger requests.
> 
> See kernel commits 9063e21fb026 ("netlink: autosize skb lengthes") and
> d35c99ff77ec ("netlink: do not enter direct reclaim from netlink_dump()")
> for more details.
> 
> Fixes: 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Hangbin Liu <liuhangbin@gmail.com>
> Cc: Phil Sutter <phil@nwl.cc>
> ---
>  lib/libnetlink.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
> index 1892a02ab5d0d73776c9882ffc77edcd2c663d01..0d48a3d43cf03065dacbd419578ab10af56431a4 100644
> --- a/lib/libnetlink.c
> +++ b/lib/libnetlink.c
> @@ -718,6 +718,8 @@ static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
>  	if (len < 0)
>  		return len;
>  
> +	if (len < 32768)
> +		len = 32768;
>  	buf = malloc(len);
>  	if (!buf) {
>  		fprintf(stderr, "malloc error: not enough buffer\n");
> 

I believe that negates the whole point of 2d34851cd341 - which I have no
problem with. 2 recvmsg calls per message is overkill.

Do we know of any single message sizes > 32k? 2d34851cd341 cites
increasing VF's but at some point there is a limit. If not, the whole
PEEK thing should go away and we just malloc 32k (or 64k) buffers for
each recvmsg.

^ permalink raw reply

* Re: [bpf-next 1/2] tcp: replace SOCK_DEBUG() with tcp_stats()
From: Yafang Shao @ 2019-02-13  2:07 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Daniel Borkmann, ast, Yonghong Song, brakmo, Eric Dumazet,
	David Miller, netdev, LKML, shaoyafang
In-Reply-To: <38d07cb3-b767-bfc4-9ae5-48367971d839@gmail.com>

On Tue, Feb 12, 2019 at 11:07 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 02/12/2019 03:31 AM, Yafang Shao wrote:
> > SOCK_DEBUG is a very ancient debugging interface, and it's not very useful
> > for debugging.
> > So this patch removes the SOCK_DEBUG() and introduce a new function
> > tcp_stats() to trace this kind of events.
> > Some MIBs are added for these events.
> >
> > Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
> > keep as-is, because if we return an errno to tell the application that
> > this optname isn't supported for TCP, it may break the application.
> > The application still can use this option but don't take any effect for
> > TCP.
> >
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  include/uapi/linux/snmp.h |  3 +++
> >  net/ipv4/proc.c           |  3 +++
> >  net/ipv4/tcp_input.c      | 26 +++++++++++---------------
> >  net/ipv6/tcp_ipv6.c       |  2 --
> >  4 files changed, 17 insertions(+), 17 deletions(-)
> >
> > diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> > index 86dc24a..fd5c09c 100644
> > --- a/include/uapi/linux/snmp.h
> > +++ b/include/uapi/linux/snmp.h
> > @@ -283,6 +283,9 @@ enum
> >       LINUX_MIB_TCPACKCOMPRESSED,             /* TCPAckCompressed */
> >       LINUX_MIB_TCPZEROWINDOWDROP,            /* TCPZeroWindowDrop */
> >       LINUX_MIB_TCPRCVQDROP,                  /* TCPRcvQDrop */
> > +     LINUX_MIB_TCPINVALIDACK,                /* TCPInvalidAck */
> > +     LINUX_MIB_TCPOLDACK,                    /* TCPOldAck */
> > +     LINUX_MIB_TCPPARTIALPACKET,             /* TCPPartialPacket */
> >       __LINUX_MIB_MAX
> >  };
> >
> > diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> > index c3610b3..1b0320a 100644
> > --- a/net/ipv4/proc.c
> > +++ b/net/ipv4/proc.c
> > @@ -291,6 +291,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
> >       SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
> >       SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
> >       SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
> > +     SNMP_MIB_ITEM("TCPInvalidAck", LINUX_MIB_TCPINVALIDACK),
> > +     SNMP_MIB_ITEM("TCPOldAck", LINUX_MIB_TCPOLDACK),
> > +     SNMP_MIB_ITEM("TCPPartialPacket", LINUX_MIB_TCPPARTIALPACKET),
> >       SNMP_MIB_SENTINEL
> >  };
> >
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 7a027dec..88deb1f 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -3554,6 +3554,11 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
> >       return delivered;
> >  }
> >
> > +static void tcp_stats(struct sock *sk, int mib_idx)
> > +{
> > +     NET_INC_STATS(sock_net(sk), mib_idx);
> > +}
>
> This is not a very descriptive name.
>
> Why is it static, and in net/ipv4/tcp_input.c ???
>

Because it is only called in net/ipv4/tcp_input.c currently, so I
define it as static in this file,
the reseaon I don't define it as 'static inline' is that I think the
compiler can make a better decision than me.

In the future it may be called in other files, then we can put it into
a more proper file.

> > +
> >  /* This routine deals with incoming acks, but not outgoing ones. */
> >  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >  {
> > @@ -3715,7 +3720,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >       return 1;
> >
> >  invalid_ack:
> > -     SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> > +     tcp_stats(sk, LINUX_MIB_TCPINVALIDACK);
> >       return -1;
> >
> >  old_ack:
> > @@ -3731,7 +3736,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
> >               tcp_xmit_recovery(sk, rexmit);
> >       }
> >
> > -     SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
> > +     tcp_stats(sk, LINUX_MIB_TCPOLDACK);
> >       return 0;
> >  }
> >
>
>
> These counters will add noise to an already crowded MIB space.
>

I have another idea that we can define some tcp bpf events to replace
these MIB counters somehing like,
    #define BPF_EVENT_TCP_OLDACK 1
    #define BPF_EVENT_TCP_PARTIALPACKET 2
    ...
Maybe we could also cleanup some MIBs to make it less crowded.

> What bug do you expect to track and fix with these ?
>

Let me explain the background for you.
I want to track some TCP abnormal  behavior in TCP/IP stack. But I
find there's no good way to do it.
The current MIBs are per net, other than per socket, that makes it not
very powerful.
And the ancient SOCK_DEBUG is not good as well.
So we think why not cleanup this ancient SOCK_DEBUG() and introduce a
more powerful method.

> I see many TCP patches coming adding icache pressure, enabling companies to build their own modified
> TCP stack, but no real meat.
>

Thanks
Yafang

^ permalink raw reply

* Re: [PATCH iproute2] lib/libnetlink: ensure a minimum of 32KB for the buffer used in rtnl_recvmsg()
From: Eric Dumazet @ 2019-02-13  2:08 UTC (permalink / raw)
  To: David Ahern, Eric Dumazet, Stephen Hemminger
  Cc: netdev, Hangbin Liu, Phil Sutter
In-Reply-To: <b42f0dcb-3c8c-9797-a9f1-da71642e26cc@gmail.com>



On 02/12/2019 06:04 PM, David Ahern wrote:
> On 2/12/19 6:58 PM, Eric Dumazet wrote:
>> In the past, we tried to increase the buffer size up to 32 KB in order
>> to reduce number of syscalls per dump.
>>
>> Commit 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
>> brought the size back to 4KB because the kernel can not know the application
>> is ready to receive bigger requests.
>>
>> See kernel commits 9063e21fb026 ("netlink: autosize skb lengthes") and
>> d35c99ff77ec ("netlink: do not enter direct reclaim from netlink_dump()")
>> for more details.
>>
>> Fixes: 2d34851cd341 ("lib/libnetlink: re malloc buff if size is not enough")
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> Cc: Hangbin Liu <liuhangbin@gmail.com>
>> Cc: Phil Sutter <phil@nwl.cc>
>> ---
>>  lib/libnetlink.c | 2 ++
>>  1 file changed, 2 insertions(+)
>>
>> diff --git a/lib/libnetlink.c b/lib/libnetlink.c
>> index 1892a02ab5d0d73776c9882ffc77edcd2c663d01..0d48a3d43cf03065dacbd419578ab10af56431a4 100644
>> --- a/lib/libnetlink.c
>> +++ b/lib/libnetlink.c
>> @@ -718,6 +718,8 @@ static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
>>  	if (len < 0)
>>  		return len;
>>  
>> +	if (len < 32768)
>> +		len = 32768;
>>  	buf = malloc(len);
>>  	if (!buf) {
>>  		fprintf(stderr, "malloc error: not enough buffer\n");
>>
> 
> I believe that negates the whole point of 2d34851cd341 - which I have no
> problem with. 2 recvmsg calls per message is overkill.
> 

It does not negates the point at all.

The main point was to eventually be able to allocate more than 32KB.

We need to have a minimum size of 32KB so that the kernel can cook reasonably sized skbs

Because trying to allocate 4KB only in 2019 is kind of stupid...

( Especially considering ss currently buffers the whole thing before calling render() !!! )

> Do we know of any single message sizes > 32k? 2d34851cd341 cites
> increasing VF's but at some point there is a limit. If not, the whole
> PEEK thing should go away and we just malloc 32k (or 64k) buffers for
> each recvmsg.
> 


^ permalink raw reply

* Re: [bpf-next 2/2] bpf: add BPF_SOCK_OPS_STATS_CB for tcp_stats()
From: Yafang Shao @ 2019-02-13  2:10 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Daniel Borkmann, ast, Yonghong Song, brakmo, Eric Dumazet,
	David Miller, netdev, LKML, shaoyafang
In-Reply-To: <e5fabb00-4fad-183a-5c2d-e22ed58006c7@gmail.com>

On Tue, Feb 12, 2019 at 11:11 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
>
> On 02/12/2019 03:31 AM, Yafang Shao wrote:
> > Introuce this new op BPF_SOCK_OPS_STATS_CB for tcp_stats() such that it
> > can be traced via BPF on a per socket basis.
> > There's one argument in BPF_SOCK_OPS_STATS_CB, which is Linux MIB index
> > LINUX_MIB_* to indicate the TCP event.
> > All these Linux MIBs are defined in include/uapi/linux/snmp.h.
> >
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  include/uapi/linux/bpf.h | 5 +++++
> >  net/ipv4/tcp_input.c     | 1 +
> >  2 files changed, 6 insertions(+)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 1777fa0..0314ddd 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2894,6 +2894,11 @@ enum {
> >       BPF_SOCK_OPS_TCP_LISTEN_CB,     /* Called on listen(2), right after
> >                                        * socket transition to LISTEN state.
> >                                        */
> > +     BPF_SOCK_OPS_STATS_CB,          /*
> > +                                      * Called on tcp_stats().
> > +                                      * Arg1: Linux MIB index
> > +                                      *       LINUX_MIB_*
> > +                                      */
> >  };
> >
> >  /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 88deb1f..4acf458 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -3557,6 +3557,7 @@ static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag)
> >  static void tcp_stats(struct sock *sk, int mib_idx)
> >  {
> >       NET_INC_STATS(sock_net(sk), mib_idx);
> > +     tcp_call_bpf(sk, BPF_SOCK_OPS_STATS_CB, 1, &mib_idx);
> >  }
> >
> >  /* This routine deals with incoming acks, but not outgoing ones. */
> >
>
> If the plan is to add to all NET_INC_STATS() calls in TCP an additional tcp_call_bpf()
> I will say no.
>

I have no plan to place it in fast path.
Because what I concerned is the TCP abnomal events, which should be
not in the fast path.
However if we want to place it in the fast path, the code should be as bellow,

if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATS_CB_FLAG))
    tcp_call_bpf(sk, BPF_SOCK_OPS_STATS_CB, 1, &mib_idx);

> So far, tcp_call_bpf() has not been used in fast path.
>

That's why I do it like this.

Thanks
Yafang

^ permalink raw reply

* Re: [bpf-next 1/2] tcp: replace SOCK_DEBUG() with tcp_stats()
From: Eric Dumazet @ 2019-02-13  2:15 UTC (permalink / raw)
  To: Yafang Shao
  Cc: Eric Dumazet, Daniel Borkmann, Alexei Starovoitov, Yonghong Song,
	Lawrence Brakmo, David Miller, netdev, LKML, shaoyafang
In-Reply-To: <CALOAHbAskuYhs3KQTJtT0Y7hjFcOyab-JpG4GCPENUxOk34-eA@mail.gmail.com>

On Tue, Feb 12, 2019 at 6:07 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>

> Let me explain the background for you.
> I want to track some TCP abnormal  behavior in TCP/IP stack. But I
> find there's no good way to do it.
> The current MIBs are per net, other than per socket, that makes it not
> very powerful.
> And the ancient SOCK_DEBUG is not good as well.
> So we think why not cleanup this ancient SOCK_DEBUG() and introduce a
> more powerful method.


I am all for it, but this more powerful method does nothing at all in
the current patches.

I can not accept patches just because they seem to be harmless,
knowing that  the next patches
will be pushed later changing more stuff, just because the new
infrastructure is there "and can be used"

Just remove all SOCK_DEBUG() calls, there are leftovers of very ancient times.

Do not add more debugging stuff unless you can demonstrate
they actually allowed you to find a real bug and that you sent a
public fix for it.

Just adding "cool stuff" in TCP stack does not please me, it is only
more complexity for unproven gain.

Otherwise, I am tempted to think that these BPF hooks are there only
so that a company can more
easily build a private variant of TCP, yet letting the community
maintaining the hard part of TCP stack.

Thank you.

^ permalink raw reply

* [PATCH AUTOSEL 4.20 032/105] net/mlx4: Get rid of page operation after dma_alloc_coherent
From: Sasha Levin @ 2019-02-13  2:32 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Stephen Warren, David S . Miller, Sasha Levin, netdev, linux-rdma
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Stephen Warren <swarren@nvidia.com>

[ Upstream commit f65e192af35058e5c82da9e90871b472d24912bc ]

This patch solves a crash at the time of mlx4 driver unload or system
shutdown. The crash occurs because dma_alloc_coherent() returns one
value in mlx4_alloc_icm_coherent(), but a different value is passed to
dma_free_coherent() in mlx4_free_icm_coherent(). In turn this is because
when allocated, that pointer is passed to sg_set_buf() to record it,
then when freed it is re-calculated by calling
lowmem_page_address(sg_page()) which returns a different value. Solve
this by recording the value that dma_alloc_coherent() returns, and
passing this to dma_free_coherent().

This patch is roughly equivalent to commit 378efe798ecf ("RDMA/hns: Get
rid of page operation after dma_alloc_coherent").

Based-on-code-from: Christoph Hellwig <hch@lst.de>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx4/icm.c | 92 ++++++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/icm.h | 22 +++++-
 2 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 4b4351141b94..76b84d08a058 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -57,12 +57,12 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
 	int i;
 
 	if (chunk->nsg > 0)
-		pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages,
+		pci_unmap_sg(dev->persist->pdev, chunk->sg, chunk->npages,
 			     PCI_DMA_BIDIRECTIONAL);
 
 	for (i = 0; i < chunk->npages; ++i)
-		__free_pages(sg_page(&chunk->mem[i]),
-			     get_order(chunk->mem[i].length));
+		__free_pages(sg_page(&chunk->sg[i]),
+			     get_order(chunk->sg[i].length));
 }
 
 static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
@@ -71,9 +71,9 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
 
 	for (i = 0; i < chunk->npages; ++i)
 		dma_free_coherent(&dev->persist->pdev->dev,
-				  chunk->mem[i].length,
-				  lowmem_page_address(sg_page(&chunk->mem[i])),
-				  sg_dma_address(&chunk->mem[i]));
+				  chunk->buf[i].size,
+				  chunk->buf[i].addr,
+				  chunk->buf[i].dma_addr);
 }
 
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
@@ -111,22 +111,21 @@ static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
 	return 0;
 }
 
-static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
-				    int order, gfp_t gfp_mask)
+static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf,
+				   int order, gfp_t gfp_mask)
 {
-	void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order,
-				       &sg_dma_address(mem), gfp_mask);
-	if (!buf)
+	buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order,
+				       &buf->dma_addr, gfp_mask);
+	if (!buf->addr)
 		return -ENOMEM;
 
-	if (offset_in_page(buf)) {
-		dma_free_coherent(dev, PAGE_SIZE << order,
-				  buf, sg_dma_address(mem));
+	if (offset_in_page(buf->addr)) {
+		dma_free_coherent(dev, PAGE_SIZE << order, buf->addr,
+				  buf->dma_addr);
 		return -ENOMEM;
 	}
 
-	sg_set_buf(mem, buf, PAGE_SIZE << order);
-	sg_dma_len(mem) = PAGE_SIZE << order;
+	buf->size = PAGE_SIZE << order;
 	return 0;
 }
 
@@ -159,21 +158,21 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 
 	while (npages > 0) {
 		if (!chunk) {
-			chunk = kmalloc_node(sizeof(*chunk),
+			chunk = kzalloc_node(sizeof(*chunk),
 					     gfp_mask & ~(__GFP_HIGHMEM |
 							  __GFP_NOWARN),
 					     dev->numa_node);
 			if (!chunk) {
-				chunk = kmalloc(sizeof(*chunk),
+				chunk = kzalloc(sizeof(*chunk),
 						gfp_mask & ~(__GFP_HIGHMEM |
 							     __GFP_NOWARN));
 				if (!chunk)
 					goto fail;
 			}
+			chunk->coherent = coherent;
 
-			sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
-			chunk->npages = 0;
-			chunk->nsg    = 0;
+			if (!coherent)
+				sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN);
 			list_add_tail(&chunk->list, &icm->chunk_list);
 		}
 
@@ -186,10 +185,10 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 
 		if (coherent)
 			ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
-						      &chunk->mem[chunk->npages],
-						      cur_order, mask);
+						&chunk->buf[chunk->npages],
+						cur_order, mask);
 		else
-			ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
+			ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages],
 						   cur_order, mask,
 						   dev->numa_node);
 
@@ -205,7 +204,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 		if (coherent)
 			++chunk->nsg;
 		else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
-			chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
+			chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
 						chunk->npages,
 						PCI_DMA_BIDIRECTIONAL);
 
@@ -220,7 +219,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 	}
 
 	if (!coherent && chunk) {
-		chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
+		chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->sg,
 					chunk->npages,
 					PCI_DMA_BIDIRECTIONAL);
 
@@ -320,7 +319,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
 	u64 idx;
 	struct mlx4_icm_chunk *chunk;
 	struct mlx4_icm *icm;
-	struct page *page = NULL;
+	void *addr = NULL;
 
 	if (!table->lowmem)
 		return NULL;
@@ -336,28 +335,49 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
 
 	list_for_each_entry(chunk, &icm->chunk_list, list) {
 		for (i = 0; i < chunk->npages; ++i) {
+			dma_addr_t dma_addr;
+			size_t len;
+
+			if (table->coherent) {
+				len = chunk->buf[i].size;
+				dma_addr = chunk->buf[i].dma_addr;
+				addr = chunk->buf[i].addr;
+			} else {
+				struct page *page;
+
+				len = sg_dma_len(&chunk->sg[i]);
+				dma_addr = sg_dma_address(&chunk->sg[i]);
+
+				/* XXX: we should never do this for highmem
+				 * allocation.  This function either needs
+				 * to be split, or the kernel virtual address
+				 * return needs to be made optional.
+				 */
+				page = sg_page(&chunk->sg[i]);
+				addr = lowmem_page_address(page);
+			}
+
 			if (dma_handle && dma_offset >= 0) {
-				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
-					*dma_handle = sg_dma_address(&chunk->mem[i]) +
-						dma_offset;
-				dma_offset -= sg_dma_len(&chunk->mem[i]);
+				if (len > dma_offset)
+					*dma_handle = dma_addr + dma_offset;
+				dma_offset -= len;
 			}
+
 			/*
 			 * DMA mapping can merge pages but not split them,
 			 * so if we found the page, dma_handle has already
 			 * been assigned to.
 			 */
-			if (chunk->mem[i].length > offset) {
-				page = sg_page(&chunk->mem[i]);
+			if (len > offset)
 				goto out;
-			}
-			offset -= chunk->mem[i].length;
+			offset -= len;
 		}
 	}
 
+	addr = NULL;
 out:
 	mutex_unlock(&table->mutex);
-	return page ? lowmem_page_address(page) + offset : NULL;
+	return addr ? addr + offset : NULL;
 }
 
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index c9169a490557..d199874b1c07 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -47,11 +47,21 @@ enum {
 	MLX4_ICM_PAGE_SIZE	= 1 << MLX4_ICM_PAGE_SHIFT,
 };
 
+struct mlx4_icm_buf {
+	void			*addr;
+	size_t			size;
+	dma_addr_t		dma_addr;
+};
+
 struct mlx4_icm_chunk {
 	struct list_head	list;
 	int			npages;
 	int			nsg;
-	struct scatterlist	mem[MLX4_ICM_CHUNK_LEN];
+	bool			coherent;
+	union {
+		struct scatterlist	sg[MLX4_ICM_CHUNK_LEN];
+		struct mlx4_icm_buf	buf[MLX4_ICM_CHUNK_LEN];
+	};
 };
 
 struct mlx4_icm {
@@ -114,12 +124,18 @@ static inline void mlx4_icm_next(struct mlx4_icm_iter *iter)
 
 static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter)
 {
-	return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
+	if (iter->chunk->coherent)
+		return iter->chunk->buf[iter->page_idx].dma_addr;
+	else
+		return sg_dma_address(&iter->chunk->sg[iter->page_idx]);
 }
 
 static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
 {
-	return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
+	if (iter->chunk->coherent)
+		return iter->chunk->buf[iter->page_idx].size;
+	else
+		return sg_dma_len(&iter->chunk->sg[iter->page_idx]);
 }
 
 int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 034/105] xprtrdma: Double free in rpcrdma_sendctxs_create()
From: Sasha Levin @ 2019-02-13  2:32 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Dan Carpenter, Anna Schumaker, Sasha Levin, linux-nfs, netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Dan Carpenter <dan.carpenter@oracle.com>

[ Upstream commit 6e17f58c486d9554341f70aa5b63b8fbed07b3fa ]

The clean up is handled by the caller, rpcrdma_buffer_create(), so this
call to rpcrdma_sendctxs_destroy() leads to a double free.

Fixes: ae72950abf99 ("xprtrdma: Add data structure to manage RDMA Send arguments")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/sunrpc/xprtrdma/verbs.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b9bc7f9f6bb9..4ffa6db804bb 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -913,17 +913,13 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
 	for (i = 0; i <= buf->rb_sc_last; i++) {
 		sc = rpcrdma_sendctx_create(&r_xprt->rx_ia);
 		if (!sc)
-			goto out_destroy;
+			return -ENOMEM;
 
 		sc->sc_xprt = r_xprt;
 		buf->rb_sc_ctxs[i] = sc;
 	}
 
 	return 0;
-
-out_destroy:
-	rpcrdma_sendctxs_destroy(buf);
-	return -ENOMEM;
 }
 
 /* The sendctx queue is not guaranteed to have a size that is a
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 043/105] netfilter: nft_flow_offload: Fix reverse route lookup
From: Sasha Levin @ 2019-02-13  2:32 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: wenxu, Pablo Neira Ayuso, Sasha Levin, netfilter-devel, coreteam,
	netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: wenxu <wenxu@ucloud.cn>

[ Upstream commit a799aea0988ea0d1b1f263e996fdad2f6133c680 ]

Using the following example:

	client 1.1.1.7 ---> 2.2.2.7 which dnat to 10.0.0.7 server

The first reply packet (ie. syn+ack) uses an incorrect destination
address for the reverse route lookup since it uses:

	daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;

which is 2.2.2.7 in the scenario that is described above, while this
should be:

	daddr = ct->tuplehash[dir].tuple.src.u3.ip;

that is 10.0.0.7.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/netfilter/nft_flow_offload.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 974525eb92df..ccdb8f5ababb 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -29,10 +29,10 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
 	memset(&fl, 0, sizeof(fl));
 	switch (nft_pf(pkt)) {
 	case NFPROTO_IPV4:
-		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+		fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
 		break;
 	case NFPROTO_IPV6:
-		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
+		fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
 		break;
 	}
 
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 057/105] net: stmmac: Prevent RX starvation in stmmac_napi_poll()
From: Sasha Levin @ 2019-02-13  2:32 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Jose Abreu, Joao Pinto, David S . Miller, Giuseppe Cavallaro,
	Alexandre Torgue, Jose Abreu, Sasha Levin, netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Jose Abreu <jose.abreu@synopsys.com>

[ Upstream commit fa0be0a43f101888ac677dba31b590963eafeaa1 ]

Currently, TX is given a budget which is consumed by stmmac_tx_clean()
and stmmac_rx() is given the remaining non-consumed budget.

This is wrong and in case we are sending a large number of packets this
can starve RX because remaining budget will be low.

Let's give always the same budget for RX and TX clean.

While at it, check if we missed any interrupts while we were in NAPI
callback by looking at DMA interrupt status.

Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index fe9240e15aea..5d83d6a7694b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3525,27 +3525,28 @@ static int stmmac_napi_poll(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	int work_done = 0, work_rem = budget;
+	int work_done, rx_done = 0, tx_done = 0;
 	u32 chan = ch->index;
 
 	priv->xstats.napi_poll++;
 
-	if (ch->has_tx) {
-		int done = stmmac_tx_clean(priv, work_rem, chan);
+	if (ch->has_tx)
+		tx_done = stmmac_tx_clean(priv, budget, chan);
+	if (ch->has_rx)
+		rx_done = stmmac_rx(priv, budget, chan);
 
-		work_done += done;
-		work_rem -= done;
-	}
-
-	if (ch->has_rx) {
-		int done = stmmac_rx(priv, work_rem, chan);
+	work_done = max(rx_done, tx_done);
+	work_done = min(work_done, budget);
 
-		work_done += done;
-		work_rem -= done;
-	}
+	if (work_done < budget && napi_complete_done(napi, work_done)) {
+		int stat;
 
-	if (work_done < budget && napi_complete_done(napi, work_done))
 		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+		stat = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+						   &priv->xstats, chan);
+		if (stat && napi_reschedule(napi))
+			stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+	}
 
 	return work_done;
 }
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 075/105] SUNRPC: Ensure rq_bytes_sent is reset before request transmission
From: Sasha Levin @ 2019-02-13  2:33 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Trond Myklebust, Trond Myklebust, Anna Schumaker, Sasha Levin,
	linux-nfs, netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Trond Myklebust <trondmy@gmail.com>

[ Upstream commit e66721f0436396f779291a29616858b76bfd9415 ]

When we resend a request, ensure that the 'rq_bytes_sent' is reset
to zero.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 net/sunrpc/clnt.c | 1 -
 net/sunrpc/xprt.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 24cbddc44c88..2189fbc4c570 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1738,7 +1738,6 @@ rpc_xdr_encode(struct rpc_task *task)
 	xdr_buf_init(&req->rq_rcv_buf,
 		     req->rq_rbuffer,
 		     req->rq_rcvsize);
-	req->rq_bytes_sent = 0;
 
 	p = rpc_encode_header(task);
 	if (p == NULL) {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 943f08be7c38..f1ec2110efeb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1151,6 +1151,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 	struct rpc_xprt *xprt = req->rq_xprt;
 
 	if (xprt_request_need_enqueue_transmit(task, req)) {
+		req->rq_bytes_sent = 0;
 		spin_lock(&xprt->queue_lock);
 		/*
 		 * Requests that carry congestion control credits are added
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 074/105] samples/bpf: workaround clang asm goto compilation errors
From: Sasha Levin @ 2019-02-13  2:33 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Yonghong Song, Daniel Borkmann, Sasha Levin, netdev, xdp-newbies
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Yonghong Song <yhs@fb.com>

[ Upstream commit 6bf3bbe1f4d4cf405e3c2bf07bbdff56d3223ec8 ]

x86 compilation has required asm goto support since 4.17.
Since clang does not support asm goto, at 4.17,
Commit b1ae32dbab50 ("x86/cpufeature: Guard asm_volatile_goto usage
for BPF compilation") worked around the issue by permitting an
alternative implementation without asm goto for clang.

At 5.0, more asm goto usages appeared.
  [yhs@148 x86]$ egrep -r asm_volatile_goto
  include/asm/cpufeature.h:     asm_volatile_goto("1: jmp 6f\n"
  include/asm/jump_label.h:     asm_volatile_goto("1:"
  include/asm/jump_label.h:     asm_volatile_goto("1:"
  include/asm/rmwcc.h:  asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"     \
  include/asm/uaccess.h:        asm_volatile_goto("\n"                          \
  include/asm/uaccess.h:        asm_volatile_goto("\n"                          \
  [yhs@148 x86]$

Compiling samples/bpf directories, most bpf programs failed
compilation with error messages like:
  In file included from /home/yhs/work/bpf-next/samples/bpf/xdp_sample_pkts_kern.c:2:
  In file included from /home/yhs/work/bpf-next/include/linux/ptrace.h:6:
  In file included from /home/yhs/work/bpf-next/include/linux/sched.h:15:
  In file included from /home/yhs/work/bpf-next/include/linux/sem.h:5:
  In file included from /home/yhs/work/bpf-next/include/uapi/linux/sem.h:5:
  In file included from /home/yhs/work/bpf-next/include/linux/ipc.h:9:
  In file included from /home/yhs/work/bpf-next/include/linux/refcount.h:72:
  /home/yhs/work/bpf-next/arch/x86/include/asm/refcount.h:70:9: error: 'asm goto' constructs are not supported yet
        return GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
               ^
  /home/yhs/work/bpf-next/arch/x86/include/asm/rmwcc.h:67:2: note: expanded from macro 'GEN_BINARY_SUFFIXED_RMWcc'
        __GEN_RMWcc(op " %[val], %[var]\n\t" suffix, var, cc,           \
        ^
  /home/yhs/work/bpf-next/arch/x86/include/asm/rmwcc.h:21:2: note: expanded from macro '__GEN_RMWcc'
        asm_volatile_goto (fullop "; j" #cc " %l[cc_label]"             \
        ^
  /home/yhs/work/bpf-next/include/linux/compiler_types.h:188:37: note: expanded from macro 'asm_volatile_goto'
  #define asm_volatile_goto(x...) asm goto(x)

Most implementation does not even provide an alternative
implementation. And it is also not practical to make changes
for each call site.

This patch workarounded the asm goto issue by redefining the macro like below:
  #define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")

If asm_volatile_goto is not used by bpf programs, which is typically the case, nothing bad
will happen. If asm_volatile_goto is used by bpf programs, which is incorrect, the compiler
will issue an error since "invalid use of asm_volatile_goto" is not valid assembly codes.

With this patch, all bpf programs under samples/bpf can pass compilation.

Note that bpf programs under tools/testing/selftests/bpf/ compiled fine as
they do not access kernel internal headers.

Fixes: e769742d3584 ("Revert "x86/jump-labels: Macrofy inline assembly code to work around GCC inlining bugs"")
Fixes: 18fe58229d80 ("x86, asm: change the GEN_*_RMWcc() macros to not quote the condition")
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 samples/bpf/Makefile              |  1 +
 samples/bpf/asm_goto_workaround.h | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 samples/bpf/asm_goto_workaround.h

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..f5ce993c78e4 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -273,6 +273,7 @@ $(obj)/%.o: $(src)/%.c
 		-Wno-gnu-variable-sized-type-not-at-end \
 		-Wno-address-of-packed-member -Wno-tautological-compare \
 		-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
+		-I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
 ifeq ($(DWARF2BTF),y)
 	$(BTF_PAHOLE) -J $@
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
new file mode 100644
index 000000000000..5cd7c1d1a5d5
--- /dev/null
+++ b/samples/bpf/asm_goto_workaround.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 Facebook */
+#ifndef __ASM_GOTO_WORKAROUND_H
+#define __ASM_GOTO_WORKAROUND_H
+
+/* this will bring in asm_volatile_goto macro definition
+ * if enabled by compiler and config options.
+ */
+#include <linux/types.h>
+
+#ifdef asm_volatile_goto
+#undef asm_volatile_goto
+#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
+#endif
+
+#endif
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 092/105] atm: he: fix sign-extension overflow on large shift
From: Sasha Levin @ 2019-02-13  2:33 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Colin Ian King, David S . Miller, Sasha Levin, netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Colin Ian King <colin.king@canonical.com>

[ Upstream commit cb12d72b27a6f41325ae23a11033cf5fedfa1b97 ]

Shifting the 1 by exp by an int can lead to sign-extension overlow when
exp is 31 since 1 is an signed int and sign-extending this result to an
unsigned long long will set the upper 32 bits.  Fix this by shifting an
unsigned long.

Detected by cppcheck:
(warning) Shifting signed 32-bit value by 31 bits is undefined behaviour

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/atm/he.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 29f102dcfec4..329ce9072ee9 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -717,7 +717,7 @@ static int he_init_cs_block_rcm(struct he_dev *he_dev)
 			instead of '/ 512', use '>> 9' to prevent a call
 			to divdu3 on x86 platforms
 		*/
-		rate_cps = (unsigned long long) (1 << exp) * (man + 512) >> 9;
+		rate_cps = (unsigned long long) (1UL << exp) * (man + 512) >> 9;
 
 		if (rate_cps < 10)
 			rate_cps = 10;	/* 2.2.1 minimum payload rate is 10 cps */
-- 
2.19.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.20 100/105] mlxsw: spectrum_switchdev: Do not treat static FDB entries as sticky
From: Sasha Levin @ 2019-02-13  2:33 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Ido Schimmel, David S . Miller, Sasha Levin, netdev
In-Reply-To: <20190213023336.19019-1-sashal@kernel.org>

From: Ido Schimmel <idosch@mellanox.com>

[ Upstream commit 64254a2054611205798e6bde634639bc704573ac ]

The driver currently treats static FDB entries as both static and
sticky. This is incorrect and prevents such entries from being roamed to
a different port via learning.

Fix this by configuring static entries with ageing disabled and roaming
enabled.

In net-next we can add proper support for the newly introduced 'sticky'
flag.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alexander Petrovskiy <alexpe@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 69f556ddb934..9fc6b609834b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1244,7 +1244,7 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
 static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic)
 {
 	return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS :
-			 MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY;
+			 MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG;
 }
 
 static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
@@ -1301,7 +1301,7 @@ out:
 static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				     const char *mac, u16 fid, bool adding,
 				     enum mlxsw_reg_sfd_rec_action action,
-				     bool dynamic)
+				     enum mlxsw_reg_sfd_rec_policy policy)
 {
 	char *sfd_pl;
 	u8 num_rec;
@@ -1312,8 +1312,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		return -ENOMEM;
 
 	mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
-	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic),
-			      mac, fid, action, local_port);
+	mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port);
 	num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
 	if (err)
@@ -1332,7 +1331,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				   bool dynamic)
 {
 	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding,
-					 MLXSW_REG_SFD_REC_ACTION_NOP, dynamic);
+					 MLXSW_REG_SFD_REC_ACTION_NOP,
+					 mlxsw_sp_sfd_rec_policy(dynamic));
 }
 
 int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
@@ -1340,7 +1340,7 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
 {
 	return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding,
 					 MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER,
-					 false);
+					 MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY);
 }
 
 static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id,
-- 
2.19.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox