* [PATCH bpf-next v2 3/4] selftests/bpf: test_progs: remove asserts from subtests
From: Stanislav Fomichev @ 2019-08-19 19:17 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev, Andrii Nakryiko
In-Reply-To: <20190819191752.241637-1-sdf@google.com>
Otherwise they can bring the whole process down.
Cc: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../selftests/bpf/prog_tests/bpf_obj_id.c | 19 +++++++++++-------
.../selftests/bpf/prog_tests/map_lock.c | 20 +++++++++++--------
.../selftests/bpf/prog_tests/spinlock.c | 12 ++++++-----
.../bpf/prog_tests/stacktrace_build_id.c | 7 ++++---
.../bpf/prog_tests/stacktrace_build_id_nmi.c | 7 ++++---
5 files changed, 39 insertions(+), 26 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index b9d0cd312839..acc2fc046b01 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -48,15 +48,17 @@ void test_bpf_obj_id(void)
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- QCHECK(err);
- assert(!err);
+ if (QCHECK(err))
+ continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- assert(map_fds[i] >= 0);
+ if (QCHECK(map_fds[i] < 0))
+ goto done;
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- assert(!err);
+ if (QCHECK(err))
+ goto done;
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
@@ -95,9 +97,11 @@ void test_bpf_obj_id(void)
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- assert(!err);
+ if (QCHECK(err))
+ goto done;
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- assert(!err);
+ if (QCHECK(err))
+ goto done;
err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
@@ -223,7 +227,8 @@ void test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- assert(!err);
+ if (QCHECK(err))
+ goto done;
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
CHECK(err || info_len != sizeof(struct bpf_map_info) ||
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index c1bddc433a5a..7a12129def9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -54,17 +54,21 @@ void test_map_lock(void)
bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK);
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
+ if (QCHECK(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_create(&thread_id[i], NULL,
- ¶llel_map_access, &map_fd[i - 4]) == 0);
+ if (QCHECK(pthread_create(&thread_id[i], NULL,
+ ¶llel_map_access, &map_fd[i - 4])))
+ goto close_prog;
for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
+ if (QCHECK(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
for (i = 4; i < 6; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&map_fd[i - 4]);
+ if (QCHECK(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&map_fd[i - 4]))
+ goto close_prog;
close_prog:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index e4294a7fdf1a..00b4ed1734e0 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -16,12 +16,14 @@ void test_spinlock(void)
goto close_prog;
}
for (i = 0; i < 4; i++)
- assert(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd) == 0);
- for (i = 0; i < 4; i++)
- assert(pthread_join(thread_id[i], &ret) == 0 &&
- ret == (void *)&prog_fd);
+ if (QCHECK(pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd)))
+ goto close_prog;
+ for (i = 0; i < 4; i++)
+ if (QCHECK(pthread_join(thread_id[i], &ret) ||
+ ret != (void *)&prog_fd))
+ goto close_prog;
close_prog:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index ac44fda84833..552e07e7800c 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -51,9 +51,10 @@ void test_stacktrace_build_id(void)
"err %d errno %d\n", err, errno))
goto disable_pmu;
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("./urandom_read") == 0);
+ if (QCHECK(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto disable_pmu;
+ if (QCHECK(system("./urandom_read")))
+ goto disable_pmu;
/* disable stack trace collection */
key = 0;
val = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 9557b7dfb782..1553c848edc5 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -82,9 +82,10 @@ void test_stacktrace_build_id_nmi(void)
"err %d errno %d\n", err, errno))
goto disable_pmu;
- assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
- == 0);
- assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ if (QCHECK(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
+ goto disable_pmu;
+ if (QCHECK(system("taskset 0x1 ./urandom_read 100000")))
+ goto disable_pmu;
/* disable stack trace collection */
key = 0;
val = 1;
--
2.23.0.rc1.153.gdeed80330f-goog
^ permalink raw reply related
* [PATCH bpf-next v2 4/4] selftests/bpf: test_progs: remove unused ret
From: Stanislav Fomichev @ 2019-08-19 19:17 UTC (permalink / raw)
To: netdev, bpf; +Cc: davem, ast, daniel, Stanislav Fomichev
In-Reply-To: <20190819191752.241637-1-sdf@google.com>
send_signal test returns static codes from the subtests which
nobody looks at, let's rely on the CHECK macros instead.
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../selftests/bpf/prog_tests/send_signal.c | 42 +++++++++----------
1 file changed, 19 insertions(+), 23 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 40c2c5efdd3e..b607112c64e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -8,7 +8,7 @@ static void sigusr1_handler(int signum)
sigusr1_received++;
}
-static int test_send_signal_common(struct perf_event_attr *attr,
+static void test_send_signal_common(struct perf_event_attr *attr,
int prog_type,
const char *test_name)
{
@@ -23,13 +23,13 @@ static int test_send_signal_common(struct perf_event_attr *attr,
if (CHECK(pipe(pipe_c2p), test_name,
"pipe pipe_c2p error: %s\n", strerror(errno)))
- goto no_fork_done;
+ return;
if (CHECK(pipe(pipe_p2c), test_name,
"pipe pipe_p2c error: %s\n", strerror(errno))) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
- goto no_fork_done;
+ return;
}
pid = fork();
@@ -38,7 +38,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
close(pipe_c2p[1]);
close(pipe_p2c[0]);
close(pipe_p2c[1]);
- goto no_fork_done;
+ return;
}
if (pid == 0) {
@@ -125,7 +125,7 @@ static int test_send_signal_common(struct perf_event_attr *attr,
goto disable_pmu;
}
- err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
write(pipe_p2c[1], buf, 1);
@@ -138,11 +138,9 @@ static int test_send_signal_common(struct perf_event_attr *attr,
close(pipe_c2p[0]);
close(pipe_p2c[1]);
wait(NULL);
-no_fork_done:
- return err;
}
-static int test_send_signal_tracepoint(void)
+static void test_send_signal_tracepoint(void)
{
const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
struct perf_event_attr attr = {
@@ -159,21 +157,21 @@ static int test_send_signal_tracepoint(void)
if (CHECK(efd < 0, "tracepoint",
"open syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
- return -1;
+ return;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
"read syscalls/sys_enter_nanosleep/id failure: %s\n",
strerror(errno)))
- return -1;
+ return;
attr.config = strtol(buf, NULL, 0);
- return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
}
-static int test_send_signal_perf(void)
+static void test_send_signal_perf(void)
{
struct perf_event_attr attr = {
.sample_period = 1,
@@ -181,11 +179,11 @@ static int test_send_signal_perf(void)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_sw_event");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+ "perf_sw_event");
}
-static int test_send_signal_nmi(void)
+static void test_send_signal_nmi(void)
{
struct perf_event_attr attr = {
.sample_freq = 50,
@@ -205,25 +203,23 @@ static int test_send_signal_nmi(void)
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
test__skip();
- return 0;
+ return;
}
/* Let the test fail with a more informative message */
} else {
close(pmu_fd);
}
- return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
- "perf_hw_event");
+ test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT,
+ "perf_hw_event");
}
void test_send_signal(void)
{
- int ret = 0;
-
if (test__start_subtest("send_signal_tracepoint"))
- ret |= test_send_signal_tracepoint();
+ test_send_signal_tracepoint();
if (test__start_subtest("send_signal_perf"))
- ret |= test_send_signal_perf();
+ test_send_signal_perf();
if (test__start_subtest("send_signal_nmi"))
- ret |= test_send_signal_nmi();
+ test_send_signal_nmi();
}
--
2.23.0.rc1.153.gdeed80330f-goog
^ permalink raw reply related
* [PATCH net-next,v2 0/6] Add software backchannel and mlx5e HV VHCA stats
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
This patch set adds paravirtual backchannel in software in pci_hyperv,
which is required by the mlx5e driver HV VHCA stats agent.
The stats agent is responsible on running a periodic rx/tx packets/bytes
stats update.
Dexuan Cui (1):
PCI: hv: Add a paravirtual backchannel in software
Haiyang Zhang (5):
PCI: hv: Add a Hyper-V PCI interface driver for software backchannel
interface
net/mlx5: Add wrappers for HyperV PCIe operations
net/mlx5: Add HV VHCA infrastructure
net/mlx5: Add HV VHCA control agent
net/mlx5e: Add mlx5e HV VHCA stats agent
MAINTAINERS | 1 +
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 +
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 371 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 104 ++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 +
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-intf.c | 70 ++++
drivers/pci/controller/pci-hyperv.c | 308 +++++++++++++++++
include/linux/hyperv.h | 29 ++
include/linux/mlx5/driver.h | 2 +
18 files changed, 1192 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
create mode 100644 drivers/pci/controller/pci-hyperv-intf.c
--
1.8.3.1
^ permalink raw reply
* [PATCH net-next,v2 1/6] PCI: hv: Add a paravirtual backchannel in software
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org, Dexuan Cui, Jake Oshins
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Dexuan Cui <decui@microsoft.com>
Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver. These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.
Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional. Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.
The usage model for these packets puts the responsibility for reading or
writing on the VF driver. The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.
If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks. This invalidation is delivered via a callback
supplied by the VF driver by this driver.
No protocol is implied, except that supplied by the PF and VF drivers.
Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
include/linux/hyperv.h | 15 ++
2 files changed, 317 insertions(+)
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b6254..57adeca 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
struct tran_int_desc int_desc;
} __packed;
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+ struct vmpacket_descriptor hdr;
+ u32 status;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+ struct pci_message message_type;
+ u32 block_id;
+ union win_slot_encoding wslot;
+ u32 byte_count;
+ u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+ struct pci_incoming_message incoming;
+ union win_slot_encoding wslot;
+ u64 block_mask;
+} __packed;
+
struct pci_dev_incoming {
struct pci_incoming_message incoming;
union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
struct hv_pcibus_device *hbus;
struct work_struct wrk;
+ void (*block_invalidate)(void *context, u64 block_mask);
+ void *invalidate_context;
+
/*
* What would be observed if one wrote 0xFFFFFFFF to a BAR and then
* read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
.write = hv_pcifront_write_config,
};
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver. These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional. Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver. The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks. This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+ struct hv_pci_compl comp_pkt;
+ void *buf;
+ unsigned int len;
+ unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context: Identifies the read config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_read_config_compl *comp = context;
+ struct pci_read_block_response *read_resp =
+ (struct pci_read_block_response *)resp;
+ unsigned int data_len, hdr_len;
+
+ hdr_len = offsetof(struct pci_read_block_response, bytes);
+ if (resp_packet_size < hdr_len) {
+ comp->comp_pkt.completion_status = -1;
+ goto out;
+ }
+
+ data_len = resp_packet_size - hdr_len;
+ if (data_len > 0 && read_resp->status == 0) {
+ comp->bytes_returned = min(comp->len, data_len);
+ memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+ } else {
+ comp->bytes_returned = 0;
+ }
+
+ comp->comp_pkt.completion_status = read_resp->status;
+out:
+ complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer into which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which has been requested.
+ * @bytes_returned: Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_read_block)];
+ } pkt;
+ struct hv_read_config_compl comp_pkt;
+ struct pci_read_block *read_blk;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.comp_pkt.host_event);
+ comp_pkt.buf = buf;
+ comp_pkt.len = len;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_read_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ read_blk = (struct pci_read_block *)&pkt.pkt.message;
+ read_blk->message_type.type = PCI_READ_BLOCK;
+ read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ read_blk->block_id = block_id;
+ read_blk->bytes_requested = len;
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+ sizeof(*read_blk), (unsigned long)&pkt.pkt,
+ VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.comp_pkt.completion_status != 0 ||
+ comp_pkt.bytes_returned == 0) {
+ dev_err(&hbus->hdev->device,
+ "Read Config Block failed: 0x%x, bytes_returned=%d\n",
+ comp_pkt.comp_pkt.completion_status,
+ comp_pkt.bytes_returned);
+ return -EIO;
+ }
+
+ *bytes_returned = comp_pkt.bytes_returned;
+ return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context: Identifies the write config operation
+ * @resp: The response packet itself
+ * @resp_packet_size: Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+ int resp_packet_size)
+{
+ struct hv_pci_compl *comp_pkt = context;
+
+ comp_pkt->completion_status = resp->status;
+ complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev: The PCI driver's representation for this device.
+ * @buf: Buffer from which the config block will be copied.
+ * @len: Size in bytes of buf.
+ * @block_id: Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct {
+ struct pci_packet pkt;
+ char buf[sizeof(struct pci_write_block)];
+ u32 reserved;
+ } pkt;
+ struct hv_pci_compl comp_pkt;
+ struct pci_write_block *write_blk;
+ u32 pkt_size;
+ int ret;
+
+ if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ init_completion(&comp_pkt.host_event);
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.pkt.completion_func = hv_pci_write_config_compl;
+ pkt.pkt.compl_ctxt = &comp_pkt;
+ write_blk = (struct pci_write_block *)&pkt.pkt.message;
+ write_blk->message_type.type = PCI_WRITE_BLOCK;
+ write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+ write_blk->block_id = block_id;
+ write_blk->byte_count = len;
+ memcpy(write_blk->bytes, buf, len);
+ pkt_size = offsetof(struct pci_write_block, bytes) + len;
+ /*
+ * This quirk is required on some hosts shipped around 2018, because
+ * these hosts don't check the pkt_size correctly (new hosts have been
+ * fixed since early 2019). The quirk is also safe on very old hosts
+ * and new hosts, because, on them, what really matters is the length
+ * specified in write_blk->byte_count.
+ */
+ pkt_size += sizeof(pkt.reserved);
+
+ ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+ (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret)
+ return ret;
+
+ ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+ if (ret)
+ return ret;
+
+ if (comp_pkt.completion_status != 0) {
+ dev_err(&hbus->hdev->device,
+ "Write Config Block failed: 0x%x\n",
+ comp_pkt.completion_status);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev: The PCI driver's representation for this device.
+ * @context: Identifies the device.
+ * @block_invalidate: Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ struct hv_pcibus_device *hbus =
+ container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+ sysdata);
+ struct hv_pci_dev *hpdev;
+
+ hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+ if (!hpdev)
+ return -ENODEV;
+
+ hpdev->block_invalidate = block_invalidate;
+ hpdev->invalidate_context = context;
+
+ put_pcichild(hpdev);
+ return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
struct pci_response *response;
struct pci_incoming_message *new_message;
struct pci_bus_relations *bus_rel;
+ struct pci_dev_inval_block *inval;
struct pci_dev_incoming *dev_message;
struct hv_pci_dev *hpdev;
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
}
break;
+ case PCI_INVALIDATE_BLOCK:
+
+ inval = (struct pci_dev_inval_block *)buffer;
+ hpdev = get_pcichild_wslot(hbus,
+ inval->wslot.slot);
+ if (hpdev) {
+ if (hpdev->block_invalidate) {
+ hpdev->block_invalidate(
+ hpdev->invalidate_context,
+ inval->block_mask);
+ }
+ put_pcichild(hpdev);
+ }
+ break;
+
default:
dev_warn(&hbus->hdev->device,
"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc3..9d37f8c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ struct vmpacket_descriptor *
for (pkt = hv_pkt_iter_first(channel); pkt; \
pkt = hv_pkt_iter_next(channel, pkt))
+/*
+ * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
#endif /* _HYPERV_H */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 2/6] PCI: hv: Add a Hyper-V PCI interface driver for software backchannel interface
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
This interface driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
MAINTAINERS | 1 +
drivers/pci/Kconfig | 1 +
drivers/pci/controller/Kconfig | 7 ++++
drivers/pci/controller/Makefile | 1 +
drivers/pci/controller/pci-hyperv-intf.c | 70 ++++++++++++++++++++++++++++++++
drivers/pci/controller/pci-hyperv.c | 12 ++++--
include/linux/hyperv.h | 30 ++++++++++----
7 files changed, 111 insertions(+), 11 deletions(-)
create mode 100644 drivers/pci/controller/pci-hyperv-intf.c
diff --git a/MAINTAINERS b/MAINTAINERS
index e352550..866ae88 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7453,6 +7453,7 @@ F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/pci/controller/pci-hyperv.c
+F: drivers/pci/controller/pci-hyperv-intf.c
F: drivers/net/hyperv/
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab9240..c313de9 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ select PCI_HYPERV_INTERFACE
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f1..70e0782 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
To compile this driver as a module, choose M here: the
module will be called vmd.
+config PCI_HYPERV_INTERFACE
+ tristate "Hyper-V PCI Interface"
+ depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ help
+ The Hyper-V PCI Interface is a helper driver allows other drivers to
+ have a common interface with the Hyper-V PCI frontend driver.
+
source "drivers/pci/controller/dwc/Kconfig"
endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507..a2a22c9 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
new file mode 100644
index 0000000..087b7eb
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This small module is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned)
+{
+ if (!hvpci_block_ops.read_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+ bytes_returned);
+}
+EXPORT_SYMBOL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id)
+{
+ if (!hvpci_block_ops.write_block)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ if (!hvpci_block_ops.reg_blk_invalidate)
+ return -EOPNOTSUPP;
+
+ return hvpci_block_ops.reg_blk_invalidate(dev, context,
+ block_invalidate);
+}
+EXPORT_SYMBOL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_intf(void)
+{
+ pr_info("unloaded\n");
+}
+
+static int __init init_hv_pci_intf(void)
+{
+ pr_info("loaded\n");
+
+ return 0;
+}
+
+module_init(init_hv_pci_intf);
+module_exit(exit_hv_pci_intf);
+
+MODULE_DESCRIPTION("Hyper-V PCI Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca..9c93ac2 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
*bytes_returned = comp_pkt.bytes_returned;
return 0;
}
-EXPORT_SYMBOL(hv_read_config_block);
/**
* hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
return 0;
}
-EXPORT_SYMBOL(hv_write_config_block);
/**
* hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
return 0;
}
-EXPORT_SYMBOL(hv_register_block_invalidate);
/* Interrupt management hooks */
static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static int hv_pci_remove(struct hv_device *hdev)
static void __exit exit_hv_pci_drv(void)
{
vmbus_driver_unregister(&hv_pci_drv);
+
+ hvpci_block_ops.read_block = NULL;
+ hvpci_block_ops.write_block = NULL;
+ hvpci_block_ops.reg_blk_invalidate = NULL;
}
static int __init init_hv_pci_drv(void)
{
+ /* Initialize PCI block r/w interface */
+ hvpci_block_ops.read_block = hv_read_config_block;
+ hvpci_block_ops.write_block = hv_write_config_block;
+ hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
return vmbus_driver_register(&hv_pci_drv);
}
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8c..2afe6fd 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ struct vmpacket_descriptor *
pkt = hv_pkt_iter_next(channel, pkt))
/*
- * Functions for passing data between SR-IOV PF and VF drivers. The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
* sends requests to read and write blocks. Each block must be 128 bytes or
* smaller. Optionally, the VF driver can register a callback function which
* will be invoked when the host says that one or more of the first 64 block
* IDs is "invalid" which means that the VF driver should reread them.
*/
#define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
- unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
- unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
- void (*block_invalidate)(void *context,
- u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+
+struct hyperv_pci_block_ops {
+ int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+ unsigned int block_id, unsigned int *bytes_returned);
+ int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+ unsigned int block_id);
+ int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
#endif /* _HYPERV_H */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 3/6] net/mlx5: Add wrappers for HyperV PCIe operations
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Add wrapper functions for HyperV PCIe read / write /
block_invalidate_register operations. This will be used as an
infrastructure in the downstream patch for software communication.
This will be enabled by default if CONFIG_PCI_HYPERV_INTERFACE is set.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c | 64 ++++++++++++++++++++++++
drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h | 22 ++++++++
3 files changed, 87 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8b7edaa..247295b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,6 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
new file mode 100644
index 0000000..cf08d02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+
+static int mlx5_hv_config_common(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset, bool read)
+{
+ int rc = -EOPNOTSUPP;
+ int bytes_returned;
+ int block_id;
+
+ if (offset % HV_CONFIG_BLOCK_SIZE_MAX || len % HV_CONFIG_BLOCK_SIZE_MAX)
+ return -EINVAL;
+
+ block_id = offset / HV_CONFIG_BLOCK_SIZE_MAX;
+
+ rc = read ?
+ hyperv_read_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id,
+ &bytes_returned) :
+ hyperv_write_cfg_blk(dev->pdev, buf,
+ HV_CONFIG_BLOCK_SIZE_MAX, block_id);
+
+ /* Make sure len bytes were read successfully */
+ if (read)
+ rc |= !(len == bytes_returned);
+
+ if (rc) {
+ mlx5_core_err(dev, "Failed to %s hv config, err = %d, len = %d, offset = %d\n",
+ read ? "read" : "write", rc, len,
+ offset);
+ return rc;
+ }
+
+ return 0;
+}
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, true);
+}
+
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset)
+{
+ return mlx5_hv_config_common(dev, buf, len, offset, false);
+}
+
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask))
+{
+ return hyperv_reg_block_invalidate(dev->pdev, context,
+ block_invalidate);
+}
+
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev)
+{
+ hyperv_reg_block_invalidate(dev->pdev, NULL, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
new file mode 100644
index 0000000..f9a4557
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_H__
+#define __LIB_HV_H__
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+#include <linux/hyperv.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_hv_read_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_write_config(struct mlx5_core_dev *dev, void *buf, int len,
+ int offset);
+int mlx5_hv_register_invalidate(struct mlx5_core_dev *dev, void *context,
+ void (*block_invalidate)(void *context,
+ u64 block_mask));
+void mlx5_hv_unregister_invalidate(struct mlx5_core_dev *dev);
+#endif
+
+#endif /* __LIB_HV_H__ */
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 6/6] net/mlx5e: Add mlx5e HV VHCA stats agent
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
HV VHCA stats agent is responsible on running a preiodic rx/tx
packets/bytes stats update. Currently the supported format is version
MLX5_HV_VHCA_STATS_VERSION. Block ID 1 is dedicated for statistics data
transfer from the VF to the PF.
The reporter fetch the statistics data from all opened channels, fill it
in a buffer and send it to mlx5_hv_vhca_write_agent.
As the stats layer should include some metadata per block (sequence and
offset), the HV VHCA layer shall modify the buffer before actually send it
over block 1.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 1 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 ++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c | 162 +++++++++++++++++++++
.../ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h | 25 ++++
drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
6 files changed, 205 insertions(+)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index fc59a40..a889a38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -36,6 +36,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
#
# Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8fc5107..4a8008b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
#include "mlx5_core.h"
#include "en_stats.h"
#include "en/fs.h"
+#include "lib/hv_vhca.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
@@ -777,6 +778,15 @@ struct mlx5e_modify_sq_param {
int rl_index;
};
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+struct mlx5e_hv_vhca_stats_agent {
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work work;
+ u16 delay;
+ void *buf;
+};
+#endif
+
struct mlx5e_xsk {
/* UMEMs are stored separately from channels, because we don't want to
* lose them when channels are recreated. The kernel also stores UMEMs,
@@ -848,6 +858,9 @@ struct mlx5e_priv {
struct devlink_health_reporter *tx_reporter;
struct devlink_health_reporter *rx_reporter;
struct mlx5e_xsk xsk;
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+ struct mlx5e_hv_vhca_stats_agent stats_agent;
+#endif
};
struct mlx5e_profile {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
new file mode 100644
index 0000000..c37b4ac
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include "en.h"
+#include "en/hv_vhca_stats.h"
+#include "lib/hv_vhca.h"
+#include "lib/hv.h"
+
+struct mlx5e_hv_vhca_per_ring_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+};
+
+static void
+mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
+ struct mlx5e_hv_vhca_per_ring_stats *data)
+{
+ struct mlx5e_channel_stats *stats;
+ int tc;
+
+ stats = &priv->channel_stats[ch];
+ data->rx_packets = stats->rq.packets;
+ data->rx_bytes = stats->rq.bytes;
+
+ for (tc = 0; tc < priv->max_opened_tc; tc++) {
+ data->tx_packets += stats->sq[tc].packets;
+ data->tx_bytes += stats->sq[tc].bytes;
+ }
+}
+
+static void mlx5e_hv_vhca_fill_stats(struct mlx5e_priv *priv, u64 *data,
+ int buf_len)
+{
+ int ch, i = 0;
+
+ for (ch = 0; ch < priv->max_nch; ch++) {
+ u64 *buf = data + i;
+
+ if (WARN_ON_ONCE(buf +
+ sizeof(struct mlx5e_hv_vhca_per_ring_stats) >
+ data + buf_len))
+ return;
+
+ mlx5e_hv_vhca_fill_ring_stats(priv, ch,
+ (struct mlx5e_hv_vhca_per_ring_stats *)buf);
+ i += sizeof(struct mlx5e_hv_vhca_per_ring_stats) / sizeof(u64);
+ }
+}
+
+static int mlx5e_hv_vhca_stats_buf_size(struct mlx5e_priv *priv)
+{
+ return (sizeof(struct mlx5e_hv_vhca_per_ring_stats) *
+ priv->max_nch);
+}
+
+static void mlx5e_hv_vhca_stats_work(struct work_struct *work)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5_hv_vhca_agent *agent;
+ struct delayed_work *dwork;
+ struct mlx5e_priv *priv;
+ int buf_len, rc;
+ void *buf;
+
+ dwork = to_delayed_work(work);
+ sagent = container_of(dwork, struct mlx5e_hv_vhca_stats_agent, work);
+ priv = container_of(sagent, struct mlx5e_priv, stats_agent);
+ buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ agent = sagent->agent;
+ buf = sagent->buf;
+
+ memset(buf, 0, buf_len);
+ mlx5e_hv_vhca_fill_stats(priv, buf, buf_len);
+
+ rc = mlx5_hv_vhca_agent_write(agent, buf, buf_len);
+ if (rc) {
+ mlx5_core_err(priv->mdev,
+ "%s: Failed to write stats, err = %d\n",
+ __func__, rc);
+ return;
+ }
+
+ if (sagent->delay)
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+enum {
+ MLX5_HV_VHCA_STATS_VERSION = 1,
+ MLX5_HV_VHCA_STATS_UPDATE_ONCE = 0xFFFF,
+};
+
+static void mlx5e_hv_vhca_stats_control(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ struct mlx5e_hv_vhca_stats_agent *sagent;
+ struct mlx5e_priv *priv;
+
+ priv = mlx5_hv_vhca_agent_priv(agent);
+ sagent = &priv->stats_agent;
+
+ block->version = MLX5_HV_VHCA_STATS_VERSION;
+ block->rings = priv->max_nch;
+
+ if (!block->command) {
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+ return;
+ }
+
+ sagent->delay = block->command == MLX5_HV_VHCA_STATS_UPDATE_ONCE ? 0 :
+ msecs_to_jiffies(block->command * 100);
+
+ queue_delayed_work(priv->wq, &sagent->work, sagent->delay);
+}
+
+static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5e_priv *priv = mlx5_hv_vhca_agent_priv(agent);
+
+ cancel_delayed_work_sync(&priv->stats_agent.work);
+}
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
+ struct mlx5_hv_vhca_agent *agent;
+
+ priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
+ if (!priv->stats_agent.buf)
+ return -ENOMEM;
+
+ agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
+ MLX5_HV_VHCA_AGENT_STATS,
+ mlx5e_hv_vhca_stats_control, NULL,
+ mlx5e_hv_vhca_stats_cleanup,
+ priv);
+
+ if (IS_ERR_OR_NULL(agent)) {
+ if (IS_ERR(agent))
+ netdev_warn(priv->netdev,
+ "Failed to create hv vhca stats agent, err = %ld\n",
+ PTR_ERR(agent));
+
+ kfree(priv->stats_agent.buf);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ priv->stats_agent.agent = agent;
+ INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
+
+ return 0;
+}
+
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+ if (IS_ERR_OR_NULL(priv->stats_agent.agent))
+ return;
+
+ mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
+ kfree(priv->stats_agent.buf);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
new file mode 100644
index 0000000..664463f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_STATS_VHCA_H__
+#define __MLX5_EN_STATS_VHCA_H__
+#include "en.h"
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
+
+#else
+
+static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+{
+ return 0;
+}
+
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
+{
+}
+#endif
+
+#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5721d3d..fac8455 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -64,6 +64,7 @@
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
+#include "en/hv_vhca_stats.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
@@ -5103,6 +5104,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_init(priv);
+ mlx5e_hv_vhca_stats_create(priv);
if (netdev->reg_state != NETREG_REGISTERED)
return;
#ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -5135,6 +5137,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work);
+ mlx5e_hv_vhca_stats_destroy(priv);
if (mlx5e_monitor_counter_supported(priv))
mlx5e_monitor_counter_cleanup(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index 984e7ad..4bad6a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -13,6 +13,7 @@
enum mlx5_hv_vhca_agent_type {
MLX5_HV_VHCA_AGENT_CONTROL = 0,
+ MLX5_HV_VHCA_AGENT_STATS = 1,
MLX5_HV_VHCA_AGENT_MAX = 32,
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 5/6] net/mlx5: Add HV VHCA control agent
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
Control agent is responsible over of the control block (ID 0). It should
update the PF via this block about every capability change. In addition,
upon block 0 invalidate, it should activate all other supported agents
with data requests from the PF.
Upon agent create/destroy, the invalidate callback of the control agent
is being called in order to update the PF driver about this change.
The control agent is an integral part of HV VHCA and will be created
and destroy as part of the HV VHCA init/cleanup flow.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 122 ++++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 1 +
2 files changed, 121 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
index 84d1d75..4047629 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -109,22 +109,131 @@ void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
queue_work(hv_vhca->work_queue, &work->invalidate_work);
}
+#define AGENT_MASK(type) (type ? BIT(type - 1) : 0 /* control */)
+
+static void mlx5_hv_vhca_agents_control(struct mlx5_hv_vhca *hv_vhca,
+ struct mlx5_hv_vhca_control_block *block)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->control)
+ continue;
+
+ if (!(AGENT_MASK(agent->type) & block->control))
+ continue;
+
+ agent->control(agent, block);
+ }
+}
+
+static void mlx5_hv_vhca_capabilities(struct mlx5_hv_vhca *hv_vhca,
+ u32 *capabilities)
+{
+ int i;
+
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (agent)
+ *capabilities |= AGENT_MASK(agent->type);
+ }
+}
+
+static void
+mlx5_hv_vhca_control_agent_invalidate(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+ struct mlx5_core_dev *dev = hv_vhca->dev;
+ struct mlx5_hv_vhca_control_block *block;
+ u32 capabilities = 0;
+ int err;
+
+ block = kzalloc(sizeof(*block), GFP_KERNEL);
+ if (!block)
+ return;
+
+ err = mlx5_hv_read_config(dev, block, sizeof(*block), 0);
+ if (err)
+ goto free_block;
+
+ mlx5_hv_vhca_capabilities(hv_vhca, &capabilities);
+
+ /* In case no capabilities, send empty block in return */
+ if (!capabilities) {
+ memset(block, 0, sizeof(*block));
+ goto write;
+ }
+
+ if (block->capabilities != capabilities)
+ block->capabilities = capabilities;
+
+ if (block->control & ~capabilities)
+ goto free_block;
+
+ mlx5_hv_vhca_agents_control(hv_vhca, block);
+ block->command_ack = block->command;
+
+write:
+ mlx5_hv_write_config(dev, block, sizeof(*block), 0);
+
+free_block:
+ kfree(block);
+}
+
+static struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_control_agent_create(struct mlx5_hv_vhca *hv_vhca)
+{
+ return mlx5_hv_vhca_agent_create(hv_vhca, MLX5_HV_VHCA_AGENT_CONTROL,
+ NULL,
+ mlx5_hv_vhca_control_agent_invalidate,
+ NULL, NULL);
+}
+
+static void mlx5_hv_vhca_control_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ mlx5_hv_vhca_agent_destroy(agent);
+}
+
int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
+ int err;
+
if (IS_ERR_OR_NULL(hv_vhca))
return IS_ERR_OR_NULL(hv_vhca);
- return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
- mlx5_hv_vhca_invalidate);
+ err = mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+ if (err)
+ return err;
+
+ agent = mlx5_hv_vhca_control_agent_create(hv_vhca);
+ if (IS_ERR_OR_NULL(agent)) {
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+ return IS_ERR_OR_NULL(agent);
+ }
+
+ hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL] = agent;
+
+ return 0;
}
void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
{
+ struct mlx5_hv_vhca_agent *agent;
int i;
if (IS_ERR_OR_NULL(hv_vhca))
return;
+ agent = hv_vhca->agents[MLX5_HV_VHCA_AGENT_CONTROL];
+ if (agent)
+ mlx5_hv_vhca_control_agent_destroy(agent);
+
mutex_lock(&hv_vhca->agents_lock);
for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
WARN_ON(hv_vhca->agents[i]);
@@ -134,6 +243,11 @@ void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
mlx5_hv_unregister_invalidate(hv_vhca->dev);
}
+static void mlx5_hv_vhca_agents_update(struct mlx5_hv_vhca *hv_vhca)
+{
+ mlx5_hv_vhca_invalidate(hv_vhca, BIT(MLX5_HV_VHCA_AGENT_CONTROL));
+}
+
struct mlx5_hv_vhca_agent *
mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
enum mlx5_hv_vhca_agent_type type,
@@ -174,6 +288,8 @@ struct mlx5_hv_vhca_agent *
hv_vhca->agents[type] = agent;
mutex_unlock(&hv_vhca->agents_lock);
+ mlx5_hv_vhca_agents_update(hv_vhca);
+
return agent;
}
@@ -195,6 +311,8 @@ void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
agent->cleanup(agent);
kfree(agent);
+
+ mlx5_hv_vhca_agents_update(hv_vhca);
}
static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
index cdf1303..984e7ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -12,6 +12,7 @@
struct mlx5_hv_vhca_control_block;
enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_CONTROL = 0,
MLX5_HV_VHCA_AGENT_MAX = 32,
};
--
1.8.3.1
^ permalink raw reply related
* [PATCH net-next,v2 4/6] net/mlx5: Add HV VHCA infrastructure
From: Haiyang Zhang @ 2019-08-19 19:30 UTC (permalink / raw)
To: sashal@kernel.org, davem@davemloft.net, saeedm@mellanox.com,
leon@kernel.org, eranbe@mellanox.com, lorenzo.pieralisi@arm.com,
bhelgaas@google.com, linux-pci@vger.kernel.org,
linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: Haiyang Zhang, KY Srinivasan, Stephen Hemminger,
linux-kernel@vger.kernel.org
In-Reply-To: <1566242976-108801-1-git-send-email-haiyangz@microsoft.com>
From: Eran Ben Elisha <eranbe@mellanox.com>
HV VHCA is a layer which provides PF to VF communication channel based on
HyperV PCI config channel. It implements Mellanox's Inter VHCA control
communication protocol. The protocol contains control block in order to
pass messages between the PF and VF drivers, and data blocks in order to
pass actual data.
The infrastructure is agent based. Each agent will be responsible of
contiguous buffer blocks in the VHCA config space. This infrastructure will
bind agents to their blocks, and those agents can only access read/write
the buffer blocks assigned to them. Each agent will provide three
callbacks (control, invalidate, cleanup). Control will be invoked when
block-0 is invalidated with a command that concerns this agent. Invalidate
callback will be invoked if one of the blocks assigned to this agent was
invalidated. Cleanup will be invoked before the agent is being freed in
order to clean all of its open resources or deferred works.
Block-0 serves as the control block. All execution commands from the PF
will be written by the PF over this block. VF will ack on those by
writing on block-0 as well. Its format is described by struct
mlx5_hv_vhca_control_block layout.
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 253 +++++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h | 102 +++++++++
drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +
include/linux/mlx5/driver.h | 2 +
5 files changed, 365 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 247295b..fc59a40 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offlo
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o
mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
-mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
#
# Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 0000000..84d1d75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+ struct mlx5_core_dev *dev;
+ struct workqueue_struct *work_queue;
+ struct mlx5_hv_vhca_agent *agents[MLX5_HV_VHCA_AGENT_MAX];
+ struct mutex agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+ struct work_struct invalidate_work;
+ struct mlx5_hv_vhca *hv_vhca;
+ u64 block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+ u16 sequence;
+ u16 offset;
+ u8 reserved[4];
+ u64 data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+ enum mlx5_hv_vhca_agent_type type;
+ struct mlx5_hv_vhca *hv_vhca;
+ void *priv;
+ u16 seq;
+ void (*control)(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_control_block *block);
+ void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+ u64 block_mask);
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_hv_vhca *hv_vhca = NULL;
+
+ hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+ if (!hv_vhca)
+ return ERR_PTR(-ENOMEM);
+
+ hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+ if (!hv_vhca->work_queue) {
+ kfree(hv_vhca);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ hv_vhca->dev = dev;
+ mutex_init(&hv_vhca->agents_lock);
+
+ return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ destroy_workqueue(hv_vhca->work_queue);
+ kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+ struct mlx5_hv_vhca_work *hwork;
+ struct mlx5_hv_vhca *hv_vhca;
+ int i;
+
+ hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+ hv_vhca = hwork->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+ struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+ if (!agent || !agent->invalidate)
+ continue;
+
+ if (!(BIT(agent->type) & hwork->block_mask))
+ continue;
+
+ agent->invalidate(agent, hwork->block_mask);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+ struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+ struct mlx5_hv_vhca_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+ work->hv_vhca = hv_vhca;
+ work->block_mask = block_mask;
+
+ queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return IS_ERR_OR_NULL(hv_vhca);
+
+ return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+ mlx5_hv_vhca_invalidate);
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+ WARN_ON(hv_vhca->agents[i]);
+
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+ void *priv)
+{
+ struct mlx5_hv_vhca_agent *agent;
+
+ if (IS_ERR_OR_NULL(hv_vhca))
+ return ERR_PTR(-ENOMEM);
+
+ if (type >= MLX5_HV_VHCA_AGENT_MAX)
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&hv_vhca->agents_lock);
+ if (hv_vhca->agents[type]) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return ERR_PTR(-EINVAL);
+ }
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+ if (!agent)
+ return ERR_PTR(-ENOMEM);
+
+ agent->type = type;
+ agent->hv_vhca = hv_vhca;
+ agent->priv = priv;
+ agent->control = control;
+ agent->invalidate = invalidate;
+ agent->cleanup = cleaup;
+
+ mutex_lock(&hv_vhca->agents_lock);
+ hv_vhca->agents[type] = agent;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+ struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+ mutex_lock(&hv_vhca->agents_lock);
+
+ if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+ mutex_unlock(&hv_vhca->agents_lock);
+ return;
+ }
+
+ hv_vhca->agents[agent->type] = NULL;
+ mutex_unlock(&hv_vhca->agents_lock);
+
+ if (agent->cleanup)
+ agent->cleanup(agent);
+
+ kfree(agent);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+ struct mlx5_hv_vhca_data_block *data_block,
+ void *src, int len, int *offset)
+{
+ int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+ data_block->sequence = agent->seq;
+ data_block->offset = (*offset)++;
+ memcpy(data_block->data, src, bytes);
+
+ return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+ agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+ int block_offset = 0;
+ int total = 0;
+ int err;
+
+ while (len) {
+ struct mlx5_hv_vhca_data_block data_block = {0};
+ int bytes;
+
+ bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+ buf + total,
+ len, &block_offset);
+ if (!bytes)
+ return -ENOMEM;
+
+ err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+ sizeof(data_block), offset);
+ if (err)
+ return err;
+
+ total += bytes;
+ len -= bytes;
+ }
+
+ mlx5_hv_vhca_agent_seq_update(agent);
+
+ return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+ return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 0000000..cdf1303
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+ MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+ u32 capabilities;
+ u32 control;
+ u16 command;
+ u16 command_ack;
+ u16 version;
+ u16 rings;
+ u32 reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+ return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+ u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+ enum mlx5_hv_vhca_agent_type type,
+ void (*control)(struct mlx5_hv_vhca_agent*,
+ struct mlx5_hv_vhca_control_block *block),
+ void (*invalidate)(struct mlx5_hv_vhca_agent*,
+ u64 block_mask),
+ void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+ void *context)
+{
+ return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+ void *buf, int len)
+{
+ return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 80437d4..4b74315 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -69,6 +69,7 @@
#include "lib/pci_vsc.h"
#include "diag/fw_tracer.h"
#include "ecpf.h"
+#include "lib/hv_vhca.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -868,6 +869,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
}
dev->tracer = mlx5_fw_tracer_create(dev);
+ dev->hv_vhca = mlx5_hv_vhca_create(dev);
return 0;
@@ -897,6 +899,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
{
+ mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_fpga_cleanup(dev);
mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1063,6 +1066,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_fw_tracer;
}
+ mlx5_hv_vhca_init(dev->hv_vhca);
+
err = mlx5_fpga_device_start(dev);
if (err) {
mlx5_core_err(dev, "fpga device start failed %d\n", err);
@@ -1118,6 +1123,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err_ipsec_start:
mlx5_fpga_device_stop(dev);
err_fpga_start:
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
err_fw_tracer:
mlx5_eq_table_destroy(dev);
@@ -1138,6 +1144,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
+ mlx5_hv_vhca_cleanup(dev->hv_vhca);
mlx5_fw_tracer_cleanup(dev->tracer);
mlx5_eq_table_destroy(dev);
mlx5_irq_table_destroy(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 467de34..0e00cbc 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -638,6 +638,7 @@ struct mlx5_clock {
struct mlx5_fw_tracer;
struct mlx5_vxlan;
struct mlx5_geneve;
+struct mlx5_hv_vhca;
struct mlx5_core_dev {
struct device *device;
@@ -685,6 +686,7 @@ struct mlx5_core_dev {
struct mlx5_ib_clock_info *clock_info;
struct mlx5_fw_tracer *tracer;
u32 vsc_addr;
+ struct mlx5_hv_vhca *hv_vhca;
};
struct mlx5_db {
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net-next 1/1] Add genphy_c45_config_aneg() function to phy-c45.c
From: Heiner Kallweit @ 2019-08-19 19:51 UTC (permalink / raw)
To: Marco Hartmann, Christian Herber, andrew@lunn.ch,
f.fainelli@gmail.com, davem@davemloft.net, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <1566237157-9054-2-git-send-email-marco.hartmann@nxp.com>
On 19.08.2019 19:52, Marco Hartmann wrote:
> and call it from phy_config_aneg().
>
Here something went wrong.
> commit 34786005eca3 ("net: phy: prevent PHYs w/o Clause 22 regs from
> calling genphy_config_aneg") introduced a check that aborts
> phy_config_aneg() if the phy is a C45 phy.
> This causes phy_state_machine() to call phy_error() so that the phy
> ends up in PHY_HALTED state.
>
> Instead of returning -EOPNOTSUPP, call genphy_c45_config_aneg()
> (analogous to the C22 case) so that the state machine can run
> correctly.
>
> genphy_c45_config_aneg() closely resembles mv3310_config_aneg()
> in drivers/net/phy/marvell10g.c, excluding vendor specific
> configurations for 1000BaseT.
>
> Fixes: 34786005eca3 ("net: phy: prevent PHYs w/o Clause 22 regs from
> calling genphy_config_aneg")
>
This tag seems to be the wrong one. This change was done before
genphy_c45_driver was added. Most likely tag should be:
22b56e827093 ("net: phy: replace genphy_10g_driver with genphy_c45_driver")
And because it's a fix applying to previous kernel versions it should
be annotated "net", not "net-next".
> Signed-off-by: Marco Hartmann <marco.hartmann@nxp.com>
> ---
> drivers/net/phy/phy-c45.c | 26 ++++++++++++++++++++++++++
> drivers/net/phy/phy.c | 2 +-
> include/linux/phy.h | 1 +
> 3 files changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
> index b9d4145781ca..fa9062fd9122 100644
> --- a/drivers/net/phy/phy-c45.c
> +++ b/drivers/net/phy/phy-c45.c
> @@ -509,6 +509,32 @@ int genphy_c45_read_status(struct phy_device *phydev)
> }
> EXPORT_SYMBOL_GPL(genphy_c45_read_status);
>
> +/**
> + * genphy_c45_config_aneg - restart auto-negotiation or forced setup
> + * @phydev: target phy_device struct
> + *
> + * Description: If auto-negotiation is enabled, we configure the
> + * advertising, and then restart auto-negotiation. If it is not
> + * enabled, then we force a configuration.
> + */
> +int genphy_c45_config_aneg(struct phy_device *phydev)
> +{
> + int ret;
> + bool changed = false;
Reverse xmas tree please.
> [...]
Overall looks good to me. For a single patch you don't have to provide
a cover letter.
^ permalink raw reply
* [PATCH net-next v2 0/6] net: dsa: enable and disable all ports
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
The DSA stack currently calls the .port_enable and .port_disable switch
callbacks for slave ports only. However, it is useful to call them for all
port types. For example this allows some drivers to delay the optimization
of power consumption after the switch is setup. This can also help reducing
the setup code of drivers a bit.
The first DSA core patches enable and disable all ports of a switch, regardless
their type. The last mv88e6xxx patches remove redundant code from the driver
setup and the said callbacks, now that they handle SERDES power for all ports.
Changes in v2: do not guard .port_disable for broadcom switches.
Vivien Didelot (6):
net: dsa: use a single switch statement for port setup
net: dsa: do not enable or disable non user ports
net: dsa: enable and disable all ports
net: dsa: mv88e6xxx: do not change STP state on port disabling
net: dsa: mv88e6xxx: enable SERDES after setup
net: dsa: mv88e6xxx: wrap SERDES IRQ in power function
drivers/net/dsa/b53/b53_common.c | 7 +-
drivers/net/dsa/bcm_sf2.c | 3 +
drivers/net/dsa/lan9303-core.c | 6 ++
drivers/net/dsa/lantiq_gswip.c | 6 ++
drivers/net/dsa/microchip/ksz_common.c | 6 ++
drivers/net/dsa/mt7530.c | 6 ++
drivers/net/dsa/mv88e6xxx/chip.c | 64 ++++++-----------
net/dsa/dsa2.c | 98 +++++++++++++-------------
8 files changed, 106 insertions(+), 90 deletions(-)
--
2.22.0
^ permalink raw reply
* [PATCH net-next v2 2/6] net: dsa: do not enable or disable non user ports
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
The .port_enable and .port_disable operations are currently only
called for user ports, hence assuming they have a slave device. In
preparation for using these operations for other port types as well,
simply guard all implementations against non user ports and return
directly in such case.
Note that bcm_sf2_sw_suspend() currently calls bcm_sf2_port_disable()
(and thus b53_disable_port()) against the user and CPU ports, so do
not guards those functions. They will be called for unused ports in
the future, but that was expected by those drivers anyway.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
drivers/net/dsa/b53/b53_common.c | 7 ++++++-
drivers/net/dsa/bcm_sf2.c | 3 +++
drivers/net/dsa/lan9303-core.c | 6 ++++++
drivers/net/dsa/lantiq_gswip.c | 6 ++++++
drivers/net/dsa/microchip/ksz_common.c | 6 ++++++
drivers/net/dsa/mt7530.c | 6 ++++++
drivers/net/dsa/mv88e6xxx/chip.c | 6 ++++++
7 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 907af62846ba..7d328a5f0161 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -510,10 +510,15 @@ EXPORT_SYMBOL(b53_imp_vlan_setup);
int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct b53_device *dev = ds->priv;
- unsigned int cpu_port = ds->ports[port].cpu_dp->index;
+ unsigned int cpu_port;
int ret = 0;
u16 pvlan;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
+ cpu_port = ds->ports[port].cpu_dp->index;
+
if (dev->ops->irq_enable)
ret = dev->ops->irq_enable(dev, port);
if (ret)
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 49f99436018a..4f839348011d 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -157,6 +157,9 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
unsigned int i;
u32 reg;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
/* Clear the memory power down */
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
reg &= ~P_TXQ_PSM_VDD(port);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index 7a2063e7737a..bbec86b9418e 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1079,6 +1079,9 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port,
{
struct lan9303 *chip = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
return lan9303_enable_processing_port(chip, port);
}
@@ -1086,6 +1089,9 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port)
{
struct lan9303 *chip = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return;
+
lan9303_disable_processing_port(chip, port);
lan9303_phy_write(ds, chip->phy_addr_base + port, MII_BMCR, BMCR_PDOWN);
}
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 2175ec13bb2c..a69c9b9878b7 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -642,6 +642,9 @@ static int gswip_port_enable(struct dsa_switch *ds, int port,
struct gswip_priv *priv = ds->priv;
int err;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
if (!dsa_is_cpu_port(ds, port)) {
err = gswip_add_single_port_br(priv, port, true);
if (err)
@@ -678,6 +681,9 @@ static void gswip_port_disable(struct dsa_switch *ds, int port)
{
struct gswip_priv *priv = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return;
+
if (!dsa_is_cpu_port(ds, port)) {
gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_DOWN,
GSWIP_MDIO_PHY_LINK_MASK,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b45c7b972cec..b0b870f0c252 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -361,6 +361,9 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct ksz_device *dev = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
/* setup slave port */
dev->dev_ops->port_setup(dev, port, false);
if (dev->dev_ops->phy_setup)
@@ -378,6 +381,9 @@ void ksz_disable_port(struct dsa_switch *ds, int port)
{
struct ksz_device *dev = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return;
+
dev->on_ports &= ~(1 << port);
dev->live_ports &= ~(1 << port);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 3181e95586d6..c48e29486b10 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -726,6 +726,9 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
{
struct mt7530_priv *priv = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
mutex_lock(&priv->reg_mutex);
/* Setup the MAC for the user port */
@@ -751,6 +754,9 @@ mt7530_port_disable(struct dsa_switch *ds, int port)
{
struct mt7530_priv *priv = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return;
+
mutex_lock(&priv->reg_mutex);
/* Clear up all port matrix which could be restored in the next
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 9b3ad22a5b98..5e557545df6d 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2267,6 +2267,9 @@ static int mv88e6xxx_port_enable(struct dsa_switch *ds, int port,
struct mv88e6xxx_chip *chip = ds->priv;
int err;
+ if (!dsa_is_user_port(ds, port))
+ return 0;
+
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_serdes_power(chip, port, true);
@@ -2283,6 +2286,9 @@ static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
{
struct mv88e6xxx_chip *chip = ds->priv;
+ if (!dsa_is_user_port(ds, port))
+ return;
+
mv88e6xxx_reg_lock(chip);
if (mv88e6xxx_port_set_state(chip, port, BR_STATE_DISABLED))
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 3/6] net: dsa: enable and disable all ports
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
Call the .port_enable and .port_disable functions for all ports,
not only the user ports, so that drivers may optimize the power
consumption of all ports after a successful setup.
Unused ports are now disabled on setup. CPU and DSA ports are now
enabled on setup and disabled on teardown. User ports were already
enabled at slave creation and disabled at slave destruction.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
---
net/dsa/dsa2.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 405552ac4c08..8c4eccb0cfe6 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -264,6 +264,7 @@ static int dsa_port_setup(struct dsa_port *dp)
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
+ dsa_port_disable(dp);
break;
case DSA_PORT_TYPE_CPU:
memset(dlp, 0, sizeof(*dlp));
@@ -274,6 +275,10 @@ static int dsa_port_setup(struct dsa_port *dp)
return err;
err = dsa_port_link_register_of(dp);
+ if (err)
+ return err;
+
+ err = dsa_port_enable(dp, NULL);
if (err)
return err;
break;
@@ -286,6 +291,10 @@ static int dsa_port_setup(struct dsa_port *dp)
return err;
err = dsa_port_link_register_of(dp);
+ if (err)
+ return err;
+
+ err = dsa_port_enable(dp, NULL);
if (err)
return err;
break;
@@ -317,11 +326,13 @@ static void dsa_port_teardown(struct dsa_port *dp)
case DSA_PORT_TYPE_UNUSED:
break;
case DSA_PORT_TYPE_CPU:
+ dsa_port_disable(dp);
dsa_tag_driver_put(dp->tag_ops);
devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_DSA:
+ dsa_port_disable(dp);
devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 4/6] net: dsa: mv88e6xxx: do not change STP state on port disabling
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
When disabling a port, that is not for the driver to decide what to
do with the STP state. This is already handled by the DSA layer.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
---
drivers/net/dsa/mv88e6xxx/chip.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 5e557545df6d..27e1622bb03b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2291,9 +2291,6 @@ static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
mv88e6xxx_reg_lock(chip);
- if (mv88e6xxx_port_set_state(chip, port, BR_STATE_DISABLED))
- dev_err(chip->dev, "failed to disable port\n");
-
if (chip->info->ops->serdes_irq_free)
chip->info->ops->serdes_irq_free(chip, port);
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 5/6] net: dsa: mv88e6xxx: enable SERDES after setup
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
SERDES is powered on for CPU and DSA ports and powered down for unused
ports at setup time. But now that DSA calls mv88e6xxx_port_enable
and mv88e6xxx_port_disable for all ports, the SERDES power can now
be handled after setup inconditionally for all ports.
Using the port enable and disable callbacks also have the benefit to
handle the SERDES IRQ for non user ports as well.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 35 ++++----------------------------
1 file changed, 4 insertions(+), 31 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 27e1622bb03b..c72b3db75c54 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2151,16 +2151,6 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
if (err)
return err;
- /* Enable the SERDES interface for DSA and CPU ports. Normal
- * ports SERDES are enabled when the port is enabled, thus
- * saving a bit of power.
- */
- if ((dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) {
- err = mv88e6xxx_serdes_power(chip, port, true);
- if (err)
- return err;
- }
-
/* Port Control 2: don't force a good FCS, set the maximum frame size to
* 10240 bytes, disable 802.1q tags checking, don't discard tagged or
* untagged frames on this port, do a destination address lookup on all
@@ -2267,9 +2257,6 @@ static int mv88e6xxx_port_enable(struct dsa_switch *ds, int port,
struct mv88e6xxx_chip *chip = ds->priv;
int err;
- if (!dsa_is_user_port(ds, port))
- return 0;
-
mv88e6xxx_reg_lock(chip);
err = mv88e6xxx_serdes_power(chip, port, true);
@@ -2286,9 +2273,6 @@ static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
{
struct mv88e6xxx_chip *chip = ds->priv;
- if (!dsa_is_user_port(ds, port))
- return;
-
mv88e6xxx_reg_lock(chip);
if (chip->info->ops->serdes_irq_free)
@@ -2461,27 +2445,16 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
/* Setup Switch Port Registers */
for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
+ if (dsa_is_unused_port(ds, i))
+ continue;
+
/* Prevent the use of an invalid port. */
- if (mv88e6xxx_is_invalid_port(chip, i) &&
- !dsa_is_unused_port(ds, i)) {
+ if (mv88e6xxx_is_invalid_port(chip, i)) {
dev_err(chip->dev, "port %d is invalid\n", i);
err = -EINVAL;
goto unlock;
}
- if (dsa_is_unused_port(ds, i)) {
- err = mv88e6xxx_port_set_state(chip, i,
- BR_STATE_DISABLED);
- if (err)
- goto unlock;
-
- err = mv88e6xxx_serdes_power(chip, i, false);
- if (err)
- goto unlock;
-
- continue;
- }
-
err = mv88e6xxx_setup_port(chip, i);
if (err)
goto unlock;
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 6/6] net: dsa: mv88e6xxx: wrap SERDES IRQ in power function
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
Now that mv88e6xxx_serdes_power is only called after driver setup,
we can wrap the SERDES IRQ code directly within it for clarity.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index c72b3db75c54..d0bf98c10b2b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2057,10 +2057,26 @@ static int mv88e6xxx_setup_egress_floods(struct mv88e6xxx_chip *chip, int port)
static int mv88e6xxx_serdes_power(struct mv88e6xxx_chip *chip, int port,
bool on)
{
- if (chip->info->ops->serdes_power)
- return chip->info->ops->serdes_power(chip, port, on);
+ int err;
- return 0;
+ if (!chip->info->ops->serdes_power)
+ return 0;
+
+ if (on) {
+ err = chip->info->ops->serdes_power(chip, port, true);
+ if (err)
+ return err;
+
+ if (chip->info->ops->serdes_irq_setup)
+ err = chip->info->ops->serdes_irq_setup(chip, port);
+ } else {
+ if (chip->info->ops->serdes_irq_free)
+ chip->info->ops->serdes_irq_free(chip, port);
+
+ err = chip->info->ops->serdes_power(chip, port, false);
+ }
+
+ return err;
}
static int mv88e6xxx_setup_upstream_port(struct mv88e6xxx_chip *chip, int port)
@@ -2258,12 +2274,7 @@ static int mv88e6xxx_port_enable(struct dsa_switch *ds, int port,
int err;
mv88e6xxx_reg_lock(chip);
-
err = mv88e6xxx_serdes_power(chip, port, true);
-
- if (!err && chip->info->ops->serdes_irq_setup)
- err = chip->info->ops->serdes_irq_setup(chip, port);
-
mv88e6xxx_reg_unlock(chip);
return err;
@@ -2274,13 +2285,8 @@ static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
struct mv88e6xxx_chip *chip = ds->priv;
mv88e6xxx_reg_lock(chip);
-
- if (chip->info->ops->serdes_irq_free)
- chip->info->ops->serdes_irq_free(chip, port);
-
if (mv88e6xxx_serdes_power(chip, port, false))
dev_err(chip->dev, "failed to power off SERDES\n");
-
mv88e6xxx_reg_unlock(chip);
}
--
2.22.0
^ permalink raw reply related
* [PATCH net-next v2 1/6] net: dsa: use a single switch statement for port setup
From: Vivien Didelot @ 2019-08-19 20:00 UTC (permalink / raw)
To: netdev; +Cc: marek.behun, davem, f.fainelli, andrew, Vivien Didelot
In-Reply-To: <20190819200053.21637-1-vivien.didelot@gmail.com>
It is currently difficult to read the different steps involved in the
setup and teardown of ports in the DSA code. Keep it simple with a
single switch statement for each port type: UNUSED, CPU, DSA, or USER.
Also no need to call devlink_port_unregister from within dsa_port_setup
as this step is inconditionally handled by dsa_port_teardown on error.
Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
---
net/dsa/dsa2.c | 87 ++++++++++++++++++++++----------------------------
1 file changed, 39 insertions(+), 48 deletions(-)
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3abd173ebacb..405552ac4c08 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -254,88 +254,79 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
static int dsa_port_setup(struct dsa_port *dp)
{
- enum devlink_port_flavour flavour;
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- int err = 0;
-
- if (dp->type == DSA_PORT_TYPE_UNUSED)
- return 0;
-
- memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
- dp->mac = of_get_mac_address(dp->dn);
-
- switch (dp->type) {
- case DSA_PORT_TYPE_CPU:
- flavour = DEVLINK_PORT_FLAVOUR_CPU;
- break;
- case DSA_PORT_TYPE_DSA:
- flavour = DEVLINK_PORT_FLAVOUR_DSA;
- break;
- case DSA_PORT_TYPE_USER: /* fall-through */
- default:
- flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
- break;
- }
-
- /* dp->index is used now as port_number. However
- * CPU and DSA ports should have separate numbering
- * independent from front panel port numbers.
- */
- devlink_port_attrs_set(&dp->devlink_port, flavour,
- dp->index, false, 0,
- (const char *) &dst->index, sizeof(dst->index));
- err = devlink_port_register(ds->devlink, &dp->devlink_port,
- dp->index);
- if (err)
- return err;
+ const unsigned char *id = (const unsigned char *)&dst->index;
+ const unsigned char len = sizeof(dst->index);
+ struct devlink_port *dlp = &dp->devlink_port;
+ struct devlink *dl = ds->devlink;
+ int err;
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
case DSA_PORT_TYPE_CPU:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ return err;
+
err = dsa_port_link_register_of(dp);
if (err)
- dev_err(ds->dev, "failed to setup link for port %d.%d\n",
- ds->index, dp->index);
+ return err;
break;
case DSA_PORT_TYPE_DSA:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ return err;
+
err = dsa_port_link_register_of(dp);
if (err)
- dev_err(ds->dev, "failed to setup link for port %d.%d\n",
- ds->index, dp->index);
+ return err;
break;
case DSA_PORT_TYPE_USER:
+ memset(dlp, 0, sizeof(*dlp));
+ devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL,
+ dp->index, false, 0, id, len);
+ err = devlink_port_register(dl, dlp, dp->index);
+ if (err)
+ return err;
+
+ dp->mac = of_get_mac_address(dp->dn);
err = dsa_slave_create(dp);
if (err)
- dev_err(ds->dev, "failed to create slave for port %d.%d\n",
- ds->index, dp->index);
- else
- devlink_port_type_eth_set(&dp->devlink_port, dp->slave);
+ return err;
+
+ devlink_port_type_eth_set(dlp, dp->slave);
break;
}
- if (err)
- devlink_port_unregister(&dp->devlink_port);
-
- return err;
+ return 0;
}
static void dsa_port_teardown(struct dsa_port *dp)
{
- if (dp->type != DSA_PORT_TYPE_UNUSED)
- devlink_port_unregister(&dp->devlink_port);
+ struct devlink_port *dlp = &dp->devlink_port;
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
break;
case DSA_PORT_TYPE_CPU:
dsa_tag_driver_put(dp->tag_ops);
- /* fall-through */
+ devlink_port_unregister(dlp);
+ dsa_port_link_unregister_of(dp);
+ break;
case DSA_PORT_TYPE_DSA:
+ devlink_port_unregister(dlp);
dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
+ devlink_port_unregister(dlp);
if (dp->slave) {
dsa_slave_destroy(dp->slave);
dp->slave = NULL;
--
2.22.0
^ permalink raw reply related
* Re: [PATCH net-next] net: flow_offload: convert block_ing_cb_list to regular list type
From: David Miller @ 2019-08-19 20:03 UTC (permalink / raw)
To: vladbu; +Cc: netdev, jhs, xiyou.wangcong, jiri, wenxu, pablo
In-Reply-To: <20190816150654.22106-1-vladbu@mellanox.com>
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 16 Aug 2019 18:06:54 +0300
> RCU list block_ing_cb_list is protected by rcu read lock in
> flow_block_ing_cmd() and with flow_indr_block_ing_cb_lock mutex in all
> functions that use it. However, flow_block_ing_cmd() needs to call blocking
> functions while iterating block_ing_cb_list which leads to following
> suspicious RCU usage warning:
...
> To fix the described incorrect RCU usage, convert block_ing_cb_list from
> RCU list to regular list and protect it with flow_indr_block_ing_cb_lock
> mutex in flow_block_ing_cmd().
>
> Fixes: 1150ab0f1b33 ("flow_offload: support get multi-subsystem block")
> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Applied.
^ permalink raw reply
* Re: [PATCH net-next 0/2] net: phy: realtek: support NBase-T MMD EEE registers on RTL8125
From: David Miller @ 2019-08-19 20:05 UTC (permalink / raw)
To: hkallweit1; +Cc: andrew, f.fainelli, netdev
In-Reply-To: <d2669c95-9861-df53-2e37-6ebfde11c4c9@gmail.com>
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 16 Aug 2019 21:55:40 +0200
> Add missing EEE-related constants, including the new MMD EEE registers
> for NBase-T / 802.3bz. Based on that emulate the new 802.3bz MMD EEE
> registers for 2.5Gbps EEE on RTL8125.
Series applied.
^ permalink raw reply
* Re: [PATCH net] tcp: make sure EPOLLOUT wont be missed
From: David Miller @ 2019-08-19 20:08 UTC (permalink / raw)
To: edumazet; +Cc: netdev, soheil, ncardwell, eric.dumazet, jbaron, rutsky
In-Reply-To: <20190817042622.91497-1-edumazet@google.com>
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 16 Aug 2019 21:26:22 -0700
> As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE
> under memory pressure"), it is crucial we properly set SOCK_NOSPACE
> when needed.
>
> However, Jason patch had a bug, because the 'nonblocking' status
> as far as sk_stream_wait_memory() is concerned is governed
> by MSG_DONTWAIT flag passed at sendmsg() time :
>
> long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
>
> So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(),
> and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE
> cleared, if sk->sk_sndtimeo has been set to a small (but not zero)
> value.
>
> This patch removes the 'noblock' variable since we must always
> set SOCK_NOSPACE if -EAGAIN is returned.
>
> It also renames the do_nonblock label since we might reach this
> code path even if we were in blocking mode.
>
> Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jason Baron <jbaron@akamai.com>
> Reported-by: Vladimir Rutsky <rutsky@google.com>
Applied and queued up for -stable.
^ permalink raw reply
* Re: [PATCH net-next 0/6] net: dsa: enable and disable all ports
From: David Miller @ 2019-08-19 20:11 UTC (permalink / raw)
To: vivien.didelot; +Cc: netdev, marek.behun, f.fainelli, andrew
In-Reply-To: <20190818173548.19631-1-vivien.didelot@gmail.com>
From: Vivien Didelot <vivien.didelot@gmail.com>
Date: Sun, 18 Aug 2019 13:35:42 -0400
> The DSA stack currently calls the .port_enable and .port_disable switch
> callbacks for slave ports only. However, it is useful to call them for all
> port types. For example this allows some drivers to delay the optimization
> of power consumption after the switch is setup. This can also help reducing
> the setup code of drivers a bit.
>
> The first DSA core patches enable and disable all ports of a switch, regardless
> their type. The last mv88e6xxx patches remove redundant code from the driver
> setup and the said callbacks, now that they handle SERDES power for all ports.
Looks like the bcm_sf2 parts need some adjustments.
^ permalink raw reply
* Re: What to do when a bridge port gets its pvid deleted?
From: Ido Schimmel @ 2019-08-19 20:15 UTC (permalink / raw)
To: Vladimir Oltean
Cc: Florian Fainelli, roopa, nikolay, netdev, Andrew Lunn,
Vivien Didelot, stephen
In-Reply-To: <4756358f-6717-0fbc-3fe8-9f6359583367@gmail.com>
On Mon, Aug 19, 2019 at 08:15:03PM +0300, Vladimir Oltean wrote:
> On 6/28/19 7:45 PM, Florian Fainelli wrote:
> > On 6/28/19 5:37 AM, Vladimir Oltean wrote:
> > > On Fri, 28 Jun 2019 at 15:30, Ido Schimmel <idosch@idosch.org> wrote:
> > > >
> > > > On Tue, Jun 25, 2019 at 11:49:29PM +0300, Vladimir Oltean wrote:
> > > > > A number of DSA drivers (BCM53XX, Microchip KSZ94XX, Mediatek MT7530
> > > > > at the very least), as well as Mellanox Spectrum (I didn't look at all
> > > > > the pure switchdev drivers) try to restore the pvid to a default value
> > > > > on .port_vlan_del.
> > > >
> > > > I don't know about DSA drivers, but that's not what mlxsw is doing. If
> > > > the VLAN that is configured as PVID is deleted from the bridge port, the
> > > > driver instructs the port to discard untagged and prio-tagged packets.
> > > > This is consistent with the bridge driver's behavior.
> > > >
> > > > We do have a flow the "restores" the PVID, but that's when a port is
> > > > unlinked from its bridge master. The PVID we set is 4095 which cannot be
> > > > configured by the 8021q / bridge driver. This is due to the way the
> > > > underlying hardware works. Even if a port is not bridged and used purely
> > > > for routing, packets still do L2 lookup first which sends them directly
> > > > to the router block. If PVID is not configured, untagged packets could
> > > > not be routed. Obviously, at egress we strip this VLAN.
> > > >
> > > > > Sure, the port stops receiving traffic when its pvid is a VLAN ID that
> > > > > is not installed in its hw filter, but as far as the bridge core is
> > > > > concerned, this is to be expected:
> > > > >
> > > > > # bridge vlan add dev swp2 vid 100 pvid untagged
> > > > > # bridge vlan
> > > > > port vlan ids
> > > > > swp5 1 PVID Egress Untagged
> > > > >
> > > > > swp2 1 Egress Untagged
> > > > > 100 PVID Egress Untagged
> > > > >
> > > > > swp3 1 PVID Egress Untagged
> > > > >
> > > > > swp4 1 PVID Egress Untagged
> > > > >
> > > > > br0 1 PVID Egress Untagged
> > > > > # ping 10.0.0.1
> > > > > PING 10.0.0.1 (10.0.0.1) 56(84) bytes of data.
> > > > > 64 bytes from 10.0.0.1: icmp_seq=1 ttl=64 time=0.682 ms
> > > > > 64 bytes from 10.0.0.1: icmp_seq=2 ttl=64 time=0.299 ms
> > > > > 64 bytes from 10.0.0.1: icmp_seq=3 ttl=64 time=0.251 ms
> > > > > 64 bytes from 10.0.0.1: icmp_seq=4 ttl=64 time=0.324 ms
> > > > > 64 bytes from 10.0.0.1: icmp_seq=5 ttl=64 time=0.257 ms
> > > > > ^C
> > > > > --- 10.0.0.1 ping statistics ---
> > > > > 5 packets transmitted, 5 received, 0% packet loss, time 4188ms
> > > > > rtt min/avg/max/mdev = 0.251/0.362/0.682/0.163 ms
> > > > > # bridge vlan del dev swp2 vid 100
> > > > > # bridge vlan
> > > > > port vlan ids
> > > > > swp5 1 PVID Egress Untagged
> > > > >
> > > > > swp2 1 Egress Untagged
> > > > >
> > > > > swp3 1 PVID Egress Untagged
> > > > >
> > > > > swp4 1 PVID Egress Untagged
> > > > >
> > > > > br0 1 PVID Egress Untagged
> > > > >
> > > > > # ping 10.0.0.1
> > > > > PING 10.0.0.1 (10.0.0.1) 56(84) bytes of data.
> > > > > ^C
> > > > > --- 10.0.0.1 ping statistics ---
> > > > > 8 packets transmitted, 0 received, 100% packet loss, time 7267ms
> > > > >
> > > > > What is the consensus here? Is there a reason why the bridge driver
> > > > > doesn't take care of this?
> > > >
> > > > Take care of what? :) Always maintaining a PVID on the bridge port? It's
> > > > completely OK not to have a PVID.
> > > >
> > >
> > > Yes, I didn't think it through during the first email. I came to the
> > > same conclusion in the second one.
> > >
> > > > > Do switchdev drivers have to restore the pvid to always be
> > > > > operational, even if their state becomes inconsistent with the upper
> > > > > dev? Is it just 'nice to have'? What if VID 1 isn't in the hw filter
> > > > > either (perfectly legal)?
> > > >
> > > > Are you saying that DSA drivers always maintain a PVID on the bridge
> > > > port and allow untagged traffic to ingress regardless of the bridge
> > > > driver's configuration? If so, I think this needs to be fixed.
> > >
> > > Well, not at the DSA core level.
> > > But for Microchip:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/tree/drivers/net/dsa/microchip/ksz9477.c#n576
> > > For Broadcom:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/tree/drivers/net/dsa/b53/b53_common.c#n1376
> > > For Mediatek:
> > > https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/tree/drivers/net/dsa/mt7530.c#n1196
> > >
> > > There might be others as well.
> >
> > That sounds bogus indeed, and I bet that the two other drivers just
> > followed the b53 driver there. I will have to test this again and come
> > up with a patch eventually.
> >
> > When the port leaves the bridge we do bring it back into a default PVID
> > (which is different than the bridge's default PVID) so that part should
> > be okay.
> >
>
> Adding a few more networking people.
> So my flow is something like this:
> - Boot a board with a DSA switch
> - Bring all interfaces up
> - Enslave all interfaces to br0
> - Enable vlan_filtering on br0
>
> What VIDs should be installed into the ports' hw filters, and what should
> the pvid be at this point?
> Should the switch ports pass any traffic?
> At this point, 'bridge vlan' shows a confusing:
> port vlan ids
> eth0 1 PVID Egress Untagged
>
> swp5 1 PVID Egress Untagged
>
> swp2 1 PVID Egress Untagged
>
> swp3 1 PVID Egress Untagged
>
> swp4 1 PVID Egress Untagged
>
> br0 1 PVID Egress Untagged
> for all ports, but the .port_vlan_add callback is nowhere to be found.
The bridge adds a PVID on the port when it is enslaved to the bridge.
The configuration only takes effect when VLAN filtering is enabled. I'm
looking at dsa_port_vlan_add() and it seems that it does not propagate
the VLAN call when VLAN filtering is disabled. This explains why you
never see the callback.
I assume that if you configure the bridge with VLAN filtering enabled
and then enslave a port, then everything works fine.
mlxsw avoids the situation by forbidding the toggling of VLAN filtering
on the bridge when its ports are enslaved.
>
> Whose responsibility is it for the switch to pass traffic without any
> further 'bridge vlan' command? What is the mechanism through which this
> should work?
>
> What if I do:
> sudo bridge vlan add vid 100 dev swp2 pvid untagged
> echo 0 | sudo tee /sys/class/net/br0/bridge/vlan_filtering
> echo 1 | sudo tee /sys/class/net/br0/bridge/vlan_filtering
> What pvid should there be on swp2 now?
> 'bridge vlan' shows:
> port vlan ids
> eth0 1 PVID Egress Untagged
>
> swp5 1 PVID Egress Untagged
>
> swp2 1 Egress Untagged
> 100 PVID Egress Untagged
>
> swp3 1 PVID Egress Untagged
>
> swp4 1 PVID Egress Untagged
>
> br0 1 PVID Egress Untagged
> If the 'bridge vlan' output is correct, whose responsibility is it to
> restore this pvid?
I suggest to follow mlxsw and avoid this mess. You can support both VLAN
filtering enable / disable without supporting dynamically toggling the
option.
>
> More context: the sja1105 driver is somewhat similar to the mlxsw in that
> VLAN awareness cannot be truly disabled. Arid details aside, in both cases,
> achieving "VLAN-unaware"-like behavior involves manipulating the pvid in
> both cases. But it appears that the bridge core does expect:
> (1) that the driver performs a default VLAN initialization which matches its
> own, without them ever communicating. But because switchdev/DSA drivers
> start off in standalone mode, vlan_filtering=0 comes first, hence the
> non-standard pvid. Through what mechanism is the bridge-expected pvid
> supposed to get restored upon flipping vlan_filtering?
> (2) that toggling VLAN filtering off and on has no other state upon the
> underlying driver than enabling and disabling VLAN awareness. The VLAN hw
> filter table is assumed to be unchanged. Is this a correct assumption?
>
> Thanks,
> -Vladimir
^ permalink raw reply
* Re: [PATCH 0/5] Netfilter fixes for net
From: David Miller @ 2019-08-19 20:16 UTC (permalink / raw)
To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <20190819184911.15263-1-pablo@netfilter.org>
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 19 Aug 2019 20:49:06 +0200
> The following patchset contains Netfilter fixes for net:
>
> 1) Remove IP MASQUERADING record in MAINTAINERS file,
> from Denis Efremov.
>
> 2) Counter arguments are swapped in ebtables, from
> Todd Seidelmann.
>
> 3) Missing netlink attribute validation in flow_offload
> extension.
>
> 4) Incorrect alignment in xt_nfacct that breaks 32-bits
> userspace / 64-bits kernels, from Juliana Rodrigueiro.
>
> 5) Missing include guard in nf_conntrack_h323_types.h,
> from Masahiro Yamada.
>
> You can pull these changes from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git
Pulled, thanks.
^ permalink raw reply
* RE: [PATCH] net/ncsi: add control packet payload to NC-SI commands from netlink
From: Justin.Lee1 @ 2019-08-19 20:17 UTC (permalink / raw)
To: benwei; +Cc: netdev, openbmc, linux-kernel, sam, davem
In-Reply-To: <CH2PR15MB368691D280F882864A6D356DA3A80@CH2PR15MB3686.namprd15.prod.outlook.com>
Hi Ben,
I have similar fix locally with different approach as the command handler may have some expectation for those byes.
We can use NCSI_PKT_CMD_OEM handler as it only copies data based on the payload length.
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 5c3fad8..3b01f65 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -309,14 +309,19 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
{
+ struct ncsi_cmd_handler *nch = NULL;
struct ncsi_request *nr;
+ unsigned char type;
struct ethhdr *eh;
- struct ncsi_cmd_handler *nch = NULL;
int i, ret;
+ if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN)
+ type = NCSI_PKT_CMD_OEM;
+ else
+ type = nca->type;
/* Search for the handler */
for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) {
- if (ncsi_cmd_handlers[i].type == nca->type) {
+ if (ncsi_cmd_handlers[i].type == type) {
if (ncsi_cmd_handlers[i].handler)
nch = &ncsi_cmd_handlers[i];
else
Thanks,
Justin
> This patch adds support for NCSI_CMD_SEND_CMD netlink command to load packet data payload (up to 16 bytes) to standard NC-SI commands.
>
> Packet data will be loaded from NCSI_ATTR_DATA attribute similar to NC-SI OEM commands
>
> Signed-off-by: Ben Wei <benwei@fb.com>
> ---
> net/ncsi/internal.h | 7 ++++---
> net/ncsi/ncsi-netlink.c | 9 +++++++++
> 2 files changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 0b3f0673e1a2..4ff442faf5dc 100644
> --- a/net/ncsi/internal.h
> +++ b/net/ncsi/internal.h
> @@ -328,9 +328,10 @@ struct ncsi_cmd_arg {
> unsigned short payload; /* Command packet payload length */
> unsigned int req_flags; /* NCSI request properties */
> union {
> - unsigned char bytes[16]; /* Command packet specific data */
> - unsigned short words[8];
> - unsigned int dwords[4];
> +#define NCSI_MAX_DATA_BYTES 16
> + unsigned char bytes[NCSI_MAX_DATA_BYTES]; /* Command packet specific data */
> + unsigned short words[NCSI_MAX_DATA_BYTES / sizeof(unsigned short)];
> + unsigned int dwords[NCSI_MAX_DATA_BYTES / sizeof(unsigned int)];
> };
> unsigned char *data; /* NCSI OEM data */
> struct genl_info *info; /* Netlink information */
> diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 8b386d766e7d..7d2a43f30eb1 100644
> --- a/net/ncsi/ncsi-netlink.c
> +++ b/net/ncsi/ncsi-netlink.c
> @@ -459,6 +459,15 @@ static int ncsi_send_cmd_nl(struct sk_buff *msg, struct genl_info *info)
> nca.payload = ntohs(hdr->length);
> nca.data = data + sizeof(*hdr);
>
> + if (nca.payload <= NCSI_MAX_DATA_BYTES) {
> + memcpy(nca.bytes, nca.data, nca.payload);
> + } else {
> + netdev_info(ndp->ndev.dev, "NCSI:payload size %u exceeds max %u\n",
> + nca.payload, NCSI_MAX_DATA_BYTES);
> + ret = -EINVAL;
> + goto out_netlink;
> + }
> +
> ret = ncsi_xmit_cmd(&nca);
> out_netlink:
> if (ret != 0) {
> --
> 2.17.1
^ permalink raw reply related
* Re: [PATCH net-next v7 5/6] flow_offload: support get multi-subsystem block
From: Jakub Kicinski @ 2019-08-19 20:27 UTC (permalink / raw)
To: Vlad Buslov
Cc: wenxu, David Miller, Jiri Pirko, pablo@netfilter.org,
netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <vbftvady5tg.fsf@mellanox.com>
On Mon, 19 Aug 2019 07:26:07 +0000, Vlad Buslov wrote:
> On Fri 16 Aug 2019 at 20:56, Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> > Hi Vlad!
> >
> > While looking into this, would you mind also add the missing
> > flow_block_cb_is_busy() calls in the indirect handlers in the drivers?
> >
> > LMK if you're too busy, I don't want this to get forgotten :)
>
> Hi Jakub,
>
> I've checked the code and it looks like only nfp driver is affected:
>
> - I added check in nfp to lookup cb_priv with
> nfp_flower_indr_block_cb_priv_lookup() and call
> flow_block_cb_is_busy() if cb_priv exists.
>
> - In mlx5 en_rep.c there is already a check that indr_priv exists, so
> trying to lookup block_cb->cb_indent==indr_priv is redundant.
>
> - Switch drivers (mlxsw and ocelot) take reference to block_cb on
> FLOW_BLOCK_BIND, so they should not require any modifications.
>
> Tell me if I missed anything. Sending the patch for nfp.
Ah, that sounds plausible, I've only checked the nfp driver.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox