* Re: [PATCH bpf-next v2 4/6] bpf, arm64: Impelment bpf_arch_text_poke() for arm64
From: Jakub Sitnicki @ 2022-04-22 10:54 UTC (permalink / raw)
To: Xu Kuohai
Cc: bpf, linux-arm-kernel, linux-kernel, netdev, linux-kselftest,
Catalin Marinas, Will Deacon, Steven Rostedt, Ingo Molnar,
Daniel Borkmann, Alexei Starovoitov, Zi Shen Lim, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, David S . Miller, Hideaki YOSHIFUJI, David Ahern,
Thomas Gleixner, Borislav Petkov, Dave Hansen, x86, hpa,
Shuah Khan, Mark Rutland, Ard Biesheuvel, Pasha Tatashin,
Peter Collingbourne, Daniel Kiss, Sudeep Holla, Steven Price,
Marc Zyngier, Mark Brown, Kumar Kartikeya Dwivedi,
Delyan Kratunov, kernel-team
In-Reply-To: <20220414162220.1985095-5-xukuohai@huawei.com>
Hi Xu,
Thanks for working on this.
We are also looking forward to using fentry hooks on arm64.
In particular, attaching to entry/exit into/from XDP progs.
On Thu, Apr 14, 2022 at 12:22 PM -04, Xu Kuohai wrote:
> Impelment bpf_arch_text_poke() for arm64, so bpf trampoline code can use
> it to replace nop with jump, or replace jump with nop.
>
> Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> ---
> arch/arm64/net/bpf_jit_comp.c | 52 +++++++++++++++++++++++++++++++++++
> 1 file changed, 52 insertions(+)
>
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 8ab4035dea27..1a1c3ea75ee2 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -9,6 +9,7 @@
>
> #include <linux/bitfield.h>
> #include <linux/bpf.h>
> +#include <linux/memory.h>
> #include <linux/filter.h>
> #include <linux/printk.h>
> #include <linux/slab.h>
> @@ -18,6 +19,7 @@
> #include <asm/cacheflush.h>
> #include <asm/debug-monitors.h>
> #include <asm/insn.h>
> +#include <asm/patching.h>
> #include <asm/set_memory.h>
>
> #include "bpf_jit.h"
> @@ -1529,3 +1531,53 @@ void bpf_jit_free_exec(void *addr)
> {
> return vfree(addr);
> }
> +
> +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
> + void *addr, u32 *insn)
> +{
> + if (!addr)
> + *insn = aarch64_insn_gen_nop();
> + else
> + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
> + (unsigned long)addr,
> + type);
> +
> + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
> +}
> +
> +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
> + void *old_addr, void *new_addr)
> +{
> + int ret;
> + u32 old_insn;
> + u32 new_insn;
> + u32 replaced;
> + enum aarch64_insn_branch_type branch_type;
> +
> + if (poke_type == BPF_MOD_CALL)
> + branch_type = AARCH64_INSN_BRANCH_LINK;
This path, bpf_arch_text_poke(<ip>, BPF_MOD_CALL, ...), is what we hit
when attaching a BPF program entry. It is exercised by selftest #232
xdp_bpf2bpf.
However, with this patchset alone it will not work because we don't
emit, yet, the ftrace patch (MOV X9, LR; NOP) as a part of BPF prog
prologue, like ftrace_init_nop() does. So patching attempt will fail.
I think that is what you mentioned to in your reply to Hou [1]
So my question is - is support for attaching to BPF progs in scope for
this patchset?
If no, then perhaps it would be better for now to fail early with
something like -EOPNOTSUPP when poke_type is BPF_MOD_CALL, rather then
attempt to patch the code.
If you plan to enable it as a part of this patchset, then I've given it
a quick try, and it seems that not a lot is needed get fentry to BPF
attachment to work.
I'm including the diff for my quick and dirty attempt below. With that
patch on top, the xdp_bpf2bpf tests pass:
#232 xdp_bpf2bpf:OK
[1] https://lore.kernel.org/bpf/d8c4f1fb-a020-9457-44e2-dc63982a9213@huawei.com/
> + else
> + branch_type = AARCH64_INSN_BRANCH_NOLINK;
> +
> + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0)
> + return -EFAULT;
> +
> + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0)
> + return -EFAULT;
> +
> + mutex_lock(&text_mutex);
> + if (aarch64_insn_read(ip, &replaced)) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + if (replaced != old_insn) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + ret = aarch64_insn_patch_text_nosync((void *)ip, new_insn);
> +out:
> + mutex_unlock(&text_mutex);
The body of this critical section is identical as ftrace_modify_code().
Perhaps we could export it and reuse?
> + return ret;
> +}
---
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 5f6bd755050f..94d8251500ab 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -240,9 +240,9 @@ static bool is_lsi_offset(int offset, int scale)
/* Tail call offset to jump into */
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \
IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)
-#define PROLOGUE_OFFSET 9
+#define PROLOGUE_OFFSET 11
#else
-#define PROLOGUE_OFFSET 8
+#define PROLOGUE_OFFSET 10
#endif
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
@@ -281,6 +281,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
*
*/
+ /* Set up ftrace patch (initially in disabled state) */
+ emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
+ emit(A64_NOP, ctx);
+
/* Sign lr */
if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
emit(A64_PACIASP, ctx);
@@ -1888,10 +1892,16 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
u32 replaced;
enum aarch64_insn_branch_type branch_type;
- if (poke_type == BPF_MOD_CALL)
+ if (poke_type == BPF_MOD_CALL) {
branch_type = AARCH64_INSN_BRANCH_LINK;
- else
+ /*
+ * Adjust addr to point at the BL in the callsite.
+ * See ftrace_init_nop() for the callsite sequence.
+ */
+ ip = (void *)((unsigned long)ip + AARCH64_INSN_SIZE);
+ } else {
branch_type = AARCH64_INSN_BRANCH_NOLINK;
+ }
if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0)
return -EFAULT;
^ permalink raw reply related
* [PATCH v0] mctp: defer the kfree of object mdev->addrs
From: Lin Ma @ 2022-04-22 11:43 UTC (permalink / raw)
To: jk, matt, davem, kuba, pabeni, netdev, linux-kernel; +Cc: Lin Ma
The function mctp_unregister() reclaims the device's relevant resource
when a netcard detaches. However, a running routine may be unaware of
this and cause the use-after-free of the mdev->addrs object.
The race condition can be demonstrated below
cleanup thread another thread
|
unregister_netdev() | mctp_sendmsg()
... | ...
mctp_unregister() | rt = mctp_route_lookup()
... | mctl_local_output()
kfree(mdev->addrs) | ...
| saddr = rt->dev->addrs[0];
|
An attacker can adopt the (recent provided) mtcpserial driver with pty
to fake the device detaching and use the userfaultfd to increase the
race success chance (in mctp_sendmsg). The KASan report for such a POC
is shown below:
[ 86.051955] ==================================================================
[ 86.051955] BUG: KASAN: use-after-free in mctp_local_output+0x4e9/0xb7d
[ 86.051955] Read of size 1 at addr ffff888005f298c0 by task poc/295
[ 86.051955]
[ 86.051955] Call Trace:
[ 86.051955] <TASK>
[ 86.051955] dump_stack_lvl+0x33/0x42
[ 86.051955] print_report.cold.13+0xb2/0x6b3
[ 86.051955] ? preempt_schedule_irq+0x57/0x80
[ 86.051955] ? mctp_local_output+0x4e9/0xb7d
[ 86.051955] kasan_report+0xa5/0x120
[ 86.051955] ? mctp_local_output+0x4e9/0xb7d
[ 86.051955] mctp_local_output+0x4e9/0xb7d
[ 86.051955] ? mctp_dev_set_key+0x79/0x79
[ 86.051955] ? copyin+0x38/0x50
[ 86.051955] ? _copy_from_iter+0x1b6/0xf20
[ 86.051955] ? sysvec_apic_timer_interrupt+0x97/0xb0
[ 86.051955] ? asm_sysvec_apic_timer_interrupt+0x12/0x20
[ 86.051955] ? mctp_local_output+0x1/0xb7d
[ 86.051955] mctp_sendmsg+0x64d/0xdb0
[ 86.051955] ? mctp_sk_close+0x20/0x20
[ 86.051955] ? __fget_light+0x2fd/0x4f0
[ 86.051955] ? mctp_sk_close+0x20/0x20
[ 86.051955] sock_sendmsg+0xdd/0x110
[ 86.051955] __sys_sendto+0x1cc/0x2a0
[ 86.051955] ? __ia32_sys_getpeername+0xa0/0xa0
[ 86.051955] ? new_sync_write+0x335/0x550
[ 86.051955] ? alloc_file+0x22f/0x500
[ 86.051955] ? __ip_do_redirect+0x820/0x1820
[ 86.051955] ? vfs_write+0x44d/0x7b0
[ 86.051955] ? vfs_write+0x44d/0x7b0
[ 86.051955] ? fput_many+0x15/0x120
[ 86.051955] ? ksys_write+0x155/0x1b0
[ 86.051955] ? __ia32_sys_read+0xa0/0xa0
[ 86.051955] __x64_sys_sendto+0xd8/0x1b0
[ 86.051955] ? exit_to_user_mode_prepare+0x2f/0x120
[ 86.051955] ? syscall_exit_to_user_mode+0x12/0x20
[ 86.051955] do_syscall_64+0x3a/0x80
[ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 86.051955] RIP: 0033:0x7f82118a56b3
[ 86.051955] RSP: 002b:00007ffdb154b110 EFLAGS: 00000293 ORIG_RAX: 000000000000002c
[ 86.051955] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f82118a56b3
[ 86.051955] RDX: 0000000000000010 RSI: 00007f8211cd4000 RDI: 0000000000000007
[ 86.051955] RBP: 00007ffdb154c1d0 R08: 00007ffdb154b164 R09: 000000000000000c
[ 86.051955] R10: 0000000000000000 R11: 0000000000000293 R12: 000055d779800db0
[ 86.051955] R13: 00007ffdb154c2b0 R14: 0000000000000000 R15: 0000000000000000
[ 86.051955] </TASK>
[ 86.051955]
[ 86.051955] Allocated by task 295:
[ 86.051955] kasan_save_stack+0x1c/0x40
[ 86.051955] __kasan_kmalloc+0x84/0xa0
[ 86.051955] mctp_rtm_newaddr+0x242/0x610
[ 86.051955] rtnetlink_rcv_msg+0x2fd/0x8b0
[ 86.051955] netlink_rcv_skb+0x11c/0x340
[ 86.051955] netlink_unicast+0x439/0x630
[ 86.051955] netlink_sendmsg+0x752/0xc00
[ 86.051955] sock_sendmsg+0xdd/0x110
[ 86.051955] __sys_sendto+0x1cc/0x2a0
[ 86.051955] __x64_sys_sendto+0xd8/0x1b0
[ 86.051955] do_syscall_64+0x3a/0x80
[ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 86.051955]
[ 86.051955] Freed by task 301:
[ 86.051955] kasan_save_stack+0x1c/0x40
[ 86.051955] kasan_set_track+0x21/0x30
[ 86.051955] kasan_set_free_info+0x20/0x30
[ 86.051955] __kasan_slab_free+0x104/0x170
[ 86.051955] kfree+0x8c/0x290
[ 86.051955] mctp_dev_notify+0x161/0x2c0
[ 86.051955] raw_notifier_call_chain+0x8b/0xc0
[ 86.051955] unregister_netdevice_many+0x299/0x1180
[ 86.051955] unregister_netdevice_queue+0x210/0x2f0
[ 86.051955] unregister_netdev+0x13/0x20
[ 86.051955] mctp_serial_close+0x6d/0xa0
[ 86.051955] tty_ldisc_kill+0x31/0xa0
[ 86.051955] tty_ldisc_hangup+0x24f/0x560
[ 86.051955] __tty_hangup.part.28+0x2ce/0x6b0
[ 86.051955] tty_release+0x327/0xc70
[ 86.051955] __fput+0x1df/0x8b0
[ 86.051955] task_work_run+0xca/0x150
[ 86.051955] exit_to_user_mode_prepare+0x114/0x120
[ 86.051955] syscall_exit_to_user_mode+0x12/0x20
[ 86.051955] do_syscall_64+0x46/0x80
[ 86.051955] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 86.051955]
[ 86.051955] The buggy address belongs to the object at ffff888005f298c0
[ 86.051955] which belongs to the cache kmalloc-8 of size 8
[ 86.051955] The buggy address is located 0 bytes inside of
[ 86.051955] 8-byte region [ffff888005f298c0, ffff888005f298c8)
[ 86.051955]
[ 86.051955] The buggy address belongs to the physical page:
[ 86.051955] flags: 0x100000000000200(slab|node=0|zone=1)
[ 86.051955] raw: 0100000000000200 dead000000000100 dead000000000122 ffff888005c42280
[ 86.051955] raw: 0000000000000000 0000000080660066 00000001ffffffff 0000000000000000
[ 86.051955] page dumped because: kasan: bad access detected
[ 86.051955]
[ 86.051955] Memory state around the buggy address:
[ 86.051955] ffff888005f29780: 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00
[ 86.051955] ffff888005f29800: fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc
[ 86.051955] >ffff888005f29880: fc fc fc fb fc fc fc fc fa fc fc fc fc fa fc fc
[ 86.051955] ^
[ 86.051955] ffff888005f29900: fc fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc
[ 86.051955] ffff888005f29980: fc 00 fc fc fc fc 00 fc fc fc fc 00 fc fc fc fc
[ 86.051955] ==================================================================
To this end, just like the commit e04480920d1e ("Bluetooth: defer
cleanup of resources in hci_unregister_dev()") this patch defers the
destructive kfree(mdev->addrs) in mctp_unregister to the mctp_dev_put,
where the refcount of mdev is zero and the entire device is reclaimed.
This prevents the use-after-free because the sendmsg thread holds the
reference of mdev in the mctp_route object.
Fixes: 583be982d934 (mctp: Add device handling and netlink interface)
Signed-off-by: Lin Ma <linma@zju.edu.cn>
---
net/mctp/device.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mctp/device.c b/net/mctp/device.c
index f49be882e98e..99a3bda8852f 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -313,6 +313,7 @@ void mctp_dev_hold(struct mctp_dev *mdev)
void mctp_dev_put(struct mctp_dev *mdev)
{
if (mdev && refcount_dec_and_test(&mdev->refs)) {
+ kfree(mdev->addrs);
dev_put(mdev->dev);
kfree_rcu(mdev, rcu);
}
@@ -441,7 +442,6 @@ static void mctp_unregister(struct net_device *dev)
mctp_route_remove_dev(mdev);
mctp_neigh_remove_dev(mdev);
- kfree(mdev->addrs);
mctp_dev_put(mdev);
}
--
2.35.1
^ permalink raw reply related
* Re: [PATCH net-next v1] net: Use csum_replace_... and csum_sub() helpers instead of opencoding
From: kernel test robot @ 2022-04-22 11:48 UTC (permalink / raw)
To: Christophe Leroy, David S. Miller, Jakub Kicinski,
Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal
Cc: kbuild-all, netdev, Christophe Leroy, linux-kernel,
netfilter-devel, coreteam
In-Reply-To: <fe60030b6f674d9bf41f56426a4b0a8a9db0d20f.1645112415.git.christophe.leroy@csgroup.eu>
Hi Christophe,
I love your patch! Perhaps something to improve:
[auto build test WARNING on net-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Christophe-Leroy/net-Use-csum_replace_-and-csum_sub-helpers-instead-of-opencoding/20220217-234555
base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git c8b441d2fbd0e005541c7363fd5346971b6febcb
config: x86_64-rhel-8.3-kselftests (https://download.01.org/0day-ci/archive/20220422/202204221937.SbSpkzXW-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.4-dirty
# https://github.com/intel-lab-lkp/linux/commit/cec9ed7cf59fe6dafcec0a30811024d22fad8cbd
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Christophe-Leroy/net-Use-csum_replace_-and-csum_sub-helpers-instead-of-opencoding/20220217-234555
git checkout cec9ed7cf59fe6dafcec0a30811024d22fad8cbd
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
sparse warnings: (new ones prefixed by >>)
net/netfilter/nft_payload.c: note: in included file (through include/net/sctp/sctp.h, include/net/sctp/checksum.h):
include/net/sctp/structs.h:335:41: sparse: sparse: array of flexible structures
>> net/netfilter/nft_payload.c:560:28: sparse: sparse: incorrect type in argument 2 (different base types) @@ expected restricted __be32 [usertype] from @@ got restricted __wsum [usertype] fsum @@
net/netfilter/nft_payload.c:560:28: sparse: expected restricted __be32 [usertype] from
net/netfilter/nft_payload.c:560:28: sparse: got restricted __wsum [usertype] fsum
>> net/netfilter/nft_payload.c:560:34: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected restricted __be32 [usertype] to @@ got restricted __wsum [usertype] tsum @@
net/netfilter/nft_payload.c:560:34: sparse: expected restricted __be32 [usertype] to
net/netfilter/nft_payload.c:560:34: sparse: got restricted __wsum [usertype] tsum
>> net/netfilter/nft_payload.c:560:28: sparse: sparse: incorrect type in argument 2 (different base types) @@ expected restricted __be32 [usertype] from @@ got restricted __wsum [usertype] fsum @@
net/netfilter/nft_payload.c:560:28: sparse: expected restricted __be32 [usertype] from
net/netfilter/nft_payload.c:560:28: sparse: got restricted __wsum [usertype] fsum
>> net/netfilter/nft_payload.c:560:34: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected restricted __be32 [usertype] to @@ got restricted __wsum [usertype] tsum @@
net/netfilter/nft_payload.c:560:34: sparse: expected restricted __be32 [usertype] to
net/netfilter/nft_payload.c:560:34: sparse: got restricted __wsum [usertype] tsum
vim +560 net/netfilter/nft_payload.c
557
558 static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum)
559 {
> 560 csum_replace4(sum, fsum, tsum);
561 if (*sum == 0)
562 *sum = CSUM_MANGLED_0;
563 }
564
--
0-DAY CI Kernel Test Service
https://01.org/lkp
^ permalink raw reply
* Re: [PATCH] USB2NET : SR9800 : change SR9800_BULKIN_SIZE from global to static
From: patchwork-bot+netdevbpf @ 2022-04-22 11:50 UTC (permalink / raw)
To: Tom Rix; +Cc: davem, kuba, pabeni, linux-usb, netdev, linux-kernel
In-Reply-To: <20220419140625.2886328-1-trix@redhat.com>
Hello:
This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Tue, 19 Apr 2022 10:06:25 -0400 you wrote:
> Smatch reports this issue
> sr9800.h:166:53: warning: symbol 'SR9800_BULKIN_SIZE' was not declared. Should it be static?
>
> Global variables should not be defined in header files.
> This only works because sr9800.h in only included by sr9800.c
> Change the storage-class specifier to static.
> And since it does not change add type qualifier const.
>
> [...]
Here is the summary with links:
- USB2NET : SR9800 : change SR9800_BULKIN_SIZE from global to static
https://git.kernel.org/netdev/net-next/c/0844d36f771d
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH net-next 0/2] ipv6: Use ipv6_only_sock helper function.
From: patchwork-bot+netdevbpf @ 2022-04-22 12:00 UTC (permalink / raw)
To: Kuniyuki Iwashima; +Cc: davem, kuba, dsahern, kuni1840, netdev
In-Reply-To: <20220420015851.50237-1-kuniyu@amazon.co.jp>
Hello:
This series was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Wed, 20 Apr 2022 10:58:49 +0900 you wrote:
> The first patch removes __ipv6_only_sock(), and the second replaces
> ipv6only tests with ipv6_only_sock().
>
>
> Kuniyuki Iwashima (2):
> ipv6: Remove __ipv6_only_sock().
> ipv6: Use ipv6_only_sock() helper in condition.
>
> [...]
Here is the summary with links:
- [net-next,1/2] ipv6: Remove __ipv6_only_sock().
https://git.kernel.org/netdev/net-next/c/89e9c7280075
- [net-next,2/2] ipv6: Use ipv6_only_sock() helper in condition.
https://git.kernel.org/netdev/net-next/c/81ee0eb6c0fe
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH 0/2] net: macb: Make ZynqMP SGMII phy configuration optional
From: patchwork-bot+netdevbpf @ 2022-04-22 12:00 UTC (permalink / raw)
To: Radhey Shyam Pandey
Cc: davem, kuba, pabeni, robh+dt, krzk+dt, nicolas.ferre,
claudiu.beznea, netdev, devicetree, linux-kernel, michals,
harinik, git
In-Reply-To: <1650452590-32948-1-git-send-email-radhey.shyam.pandey@xilinx.com>
Hello:
This series was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Wed, 20 Apr 2022 16:33:08 +0530 you wrote:
> This patchset drop phy-names property from MACB node and also make
> SGMII Phy configuration optional. The motivation for this change
> is to support traditional usescase in which first stage bootloader
> does PS-GT configuration, and should still be supported in macb
> driver.
>
>
> [...]
Here is the summary with links:
- [1/2] dt-bindings: net: cdns,macb: Drop phy-names property for ZynqMP SGMII PHY
https://git.kernel.org/netdev/net-next/c/3ac8316e09b0
- [2/2] net: macb: In ZynqMP initialization make SGMII phy configuration optional
https://git.kernel.org/netdev/net-next/c/29e96fe9e0ec
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH] net: dsa: Add missing of_node_put() in dsa_port_link_register_of
From: patchwork-bot+netdevbpf @ 2022-04-22 12:10 UTC (permalink / raw)
To: Miaoqian Lin
Cc: andrew, vivien.didelot, f.fainelli, olteanv, davem, kuba, pabeni,
linux, netdev, linux-kernel
In-Reply-To: <20220420110413.17828-1-linmq006@gmail.com>
Hello:
This patch was applied to netdev/net.git (master)
by David S. Miller <davem@davemloft.net>:
On Wed, 20 Apr 2022 19:04:08 +0800 you wrote:
> The device_node pointer is returned by of_parse_phandle() with refcount
> incremented. We should use of_node_put() on it when done.
> of_node_put() will check for NULL value.
>
> Fixes: a20f997010c4 ("net: dsa: Don't instantiate phylink for CPU/DSA ports unless needed")
> Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
>
> [...]
Here is the summary with links:
- net: dsa: Add missing of_node_put() in dsa_port_link_register_of
https://git.kernel.org/netdev/net/c/fc06b2867f4c
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH] staging: qlge: Fix line wrapping
From: Greg Kroah-Hartman @ 2022-04-22 12:11 UTC (permalink / raw)
To: Lungash
Cc: Manish Chopra, GR-Linux-NIC-Dev, Coiby Xu, netdev, linux-staging,
linux-kernel, outreachy
In-Reply-To: <YmJseHLyoAJWOGpc@kali-h6>
On Fri, Apr 22, 2022 at 11:51:04AM +0300, Lungash wrote:
> This patch fixes line wrapping following kernel coding style.
>
> Task on TODO list
>
> * fix weird line wrapping (all over, ex. the ql_set_routing_reg() calls in
> qlge_set_multicast_list()).
>
> Signed-off-by: Lungash <denzlungash@gmail.com>
We need a "full" name here, whatever you sign legal documents with.
> ---
> drivers/staging/qlge/qlge_main.c | 235 ++++++++++++++-----------------
> 1 file changed, 107 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c
> index 113a3efd12e9..309db00e0b22 100644
> --- a/drivers/staging/qlge/qlge_main.c
> +++ b/drivers/staging/qlge/qlge_main.c
> @@ -499,77 +499,57 @@ static int qlge_set_routing_reg(struct qlge_adapter *qdev, u32 index, u32 mask,
>
> switch (mask) {
> case RT_IDX_CAM_HIT:
> - {
> - value = RT_IDX_DST_CAM_Q | /* dest */
> - RT_IDX_TYPE_NICQ | /* type */
> - (RT_IDX_CAM_HIT_SLOT << RT_IDX_IDX_SHIFT);/* index */
> - break;
> - }
> + value = RT_IDX_DST_CAM_Q | /* dest */
> + RT_IDX_TYPE_NICQ | /* type */
> + (RT_IDX_CAM_HIT_SLOT << RT_IDX_IDX_SHIFT);/* index */
> + break;
The original was fine, but yes, the {} can be removed, but that does not
have to do with the TODO item here. Please only do one type of fixup at
a time.
>
> -static int qlge_validate_flash(struct qlge_adapter *qdev, u32 size, const char *str)
> +static int qlge_validate_flash(struct qlge_adapter *qdev, u32 size,
> + const char *str)
You just made this look worse, why?
> -static int qlge_read_flash_word(struct qlge_adapter *qdev, int offset, __le32 *data)
> +static int qlge_read_flash_word(struct qlge_adapter *qdev, int offset,
> + __le32 *data)
Same here, why change the original?
> @@ -2952,8 +2936,8 @@ static int qlge_start_rx_ring(struct qlge_adapter *qdev, struct rx_ring *rx_ring
> (rx_ring->cq_id * RX_RING_SHADOW_SPACE);
> u64 shadow_reg_dma = qdev->rx_ring_shadow_reg_dma +
> (rx_ring->cq_id * RX_RING_SHADOW_SPACE);
> - void __iomem *doorbell_area =
> - qdev->doorbell_area + (DB_PAGE_SIZE * (128 + rx_ring->cq_id));
> + void __iomem *doorbell_area = qdev->doorbell_area +
> + (DB_PAGE_SIZE * (128 + rx_ring->cq_id));
This does not look better, why not put it all on one line?
thanks,
greg k-h
^ permalink raw reply
* Re: [PATCH v6 1/7] ethtool: Add 10base-T1L link mode entry
From: Oleksij Rempel @ 2022-04-22 12:27 UTC (permalink / raw)
To: alexandru.tachici
Cc: andrew, davem, devicetree, hkallweit1, kuba, linux-kernel, linux,
netdev, robh+dt
In-Reply-To: <20220412130706.36767-2-alexandru.tachici@analog.com>
Hi Alexandru,
on top of kernel v5.18-rcX you will need following changes:
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8406ac739def..7e18d1571f78 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -90,8 +90,9 @@ const int phy_10_100_features_array[4] = {
};
EXPORT_SYMBOL_GPL(phy_10_100_features_array);
-const int phy_basic_t1_features_array[2] = {
+const int phy_basic_t1_features_array[3] = {
ETHTOOL_LINK_MODE_TP_BIT,
+ ETHTOOL_LINK_MODE_10baseT1L_Full_BIT,
ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
};
EXPORT_SYMBOL_GPL(phy_basic_t1_features_array);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 06943889d747..2c5e45e2b1f3 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -168,8 +168,10 @@ static void phylink_caps_to_linkmodes(unsigned long *linkmodes,
if (caps & MAC_10HD)
__set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, linkmodes);
- if (caps & MAC_10FD)
+ if (caps & MAC_10FD) {
__set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, linkmodes);
+ __set_bit(ETHTOOL_LINK_MODE_10baseT1L_Full_BIT, linkmodes);
+ }
if (caps & MAC_100HD) {
__set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, linkmodes);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 36ca2b5c2253..b12af9e2f389 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -65,7 +65,7 @@ extern const int phy_basic_ports_array[3];
extern const int phy_fibre_port_array[1];
extern const int phy_all_ports_features_array[7];
extern const int phy_10_100_features_array[4];
-extern const int phy_basic_t1_features_array[2];
+extern const int phy_basic_t1_features_array[3];
extern const int phy_gbit_features_array[2];
extern const int phy_10gbit_features_array[1];
On Tue, Apr 12, 2022 at 04:07:00PM +0300, alexandru.tachici@analog.com wrote:
> From: Alexandru Tachici <alexandru.tachici@analog.com>
>
> Add entry for the 10base-T1L full duplex mode.
>
> Reviewed-by: Andrew Lunn <andrew@lunn.ch>
> Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
> Signed-off-by: Alexandru Tachici <alexandru.tachici@analog.com>
> ---
> drivers/net/phy/phy-core.c | 3 ++-
> include/uapi/linux/ethtool.h | 1 +
> net/ethtool/common.c | 3 +++
> 3 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
> index 2001f3329133..1f2531a1a876 100644
> --- a/drivers/net/phy/phy-core.c
> +++ b/drivers/net/phy/phy-core.c
> @@ -13,7 +13,7 @@
> */
> const char *phy_speed_to_str(int speed)
> {
> - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 92,
> + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 93,
> "Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
> "If a speed or mode has been added please update phy_speed_to_str "
> "and the PHY settings array.\n");
> @@ -176,6 +176,7 @@ static const struct phy_setting settings[] = {
> /* 10M */
> PHY_SETTING( 10, FULL, 10baseT_Full ),
> PHY_SETTING( 10, HALF, 10baseT_Half ),
> + PHY_SETTING( 10, FULL, 10baseT1L_Full ),
> };
> #undef PHY_SETTING
>
> diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
> index 7bc4b8def12c..e0f0ee9bc89e 100644
> --- a/include/uapi/linux/ethtool.h
> +++ b/include/uapi/linux/ethtool.h
> @@ -1691,6 +1691,7 @@ enum ethtool_link_mode_bit_indices {
> ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT = 89,
> ETHTOOL_LINK_MODE_100baseFX_Half_BIT = 90,
> ETHTOOL_LINK_MODE_100baseFX_Full_BIT = 91,
> + ETHTOOL_LINK_MODE_10baseT1L_Full_BIT = 92,
> /* must be last entry */
> __ETHTOOL_LINK_MODE_MASK_NBITS
> };
> diff --git a/net/ethtool/common.c b/net/ethtool/common.c
> index 0c5210015911..566adf85e658 100644
> --- a/net/ethtool/common.c
> +++ b/net/ethtool/common.c
> @@ -201,6 +201,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
> __DEFINE_LINK_MODE_NAME(400000, CR4, Full),
> __DEFINE_LINK_MODE_NAME(100, FX, Half),
> __DEFINE_LINK_MODE_NAME(100, FX, Full),
> + __DEFINE_LINK_MODE_NAME(10, T1L, Full),
> };
> static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
>
> @@ -236,6 +237,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
> #define __LINK_MODE_LANES_T1 1
> #define __LINK_MODE_LANES_X 1
> #define __LINK_MODE_LANES_FX 1
> +#define __LINK_MODE_LANES_T1L 1
>
> #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
> [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
> @@ -349,6 +351,7 @@ const struct link_mode_info link_mode_params[] = {
> __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full),
> __DEFINE_LINK_MODE_PARAMS(100, FX, Half),
> __DEFINE_LINK_MODE_PARAMS(100, FX, Full),
> + __DEFINE_LINK_MODE_PARAMS(10, T1L, Full),
> };
> static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
>
> --
> 2.25.1
>
>
--
Pengutronix e.K. | |
Steuerwalder Str. 21 | http://www.pengutronix.de/ |
31137 Hildesheim, Germany | Phone: +49-5121-206917-0 |
Amtsgericht Hildesheim, HRA 2686 | Fax: +49-5121-206917-5555 |
^ permalink raw reply related
* Re: [PATCH v3 net-next] dt-bindings: net: mediatek,net: convert to the json-schema
From: patchwork-bot+netdevbpf @ 2022-04-22 12:50 UTC (permalink / raw)
To: Lorenzo Bianconi
Cc: netdev, nbd, lorenzo.bianconi, devicetree, robh, davem, kuba,
pabeni, john
In-Reply-To: <6b417ab35163bd8a4bef4bd38cf46d777925bd26.1650463289.git.lorenzo@kernel.org>
Hello:
This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Wed, 20 Apr 2022 16:07:07 +0200 you wrote:
> This patch converts the existing mediatek-net.txt binding file
> in yaml format.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
> Changes since v2:
> - remove additionalItems for clock-names properties
> - move mediatek,sgmiisys definition out of the if block
>
> [...]
Here is the summary with links:
- [v3,net-next] dt-bindings: net: mediatek,net: convert to the json-schema
https://git.kernel.org/netdev/net-next/c/c78c5a660439
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH net-next 0/3] ipv4: First steps toward removing RTO_ONLINK
From: patchwork-bot+netdevbpf @ 2022-04-22 12:50 UTC (permalink / raw)
To: Guillaume Nault; +Cc: davem, kuba, pabeni, netdev, yoshfuji, dsahern, dccp
In-Reply-To: <cover.1650470610.git.gnault@redhat.com>
Hello:
This series was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:
On Thu, 21 Apr 2022 01:21:19 +0200 you wrote:
> RTO_ONLINK is a flag that allows to reduce the scope of route lookups.
> It's stored in a normally unused bit of the ->flowi4_tos field, in
> struct flowi4. However it has several problems:
>
> * This bit is also used by ECN. Although ECN bits are supposed to be
> cleared before doing a route lookup, it happened that some code
> paths didn't properly sanitise their ->flowi4_tos. So this mechanism
> is fragile and we had bugs in the past where ECN bits slipped in and
> could end up being erroneously interpreted as RTO_ONLINK.
>
> [...]
Here is the summary with links:
- [net-next,1/3] ipv4: Don't reset ->flowi4_scope in ip_rt_fix_tos().
https://git.kernel.org/netdev/net-next/c/16a28267774c
- [net-next,2/3] ipv4: Avoid using RTO_ONLINK with ip_route_connect().
https://git.kernel.org/netdev/net-next/c/67e1e2f4854b
- [net-next,3/3] ipv4: Initialise ->flowi4_scope properly in ICMP handlers.
https://git.kernel.org/netdev/net-next/c/b1ad41384866
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply
* Re: [PATCH] mediatek/mt7601u: add debugfs exit function
From: Jakub Kicinski @ 2022-04-22 12:49 UTC (permalink / raw)
To: Kalle Valo
Cc: Bernard Zhao, David S. Miller, Paolo Abeni, Matthias Brugger,
linux-wireless, netdev, linux-arm-kernel, linux-mediatek,
linux-kernel, bernard
In-Reply-To: <87k0bhmuh6.fsf@kernel.org>
On Fri, 22 Apr 2022 10:45:57 +0300 Kalle Valo wrote:
> > When mt7601u loaded, there are two cases:
> > First when mt7601u is loaded, in function mt7601u_probe, if
> > function mt7601u_probe run into error lable err_hw,
> > mt7601u_cleanup didn`t cleanup the debugfs node.
> > Second when the module disconnect, in function mt7601u_disconnect,
> > mt7601u_cleanup didn`t cleanup the debugfs node.
> > This patch add debugfs exit function and try to cleanup debugfs
> > node when mt7601u loaded fail or unloaded.
Is this actually needed? Looks like wireless has a wiphy debugfs dir
now, so the entire thing should get removed recursively when probe
fails. The driver is not doing anything special.
> > diff --git a/drivers/net/wireless/mediatek/mt7601u/debugfs.c b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
> > index 20669eacb66e..1ae3d75d3c9b 100644
> > --- a/drivers/net/wireless/mediatek/mt7601u/debugfs.c
> > +++ b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
> > @@ -124,17 +124,22 @@ DEFINE_SHOW_ATTRIBUTE(mt7601u_eeprom_param);
> >
> > void mt7601u_init_debugfs(struct mt7601u_dev *dev)
> > {
> > - struct dentry *dir;
> > -
> > - dir = debugfs_create_dir("mt7601u", dev->hw->wiphy->debugfsdir);
> > - if (!dir)
> > + dev->root_dir = debugfs_create_dir("mt7601u", dev->hw->wiphy->debugfsdir);
> > + if (!dev->root_dir)
> > return;
^ permalink raw reply
* Re: [PATCH net-next 2/4] net: stmmac: introduce PHY-less setup support
From: Andrew Lunn @ 2022-04-22 12:58 UTC (permalink / raw)
To: Ong Boon Leong
Cc: Alexandre Torgue, Jose Abreu, Heiner Kallweit, Russell King,
Paolo Abeni, David S . Miller, Jakub Kicinski, Maxime Coquelin,
Alexandre Torgue, Giuseppe Cavallaro, netdev, linux-stm32,
linux-arm-kernel, linux-kernel
In-Reply-To: <20220422073505.810084-3-boon.leong.ong@intel.com>
On Fri, Apr 22, 2022 at 03:35:03PM +0800, Ong Boon Leong wrote:
> Certain platform uses PHY-less configuration whereby the MAC controller
> is connected to network switch chip directly over SGMII or 1000BASE-X.
>
> This patch prepares the stmmac driver to support PHY-less configuration
> described above.
The normal way to do a PHY less setup is to use a fixed-PHY. It offers
the same API to the MAC as a real PHY but is fixed speed, dupex
etc. The MAC sees a PHY as usual, and you don't need anything special
in the MAC.
What you need to do is extend your DSD to list the fixed-link. See
https://www.kernel.org/doc/html/latest/firmware-guide/acpi/dsd/phy.html#mac-node-example-with-a-fixed-link-subnode
Andrew
^ permalink raw reply
* Re: 9p EBADF with cache enabled (Was: 9p fs-cache tests/benchmark (was: 9p fscache Duplicate cookie detected))
From: asmadeus @ 2022-04-22 13:13 UTC (permalink / raw)
To: Christian Schoenebeck
Cc: David Howells, David Kahurani, davem, ericvh, kuba, linux-kernel,
lucho, netdev, v9fs-developer, Greg Kurz
In-Reply-To: <1817268.LulUJvKFVv@silver>
Christian Schoenebeck wrote on Thu, Apr 21, 2022 at 01:36:14PM +0200:
> I hope this does not sound harsh, wouldn't it make sense to revert
> eb497943fa215897f2f60fd28aa6fe52da27ca6c for now until those issues are sorted
> out? My concern is that it might take a long time to address them, and these
> are not minor issues.
I'm not sure that's possible at all, the related old fscache code has
been ripped out since and just reverting won't work.
I'm also curious why that behavior changed though, I don't think the
old code had any special handling of partially written pages either...
Understanding that might give a key to a small quick fix.
It is quite a bad bug though and really wish I could give it the
attention it deserves, early next month has a few holidays here
hopefully I'll be able to look at it closer then :/
--
Dominique
^ permalink raw reply
* [PATCH net-next] nfp: VF rate limit support
From: Simon Horman @ 2022-04-22 13:19 UTC (permalink / raw)
To: David Miller, Jakub Kicinski; +Cc: netdev, oss-drivers, Bin Chen
From: Bin Chen <bin.chen@corigine.com>
This patch enhances the NFP driver to supports assignment of
both max_tx_rate and min_tx_rate to VFs
The following configurations are all supported:
# ip link set $DEV vf $VF_NUM max_tx_rate $RATE_VALUE
# ip link set $DEV vf $VF_NUM min_tx_rate $RATE_VALUE
# ip link set $DEV vf $VF_NUM max_tx_rate $RATE_VALUE \
min_tx_rate $RATE_VALUE
# ip link set $DEV vf $VF_NUM min_tx_rate $RATE_VALUE \
max_tx_rate $RATE_VALUE
The max RATE_VALUE is limited to 0xFFFF which is about
63Gbps (using 1024 for 1G).
Signed-off-by: Bin Chen <bin.chen@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: Baowen Zheng <baowen.zheng@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
---
.../ethernet/netronome/nfp/nfp_net_common.c | 1 +
.../ethernet/netronome/nfp/nfp_net_sriov.c | 50 ++++++++++++++++++-
.../ethernet/netronome/nfp/nfp_net_sriov.h | 9 ++++
3 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index b412670d89b2..4340b69cc919 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1903,6 +1903,7 @@ const struct net_device_ops nfp_nfd3_netdev_ops = {
.ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid,
.ndo_set_vf_mac = nfp_app_set_vf_mac,
.ndo_set_vf_vlan = nfp_app_set_vf_vlan,
+ .ndo_set_vf_rate = nfp_app_set_vf_rate,
.ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk,
.ndo_set_vf_trust = nfp_app_set_vf_trust,
.ndo_get_vf_config = nfp_app_get_vf_config,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
index 4627715a5e32..bca0a864cb44 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c
@@ -142,6 +142,40 @@ int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
return nfp_net_sriov_update(app, vf, update, "vlan");
}
+int nfp_app_set_vf_rate(struct net_device *netdev, int vf,
+ int min_tx_rate, int max_tx_rate)
+{
+ struct nfp_app *app = nfp_app_from_netdev(netdev);
+ u32 vf_offset, ratevalue;
+ int err;
+
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate");
+ if (err)
+ return err;
+
+ if (max_tx_rate > 0 || min_tx_rate > 0) {
+ if (max_tx_rate > 0 && max_tx_rate < min_tx_rate) {
+ nfp_warn(app->cpp, "min-tx-rate exceeds max_tx_rate.\n");
+ return -EINVAL;
+ }
+
+ if (max_tx_rate > NFP_NET_VF_RATE_MAX || min_tx_rate > NFP_NET_VF_RATE_MAX) {
+ nfp_warn(app->cpp, "tx-rate exceeds 0x%x.\n", NFP_NET_VF_RATE_MAX);
+ return -EINVAL;
+ }
+ }
+
+ vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ;
+ ratevalue = FIELD_PREP(NFP_NET_VF_CFG_MAX_RATE,
+ max_tx_rate ? max_tx_rate : NFP_NET_VF_RATE_MAX) |
+ FIELD_PREP(NFP_NET_VF_CFG_MIN_RATE, min_tx_rate);
+
+ writel(ratevalue, app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_RATE);
+
+ return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_RATE,
+ "rate");
+}
+
int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
{
struct nfp_app *app = nfp_app_from_netdev(netdev);
@@ -228,9 +262,8 @@ int nfp_app_get_vf_config(struct net_device *netdev, int vf,
struct ifla_vf_info *ivi)
{
struct nfp_app *app = nfp_app_from_netdev(netdev);
- unsigned int vf_offset;
+ u32 vf_offset, mac_hi, rate;
u32 vlan_tag;
- u32 mac_hi;
u16 mac_lo;
u8 flags;
int err;
@@ -261,5 +294,18 @@ int nfp_app_get_vf_config(struct net_device *netdev, int vf,
ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags);
ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags);
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate");
+ if (!err) {
+ rate = readl(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_RATE);
+
+ ivi->max_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MAX_RATE, rate);
+ ivi->min_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MIN_RATE, rate);
+
+ if (ivi->max_tx_rate == NFP_NET_VF_RATE_MAX)
+ ivi->max_tx_rate = 0;
+ if (ivi->min_tx_rate == NFP_NET_VF_RATE_MAX)
+ ivi->max_tx_rate = 0;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
index 7b72cc083476..2d445fa199dc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h
@@ -20,6 +20,7 @@
#define NFP_NET_VF_CFG_MB_CAP_LINK_STATE (0x1 << 3)
#define NFP_NET_VF_CFG_MB_CAP_TRUST (0x1 << 4)
#define NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO (0x1 << 5)
+#define NFP_NET_VF_CFG_MB_CAP_RATE (0x1 << 6)
#define NFP_NET_VF_CFG_MB_RET 0x2
#define NFP_NET_VF_CFG_MB_UPD 0x4
#define NFP_NET_VF_CFG_MB_UPD_MAC (0x1 << 0)
@@ -28,6 +29,7 @@
#define NFP_NET_VF_CFG_MB_UPD_LINK_STATE (0x1 << 3)
#define NFP_NET_VF_CFG_MB_UPD_TRUST (0x1 << 4)
#define NFP_NET_VF_CFG_MB_UPD_VLAN_PROTO (0x1 << 5)
+#define NFP_NET_VF_CFG_MB_UPD_RATE (0x1 << 6)
#define NFP_NET_VF_CFG_MB_VF_NUM 0x7
/* VF config entry
@@ -48,10 +50,17 @@
#define NFP_NET_VF_CFG_VLAN_PROT 0xffff0000
#define NFP_NET_VF_CFG_VLAN_QOS 0xe000
#define NFP_NET_VF_CFG_VLAN_VID 0x0fff
+#define NFP_NET_VF_CFG_RATE 0xc
+#define NFP_NET_VF_CFG_MIN_RATE 0x0000ffff
+#define NFP_NET_VF_CFG_MAX_RATE 0xffff0000
+
+#define NFP_NET_VF_RATE_MAX 0xffff
int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto);
+int nfp_app_set_vf_rate(struct net_device *netdev, int vf, int min_tx_rate,
+ int max_tx_rate);
int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool setting);
int nfp_app_set_vf_link_state(struct net_device *netdev, int vf,
--
2.30.2
^ permalink raw reply related
* Re: [PATCH net] net: sched: act_mirred: Reset ct info when mirror/redirect skb
From: Marcelo Ricardo Leitner @ 2022-04-22 13:41 UTC (permalink / raw)
To: Hangbin Liu
Cc: Eyal Birger, netdev, jhs, xiyou.wangcong, jiri, davem, kuba,
ahleihel, dcaratti, aconole, roid, Shmulik Ladkani
In-Reply-To: <YmE5N0aNisKVLAyt@Laptop-X1>
On Thu, Apr 21, 2022 at 07:00:07PM +0800, Hangbin Liu wrote:
> Hi Eyal,
> On Tue, Apr 19, 2022 at 09:14:38PM +0300, Eyal Birger wrote:
> > > > > On Mon, 9 Aug 2021 15:04:55 +0800 you wrote:
> > > > > > When mirror/redirect a skb to a different port, the ct info should be reset
> > > > > > for reclassification. Or the pkts will match unexpected rules. For example,
> > > > > > with following topology and commands:
> > > > > >
> > > > > > -----------
> > > > > > |
> > > > > > veth0 -+-------
> > > > > > |
> > > > > > veth1 -+-------
> > > > > > |
> > > > > >
> > > > > > [...]
> > > > >
> > > > > Here is the summary with links:
> > > > > - [net] net: sched: act_mirred: Reset ct info when mirror/redirect skb
> > > > > https://git.kernel.org/netdev/net/c/d09c548dbf3b
> > > >
> > > > Unfortunately this commit breaks DNAT when performed before going via mirred
> > > > egress->ingress.
> > > >
> > > > The reason is that connection tracking is lost and therefore a new state
> > > > is created on ingress.
> > > >
> > > > This breaks existing setups.
> > > >
> > > > See below a simplified script reproducing this issue.
>
> I think we come in to a paradox state. Some user don't want to have previous
> ct info after mirror, while others would like to keep. In my understanding,
> when we receive a pkt from a interface, the skb should be clean and no ct info
> at first. But I may wrong.
Makes sense to me. Moreover, there were a couple of fixes on this on
mirred around that time frame/area (like f799ada6bf23 ("net: sched:
act_mirred: drop dst for the direction from egress to ingress")). That's
because we are seeing that mirred xmit action when switching to
ingress direction should be as close skb_scrub_packet. OVS needs this
scrubbing as well, btw. This ct information could be easily stale if
there were other packet changes after it.
Point being, if we really need the knob for backwards compatibility
here, it may have to be a broader one.
>
> Jamal, Wang Cong, Jiri, do you have any comments?
>
> > >
> > > I guess I can understand why the reproducer triggers it, but I fail to
> > > see the actual use case you have behind it. Can you please elaborate
> > > on it?
> >
> > One use case we use mirred egress->ingress redirect for is when we want to
> > reroute a packet after applying some change to the packet which would affect
> > its routing. for example consider a bpf program running on tc ingress (after
> > mirred) setting the skb->mark based on some criteria.
> >
> > So you have something like:
> >
> > packet routed to dummy device based on some criteria ->
> > mirred redirect to ingress ->
> > classification by ebpf logic at tc ingress ->
> > packet routed again
> >
> > We have a setup where DNAT is performed before this flow in that case the
> > ebpf logic needs to see the packet after the NAT.
>
> Is it possible to check whether it's need to set the skb->mark before DNAT?
> So we can update it before egress and no need to re-route.
>
> Thanks
> Hangbin
>
^ permalink raw reply
* [PATCH net-next] 1588 support on bcm54210pe
From: Lasse Johnsen @ 2022-04-22 14:21 UTC (permalink / raw)
To: Andrew Lunn
Cc: netdev, richardcochran, Gordon Hollingworth, Ahmad Byagowi,
Heiner Kallweit, Russell King, bcm-kernel-feedback-list,
Florian Fainelli
In-Reply-To: <YmBc2E2eCPHMA7lR@lunn.ch>
Hi Andrew,
> On 20 Apr 2022, at 20:19, Andrew Lunn <andrew@lunn.ch> wrote:
>
> On Wed, Apr 20, 2022 at 03:03:26PM +0100, Lasse Johnsen wrote:
>> Hello,
>>
>>
>> The attached set of patches adds support for the IEEE1588 functionality on the BCM54210PE PHY using the Linux Kernel mii_timestamper interface. The BCM54210PE PHY can be found in the Raspberry PI Compute Module 4 and the work has been undertaken by Timebeat.app on behalf of Raspberry PI with help and support from the nice engineers at Broadcom.
>
> Hi Lasse
>
> There are a few process issues you should address.
>
> Please wrap your email at about 80 characters.
>
> Please take a read of
>
> https://www.kernel.org/doc/html/latest/process/submitting-patches.html
>
> and
>
> https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html#netdev-faq
>
> It is a bit of a learning curve getting patches accepted, and you have
> to follow the processes defined in these documents.
I have read the documents, I understand about 10% of them and I am considering jumping off a tall building :-)
I’ve changed the subject of the email. How did I do?
>
>> arch/arm/configs/bcm2711_defconfig | 1 +
>> arch/arm64/configs/bcm2711_defconfig | 1 +
>
> You will need to split these changes up. defconfg changes go via the
> Broadcom maintainers. PHY driver changes go via netdev. You can
> initially post them as a series, but in the end you might need to send
> them to different people/lists.
>
Ok. I was asked by Florian to put the Broadcom maintainers in Cc so I will do this to begin with.
>> +obj-$(CONFIG_BCM54120PE_PHY) += bcm54210pe_ptp.o
>
> How specific is this code to the bcm54210pe? Should it work for any
> bcm54xxx PHY? You might want to name this file broadcom_ptp.c if it
> will work with any PHY supported by broadcom.c.
I am confident that this code is relevant exclusively to the BCM54210PE. It will not even work with the BCM54210, BCM54210S and BCM54210SE PHYs.
>
>> +static bool bcm54210pe_fetch_timestamp(u8 txrx, u8 message_type, u16 seq_id, struct bcm54210pe_private *private, u64 *timestamp)
>> +{
>> + struct bcm54210pe_circular_buffer_item *item;
>> + struct list_head *this, *next;
>> +
>> + u8 index = (txrx * 4) + message_type;
>> +
>> + if(index >= CIRCULAR_BUFFER_COUNT)
>> + {
>> + return false;
>> + }
>
> Please run your code through ./scripts/checkpatch.pl. You will find
> the code has a number of code style issues which need cleaning up.
I am about to respond to Richard's mail with an amended set of patches which is much cleaner. checkpatch now complains only about a Signed-off line and asks if Maintainers needs updating because I’ve added a file (I guess it probably does).
>
>> +#if IS_ENABLED (CONFIG_BCM54120PE_PHY)
>> +{
>> + .phy_id = PHY_ID_BCM54213PE,
>> + .phy_id_mask = 0xffffffff,
>> + .name = "Broadcom BCM54210PE",
>> + /* PHY_GBIT_FEATURES */
>> + .config_init = bcm54xx_config_init,
>> + .ack_interrupt = bcm_phy_ack_intr,
>> + .config_intr = bcm_phy_config_intr,
>> + .probe = bcm54210pe_probe,
>> +#elif
>> +{
>> .phy_id = PHY_ID_BCM54213PE,
>> .phy_id_mask = 0xffffffff,
>> .name = "Broadcom BCM54213PE",
>> @@ -786,6 +804,7 @@ static struct phy_driver broadcom_drivers[] = {
>> .config_init = bcm54xx_config_init,
>> .ack_interrupt = bcm_phy_ack_intr,
>> .config_intr = bcm_phy_config_intr,
>> +#endif
>
> Don't replace the existing entry, extend it with your new
> functionality.
Is what you propose possible? Isn’t the issue here that the two PHYs (54213PE and 54210PE) present themselves with the same phy ID? If there is a way to seperate them then I will need your instruction on how to do it.
>
> Andrew
All the best,
Lasse
^ permalink raw reply
* Re: [PATCH net-next 1/3] ipv4: Don't reset ->flowi4_scope in ip_rt_fix_tos().
From: David Ahern @ 2022-04-22 14:40 UTC (permalink / raw)
To: Guillaume Nault
Cc: David Miller, Jakub Kicinski, Paolo Abeni, netdev,
Hideaki YOSHIFUJI, dccp
In-Reply-To: <20220422105345.GA15621@debian.home>
On 4/22/22 4:53 AM, Guillaume Nault wrote:
> On Thu, Apr 21, 2022 at 08:30:52PM -0600, David Ahern wrote:
>> On 4/20/22 5:21 PM, Guillaume Nault wrote:
>>> All callers already initialise ->flowi4_scope with RT_SCOPE_UNIVERSE,
>>> either by manual field assignment, memset(0) of the whole structure or
>>> implicit structure initialisation of on-stack variables
>>> (RT_SCOPE_UNIVERSE actually equals 0).
>>>
>>> Therefore, we don't need to always initialise ->flowi4_scope in
>>> ip_rt_fix_tos(). We only need to reduce the scope to RT_SCOPE_LINK when
>>> the special RTO_ONLINK flag is present in the tos.
>>>
>>> This will allow some code simplification, like removing
>>> ip_rt_fix_tos(). Also, the long term idea is to remove RTO_ONLINK
>>> entirely by properly initialising ->flowi4_scope, instead of
>>> overloading ->flowi4_tos with a special flag. Eventually, this will
>>> allow to convert ->flowi4_tos to dscp_t.
>>>
>>> Signed-off-by: Guillaume Nault <gnault@redhat.com>
>>> ---
>>> It's important for the correctness of this patch that all callers
>>> initialise ->flowi4_scope to 0 (in one way or another). Auditing all of
>>> them is long, although each case is pretty trivial.
>>>
>>> If it helps, I can send a patch series that converts implicit
>>> initialisation of ->flowi4_scope with an explicit assignment to
>>> RT_SCOPE_UNIVERSE. This would also have the advantage of making it
>>> clear to future readers that ->flowi4_scope _has_ to be initialised. I
>>> haven't sent such patch series to not overwhelm reviewers with trivial
>>> and not technically-required changes (there are 40+ places to modify,
>>> scattered over 30+ different files). But if anyone prefers explicit
>>> initialisation everywhere, then just let me know and I'll send such
>>> patches.
>>
>> There are a handful of places that open code the initialization of the
>> flow struct. I *think* I found all of them in 40867d74c374.
>
> By open code, do you mean "doesn't use flowi4_init_output() nor
> ip_tunnel_init_flow()"? If so, I think there are many more.
>
no, you made a comment about flow struct being initialized to 0 which
implicitly initializes scope. My comment is that there are only a few
places that do not use either `memset(flow, 0, sizeof())` or `struct
flowi4 fl4 = {}` to fully initialize the struct.
^ permalink raw reply
* Re: [syzbot] KASAN: use-after-free Read in tcp_retransmit_timer (5)
From: Tetsuo Handa @ 2022-04-22 14:40 UTC (permalink / raw)
To: Santosh Shilimkar, OFED mailing list
Cc: syzbot, andrii, andriin, ast, daniel, davem, dsahern, edumazet,
john.fastabend, kafai, kpsingh, kuba, kuznet, netdev,
songliubraving, syzkaller-bugs, tpa, yhs, yoshfuji, bpf
In-Reply-To: <c389e47f-8f82-fd62-8c1d-d9481d2f71ff@I-love.SAKURA.ne.jp>
Hello, RDS developers.
I was thinking that BPF program is relevant with the TCP/IPv6 socket triggering
use-after-free access. But disassembling syzkaller-generated BPF program concluded
that what "char program[2053]" is doing is not important
( https://lkml.kernel.org/r/d21e278f-a3ff-8603-f6ba-b51a8cddafa8@I-love.SAKURA.ne.jp ).
Then, I realized that TCP/IPv6 port 16385 (which the reproducer is accessing) is
used by kernel RDS server, which can explain
"It seems that a socket with sk->sk_net_refcnt=0 is created by unshare(CLONE_NEWNET)"
at https://lkml.kernel.org/r/fa445f0e-32b7-5e0d-9326-94bc5adba4c1@I-love.SAKURA.ne.jp
because the kernel RDS server starts during boot procedure.
------------------------------------------------------------
root@fuzz:~# unshare -n netstat -tanpe
Active Internet connections (servers and established)
Proto Recv-Q Send-Q Local Address Foreign Address State User Inode PID/Program name
tcp6 0 0 :::16385 :::* LISTEN 0 19627 -
------------------------------------------------------------
With the debug printk() patch shown below,
------------------------------------------------------------
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 0ec2f5906a27..20b3c42b4140 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -429,7 +429,8 @@ static void net_free(struct net *net)
{
if (refcount_dec_and_test(&net->passive)) {
kfree(rcu_access_pointer(net->gen));
- kmem_cache_free(net_cachep, net);
+ memset(net, POISON_FREE, sizeof(struct net));
+ //kmem_cache_free(net_cachep, net);
}
}
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 09cadd556d1e..5792fe3df8ac 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -146,10 +146,9 @@ int rds_tcp_accept_one(struct socket *sock)
my_addr = &saddr;
peer_addr = &daddr;
#endif
- rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n",
- sock->sk->sk_family,
- my_addr, ntohs(inet->inet_sport),
- peer_addr, ntohs(inet->inet_dport));
+ pr_info("accepted family %d tcp %pI6c:%u -> %pI6c:%u refcnt=%d sock_net=%px init_net=%px\n",
+ sock->sk->sk_family, my_addr, ntohs(inet->inet_sport), peer_addr,
+ ntohs(inet->inet_dport), sock->sk->sk_net_refcnt, sock_net(sock->sk), &init_net);
#if IS_ENABLED(CONFIG_IPV6)
/* sk_bound_dev_if is not set if the peer address is not link local
------------------------------------------------------------
I get
accepted family 10 tcp ::ffff:127.0.0.1:16385 -> ::ffff:127.0.0.1:33086 refcnt=0 sock_net=ffffffff860d89c0 init_net=ffffffff860d89c0
if I do
# echo > /dev/tcp/127.0.0.1/16385
from init_net namespace, and I get
accepted family 10 tcp ::ffff:127.0.0.1:16385 -> ::ffff:127.0.0.1:33088 refcnt=0 sock_net=ffff88810a208000 init_net=ffffffff860d89c0
if I do
# echo > /dev/tcp/127.0.0.1/16385
from non-init_net namespace. Note that sock->sk->sk_net_refcnt is 0 in both cases.
Like commit 2303f994b3e18709 ("mptcp: Associate MPTCP context with TCP socket") says
/* kernel sockets do not by default acquire net ref, but TCP timer
* needs it.
*/
, I came to feel that e.g. rds_tcp_accept_one() is accessing sock_net(sock->sk) on
accepted sockets with sock->sk->sk_net_refcnt=0 (because the listening socket was
created by kernel) is causing this problem. Why not rds kernel server does
sock->sk->sk_net_refcnt = 1;
get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
on accepted sockets like mptcp_subflow_create_socket() does?
For your testing, below is the latest reproducer.
You can try this reproducer with keep-memory-poisoned patch shown above.
------------------------------------------------------------
// https://syzkaller.appspot.com/bug?id=8f0e04b2beffcd42f044d46879cc224f6eb71a99
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#ifndef MSG_PROBE
#define MSG_PROBE 0x10
#endif
struct nlmsg {
char* pos;
int nesting;
struct nlattr* nested[8];
char buf[4096];
};
static void netlink_init(struct nlmsg* nlmsg, int typ, int flags,
const void* data, int size)
{
memset(nlmsg, 0, sizeof(*nlmsg));
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_type = typ;
hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
memcpy(hdr + 1, data, size);
nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
}
static void netlink_attr(struct nlmsg* nlmsg, int typ, const void* data,
int size)
{
struct nlattr* attr = (struct nlattr*)nlmsg->pos;
attr->nla_len = sizeof(*attr) + size;
attr->nla_type = typ;
if (size > 0)
memcpy(attr + 1, data, size);
nlmsg->pos += NLMSG_ALIGN(attr->nla_len);
}
static int netlink_send_ext(struct nlmsg* nlmsg, int sock, uint16_t reply_type,
int* reply_len, bool dofail)
{
if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting)
exit(1);
struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
hdr->nlmsg_len = nlmsg->pos - nlmsg->buf;
struct sockaddr_nl addr;
memset(&addr, 0, sizeof(addr));
addr.nl_family = AF_NETLINK;
ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0,
(struct sockaddr*)&addr, sizeof(addr));
if (n != (ssize_t)hdr->nlmsg_len) {
if (dofail)
exit(1);
return -1;
}
n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
if (reply_len)
*reply_len = 0;
if (n < 0) {
if (dofail)
exit(1);
return -1;
}
if (n < (ssize_t)sizeof(struct nlmsghdr)) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
if (hdr->nlmsg_type == NLMSG_DONE)
return 0;
if (reply_len && hdr->nlmsg_type == reply_type) {
*reply_len = n;
return 0;
}
if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
if (hdr->nlmsg_type != NLMSG_ERROR) {
errno = EINVAL;
if (dofail)
exit(1);
return -1;
}
errno = -((struct nlmsgerr*)(hdr + 1))->error;
return -errno;
}
static int netlink_send(struct nlmsg* nlmsg, int sock)
{
return netlink_send_ext(nlmsg, sock, 0, NULL, true);
}
static void netlink_device_change(int sock, const char* name, const void* mac, int macsize)
{
struct nlmsg nlmsg;
struct ifinfomsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ifi_flags = hdr.ifi_change = IFF_UP;
hdr.ifi_index = if_nametoindex(name);
netlink_init(&nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr));
netlink_attr(&nlmsg, IFLA_ADDRESS, mac, macsize);
netlink_send(&nlmsg, sock);
}
static void netlink_add_addr(int sock, const char* dev, const void* addr, int addrsize)
{
struct nlmsg nlmsg;
struct ifaddrmsg hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
hdr.ifa_scope = RT_SCOPE_UNIVERSE;
hdr.ifa_index = if_nametoindex(dev);
netlink_init(&nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr,
sizeof(hdr));
netlink_attr(&nlmsg, IFA_LOCAL, addr, addrsize);
netlink_attr(&nlmsg, IFA_ADDRESS, addr, addrsize);
netlink_send(&nlmsg, sock);
}
static void netlink_add_addr4(int sock, const char* dev, const char* addr)
{
struct in_addr in_addr;
inet_pton(AF_INET, addr, &in_addr);
netlink_add_addr(sock, dev, &in_addr, sizeof(in_addr));
}
static void netlink_add_addr6(int sock, const char* dev, const char* addr)
{
struct in6_addr in6_addr;
inet_pton(AF_INET6, addr, &in6_addr);
netlink_add_addr(sock, dev, &in6_addr, sizeof(in6_addr));
}
static void initialize_netdevices(void)
{
int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
uint64_t macaddr = 0x00aaaaaaaaaa;
if (fd == EOF)
exit(1);
netlink_add_addr4(fd, "lo", "127.0.0.1");
netlink_add_addr6(fd, "lo", "::1");
netlink_device_change(fd, "lo", &macaddr, ETH_ALEN);
close(fd);
}
#ifndef __NR_bpf
#define __NR_bpf 321
#endif
static void execute_one(void)
{
const union bpf_attr attr = {
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
.insn_cnt = 2,
.insns = (unsigned long long) "\xb7\x00\x00\x00\x00\x00\x00\x00\x95\x00\x00\x00\x00\x00\x00\x00",
.license = (unsigned long long) "GPL",
};
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(0x4001), /* where kernel RDS TCPv6 socket is listening */
.sin_addr.s_addr = inet_addr("127.0.0.1")
};
const struct msghdr msg = {
.msg_name = &addr,
.msg_namelen = sizeof(addr),
};
const int bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, 72);
const int sock_fd = socket(PF_INET, SOCK_STREAM, 0);
alarm(3);
while (1) {
sendmsg(sock_fd, &msg, MSG_OOB | MSG_PROBE | MSG_CONFIRM | MSG_FASTOPEN);
setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_BPF, &bpf_fd, sizeof(bpf_fd));
}
}
int main(int argc, char *argv[])
{
if (unshare(CLONE_NEWNET))
return 1;
initialize_netdevices();
execute_one();
return 0;
}
------------------------------------------------------------
^ permalink raw reply related
* Re: [net-next v4 0/3] use standard sysctl macro
From: Luis Chamberlain @ 2022-04-22 14:44 UTC (permalink / raw)
To: xiangxia.m.yue
Cc: netdev, linux-fsdevel, Kees Cook, Iurii Zaikin, David S. Miller,
Jakub Kicinski, Paolo Abeni, Hideaki YOSHIFUJI, David Ahern,
Simon Horman, Julian Anastasov, Pablo Neira Ayuso,
Jozsef Kadlecsik, Florian Westphal, Shuah Khan, Andrew Morton,
Alexei Starovoitov, Eric Dumazet, Lorenz Bauer, Akhmat Karakotov
In-Reply-To: <20220422070141.39397-1-xiangxia.m.yue@gmail.com>
On Fri, Apr 22, 2022 at 03:01:38PM +0800, xiangxia.m.yue@gmail.com wrote:
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patchset introduce sysctl macro or replace var
> with macro.
>
> Tonghao Zhang (3):
> net: sysctl: use shared sysctl macro
> net: sysctl: introduce sysctl SYSCTL_THREE
> selftests/sysctl: add sysctl macro test
I see these are based on net-next, to avoid conflicts with
sysctl development this may be best based on sysctl-next
though. Jakub?
Luis
^ permalink raw reply
* Re: [PATCH net] tls: Skip tls_append_frag on zero copy size
From: Jakub Kicinski @ 2022-04-22 14:55 UTC (permalink / raw)
To: Maxim Mikityanskiy
Cc: Boris Pismenny, John Fastabend, Daniel Borkmann, David S. Miller,
Paolo Abeni, Tariq Toukan, Aviad Yehezkel, Ilya Lesokhin, netdev
In-Reply-To: <da984a08-1730-1b0c-d845-cf7ec732ba4c@nvidia.com>
On Thu, 21 Apr 2022 12:47:18 +0300 Maxim Mikityanskiy wrote:
> On 2022-04-18 17:56, Maxim Mikityanskiy wrote:
> > On 2022-04-14 13:28, Jakub Kicinski wrote:
> >> I appreciate you're likely trying to keep the fix minimal but Greg
> >> always says "fix it right, worry about backports later".
> >>
> >> I think we should skip more, we can reorder the mins and if
> >> min(size, rec space) == 0 then we can skip the allocation as well.
> >
> > Sorry, I didn't get the idea. Could you elaborate?
> >
> > Reordering the mins:
> >
> > copy = min_t(size_t, size, max_open_record_len - record->len);
> > copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
> >
> > I assume by skipping the allocation you mean skipping
> > tls_do_allocation(), right? Do you suggest to skip it if the result of
> > the first min_t() is 0?
> >
> > record->len used in the first min_t() comes from ctx->open_record, which
> > either exists or is allocated by tls_do_allocation(). If we move the
> > copy == 0 check above the tls_do_allocation() call, first we'll have to
> > check whether ctx->open_record is NULL, which is currently checked by
> > tls_do_allocation() itself.
> >
> > If open_record is not NULL, there isn't much to skip in
> > tls_do_allocation on copy == 0, the main part is already skipped,
> > regardless of the value of copy. If open_record is NULL, we can't skip
> > tls_do_allocation, and copy won't be 0 afterwards.
> >
> > To compare, before (pseudocode):
> >
> > tls_do_allocation {
> > if (!ctx->open_record)
> > ALLOCATE RECORD
> > Now ctx->open_record is not NULL
> > if (!sk_page_frag_refill(sk, pfrag))
> > return -ENOMEM
> > }
> > handle errors from tls_do_allocation
> > copy = min(size, pfrag->size - pfrag->offset)
> > copy = min(copy, max_open_record_len - ctx->open_record->len)
> > if (copy)
> > copy data and append frag
> >
> > After:
> >
> > if (ctx->open_record) {
> > copy = min(size, max_open_record_len - ctx->open_record->len)
> > if (copy) {
> > // You want to put this part of tls_do_allocation under if (copy)?
> > if (!sk_page_frag_refill(sk, pfrag))
> > handle errors
> > copy = min(copy, pfrag->size - pfrag->offset)
> > if (copy)
> > copy data and append frag
> > }
> > } else {
> > ALLOCATE RECORD
> > if (!sk_page_frag_refill(sk, pfrag))
> > handle errors
> > // Have to do this after the allocation anyway.
> > copy = min(size, max_open_record_len - ctx->open_record->len)
> > copy = min(copy, pfrag->size - pfrag->offset)
> > if (copy)
> > copy data and append frag
> > }
> >
> > Either I totally don't get what you suggested, or it doesn't make sense
> > to me, because we have +1 branch in the common path when a record is
> > open and copy is not 0, no changes when there is no record, and more
> > repeating code hard to compress.
> >
> > If I missed your idea, please explain in more details.
>
> Jakub, is your comment still relevant after my response? If not, can the
> patch be merged?
I'd prefer if you refactored the code so tls_push_data() looks more
natural. But the patch is correct so if you don't want to you can
repost.
Sorry for the delay.
^ permalink raw reply
* [PATCH linx-net 00/28]: Move Siena into a separate subdirectory
From: Martin Habets @ 2022-04-22 14:57 UTC (permalink / raw)
To: kuba, pabeni, davem; +Cc: netdev, ecree.xilinx
The Siena NICs (SFN5000 and SFN6000 series) went EOL in November 2021.
Most of these adapters have been remove from our test labs, and testing
has been reduced to a minimum.
This patch series creates a separate kernel module for the Siena architecture,
analogous to what was done for Falcon some years ago.
This reduces our maintenance for the sfc.ko module, and allows us to
enhance the EF10 and EF100 drivers without the risk of breaking Siena NICs.
After this series Siena code can be removed from sfc.ko. That will be posted
as a separate (small) series.
The Siena module is not built by default, but can be enabled
using Kconfig option SFC_SIENA. This will create module sfc-siena.ko.
Patches
Patch 1 disables the Siena code in the sfc.ko module.
Patches 2-4 establish the code base for the Siena driver.
Patches 5-20 ensure the allyesconfig build succeeds.
Patches 21-28 make changes specfic to the Siena module.
I do not expect patch 2 and 3 to be reviewed, they are FYI only.
No checkpatch issues were resolved as part of these 2, but they
were fixed in the subsequent patches.
Testing
Various build tests were done such as allyesconfig, W=1 and sparse.
The new sfc-siena.ko and sfc.ko modules were tested on a machine with both
these NICs in them, and several tests were run on both drivers.
Inserting the updated sfc.ko and the new sfc-siena.ko modules at the same
time works, so no external functions exist with the same name.
Martin Habets <habetsm.xilinx@gmail.com>
---
Martin Habets (28):
sfc: Disable Siena support
sfc: Move Siena specific files
sfc: Copy shared files needed for Siena
sfc: Remove build references to missing functionality
sfc/siena: Rename functions in efx_common.h to avoid conflicts with sfc
sfc/siena: Rename functions in efx.h to avoid conflicts with sfc
sfc/siena: Rename functions in efx_channels.h to avoid conflicts with sfc
sfc/siena: Update nic.h to avoid conflicts with sfc
sfc/siena: Remove unused functions in tx.h to avoid conflicts with sfc
sfc/siena: Rename functions in rx_common.h to avoid conflicts with sfc
sfc/siena: Rename functions in tx_common.h to avoid conflicts with sfc
sfc/siena: Rename functions in selftest.h to avoid conflicts with sfc
sfc/siena: Rename functions in ethtool_common.h to avoid conflicts with sfc
sfc/siena: Rename functions in ptp.h to avoid conflicts with sfc
sfc/siena: Rename functions in mcdi.h to avoid conflicts with sfc
sfc/siena: Rename functions in mcdi_port.h to avoid conflicts with sfc
sfc/siena: Rename functions in mcdi_port_common.h to avoid conflicts with sfc
sfc/siena: Rename loopback_mode in net_driver.h to avoid a conflict with sfc
sfc/siena: Rename functions in nic_common.h to avoid conflicts with sfc
sfc/siena: Inline functions in sriov.h to avoid conflicts with sfc
sfc: Add a basic Siena module
siena: Make the (un)load message more specific
siena: Make MTD support specific for Siena
siena: Make SRIOV support specific for Siena
siena: Make HWMON support specific for Siena
sfc/siena: Make MCDI logging support specific for Siena
sfc/siena: Make PTP and reset support specific for Siena
sfc/siena: Reinstate SRIOV init/fini function calls
drivers/net/ethernet/sfc/Kconfig | 15
drivers/net/ethernet/sfc/Makefile | 5
drivers/net/ethernet/sfc/efx.c | 17
drivers/net/ethernet/sfc/farch.c | 2988 ---
drivers/net/ethernet/sfc/nic.h | 4
drivers/net/ethernet/sfc/siena.c | 1109 -
drivers/net/ethernet/sfc/siena/Kconfig | 45
drivers/net/ethernet/sfc/siena/Makefile | 11
drivers/net/ethernet/sfc/siena/bitfield.h | 614 +
drivers/net/ethernet/sfc/siena/efx.c | 1325 +
drivers/net/ethernet/sfc/siena/efx.h | 218
drivers/net/ethernet/sfc/siena/efx_channels.c | 1360 +
drivers/net/ethernet/sfc/siena/efx_channels.h | 45
drivers/net/ethernet/sfc/siena/efx_common.c | 1408 +
drivers/net/ethernet/sfc/siena/efx_common.h | 118
drivers/net/ethernet/sfc/siena/enum.h | 176
drivers/net/ethernet/sfc/siena/ethtool.c | 282
drivers/net/ethernet/sfc/siena/ethtool_common.c | 1340 +
drivers/net/ethernet/sfc/siena/ethtool_common.h | 60
drivers/net/ethernet/sfc/siena/farch.c | 2990 +++
drivers/net/ethernet/sfc/siena/farch_regs.h | 2929 +++
drivers/net/ethernet/sfc/siena/filter.h | 309
drivers/net/ethernet/sfc/siena/io.h | 310
drivers/net/ethernet/sfc/siena/mcdi.c | 2260 ++
drivers/net/ethernet/sfc/siena/mcdi.h | 386
drivers/net/ethernet/sfc/siena/mcdi_mon.c | 531 +
drivers/net/ethernet/sfc/siena/mcdi_pcol.h |21968 +++++++++++++++++++++
drivers/net/ethernet/sfc/siena/mcdi_port.c | 110
drivers/net/ethernet/sfc/siena/mcdi_port.h | 17
drivers/net/ethernet/sfc/siena/mcdi_port_common.c | 1282 +
drivers/net/ethernet/sfc/siena/mcdi_port_common.h | 58
drivers/net/ethernet/sfc/siena/mtd.c | 124
drivers/net/ethernet/sfc/siena/net_driver.h | 1715 ++
drivers/net/ethernet/sfc/siena/nic.c | 530 +
drivers/net/ethernet/sfc/siena/nic.h | 206
drivers/net/ethernet/sfc/siena/nic_common.h | 251
drivers/net/ethernet/sfc/siena/ptp.c | 2201 ++
drivers/net/ethernet/sfc/siena/ptp.h | 45
drivers/net/ethernet/sfc/siena/rx.c | 400
drivers/net/ethernet/sfc/siena/rx_common.c | 1091 +
drivers/net/ethernet/sfc/siena/rx_common.h | 110
drivers/net/ethernet/sfc/siena/selftest.c | 807 +
drivers/net/ethernet/sfc/siena/selftest.h | 52
drivers/net/ethernet/sfc/siena/siena.c | 1113 +
drivers/net/ethernet/sfc/siena/siena_sriov.c | 1687 ++
drivers/net/ethernet/sfc/siena/siena_sriov.h | 79
drivers/net/ethernet/sfc/siena/sriov.h | 83
drivers/net/ethernet/sfc/siena/tx.c | 395
drivers/net/ethernet/sfc/siena/tx.h | 40
drivers/net/ethernet/sfc/siena/tx_common.c | 448
drivers/net/ethernet/sfc/siena/tx_common.h | 39
drivers/net/ethernet/sfc/siena/vfdi.h | 252
drivers/net/ethernet/sfc/siena/workarounds.h | 28
drivers/net/ethernet/sfc/siena_sriov.c | 1686 --
drivers/net/ethernet/sfc/siena_sriov.h | 76
55 files changed, 51859 insertions(+), 5889 deletions(-)
delete mode 100644 drivers/net/ethernet/sfc/farch.c
delete mode 100644 drivers/net/ethernet/sfc/siena.c
create mode 100644 drivers/net/ethernet/sfc/siena/Kconfig
create mode 100644 drivers/net/ethernet/sfc/siena/Makefile
create mode 100644 drivers/net/ethernet/sfc/siena/bitfield.h
create mode 100644 drivers/net/ethernet/sfc/siena/efx.c
create mode 100644 drivers/net/ethernet/sfc/siena/efx.h
create mode 100644 drivers/net/ethernet/sfc/siena/efx_channels.c
create mode 100644 drivers/net/ethernet/sfc/siena/efx_channels.h
create mode 100644 drivers/net/ethernet/sfc/siena/efx_common.c
create mode 100644 drivers/net/ethernet/sfc/siena/efx_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/enum.h
create mode 100644 drivers/net/ethernet/sfc/siena/ethtool.c
create mode 100644 drivers/net/ethernet/sfc/siena/ethtool_common.c
create mode 100644 drivers/net/ethernet/sfc/siena/ethtool_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/farch.c
create mode 100644 drivers/net/ethernet/sfc/siena/farch_regs.h
create mode 100644 drivers/net/ethernet/sfc/siena/filter.h
create mode 100644 drivers/net/ethernet/sfc/siena/io.h
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi.c
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi.h
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_mon.c
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_pcol.h
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_port.c
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_port.h
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_port_common.c
create mode 100644 drivers/net/ethernet/sfc/siena/mcdi_port_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/mtd.c
create mode 100644 drivers/net/ethernet/sfc/siena/net_driver.h
create mode 100644 drivers/net/ethernet/sfc/siena/nic.c
create mode 100644 drivers/net/ethernet/sfc/siena/nic.h
create mode 100644 drivers/net/ethernet/sfc/siena/nic_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/ptp.c
create mode 100644 drivers/net/ethernet/sfc/siena/ptp.h
create mode 100644 drivers/net/ethernet/sfc/siena/rx.c
create mode 100644 drivers/net/ethernet/sfc/siena/rx_common.c
create mode 100644 drivers/net/ethernet/sfc/siena/rx_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/selftest.c
create mode 100644 drivers/net/ethernet/sfc/siena/selftest.h
create mode 100644 drivers/net/ethernet/sfc/siena/siena.c
create mode 100644 drivers/net/ethernet/sfc/siena/siena_sriov.c
create mode 100644 drivers/net/ethernet/sfc/siena/siena_sriov.h
create mode 100644 drivers/net/ethernet/sfc/siena/sriov.h
create mode 100644 drivers/net/ethernet/sfc/siena/tx.c
create mode 100644 drivers/net/ethernet/sfc/siena/tx.h
create mode 100644 drivers/net/ethernet/sfc/siena/tx_common.c
create mode 100644 drivers/net/ethernet/sfc/siena/tx_common.h
create mode 100644 drivers/net/ethernet/sfc/siena/vfdi.h
create mode 100644 drivers/net/ethernet/sfc/siena/workarounds.h
delete mode 100644 drivers/net/ethernet/sfc/siena_sriov.c
delete mode 100644 drivers/net/ethernet/sfc/siena_sriov.h
--
Martin Habets <habetsm.xilinx@gmail.com>
^ permalink raw reply
* [PATCH net-next 01/28] sfc: Disable Siena support
From: Martin Habets @ 2022-04-22 14:57 UTC (permalink / raw)
To: kuba, pabeni, davem; +Cc: netdev, ecree.xilinx
In-Reply-To: <165063937837.27138.6911229584057659609.stgit@palantir17.mph.net>
From: Martin Habets <martinh@xilinx.com>
Disable the build of Siena code until later in this patch series.
Prevent sfc.ko from binding to Siena NICs.
efx_init_sriov/efx_fini_sriov is only used for Siena. Remove calls
to those.
Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com>
---
drivers/net/ethernet/sfc/Kconfig | 8 ++++----
drivers/net/ethernet/sfc/Makefile | 4 ++--
drivers/net/ethernet/sfc/efx.c | 17 -----------------
drivers/net/ethernet/sfc/nic.h | 4 ----
4 files changed, 6 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 97ce64079855..846fff16fa48 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -17,14 +17,14 @@ config NET_VENDOR_SOLARFLARE
if NET_VENDOR_SOLARFLARE
config SFC
- tristate "Solarflare SFC9000/SFC9100/EF100-family support"
+ tristate "Solarflare SFC9100/EF100-family support"
depends on PCI
depends on PTP_1588_CLOCK_OPTIONAL
select MDIO
select CRC32
help
This driver supports 10/40-gigabit Ethernet cards based on
- the Solarflare SFC9000-family and SFC9100-family controllers.
+ the Solarflare SFC9100-family controllers.
It also supports 10/25/40/100-gigabit Ethernet cards based
on the Solarflare EF100 networking IP in Xilinx FPGAs.
@@ -47,11 +47,11 @@ config SFC_MCDI_MON
This exposes the on-board firmware-managed sensors as a
hardware monitor device.
config SFC_SRIOV
- bool "Solarflare SFC9000-family SR-IOV support"
+ bool "Solarflare SFC9000/SFC9100-family SR-IOV support"
depends on SFC && PCI_IOV
default y
help
- This enables support for the SFC9000 I/O Virtualization
+ This enables support for the Single Root I/O Virtualization
features, allowing accelerated network performance in
virtualized environments.
config SFC_MCDI_LOGGING
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index 8bd01c429f91..838ee3cdc229 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
sfc-y += efx.o efx_common.o efx_channels.o nic.o \
- farch.o siena.o ef10.o \
+ ef10.o \
tx.o tx_common.o tx_tso.o rx.o rx_common.o \
selftest.o ethtool.o ethtool_common.o ptp.o \
mcdi.o mcdi_port.o mcdi_port_common.o \
@@ -8,7 +8,7 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \
ef100.o ef100_nic.o ef100_netdev.o \
ef100_ethtool.o ef100_rx.o ef100_tx.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
-sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o
+sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o
obj-$(CONFIG_SFC) += sfc.o
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 302dc835ac3d..5e7fe75cb1d4 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -795,10 +795,6 @@ static void efx_unregister_netdev(struct efx_nic *efx)
/* PCI device ID table */
static const struct pci_device_id efx_pci_table[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */
- .driver_data = (unsigned long) &siena_a0_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */
- .driver_data = (unsigned long) &siena_a0_nic_type},
{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */
.driver_data = (unsigned long) &efx_hunt_a0_nic_type},
{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */
@@ -1294,12 +1290,6 @@ static int __init efx_init_module(void)
if (rc)
goto err_notifier;
-#ifdef CONFIG_SFC_SRIOV
- rc = efx_init_sriov();
- if (rc)
- goto err_sriov;
-#endif
-
rc = efx_create_reset_workqueue();
if (rc)
goto err_reset;
@@ -1319,10 +1309,6 @@ static int __init efx_init_module(void)
err_pci:
efx_destroy_reset_workqueue();
err_reset:
-#ifdef CONFIG_SFC_SRIOV
- efx_fini_sriov();
- err_sriov:
-#endif
unregister_netdevice_notifier(&efx_netdev_notifier);
err_notifier:
return rc;
@@ -1335,9 +1321,6 @@ static void __exit efx_exit_module(void)
pci_unregister_driver(&ef100_pci_driver);
pci_unregister_driver(&efx_pci_driver);
efx_destroy_reset_workqueue();
-#ifdef CONFIG_SFC_SRIOV
- efx_fini_sriov();
-#endif
unregister_netdevice_notifier(&efx_netdev_notifier);
}
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 5c2fe3ce3f4d..251868235ae4 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -301,10 +301,6 @@ struct efx_ef10_nic_data {
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
bool *data_mapped);
-int efx_init_sriov(void);
-void efx_fini_sriov(void);
-
-extern const struct efx_nic_type siena_a0_nic_type;
extern const struct efx_nic_type efx_hunt_a0_nic_type;
extern const struct efx_nic_type efx_hunt_a0_vf_nic_type;
^ permalink raw reply related
* [PATCH net-next 02/28] sfc: Move Siena specific files
From: Martin Habets @ 2022-04-22 14:57 UTC (permalink / raw)
To: kuba, pabeni, davem; +Cc: netdev, ecree.xilinx
In-Reply-To: <165063937837.27138.6911229584057659609.stgit@palantir17.mph.net>
From: Martin Habets <martinh@xilinx.com>
Files are only moved, no changes are made.
Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com>
---
drivers/net/ethernet/sfc/farch.c | 2988 --------------------------
drivers/net/ethernet/sfc/siena.c | 1109 ----------
drivers/net/ethernet/sfc/siena/farch.c | 2988 ++++++++++++++++++++++++++
drivers/net/ethernet/sfc/siena/siena.c | 1109 ++++++++++
drivers/net/ethernet/sfc/siena/siena_sriov.c | 1686 +++++++++++++++
drivers/net/ethernet/sfc/siena/siena_sriov.h | 76 +
drivers/net/ethernet/sfc/siena_sriov.c | 1686 ---------------
drivers/net/ethernet/sfc/siena_sriov.h | 76 -
8 files changed, 5859 insertions(+), 5859 deletions(-)
delete mode 100644 drivers/net/ethernet/sfc/farch.c
delete mode 100644 drivers/net/ethernet/sfc/siena.c
create mode 100644 drivers/net/ethernet/sfc/siena/farch.c
create mode 100644 drivers/net/ethernet/sfc/siena/siena.c
create mode 100644 drivers/net/ethernet/sfc/siena/siena_sriov.c
create mode 100644 drivers/net/ethernet/sfc/siena/siena_sriov.h
delete mode 100644 drivers/net/ethernet/sfc/siena_sriov.c
delete mode 100644 drivers/net/ethernet/sfc/siena_sriov.h
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
deleted file mode 100644
index 9599123bc28d..000000000000
--- a/drivers/net/ethernet/sfc/farch.c
+++ /dev/null
@@ -1,2988 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2005-2006 Fen Systems Ltd.
- * Copyright 2006-2013 Solarflare Communications Inc.
- */
-
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/crc32.h>
-#include "net_driver.h"
-#include "bitfield.h"
-#include "efx.h"
-#include "rx_common.h"
-#include "tx_common.h"
-#include "nic.h"
-#include "farch_regs.h"
-#include "sriov.h"
-#include "siena_sriov.h"
-#include "io.h"
-#include "workarounds.h"
-
-/* Falcon-architecture (SFC9000-family) support */
-
-/**************************************************************************
- *
- * Configurable values
- *
- **************************************************************************
- */
-
-/* This is set to 16 for a good reason. In summary, if larger than
- * 16, the descriptor cache holds more than a default socket
- * buffer's worth of packets (for UDP we can only have at most one
- * socket buffer's worth outstanding). This combined with the fact
- * that we only get 1 TX event per descriptor cache means the NIC
- * goes idle.
- */
-#define TX_DC_ENTRIES 16
-#define TX_DC_ENTRIES_ORDER 1
-
-#define RX_DC_ENTRIES 64
-#define RX_DC_ENTRIES_ORDER 3
-
-/* If EFX_MAX_INT_ERRORS internal errors occur within
- * EFX_INT_ERROR_EXPIRE seconds, we consider the NIC broken and
- * disable it.
- */
-#define EFX_INT_ERROR_EXPIRE 3600
-#define EFX_MAX_INT_ERRORS 5
-
-/* Depth of RX flush request fifo */
-#define EFX_RX_FLUSH_COUNT 4
-
-/* Driver generated events */
-#define _EFX_CHANNEL_MAGIC_TEST 0x000101
-#define _EFX_CHANNEL_MAGIC_FILL 0x000102
-#define _EFX_CHANNEL_MAGIC_RX_DRAIN 0x000103
-#define _EFX_CHANNEL_MAGIC_TX_DRAIN 0x000104
-
-#define _EFX_CHANNEL_MAGIC(_code, _data) ((_code) << 8 | (_data))
-#define _EFX_CHANNEL_MAGIC_CODE(_magic) ((_magic) >> 8)
-
-#define EFX_CHANNEL_MAGIC_TEST(_channel) \
- _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TEST, (_channel)->channel)
-#define EFX_CHANNEL_MAGIC_FILL(_rx_queue) \
- _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_FILL, \
- efx_rx_queue_index(_rx_queue))
-#define EFX_CHANNEL_MAGIC_RX_DRAIN(_rx_queue) \
- _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_RX_DRAIN, \
- efx_rx_queue_index(_rx_queue))
-#define EFX_CHANNEL_MAGIC_TX_DRAIN(_tx_queue) \
- _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TX_DRAIN, \
- (_tx_queue)->queue)
-
-static void efx_farch_magic_event(struct efx_channel *channel, u32 magic);
-
-/**************************************************************************
- *
- * Hardware access
- *
- **************************************************************************/
-
-static inline void efx_write_buf_tbl(struct efx_nic *efx, efx_qword_t *value,
- unsigned int index)
-{
- efx_sram_writeq(efx, efx->membase + efx->type->buf_tbl_base,
- value, index);
-}
-
-static bool efx_masked_compare_oword(const efx_oword_t *a, const efx_oword_t *b,
- const efx_oword_t *mask)
-{
- return ((a->u64[0] ^ b->u64[0]) & mask->u64[0]) ||
- ((a->u64[1] ^ b->u64[1]) & mask->u64[1]);
-}
-
-int efx_farch_test_registers(struct efx_nic *efx,
- const struct efx_farch_register_test *regs,
- size_t n_regs)
-{
- unsigned address = 0;
- int i, j;
- efx_oword_t mask, imask, original, reg, buf;
-
- for (i = 0; i < n_regs; ++i) {
- address = regs[i].address;
- mask = imask = regs[i].mask;
- EFX_INVERT_OWORD(imask);
-
- efx_reado(efx, &original, address);
-
- /* bit sweep on and off */
- for (j = 0; j < 128; j++) {
- if (!EFX_EXTRACT_OWORD32(mask, j, j))
- continue;
-
- /* Test this testable bit can be set in isolation */
- EFX_AND_OWORD(reg, original, mask);
- EFX_SET_OWORD32(reg, j, j, 1);
-
- efx_writeo(efx, ®, address);
- efx_reado(efx, &buf, address);
-
- if (efx_masked_compare_oword(®, &buf, &mask))
- goto fail;
-
- /* Test this testable bit can be cleared in isolation */
- EFX_OR_OWORD(reg, original, mask);
- EFX_SET_OWORD32(reg, j, j, 0);
-
- efx_writeo(efx, ®, address);
- efx_reado(efx, &buf, address);
-
- if (efx_masked_compare_oword(®, &buf, &mask))
- goto fail;
- }
-
- efx_writeo(efx, &original, address);
- }
-
- return 0;
-
-fail:
- netif_err(efx, hw, efx->net_dev,
- "wrote "EFX_OWORD_FMT" read "EFX_OWORD_FMT
- " at address 0x%x mask "EFX_OWORD_FMT"\n", EFX_OWORD_VAL(reg),
- EFX_OWORD_VAL(buf), address, EFX_OWORD_VAL(mask));
- return -EIO;
-}
-
-/**************************************************************************
- *
- * Special buffer handling
- * Special buffers are used for event queues and the TX and RX
- * descriptor rings.
- *
- *************************************************************************/
-
-/*
- * Initialise a special buffer
- *
- * This will define a buffer (previously allocated via
- * efx_alloc_special_buffer()) in the buffer table, allowing
- * it to be used for event queues, descriptor rings etc.
- */
-static void
-efx_init_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
-{
- efx_qword_t buf_desc;
- unsigned int index;
- dma_addr_t dma_addr;
- int i;
-
- EFX_WARN_ON_PARANOID(!buffer->buf.addr);
-
- /* Write buffer descriptors to NIC */
- for (i = 0; i < buffer->entries; i++) {
- index = buffer->index + i;
- dma_addr = buffer->buf.dma_addr + (i * EFX_BUF_SIZE);
- netif_dbg(efx, probe, efx->net_dev,
- "mapping special buffer %d at %llx\n",
- index, (unsigned long long)dma_addr);
- EFX_POPULATE_QWORD_3(buf_desc,
- FRF_AZ_BUF_ADR_REGION, 0,
- FRF_AZ_BUF_ADR_FBUF, dma_addr >> 12,
- FRF_AZ_BUF_OWNER_ID_FBUF, 0);
- efx_write_buf_tbl(efx, &buf_desc, index);
- }
-}
-
-/* Unmaps a buffer and clears the buffer table entries */
-static void
-efx_fini_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
-{
- efx_oword_t buf_tbl_upd;
- unsigned int start = buffer->index;
- unsigned int end = (buffer->index + buffer->entries - 1);
-
- if (!buffer->entries)
- return;
-
- netif_dbg(efx, hw, efx->net_dev, "unmapping special buffers %d-%d\n",
- buffer->index, buffer->index + buffer->entries - 1);
-
- EFX_POPULATE_OWORD_4(buf_tbl_upd,
- FRF_AZ_BUF_UPD_CMD, 0,
- FRF_AZ_BUF_CLR_CMD, 1,
- FRF_AZ_BUF_CLR_END_ID, end,
- FRF_AZ_BUF_CLR_START_ID, start);
- efx_writeo(efx, &buf_tbl_upd, FR_AZ_BUF_TBL_UPD);
-}
-
-/*
- * Allocate a new special buffer
- *
- * This allocates memory for a new buffer, clears it and allocates a
- * new buffer ID range. It does not write into the buffer table.
- *
- * This call will allocate 4KB buffers, since 8KB buffers can't be
- * used for event queues and descriptor rings.
- */
-static int efx_alloc_special_buffer(struct efx_nic *efx,
- struct efx_special_buffer *buffer,
- unsigned int len)
-{
-#ifdef CONFIG_SFC_SRIOV
- struct siena_nic_data *nic_data = efx->nic_data;
-#endif
- len = ALIGN(len, EFX_BUF_SIZE);
-
- if (efx_nic_alloc_buffer(efx, &buffer->buf, len, GFP_KERNEL))
- return -ENOMEM;
- buffer->entries = len / EFX_BUF_SIZE;
- BUG_ON(buffer->buf.dma_addr & (EFX_BUF_SIZE - 1));
-
- /* Select new buffer ID */
- buffer->index = efx->next_buffer_table;
- efx->next_buffer_table += buffer->entries;
-#ifdef CONFIG_SFC_SRIOV
- BUG_ON(efx_siena_sriov_enabled(efx) &&
- nic_data->vf_buftbl_base < efx->next_buffer_table);
-#endif
-
- netif_dbg(efx, probe, efx->net_dev,
- "allocating special buffers %d-%d at %llx+%x "
- "(virt %p phys %llx)\n", buffer->index,
- buffer->index + buffer->entries - 1,
- (u64)buffer->buf.dma_addr, len,
- buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
-
- return 0;
-}
-
-static void
-efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
-{
- if (!buffer->buf.addr)
- return;
-
- netif_dbg(efx, hw, efx->net_dev,
- "deallocating special buffers %d-%d at %llx+%x "
- "(virt %p phys %llx)\n", buffer->index,
- buffer->index + buffer->entries - 1,
- (u64)buffer->buf.dma_addr, buffer->buf.len,
- buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
-
- efx_nic_free_buffer(efx, &buffer->buf);
- buffer->entries = 0;
-}
-
-/**************************************************************************
- *
- * TX path
- *
- **************************************************************************/
-
-/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
-static inline void efx_farch_notify_tx_desc(struct efx_tx_queue *tx_queue)
-{
- unsigned write_ptr;
- efx_dword_t reg;
-
- write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
- EFX_POPULATE_DWORD_1(reg, FRF_AZ_TX_DESC_WPTR_DWORD, write_ptr);
- efx_writed_page(tx_queue->efx, ®,
- FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue);
-}
-
-/* Write pointer and first descriptor for TX descriptor ring */
-static inline void efx_farch_push_tx_desc(struct efx_tx_queue *tx_queue,
- const efx_qword_t *txd)
-{
- unsigned write_ptr;
- efx_oword_t reg;
-
- BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0);
- BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0);
-
- write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
- EFX_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true,
- FRF_AZ_TX_DESC_WPTR, write_ptr);
- reg.qword[0] = *txd;
- efx_writeo_page(tx_queue->efx, ®,
- FR_BZ_TX_DESC_UPD_P0, tx_queue->queue);
-}
-
-
-/* For each entry inserted into the software descriptor ring, create a
- * descriptor in the hardware TX descriptor ring (in host memory), and
- * write a doorbell.
- */
-void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
-{
- struct efx_tx_buffer *buffer;
- efx_qword_t *txd;
- unsigned write_ptr;
- unsigned old_write_count = tx_queue->write_count;
-
- tx_queue->xmit_pending = false;
- if (unlikely(tx_queue->write_count == tx_queue->insert_count))
- return;
-
- do {
- write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
- buffer = &tx_queue->buffer[write_ptr];
- txd = efx_tx_desc(tx_queue, write_ptr);
- ++tx_queue->write_count;
-
- EFX_WARN_ON_ONCE_PARANOID(buffer->flags & EFX_TX_BUF_OPTION);
-
- /* Create TX descriptor ring entry */
- BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
- EFX_POPULATE_QWORD_4(*txd,
- FSF_AZ_TX_KER_CONT,
- buffer->flags & EFX_TX_BUF_CONT,
- FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
- FSF_AZ_TX_KER_BUF_REGION, 0,
- FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
- } while (tx_queue->write_count != tx_queue->insert_count);
-
- wmb(); /* Ensure descriptors are written before they are fetched */
-
- if (efx_nic_may_push_tx_desc(tx_queue, old_write_count)) {
- txd = efx_tx_desc(tx_queue,
- old_write_count & tx_queue->ptr_mask);
- efx_farch_push_tx_desc(tx_queue, txd);
- ++tx_queue->pushes;
- } else {
- efx_farch_notify_tx_desc(tx_queue);
- }
-}
-
-unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
- dma_addr_t dma_addr, unsigned int len)
-{
- /* Don't cross 4K boundaries with descriptors. */
- unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
-
- len = min(limit, len);
-
- return len;
-}
-
-
-/* Allocate hardware resources for a TX queue */
-int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
- unsigned entries;
-
- tx_queue->type = ((tx_queue->label & 1) ? EFX_TXQ_TYPE_OUTER_CSUM : 0) |
- ((tx_queue->label & 2) ? EFX_TXQ_TYPE_HIGHPRI : 0);
- entries = tx_queue->ptr_mask + 1;
- return efx_alloc_special_buffer(efx, &tx_queue->txd,
- entries * sizeof(efx_qword_t));
-}
-
-void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
-{
- int csum = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
- struct efx_nic *efx = tx_queue->efx;
- efx_oword_t reg;
-
- /* Pin TX descriptor ring */
- efx_init_special_buffer(efx, &tx_queue->txd);
-
- /* Push TX descriptor ring to card */
- EFX_POPULATE_OWORD_10(reg,
- FRF_AZ_TX_DESCQ_EN, 1,
- FRF_AZ_TX_ISCSI_DDIG_EN, 0,
- FRF_AZ_TX_ISCSI_HDIG_EN, 0,
- FRF_AZ_TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index,
- FRF_AZ_TX_DESCQ_EVQ_ID,
- tx_queue->channel->channel,
- FRF_AZ_TX_DESCQ_OWNER_ID, 0,
- FRF_AZ_TX_DESCQ_LABEL, tx_queue->label,
- FRF_AZ_TX_DESCQ_SIZE,
- __ffs(tx_queue->txd.entries),
- FRF_AZ_TX_DESCQ_TYPE, 0,
- FRF_BZ_TX_NON_IP_DROP_DIS, 1);
-
- EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
- EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS, !csum);
-
- efx_writeo_table(efx, ®, efx->type->txd_ptr_tbl_base,
- tx_queue->queue);
-
- EFX_POPULATE_OWORD_1(reg,
- FRF_BZ_TX_PACE,
- (tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
- FFE_BZ_TX_PACE_OFF :
- FFE_BZ_TX_PACE_RESERVED);
- efx_writeo_table(efx, ®, FR_BZ_TX_PACE_TBL, tx_queue->queue);
-
- tx_queue->tso_version = 1;
-}
-
-static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
- efx_oword_t tx_flush_descq;
-
- WARN_ON(atomic_read(&tx_queue->flush_outstanding));
- atomic_set(&tx_queue->flush_outstanding, 1);
-
- EFX_POPULATE_OWORD_2(tx_flush_descq,
- FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
- FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue);
- efx_writeo(efx, &tx_flush_descq, FR_AZ_TX_FLUSH_DESCQ);
-}
-
-void efx_farch_tx_fini(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
- efx_oword_t tx_desc_ptr;
-
- /* Remove TX descriptor ring from card */
- EFX_ZERO_OWORD(tx_desc_ptr);
- efx_writeo_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
- tx_queue->queue);
-
- /* Unpin TX descriptor ring */
- efx_fini_special_buffer(efx, &tx_queue->txd);
-}
-
-/* Free buffers backing TX queue */
-void efx_farch_tx_remove(struct efx_tx_queue *tx_queue)
-{
- efx_free_special_buffer(tx_queue->efx, &tx_queue->txd);
-}
-
-/**************************************************************************
- *
- * RX path
- *
- **************************************************************************/
-
-/* This creates an entry in the RX descriptor queue */
-static inline void
-efx_farch_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned index)
-{
- struct efx_rx_buffer *rx_buf;
- efx_qword_t *rxd;
-
- rxd = efx_rx_desc(rx_queue, index);
- rx_buf = efx_rx_buffer(rx_queue, index);
- EFX_POPULATE_QWORD_3(*rxd,
- FSF_AZ_RX_KER_BUF_SIZE,
- rx_buf->len -
- rx_queue->efx->type->rx_buffer_padding,
- FSF_AZ_RX_KER_BUF_REGION, 0,
- FSF_AZ_RX_KER_BUF_ADDR, rx_buf->dma_addr);
-}
-
-/* This writes to the RX_DESC_WPTR register for the specified receive
- * descriptor ring.
- */
-void efx_farch_rx_write(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- efx_dword_t reg;
- unsigned write_ptr;
-
- while (rx_queue->notified_count != rx_queue->added_count) {
- efx_farch_build_rx_desc(
- rx_queue,
- rx_queue->notified_count & rx_queue->ptr_mask);
- ++rx_queue->notified_count;
- }
-
- wmb();
- write_ptr = rx_queue->added_count & rx_queue->ptr_mask;
- EFX_POPULATE_DWORD_1(reg, FRF_AZ_RX_DESC_WPTR_DWORD, write_ptr);
- efx_writed_page(efx, ®, FR_AZ_RX_DESC_UPD_DWORD_P0,
- efx_rx_queue_index(rx_queue));
-}
-
-int efx_farch_rx_probe(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned entries;
-
- entries = rx_queue->ptr_mask + 1;
- return efx_alloc_special_buffer(efx, &rx_queue->rxd,
- entries * sizeof(efx_qword_t));
-}
-
-void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
-{
- efx_oword_t rx_desc_ptr;
- struct efx_nic *efx = rx_queue->efx;
- bool jumbo_en;
-
- /* For kernel-mode queues in Siena, the JUMBO flag enables scatter. */
- jumbo_en = efx->rx_scatter;
-
- netif_dbg(efx, hw, efx->net_dev,
- "RX queue %d ring in special buffers %d-%d\n",
- efx_rx_queue_index(rx_queue), rx_queue->rxd.index,
- rx_queue->rxd.index + rx_queue->rxd.entries - 1);
-
- rx_queue->scatter_n = 0;
-
- /* Pin RX descriptor ring */
- efx_init_special_buffer(efx, &rx_queue->rxd);
-
- /* Push RX descriptor ring to card */
- EFX_POPULATE_OWORD_10(rx_desc_ptr,
- FRF_AZ_RX_ISCSI_DDIG_EN, true,
- FRF_AZ_RX_ISCSI_HDIG_EN, true,
- FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
- FRF_AZ_RX_DESCQ_EVQ_ID,
- efx_rx_queue_channel(rx_queue)->channel,
- FRF_AZ_RX_DESCQ_OWNER_ID, 0,
- FRF_AZ_RX_DESCQ_LABEL,
- efx_rx_queue_index(rx_queue),
- FRF_AZ_RX_DESCQ_SIZE,
- __ffs(rx_queue->rxd.entries),
- FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
- FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
- FRF_AZ_RX_DESCQ_EN, 1);
- efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
- efx_rx_queue_index(rx_queue));
-}
-
-static void efx_farch_flush_rx_queue(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- efx_oword_t rx_flush_descq;
-
- EFX_POPULATE_OWORD_2(rx_flush_descq,
- FRF_AZ_RX_FLUSH_DESCQ_CMD, 1,
- FRF_AZ_RX_FLUSH_DESCQ,
- efx_rx_queue_index(rx_queue));
- efx_writeo(efx, &rx_flush_descq, FR_AZ_RX_FLUSH_DESCQ);
-}
-
-void efx_farch_rx_fini(struct efx_rx_queue *rx_queue)
-{
- efx_oword_t rx_desc_ptr;
- struct efx_nic *efx = rx_queue->efx;
-
- /* Remove RX descriptor ring from card */
- EFX_ZERO_OWORD(rx_desc_ptr);
- efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
- efx_rx_queue_index(rx_queue));
-
- /* Unpin RX descriptor ring */
- efx_fini_special_buffer(efx, &rx_queue->rxd);
-}
-
-/* Free buffers backing RX queue */
-void efx_farch_rx_remove(struct efx_rx_queue *rx_queue)
-{
- efx_free_special_buffer(rx_queue->efx, &rx_queue->rxd);
-}
-
-/**************************************************************************
- *
- * Flush handling
- *
- **************************************************************************/
-
-/* efx_farch_flush_queues() must be woken up when all flushes are completed,
- * or more RX flushes can be kicked off.
- */
-static bool efx_farch_flush_wake(struct efx_nic *efx)
-{
- /* Ensure that all updates are visible to efx_farch_flush_queues() */
- smp_mb();
-
- return (atomic_read(&efx->active_queues) == 0 ||
- (atomic_read(&efx->rxq_flush_outstanding) < EFX_RX_FLUSH_COUNT
- && atomic_read(&efx->rxq_flush_pending) > 0));
-}
-
-static bool efx_check_tx_flush_complete(struct efx_nic *efx)
-{
- bool i = true;
- efx_oword_t txd_ptr_tbl;
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
-
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_reado_table(efx, &txd_ptr_tbl,
- FR_BZ_TX_DESC_PTR_TBL, tx_queue->queue);
- if (EFX_OWORD_FIELD(txd_ptr_tbl,
- FRF_AZ_TX_DESCQ_FLUSH) ||
- EFX_OWORD_FIELD(txd_ptr_tbl,
- FRF_AZ_TX_DESCQ_EN)) {
- netif_dbg(efx, hw, efx->net_dev,
- "flush did not complete on TXQ %d\n",
- tx_queue->queue);
- i = false;
- } else if (atomic_cmpxchg(&tx_queue->flush_outstanding,
- 1, 0)) {
- /* The flush is complete, but we didn't
- * receive a flush completion event
- */
- netif_dbg(efx, hw, efx->net_dev,
- "flush complete on TXQ %d, so drain "
- "the queue\n", tx_queue->queue);
- /* Don't need to increment active_queues as it
- * has already been incremented for the queues
- * which did not drain
- */
- efx_farch_magic_event(channel,
- EFX_CHANNEL_MAGIC_TX_DRAIN(
- tx_queue));
- }
- }
- }
-
- return i;
-}
-
-/* Flush all the transmit queues, and continue flushing receive queues until
- * they're all flushed. Wait for the DRAIN events to be received so that there
- * are no more RX and TX events left on any channel. */
-static int efx_farch_do_flush(struct efx_nic *efx)
-{
- unsigned timeout = msecs_to_jiffies(5000); /* 5s for all flushes and drains */
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
- int rc = 0;
-
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_farch_flush_tx_queue(tx_queue);
- }
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- rx_queue->flush_pending = true;
- atomic_inc(&efx->rxq_flush_pending);
- }
- }
-
- while (timeout && atomic_read(&efx->active_queues) > 0) {
- /* If SRIOV is enabled, then offload receive queue flushing to
- * the firmware (though we will still have to poll for
- * completion). If that fails, fall back to the old scheme.
- */
- if (efx_siena_sriov_enabled(efx)) {
- rc = efx_mcdi_flush_rxqs(efx);
- if (!rc)
- goto wait;
- }
-
- /* The hardware supports four concurrent rx flushes, each of
- * which may need to be retried if there is an outstanding
- * descriptor fetch
- */
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- if (atomic_read(&efx->rxq_flush_outstanding) >=
- EFX_RX_FLUSH_COUNT)
- break;
-
- if (rx_queue->flush_pending) {
- rx_queue->flush_pending = false;
- atomic_dec(&efx->rxq_flush_pending);
- atomic_inc(&efx->rxq_flush_outstanding);
- efx_farch_flush_rx_queue(rx_queue);
- }
- }
- }
-
- wait:
- timeout = wait_event_timeout(efx->flush_wq,
- efx_farch_flush_wake(efx),
- timeout);
- }
-
- if (atomic_read(&efx->active_queues) &&
- !efx_check_tx_flush_complete(efx)) {
- netif_err(efx, hw, efx->net_dev, "failed to flush %d queues "
- "(rx %d+%d)\n", atomic_read(&efx->active_queues),
- atomic_read(&efx->rxq_flush_outstanding),
- atomic_read(&efx->rxq_flush_pending));
- rc = -ETIMEDOUT;
-
- atomic_set(&efx->active_queues, 0);
- atomic_set(&efx->rxq_flush_pending, 0);
- atomic_set(&efx->rxq_flush_outstanding, 0);
- }
-
- return rc;
-}
-
-int efx_farch_fini_dmaq(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int rc = 0;
-
- /* Do not attempt to write to the NIC during EEH recovery */
- if (efx->state != STATE_RECOVERY) {
- /* Only perform flush if DMA is enabled */
- if (efx->pci_dev->is_busmaster) {
- efx->type->prepare_flush(efx);
- rc = efx_farch_do_flush(efx);
- efx->type->finish_flush(efx);
- }
-
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_farch_rx_fini(rx_queue);
- efx_for_each_channel_tx_queue(tx_queue, channel)
- efx_farch_tx_fini(tx_queue);
- }
- }
-
- return rc;
-}
-
-/* Reset queue and flush accounting after FLR
- *
- * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
- * mastering was disabled), in which case we don't receive (RXQ) flush
- * completion events. This means that efx->rxq_flush_outstanding remained at 4
- * after the FLR; also, efx->active_queues was non-zero (as no flush completion
- * events were received, and we didn't go through efx_check_tx_flush_complete())
- * If we don't fix this up, on the next call to efx_realloc_channels() we won't
- * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
- * for batched flush requests; and the efx->active_queues gets messed up because
- * we keep incrementing for the newly initialised queues, but it never went to
- * zero previously. Then we get a timeout every time we try to restart the
- * queues, as it doesn't go back to zero when we should be flushing the queues.
- */
-void efx_farch_finish_flr(struct efx_nic *efx)
-{
- atomic_set(&efx->rxq_flush_pending, 0);
- atomic_set(&efx->rxq_flush_outstanding, 0);
- atomic_set(&efx->active_queues, 0);
-}
-
-
-/**************************************************************************
- *
- * Event queue processing
- * Event queues are processed by per-channel tasklets.
- *
- **************************************************************************/
-
-/* Update a channel's event queue's read pointer (RPTR) register
- *
- * This writes the EVQ_RPTR_REG register for the specified channel's
- * event queue.
- */
-void efx_farch_ev_read_ack(struct efx_channel *channel)
-{
- efx_dword_t reg;
- struct efx_nic *efx = channel->efx;
-
- EFX_POPULATE_DWORD_1(reg, FRF_AZ_EVQ_RPTR,
- channel->eventq_read_ptr & channel->eventq_mask);
-
- /* For Falcon A1, EVQ_RPTR_KER is documented as having a step size
- * of 4 bytes, but it is really 16 bytes just like later revisions.
- */
- efx_writed(efx, ®,
- efx->type->evq_rptr_tbl_base +
- FR_BZ_EVQ_RPTR_STEP * channel->channel);
-}
-
-/* Use HW to insert a SW defined event */
-void efx_farch_generate_event(struct efx_nic *efx, unsigned int evq,
- efx_qword_t *event)
-{
- efx_oword_t drv_ev_reg;
-
- BUILD_BUG_ON(FRF_AZ_DRV_EV_DATA_LBN != 0 ||
- FRF_AZ_DRV_EV_DATA_WIDTH != 64);
- drv_ev_reg.u32[0] = event->u32[0];
- drv_ev_reg.u32[1] = event->u32[1];
- drv_ev_reg.u32[2] = 0;
- drv_ev_reg.u32[3] = 0;
- EFX_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, evq);
- efx_writeo(efx, &drv_ev_reg, FR_AZ_DRV_EV);
-}
-
-static void efx_farch_magic_event(struct efx_channel *channel, u32 magic)
-{
- efx_qword_t event;
-
- EFX_POPULATE_QWORD_2(event, FSF_AZ_EV_CODE,
- FSE_AZ_EV_CODE_DRV_GEN_EV,
- FSF_AZ_DRV_GEN_EV_MAGIC, magic);
- efx_farch_generate_event(channel->efx, channel->channel, &event);
-}
-
-/* Handle a transmit completion event
- *
- * The NIC batches TX completion events; the message we receive is of
- * the form "complete all TX events up to this index".
- */
-static void
-efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
-{
- unsigned int tx_ev_desc_ptr;
- unsigned int tx_ev_q_label;
- struct efx_tx_queue *tx_queue;
- struct efx_nic *efx = channel->efx;
-
- if (unlikely(READ_ONCE(efx->reset_pending)))
- return;
-
- if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
- /* Transmit completion */
- tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
- tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = channel->tx_queue +
- (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
- efx_xmit_done(tx_queue, tx_ev_desc_ptr);
- } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
- /* Rewrite the FIFO write pointer */
- tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
- tx_queue = channel->tx_queue +
- (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
-
- netif_tx_lock(efx->net_dev);
- efx_farch_notify_tx_desc(tx_queue);
- netif_tx_unlock(efx->net_dev);
- } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_PKT_ERR)) {
- efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
- } else {
- netif_err(efx, tx_err, efx->net_dev,
- "channel %d unexpected TX event "
- EFX_QWORD_FMT"\n", channel->channel,
- EFX_QWORD_VAL(*event));
- }
-}
-
-/* Detect errors included in the rx_evt_pkt_ok bit. */
-static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
- const efx_qword_t *event)
-{
- struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
- struct efx_nic *efx = rx_queue->efx;
- bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
- bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
- bool rx_ev_frm_trunc, rx_ev_tobe_disc;
- bool rx_ev_other_err, rx_ev_pause_frm;
-
- rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
- rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event,
- FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
- rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event,
- FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR);
- rx_ev_tcp_udp_chksum_err = EFX_QWORD_FIELD(*event,
- FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
- rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
- rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
- rx_ev_pause_frm = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
-
- /* Every error apart from tobe_disc and pause_frm */
- rx_ev_other_err = (rx_ev_tcp_udp_chksum_err |
- rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
- rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
-
- /* Count errors that are not in MAC stats. Ignore expected
- * checksum errors during self-test. */
- if (rx_ev_frm_trunc)
- ++channel->n_rx_frm_trunc;
- else if (rx_ev_tobe_disc)
- ++channel->n_rx_tobe_disc;
- else if (!efx->loopback_selftest) {
- if (rx_ev_ip_hdr_chksum_err)
- ++channel->n_rx_ip_hdr_chksum_err;
- else if (rx_ev_tcp_udp_chksum_err)
- ++channel->n_rx_tcp_udp_chksum_err;
- }
-
- /* TOBE_DISC is expected on unicast mismatches; don't print out an
- * error message. FRM_TRUNC indicates RXDP dropped the packet due
- * to a FIFO overflow.
- */
-#ifdef DEBUG
- if (rx_ev_other_err && net_ratelimit()) {
- netif_dbg(efx, rx_err, efx->net_dev,
- " RX queue %d unexpected RX event "
- EFX_QWORD_FMT "%s%s%s%s%s%s%s\n",
- efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event),
- rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
- rx_ev_ip_hdr_chksum_err ?
- " [IP_HDR_CHKSUM_ERR]" : "",
- rx_ev_tcp_udp_chksum_err ?
- " [TCP_UDP_CHKSUM_ERR]" : "",
- rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
- rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
- rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
- rx_ev_pause_frm ? " [PAUSE]" : "");
- }
-#else
- (void) rx_ev_other_err;
-#endif
-
- if (efx->net_dev->features & NETIF_F_RXALL)
- /* don't discard frame for CRC error */
- rx_ev_eth_crc_err = false;
-
- /* The frame must be discarded if any of these are true. */
- return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
- rx_ev_tobe_disc | rx_ev_pause_frm) ?
- EFX_RX_PKT_DISCARD : 0;
-}
-
-/* Handle receive events that are not in-order. Return true if this
- * can be handled as a partial packet discard, false if it's more
- * serious.
- */
-static bool
-efx_farch_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
-{
- struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
- struct efx_nic *efx = rx_queue->efx;
- unsigned expected, dropped;
-
- if (rx_queue->scatter_n &&
- index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
- rx_queue->ptr_mask)) {
- ++channel->n_rx_nodesc_trunc;
- return true;
- }
-
- expected = rx_queue->removed_count & rx_queue->ptr_mask;
- dropped = (index - expected) & rx_queue->ptr_mask;
- netif_info(efx, rx_err, efx->net_dev,
- "dropped %d events (index=%d expected=%d)\n",
- dropped, index, expected);
-
- efx_schedule_reset(efx, RESET_TYPE_DISABLE);
- return false;
-}
-
-/* Handle a packet received event
- *
- * The NIC gives a "discard" flag if it's a unicast packet with the
- * wrong destination address
- * Also "is multicast" and "matches multicast filter" flags can be used to
- * discard non-matching multicast packets.
- */
-static void
-efx_farch_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
-{
- unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
- unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
- unsigned expected_ptr;
- bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
- u16 flags;
- struct efx_rx_queue *rx_queue;
- struct efx_nic *efx = channel->efx;
-
- if (unlikely(READ_ONCE(efx->reset_pending)))
- return;
-
- rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
- rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
- WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
- channel->channel);
-
- rx_queue = efx_channel_get_rx_queue(channel);
-
- rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
- expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
- rx_queue->ptr_mask);
-
- /* Check for partial drops and other errors */
- if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
- unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
- if (rx_ev_desc_ptr != expected_ptr &&
- !efx_farch_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
- return;
-
- /* Discard all pending fragments */
- if (rx_queue->scatter_n) {
- efx_rx_packet(
- rx_queue,
- rx_queue->removed_count & rx_queue->ptr_mask,
- rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD);
- rx_queue->removed_count += rx_queue->scatter_n;
- rx_queue->scatter_n = 0;
- }
-
- /* Return if there is no new fragment */
- if (rx_ev_desc_ptr != expected_ptr)
- return;
-
- /* Discard new fragment if not SOP */
- if (!rx_ev_sop) {
- efx_rx_packet(
- rx_queue,
- rx_queue->removed_count & rx_queue->ptr_mask,
- 1, 0, EFX_RX_PKT_DISCARD);
- ++rx_queue->removed_count;
- return;
- }
- }
-
- ++rx_queue->scatter_n;
- if (rx_ev_cont)
- return;
-
- rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
- rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
- rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
-
- if (likely(rx_ev_pkt_ok)) {
- /* If packet is marked as OK then we can rely on the
- * hardware checksum and classification.
- */
- flags = 0;
- switch (rx_ev_hdr_type) {
- case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
- flags |= EFX_RX_PKT_TCP;
- fallthrough;
- case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
- flags |= EFX_RX_PKT_CSUMMED;
- fallthrough;
- case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
- case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
- break;
- }
- } else {
- flags = efx_farch_handle_rx_not_ok(rx_queue, event);
- }
-
- /* Detect multicast packets that didn't match the filter */
- rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
- if (rx_ev_mcast_pkt) {
- unsigned int rx_ev_mcast_hash_match =
- EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_HASH_MATCH);
-
- if (unlikely(!rx_ev_mcast_hash_match)) {
- ++channel->n_rx_mcast_mismatch;
- flags |= EFX_RX_PKT_DISCARD;
- }
- }
-
- channel->irq_mod_score += 2;
-
- /* Handle received packet */
- efx_rx_packet(rx_queue,
- rx_queue->removed_count & rx_queue->ptr_mask,
- rx_queue->scatter_n, rx_ev_byte_cnt, flags);
- rx_queue->removed_count += rx_queue->scatter_n;
- rx_queue->scatter_n = 0;
-}
-
-/* If this flush done event corresponds to a &struct efx_tx_queue, then
- * send an %EFX_CHANNEL_MAGIC_TX_DRAIN event to drain the event queue
- * of all transmit completions.
- */
-static void
-efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
-{
- struct efx_tx_queue *tx_queue;
- struct efx_channel *channel;
- int qid;
-
- qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
- if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
- channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
- tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
- if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
- efx_farch_magic_event(tx_queue->channel,
- EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
- }
-}
-
-/* If this flush done event corresponds to a &struct efx_rx_queue: If the flush
- * was successful then send an %EFX_CHANNEL_MAGIC_RX_DRAIN, otherwise add
- * the RX queue back to the mask of RX queues in need of flushing.
- */
-static void
-efx_farch_handle_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
-{
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- int qid;
- bool failed;
-
- qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
- failed = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
- if (qid >= efx->n_channels)
- return;
- channel = efx_get_channel(efx, qid);
- if (!efx_channel_has_rx_queue(channel))
- return;
- rx_queue = efx_channel_get_rx_queue(channel);
-
- if (failed) {
- netif_info(efx, hw, efx->net_dev,
- "RXQ %d flush retry\n", qid);
- rx_queue->flush_pending = true;
- atomic_inc(&efx->rxq_flush_pending);
- } else {
- efx_farch_magic_event(efx_rx_queue_channel(rx_queue),
- EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue));
- }
- atomic_dec(&efx->rxq_flush_outstanding);
- if (efx_farch_flush_wake(efx))
- wake_up(&efx->flush_wq);
-}
-
-static void
-efx_farch_handle_drain_event(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
-
- WARN_ON(atomic_read(&efx->active_queues) == 0);
- atomic_dec(&efx->active_queues);
- if (efx_farch_flush_wake(efx))
- wake_up(&efx->flush_wq);
-}
-
-static void efx_farch_handle_generated_event(struct efx_channel *channel,
- efx_qword_t *event)
-{
- struct efx_nic *efx = channel->efx;
- struct efx_rx_queue *rx_queue =
- efx_channel_has_rx_queue(channel) ?
- efx_channel_get_rx_queue(channel) : NULL;
- unsigned magic, code;
-
- magic = EFX_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC);
- code = _EFX_CHANNEL_MAGIC_CODE(magic);
-
- if (magic == EFX_CHANNEL_MAGIC_TEST(channel)) {
- channel->event_test_cpu = raw_smp_processor_id();
- } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_FILL(rx_queue)) {
- /* The queue must be empty, so we won't receive any rx
- * events, so efx_process_channel() won't refill the
- * queue. Refill it here */
- efx_fast_push_rx_descriptors(rx_queue, true);
- } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue)) {
- efx_farch_handle_drain_event(channel);
- } else if (code == _EFX_CHANNEL_MAGIC_TX_DRAIN) {
- efx_farch_handle_drain_event(channel);
- } else {
- netif_dbg(efx, hw, efx->net_dev, "channel %d received "
- "generated event "EFX_QWORD_FMT"\n",
- channel->channel, EFX_QWORD_VAL(*event));
- }
-}
-
-static void
-efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
-{
- struct efx_nic *efx = channel->efx;
- unsigned int ev_sub_code;
- unsigned int ev_sub_data;
-
- ev_sub_code = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBCODE);
- ev_sub_data = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
-
- switch (ev_sub_code) {
- case FSE_AZ_TX_DESCQ_FLS_DONE_EV:
- netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
- channel->channel, ev_sub_data);
- efx_farch_handle_tx_flush_done(efx, event);
-#ifdef CONFIG_SFC_SRIOV
- efx_siena_sriov_tx_flush_done(efx, event);
-#endif
- break;
- case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
- netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
- channel->channel, ev_sub_data);
- efx_farch_handle_rx_flush_done(efx, event);
-#ifdef CONFIG_SFC_SRIOV
- efx_siena_sriov_rx_flush_done(efx, event);
-#endif
- break;
- case FSE_AZ_EVQ_INIT_DONE_EV:
- netif_dbg(efx, hw, efx->net_dev,
- "channel %d EVQ %d initialised\n",
- channel->channel, ev_sub_data);
- break;
- case FSE_AZ_SRM_UPD_DONE_EV:
- netif_vdbg(efx, hw, efx->net_dev,
- "channel %d SRAM update done\n", channel->channel);
- break;
- case FSE_AZ_WAKE_UP_EV:
- netif_vdbg(efx, hw, efx->net_dev,
- "channel %d RXQ %d wakeup event\n",
- channel->channel, ev_sub_data);
- break;
- case FSE_AZ_TIMER_EV:
- netif_vdbg(efx, hw, efx->net_dev,
- "channel %d RX queue %d timer expired\n",
- channel->channel, ev_sub_data);
- break;
- case FSE_AA_RX_RECOVER_EV:
- netif_err(efx, rx_err, efx->net_dev,
- "channel %d seen DRIVER RX_RESET event. "
- "Resetting.\n", channel->channel);
- atomic_inc(&efx->rx_reset);
- efx_schedule_reset(efx, RESET_TYPE_DISABLE);
- break;
- case FSE_BZ_RX_DSC_ERROR_EV:
- if (ev_sub_data < EFX_VI_BASE) {
- netif_err(efx, rx_err, efx->net_dev,
- "RX DMA Q %d reports descriptor fetch error."
- " RX Q %d is disabled.\n", ev_sub_data,
- ev_sub_data);
- efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
- }
-#ifdef CONFIG_SFC_SRIOV
- else
- efx_siena_sriov_desc_fetch_err(efx, ev_sub_data);
-#endif
- break;
- case FSE_BZ_TX_DSC_ERROR_EV:
- if (ev_sub_data < EFX_VI_BASE) {
- netif_err(efx, tx_err, efx->net_dev,
- "TX DMA Q %d reports descriptor fetch error."
- " TX Q %d is disabled.\n", ev_sub_data,
- ev_sub_data);
- efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
- }
-#ifdef CONFIG_SFC_SRIOV
- else
- efx_siena_sriov_desc_fetch_err(efx, ev_sub_data);
-#endif
- break;
- default:
- netif_vdbg(efx, hw, efx->net_dev,
- "channel %d unknown driver event code %d "
- "data %04x\n", channel->channel, ev_sub_code,
- ev_sub_data);
- break;
- }
-}
-
-int efx_farch_ev_process(struct efx_channel *channel, int budget)
-{
- struct efx_nic *efx = channel->efx;
- unsigned int read_ptr;
- efx_qword_t event, *p_event;
- int ev_code;
- int spent = 0;
-
- if (budget <= 0)
- return spent;
-
- read_ptr = channel->eventq_read_ptr;
-
- for (;;) {
- p_event = efx_event(channel, read_ptr);
- event = *p_event;
-
- if (!efx_event_present(&event))
- /* End of events */
- break;
-
- netif_vdbg(channel->efx, intr, channel->efx->net_dev,
- "channel %d event is "EFX_QWORD_FMT"\n",
- channel->channel, EFX_QWORD_VAL(event));
-
- /* Clear this event by marking it all ones */
- EFX_SET_QWORD(*p_event);
-
- ++read_ptr;
-
- ev_code = EFX_QWORD_FIELD(event, FSF_AZ_EV_CODE);
-
- switch (ev_code) {
- case FSE_AZ_EV_CODE_RX_EV:
- efx_farch_handle_rx_event(channel, &event);
- if (++spent == budget)
- goto out;
- break;
- case FSE_AZ_EV_CODE_TX_EV:
- efx_farch_handle_tx_event(channel, &event);
- break;
- case FSE_AZ_EV_CODE_DRV_GEN_EV:
- efx_farch_handle_generated_event(channel, &event);
- break;
- case FSE_AZ_EV_CODE_DRIVER_EV:
- efx_farch_handle_driver_event(channel, &event);
- break;
-#ifdef CONFIG_SFC_SRIOV
- case FSE_CZ_EV_CODE_USER_EV:
- efx_siena_sriov_event(channel, &event);
- break;
-#endif
- case FSE_CZ_EV_CODE_MCDI_EV:
- efx_mcdi_process_event(channel, &event);
- break;
- case FSE_AZ_EV_CODE_GLOBAL_EV:
- if (efx->type->handle_global_event &&
- efx->type->handle_global_event(channel, &event))
- break;
- fallthrough;
- default:
- netif_err(channel->efx, hw, channel->efx->net_dev,
- "channel %d unknown event type %d (data "
- EFX_QWORD_FMT ")\n", channel->channel,
- ev_code, EFX_QWORD_VAL(event));
- }
- }
-
-out:
- channel->eventq_read_ptr = read_ptr;
- return spent;
-}
-
-/* Allocate buffer table entries for event queue */
-int efx_farch_ev_probe(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- unsigned entries;
-
- entries = channel->eventq_mask + 1;
- return efx_alloc_special_buffer(efx, &channel->eventq,
- entries * sizeof(efx_qword_t));
-}
-
-int efx_farch_ev_init(struct efx_channel *channel)
-{
- efx_oword_t reg;
- struct efx_nic *efx = channel->efx;
-
- netif_dbg(efx, hw, efx->net_dev,
- "channel %d event queue in special buffers %d-%d\n",
- channel->channel, channel->eventq.index,
- channel->eventq.index + channel->eventq.entries - 1);
-
- EFX_POPULATE_OWORD_3(reg,
- FRF_CZ_TIMER_Q_EN, 1,
- FRF_CZ_HOST_NOTIFY_MODE, 0,
- FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
- efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, channel->channel);
-
- /* Pin event queue buffer */
- efx_init_special_buffer(efx, &channel->eventq);
-
- /* Fill event queue with all ones (i.e. empty events) */
- memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
-
- /* Push event queue to card */
- EFX_POPULATE_OWORD_3(reg,
- FRF_AZ_EVQ_EN, 1,
- FRF_AZ_EVQ_SIZE, __ffs(channel->eventq.entries),
- FRF_AZ_EVQ_BUF_BASE_ID, channel->eventq.index);
- efx_writeo_table(efx, ®, efx->type->evq_ptr_tbl_base,
- channel->channel);
-
- return 0;
-}
-
-void efx_farch_ev_fini(struct efx_channel *channel)
-{
- efx_oword_t reg;
- struct efx_nic *efx = channel->efx;
-
- /* Remove event queue from card */
- EFX_ZERO_OWORD(reg);
- efx_writeo_table(efx, ®, efx->type->evq_ptr_tbl_base,
- channel->channel);
- efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, channel->channel);
-
- /* Unpin event queue */
- efx_fini_special_buffer(efx, &channel->eventq);
-}
-
-/* Free buffers backing event queue */
-void efx_farch_ev_remove(struct efx_channel *channel)
-{
- efx_free_special_buffer(channel->efx, &channel->eventq);
-}
-
-
-void efx_farch_ev_test_generate(struct efx_channel *channel)
-{
- efx_farch_magic_event(channel, EFX_CHANNEL_MAGIC_TEST(channel));
-}
-
-void efx_farch_rx_defer_refill(struct efx_rx_queue *rx_queue)
-{
- efx_farch_magic_event(efx_rx_queue_channel(rx_queue),
- EFX_CHANNEL_MAGIC_FILL(rx_queue));
-}
-
-/**************************************************************************
- *
- * Hardware interrupts
- * The hardware interrupt handler does very little work; all the event
- * queue processing is carried out by per-channel tasklets.
- *
- **************************************************************************/
-
-/* Enable/disable/generate interrupts */
-static inline void efx_farch_interrupts(struct efx_nic *efx,
- bool enabled, bool force)
-{
- efx_oword_t int_en_reg_ker;
-
- EFX_POPULATE_OWORD_3(int_en_reg_ker,
- FRF_AZ_KER_INT_LEVE_SEL, efx->irq_level,
- FRF_AZ_KER_INT_KER, force,
- FRF_AZ_DRV_INT_EN_KER, enabled);
- efx_writeo(efx, &int_en_reg_ker, FR_AZ_INT_EN_KER);
-}
-
-void efx_farch_irq_enable_master(struct efx_nic *efx)
-{
- EFX_ZERO_OWORD(*((efx_oword_t *) efx->irq_status.addr));
- wmb(); /* Ensure interrupt vector is clear before interrupts enabled */
-
- efx_farch_interrupts(efx, true, false);
-}
-
-void efx_farch_irq_disable_master(struct efx_nic *efx)
-{
- /* Disable interrupts */
- efx_farch_interrupts(efx, false, false);
-}
-
-/* Generate a test interrupt
- * Interrupt must already have been enabled, otherwise nasty things
- * may happen.
- */
-int efx_farch_irq_test_generate(struct efx_nic *efx)
-{
- efx_farch_interrupts(efx, true, true);
- return 0;
-}
-
-/* Process a fatal interrupt
- * Disable bus mastering ASAP and schedule a reset
- */
-irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
-{
- efx_oword_t *int_ker = efx->irq_status.addr;
- efx_oword_t fatal_intr;
- int error, mem_perr;
-
- efx_reado(efx, &fatal_intr, FR_AZ_FATAL_INTR_KER);
- error = EFX_OWORD_FIELD(fatal_intr, FRF_AZ_FATAL_INTR);
-
- netif_err(efx, hw, efx->net_dev, "SYSTEM ERROR "EFX_OWORD_FMT" status "
- EFX_OWORD_FMT ": %s\n", EFX_OWORD_VAL(*int_ker),
- EFX_OWORD_VAL(fatal_intr),
- error ? "disabling bus mastering" : "no recognised error");
-
- /* If this is a memory parity error dump which blocks are offending */
- mem_perr = (EFX_OWORD_FIELD(fatal_intr, FRF_AZ_MEM_PERR_INT_KER) ||
- EFX_OWORD_FIELD(fatal_intr, FRF_AZ_SRM_PERR_INT_KER));
- if (mem_perr) {
- efx_oword_t reg;
- efx_reado(efx, ®, FR_AZ_MEM_STAT);
- netif_err(efx, hw, efx->net_dev,
- "SYSTEM ERROR: memory parity error "EFX_OWORD_FMT"\n",
- EFX_OWORD_VAL(reg));
- }
-
- /* Disable both devices */
- pci_clear_master(efx->pci_dev);
- efx_farch_irq_disable_master(efx);
-
- /* Count errors and reset or disable the NIC accordingly */
- if (efx->int_error_count == 0 ||
- time_after(jiffies, efx->int_error_expire)) {
- efx->int_error_count = 0;
- efx->int_error_expire =
- jiffies + EFX_INT_ERROR_EXPIRE * HZ;
- }
- if (++efx->int_error_count < EFX_MAX_INT_ERRORS) {
- netif_err(efx, hw, efx->net_dev,
- "SYSTEM ERROR - reset scheduled\n");
- efx_schedule_reset(efx, RESET_TYPE_INT_ERROR);
- } else {
- netif_err(efx, hw, efx->net_dev,
- "SYSTEM ERROR - max number of errors seen."
- "NIC will be disabled\n");
- efx_schedule_reset(efx, RESET_TYPE_DISABLE);
- }
-
- return IRQ_HANDLED;
-}
-
-/* Handle a legacy interrupt
- * Acknowledges the interrupt and schedule event queue processing.
- */
-irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id)
-{
- struct efx_nic *efx = dev_id;
- bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
- efx_oword_t *int_ker = efx->irq_status.addr;
- irqreturn_t result = IRQ_NONE;
- struct efx_channel *channel;
- efx_dword_t reg;
- u32 queues;
- int syserr;
-
- /* Read the ISR which also ACKs the interrupts */
- efx_readd(efx, ®, FR_BZ_INT_ISR0);
- queues = EFX_EXTRACT_DWORD(reg, 0, 31);
-
- /* Legacy interrupts are disabled too late by the EEH kernel
- * code. Disable them earlier.
- * If an EEH error occurred, the read will have returned all ones.
- */
- if (EFX_DWORD_IS_ALL_ONES(reg) && efx_try_recovery(efx) &&
- !efx->eeh_disabled_legacy_irq) {
- disable_irq_nosync(efx->legacy_irq);
- efx->eeh_disabled_legacy_irq = true;
- }
-
- /* Handle non-event-queue sources */
- if (queues & (1U << efx->irq_level) && soft_enabled) {
- syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
- if (unlikely(syserr))
- return efx_farch_fatal_interrupt(efx);
- efx->last_irq_cpu = raw_smp_processor_id();
- }
-
- if (queues != 0) {
- efx->irq_zero_count = 0;
-
- /* Schedule processing of any interrupting queues */
- if (likely(soft_enabled)) {
- efx_for_each_channel(channel, efx) {
- if (queues & 1)
- efx_schedule_channel_irq(channel);
- queues >>= 1;
- }
- }
- result = IRQ_HANDLED;
-
- } else {
- efx_qword_t *event;
-
- /* Legacy ISR read can return zero once (SF bug 15783) */
-
- /* We can't return IRQ_HANDLED more than once on seeing ISR=0
- * because this might be a shared interrupt. */
- if (efx->irq_zero_count++ == 0)
- result = IRQ_HANDLED;
-
- /* Ensure we schedule or rearm all event queues */
- if (likely(soft_enabled)) {
- efx_for_each_channel(channel, efx) {
- event = efx_event(channel,
- channel->eventq_read_ptr);
- if (efx_event_present(event))
- efx_schedule_channel_irq(channel);
- else
- efx_farch_ev_read_ack(channel);
- }
- }
- }
-
- if (result == IRQ_HANDLED)
- netif_vdbg(efx, intr, efx->net_dev,
- "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
- irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
-
- return result;
-}
-
-/* Handle an MSI interrupt
- *
- * Handle an MSI hardware interrupt. This routine schedules event
- * queue processing. No interrupt acknowledgement cycle is necessary.
- * Also, we never need to check that the interrupt is for us, since
- * MSI interrupts cannot be shared.
- */
-irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id)
-{
- struct efx_msi_context *context = dev_id;
- struct efx_nic *efx = context->efx;
- efx_oword_t *int_ker = efx->irq_status.addr;
- int syserr;
-
- netif_vdbg(efx, intr, efx->net_dev,
- "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
- irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
-
- if (!likely(READ_ONCE(efx->irq_soft_enabled)))
- return IRQ_HANDLED;
-
- /* Handle non-event-queue sources */
- if (context->index == efx->irq_level) {
- syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
- if (unlikely(syserr))
- return efx_farch_fatal_interrupt(efx);
- efx->last_irq_cpu = raw_smp_processor_id();
- }
-
- /* Schedule processing of the channel */
- efx_schedule_channel_irq(efx->channel[context->index]);
-
- return IRQ_HANDLED;
-}
-
-/* Setup RSS indirection table.
- * This maps from the hash value of the packet to RXQ
- */
-void efx_farch_rx_push_indir_table(struct efx_nic *efx)
-{
- size_t i = 0;
- efx_dword_t dword;
-
- BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
- FR_BZ_RX_INDIRECTION_TBL_ROWS);
-
- for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
- EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
- efx->rss_context.rx_indir_table[i]);
- efx_writed(efx, &dword,
- FR_BZ_RX_INDIRECTION_TBL +
- FR_BZ_RX_INDIRECTION_TBL_STEP * i);
- }
-}
-
-void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
-{
- size_t i = 0;
- efx_dword_t dword;
-
- BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
- FR_BZ_RX_INDIRECTION_TBL_ROWS);
-
- for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
- efx_readd(efx, &dword,
- FR_BZ_RX_INDIRECTION_TBL +
- FR_BZ_RX_INDIRECTION_TBL_STEP * i);
- efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
- }
-}
-
-/* Looks at available SRAM resources and works out how many queues we
- * can support, and where things like descriptor caches should live.
- *
- * SRAM is split up as follows:
- * 0 buftbl entries for channels
- * efx->vf_buftbl_base buftbl entries for SR-IOV
- * efx->rx_dc_base RX descriptor caches
- * efx->tx_dc_base TX descriptor caches
- */
-void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
-{
- unsigned vi_count, total_tx_channels;
-#ifdef CONFIG_SFC_SRIOV
- struct siena_nic_data *nic_data;
- unsigned buftbl_min;
-#endif
-
- total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels;
- vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
-
-#ifdef CONFIG_SFC_SRIOV
- nic_data = efx->nic_data;
- /* Account for the buffer table entries backing the datapath channels
- * and the descriptor caches for those channels.
- */
- buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
- total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_DMAQ_SIZE +
- efx->n_channels * EFX_MAX_EVQ_SIZE)
- * sizeof(efx_qword_t) / EFX_BUF_SIZE);
- if (efx->type->sriov_wanted) {
- if (efx->type->sriov_wanted(efx)) {
- unsigned vi_dc_entries, buftbl_free;
- unsigned entries_per_vf, vf_limit;
-
- nic_data->vf_buftbl_base = buftbl_min;
-
- vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES;
- vi_count = max(vi_count, EFX_VI_BASE);
- buftbl_free = (sram_lim_qw - buftbl_min -
- vi_count * vi_dc_entries);
-
- entries_per_vf = ((vi_dc_entries +
- EFX_VF_BUFTBL_PER_VI) *
- efx_vf_size(efx));
- vf_limit = min(buftbl_free / entries_per_vf,
- (1024U - EFX_VI_BASE) >> efx->vi_scale);
-
- if (efx->vf_count > vf_limit) {
- netif_err(efx, probe, efx->net_dev,
- "Reducing VF count from from %d to %d\n",
- efx->vf_count, vf_limit);
- efx->vf_count = vf_limit;
- }
- vi_count += efx->vf_count * efx_vf_size(efx);
- }
- }
-#endif
-
- efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
- efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
-}
-
-u32 efx_farch_fpga_ver(struct efx_nic *efx)
-{
- efx_oword_t altera_build;
- efx_reado(efx, &altera_build, FR_AZ_ALTERA_BUILD);
- return EFX_OWORD_FIELD(altera_build, FRF_AZ_ALTERA_BUILD_VER);
-}
-
-void efx_farch_init_common(struct efx_nic *efx)
-{
- efx_oword_t temp;
-
- /* Set positions of descriptor caches in SRAM. */
- EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, efx->tx_dc_base);
- efx_writeo(efx, &temp, FR_AZ_SRM_TX_DC_CFG);
- EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, efx->rx_dc_base);
- efx_writeo(efx, &temp, FR_AZ_SRM_RX_DC_CFG);
-
- /* Set TX descriptor cache size. */
- BUILD_BUG_ON(TX_DC_ENTRIES != (8 << TX_DC_ENTRIES_ORDER));
- EFX_POPULATE_OWORD_1(temp, FRF_AZ_TX_DC_SIZE, TX_DC_ENTRIES_ORDER);
- efx_writeo(efx, &temp, FR_AZ_TX_DC_CFG);
-
- /* Set RX descriptor cache size. Set low watermark to size-8, as
- * this allows most efficient prefetching.
- */
- BUILD_BUG_ON(RX_DC_ENTRIES != (8 << RX_DC_ENTRIES_ORDER));
- EFX_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_SIZE, RX_DC_ENTRIES_ORDER);
- efx_writeo(efx, &temp, FR_AZ_RX_DC_CFG);
- EFX_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_PF_LWM, RX_DC_ENTRIES - 8);
- efx_writeo(efx, &temp, FR_AZ_RX_DC_PF_WM);
-
- /* Program INT_KER address */
- EFX_POPULATE_OWORD_2(temp,
- FRF_AZ_NORM_INT_VEC_DIS_KER,
- EFX_INT_MODE_USE_MSI(efx),
- FRF_AZ_INT_ADR_KER, efx->irq_status.dma_addr);
- efx_writeo(efx, &temp, FR_AZ_INT_ADR_KER);
-
- if (EFX_WORKAROUND_17213(efx) && !EFX_INT_MODE_USE_MSI(efx))
- /* Use an interrupt level unused by event queues */
- efx->irq_level = 0x1f;
- else
- /* Use a valid MSI-X vector */
- efx->irq_level = 0;
-
- /* Enable all the genuinely fatal interrupts. (They are still
- * masked by the overall interrupt mask, controlled by
- * falcon_interrupts()).
- *
- * Note: All other fatal interrupts are enabled
- */
- EFX_POPULATE_OWORD_3(temp,
- FRF_AZ_ILL_ADR_INT_KER_EN, 1,
- FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
- FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
- EFX_INVERT_OWORD(temp);
- efx_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
-
- /* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be
- * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q.
- */
- efx_reado(efx, &temp, FR_AZ_TX_RESERVED);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1);
- /* Enable SW_EV to inherit in char driver - assume harmless here */
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1);
- /* Prefetch threshold 2 => fetch when descriptor cache half empty */
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_THRESHOLD, 2);
- /* Disable hardware watchdog which can misfire */
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
- /* Squash TX of packets of 16 bytes or less */
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
- efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
-
- EFX_POPULATE_OWORD_4(temp,
- /* Default values */
- FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
- FRF_BZ_TX_PACE_SB_AF, 0xb,
- FRF_BZ_TX_PACE_FB_BASE, 0,
- /* Allow large pace values in the fast bin. */
- FRF_BZ_TX_PACE_BIN_TH,
- FFE_BZ_TX_PACE_RESERVED);
- efx_writeo(efx, &temp, FR_BZ_TX_PACE);
-}
-
-/**************************************************************************
- *
- * Filter tables
- *
- **************************************************************************
- */
-
-/* "Fudge factors" - difference between programmed value and actual depth.
- * Due to pipelined implementation we need to program H/W with a value that
- * is larger than the hop limit we want.
- */
-#define EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD 3
-#define EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL 1
-
-/* Hard maximum search limit. Hardware will time-out beyond 200-something.
- * We also need to avoid infinite loops in efx_farch_filter_search() when the
- * table is full.
- */
-#define EFX_FARCH_FILTER_CTL_SRCH_MAX 200
-
-/* Don't try very hard to find space for performance hints, as this is
- * counter-productive. */
-#define EFX_FARCH_FILTER_CTL_SRCH_HINT_MAX 5
-
-enum efx_farch_filter_type {
- EFX_FARCH_FILTER_TCP_FULL = 0,
- EFX_FARCH_FILTER_TCP_WILD,
- EFX_FARCH_FILTER_UDP_FULL,
- EFX_FARCH_FILTER_UDP_WILD,
- EFX_FARCH_FILTER_MAC_FULL = 4,
- EFX_FARCH_FILTER_MAC_WILD,
- EFX_FARCH_FILTER_UC_DEF = 8,
- EFX_FARCH_FILTER_MC_DEF,
- EFX_FARCH_FILTER_TYPE_COUNT, /* number of specific types */
-};
-
-enum efx_farch_filter_table_id {
- EFX_FARCH_FILTER_TABLE_RX_IP = 0,
- EFX_FARCH_FILTER_TABLE_RX_MAC,
- EFX_FARCH_FILTER_TABLE_RX_DEF,
- EFX_FARCH_FILTER_TABLE_TX_MAC,
- EFX_FARCH_FILTER_TABLE_COUNT,
-};
-
-enum efx_farch_filter_index {
- EFX_FARCH_FILTER_INDEX_UC_DEF,
- EFX_FARCH_FILTER_INDEX_MC_DEF,
- EFX_FARCH_FILTER_SIZE_RX_DEF,
-};
-
-struct efx_farch_filter_spec {
- u8 type:4;
- u8 priority:4;
- u8 flags;
- u16 dmaq_id;
- u32 data[3];
-};
-
-struct efx_farch_filter_table {
- enum efx_farch_filter_table_id id;
- u32 offset; /* address of table relative to BAR */
- unsigned size; /* number of entries */
- unsigned step; /* step between entries */
- unsigned used; /* number currently used */
- unsigned long *used_bitmap;
- struct efx_farch_filter_spec *spec;
- unsigned search_limit[EFX_FARCH_FILTER_TYPE_COUNT];
-};
-
-struct efx_farch_filter_state {
- struct rw_semaphore lock; /* Protects table contents */
- struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT];
-};
-
-static void
-efx_farch_filter_table_clear_entry(struct efx_nic *efx,
- struct efx_farch_filter_table *table,
- unsigned int filter_idx);
-
-/* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit
- * key derived from the n-tuple. The initial LFSR state is 0xffff. */
-static u16 efx_farch_filter_hash(u32 key)
-{
- u16 tmp;
-
- /* First 16 rounds */
- tmp = 0x1fff ^ key >> 16;
- tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
- tmp = tmp ^ tmp >> 9;
- /* Last 16 rounds */
- tmp = tmp ^ tmp << 13 ^ key;
- tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
- return tmp ^ tmp >> 9;
-}
-
-/* To allow for hash collisions, filter search continues at these
- * increments from the first possible entry selected by the hash. */
-static u16 efx_farch_filter_increment(u32 key)
-{
- return key * 2 - 1;
-}
-
-static enum efx_farch_filter_table_id
-efx_farch_filter_spec_table_id(const struct efx_farch_filter_spec *spec)
-{
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
- (EFX_FARCH_FILTER_TCP_FULL >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
- (EFX_FARCH_FILTER_TCP_WILD >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
- (EFX_FARCH_FILTER_UDP_FULL >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
- (EFX_FARCH_FILTER_UDP_WILD >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_MAC !=
- (EFX_FARCH_FILTER_MAC_FULL >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_MAC !=
- (EFX_FARCH_FILTER_MAC_WILD >> 2));
- BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_TX_MAC !=
- EFX_FARCH_FILTER_TABLE_RX_MAC + 2);
- return (spec->type >> 2) + ((spec->flags & EFX_FILTER_FLAG_TX) ? 2 : 0);
-}
-
-static void efx_farch_filter_push_rx_config(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- struct efx_farch_filter_table *table;
- efx_oword_t filter_ctl;
-
- efx_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
- EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_TCP_FULL] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
- EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_TCP_WILD] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
- EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_UDP_FULL] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
- EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_UDP_WILD] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
- if (table->size) {
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_MAC_FULL] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT,
- table->search_limit[EFX_FARCH_FILTER_MAC_WILD] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
- }
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
- if (table->size) {
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_UNICAST_NOMATCH_Q_ID,
- table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].dmaq_id);
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED,
- !!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
- EFX_FILTER_FLAG_RX_RSS));
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID,
- table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].dmaq_id);
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
- !!(table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
- EFX_FILTER_FLAG_RX_RSS));
-
- /* There is a single bit to enable RX scatter for all
- * unmatched packets. Only set it if scatter is
- * enabled in both filter specs.
- */
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
- !!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
- table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
- EFX_FILTER_FLAG_RX_SCATTER));
- } else {
- /* We don't expose 'default' filters because unmatched
- * packets always go to the queue number found in the
- * RSS table. But we still need to set the RX scatter
- * bit here.
- */
- EFX_SET_OWORD_FIELD(
- filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
- efx->rx_scatter);
- }
-
- efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
-}
-
-static void efx_farch_filter_push_tx_limits(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- struct efx_farch_filter_table *table;
- efx_oword_t tx_cfg;
-
- efx_reado(efx, &tx_cfg, FR_AZ_TX_CFG);
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
- if (table->size) {
- EFX_SET_OWORD_FIELD(
- tx_cfg, FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE,
- table->search_limit[EFX_FARCH_FILTER_MAC_FULL] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
- EFX_SET_OWORD_FIELD(
- tx_cfg, FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE,
- table->search_limit[EFX_FARCH_FILTER_MAC_WILD] +
- EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
- }
-
- efx_writeo(efx, &tx_cfg, FR_AZ_TX_CFG);
-}
-
-static int
-efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
- const struct efx_filter_spec *gen_spec)
-{
- bool is_full = false;
-
- if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
- return -EINVAL;
-
- spec->priority = gen_spec->priority;
- spec->flags = gen_spec->flags;
- spec->dmaq_id = gen_spec->dmaq_id;
-
- switch (gen_spec->match_flags) {
- case (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
- EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
- EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT):
- is_full = true;
- fallthrough;
- case (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
- EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT): {
- __be32 rhost, host1, host2;
- __be16 rport, port1, port2;
-
- EFX_WARN_ON_PARANOID(!(gen_spec->flags & EFX_FILTER_FLAG_RX));
-
- if (gen_spec->ether_type != htons(ETH_P_IP))
- return -EPROTONOSUPPORT;
- if (gen_spec->loc_port == 0 ||
- (is_full && gen_spec->rem_port == 0))
- return -EADDRNOTAVAIL;
- switch (gen_spec->ip_proto) {
- case IPPROTO_TCP:
- spec->type = (is_full ? EFX_FARCH_FILTER_TCP_FULL :
- EFX_FARCH_FILTER_TCP_WILD);
- break;
- case IPPROTO_UDP:
- spec->type = (is_full ? EFX_FARCH_FILTER_UDP_FULL :
- EFX_FARCH_FILTER_UDP_WILD);
- break;
- default:
- return -EPROTONOSUPPORT;
- }
-
- /* Filter is constructed in terms of source and destination,
- * with the odd wrinkle that the ports are swapped in a UDP
- * wildcard filter. We need to convert from local and remote
- * (= zero for wildcard) addresses.
- */
- rhost = is_full ? gen_spec->rem_host[0] : 0;
- rport = is_full ? gen_spec->rem_port : 0;
- host1 = rhost;
- host2 = gen_spec->loc_host[0];
- if (!is_full && gen_spec->ip_proto == IPPROTO_UDP) {
- port1 = gen_spec->loc_port;
- port2 = rport;
- } else {
- port1 = rport;
- port2 = gen_spec->loc_port;
- }
- spec->data[0] = ntohl(host1) << 16 | ntohs(port1);
- spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16;
- spec->data[2] = ntohl(host2);
-
- break;
- }
-
- case EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_OUTER_VID:
- is_full = true;
- fallthrough;
- case EFX_FILTER_MATCH_LOC_MAC:
- spec->type = (is_full ? EFX_FARCH_FILTER_MAC_FULL :
- EFX_FARCH_FILTER_MAC_WILD);
- spec->data[0] = is_full ? ntohs(gen_spec->outer_vid) : 0;
- spec->data[1] = (gen_spec->loc_mac[2] << 24 |
- gen_spec->loc_mac[3] << 16 |
- gen_spec->loc_mac[4] << 8 |
- gen_spec->loc_mac[5]);
- spec->data[2] = (gen_spec->loc_mac[0] << 8 |
- gen_spec->loc_mac[1]);
- break;
-
- case EFX_FILTER_MATCH_LOC_MAC_IG:
- spec->type = (is_multicast_ether_addr(gen_spec->loc_mac) ?
- EFX_FARCH_FILTER_MC_DEF :
- EFX_FARCH_FILTER_UC_DEF);
- memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */
- break;
-
- default:
- return -EPROTONOSUPPORT;
- }
-
- return 0;
-}
-
-static void
-efx_farch_filter_to_gen_spec(struct efx_filter_spec *gen_spec,
- const struct efx_farch_filter_spec *spec)
-{
- bool is_full = false;
-
- /* *gen_spec should be completely initialised, to be consistent
- * with efx_filter_init_{rx,tx}() and in case we want to copy
- * it back to userland.
- */
- memset(gen_spec, 0, sizeof(*gen_spec));
-
- gen_spec->priority = spec->priority;
- gen_spec->flags = spec->flags;
- gen_spec->dmaq_id = spec->dmaq_id;
-
- switch (spec->type) {
- case EFX_FARCH_FILTER_TCP_FULL:
- case EFX_FARCH_FILTER_UDP_FULL:
- is_full = true;
- fallthrough;
- case EFX_FARCH_FILTER_TCP_WILD:
- case EFX_FARCH_FILTER_UDP_WILD: {
- __be32 host1, host2;
- __be16 port1, port2;
-
- gen_spec->match_flags =
- EFX_FILTER_MATCH_ETHER_TYPE |
- EFX_FILTER_MATCH_IP_PROTO |
- EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT;
- if (is_full)
- gen_spec->match_flags |= (EFX_FILTER_MATCH_REM_HOST |
- EFX_FILTER_MATCH_REM_PORT);
- gen_spec->ether_type = htons(ETH_P_IP);
- gen_spec->ip_proto =
- (spec->type == EFX_FARCH_FILTER_TCP_FULL ||
- spec->type == EFX_FARCH_FILTER_TCP_WILD) ?
- IPPROTO_TCP : IPPROTO_UDP;
-
- host1 = htonl(spec->data[0] >> 16 | spec->data[1] << 16);
- port1 = htons(spec->data[0]);
- host2 = htonl(spec->data[2]);
- port2 = htons(spec->data[1] >> 16);
- if (spec->flags & EFX_FILTER_FLAG_TX) {
- gen_spec->loc_host[0] = host1;
- gen_spec->rem_host[0] = host2;
- } else {
- gen_spec->loc_host[0] = host2;
- gen_spec->rem_host[0] = host1;
- }
- if (!!(gen_spec->flags & EFX_FILTER_FLAG_TX) ^
- (!is_full && gen_spec->ip_proto == IPPROTO_UDP)) {
- gen_spec->loc_port = port1;
- gen_spec->rem_port = port2;
- } else {
- gen_spec->loc_port = port2;
- gen_spec->rem_port = port1;
- }
-
- break;
- }
-
- case EFX_FARCH_FILTER_MAC_FULL:
- is_full = true;
- fallthrough;
- case EFX_FARCH_FILTER_MAC_WILD:
- gen_spec->match_flags = EFX_FILTER_MATCH_LOC_MAC;
- if (is_full)
- gen_spec->match_flags |= EFX_FILTER_MATCH_OUTER_VID;
- gen_spec->loc_mac[0] = spec->data[2] >> 8;
- gen_spec->loc_mac[1] = spec->data[2];
- gen_spec->loc_mac[2] = spec->data[1] >> 24;
- gen_spec->loc_mac[3] = spec->data[1] >> 16;
- gen_spec->loc_mac[4] = spec->data[1] >> 8;
- gen_spec->loc_mac[5] = spec->data[1];
- gen_spec->outer_vid = htons(spec->data[0]);
- break;
-
- case EFX_FARCH_FILTER_UC_DEF:
- case EFX_FARCH_FILTER_MC_DEF:
- gen_spec->match_flags = EFX_FILTER_MATCH_LOC_MAC_IG;
- gen_spec->loc_mac[0] = spec->type == EFX_FARCH_FILTER_MC_DEF;
- break;
-
- default:
- WARN_ON(1);
- break;
- }
-}
-
-static void
-efx_farch_filter_init_rx_auto(struct efx_nic *efx,
- struct efx_farch_filter_spec *spec)
-{
- /* If there's only one channel then disable RSS for non VF
- * traffic, thereby allowing VFs to use RSS when the PF can't.
- */
- spec->priority = EFX_FILTER_PRI_AUTO;
- spec->flags = (EFX_FILTER_FLAG_RX |
- (efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0) |
- (efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0));
- spec->dmaq_id = 0;
-}
-
-/* Build a filter entry and return its n-tuple key. */
-static u32 efx_farch_filter_build(efx_oword_t *filter,
- struct efx_farch_filter_spec *spec)
-{
- u32 data3;
-
- switch (efx_farch_filter_spec_table_id(spec)) {
- case EFX_FARCH_FILTER_TABLE_RX_IP: {
- bool is_udp = (spec->type == EFX_FARCH_FILTER_UDP_FULL ||
- spec->type == EFX_FARCH_FILTER_UDP_WILD);
- EFX_POPULATE_OWORD_7(
- *filter,
- FRF_BZ_RSS_EN,
- !!(spec->flags & EFX_FILTER_FLAG_RX_RSS),
- FRF_BZ_SCATTER_EN,
- !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER),
- FRF_BZ_TCP_UDP, is_udp,
- FRF_BZ_RXQ_ID, spec->dmaq_id,
- EFX_DWORD_2, spec->data[2],
- EFX_DWORD_1, spec->data[1],
- EFX_DWORD_0, spec->data[0]);
- data3 = is_udp;
- break;
- }
-
- case EFX_FARCH_FILTER_TABLE_RX_MAC: {
- bool is_wild = spec->type == EFX_FARCH_FILTER_MAC_WILD;
- EFX_POPULATE_OWORD_7(
- *filter,
- FRF_CZ_RMFT_RSS_EN,
- !!(spec->flags & EFX_FILTER_FLAG_RX_RSS),
- FRF_CZ_RMFT_SCATTER_EN,
- !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER),
- FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id,
- FRF_CZ_RMFT_WILDCARD_MATCH, is_wild,
- FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2],
- FRF_CZ_RMFT_DEST_MAC_LO, spec->data[1],
- FRF_CZ_RMFT_VLAN_ID, spec->data[0]);
- data3 = is_wild;
- break;
- }
-
- case EFX_FARCH_FILTER_TABLE_TX_MAC: {
- bool is_wild = spec->type == EFX_FARCH_FILTER_MAC_WILD;
- EFX_POPULATE_OWORD_5(*filter,
- FRF_CZ_TMFT_TXQ_ID, spec->dmaq_id,
- FRF_CZ_TMFT_WILDCARD_MATCH, is_wild,
- FRF_CZ_TMFT_SRC_MAC_HI, spec->data[2],
- FRF_CZ_TMFT_SRC_MAC_LO, spec->data[1],
- FRF_CZ_TMFT_VLAN_ID, spec->data[0]);
- data3 = is_wild | spec->dmaq_id << 1;
- break;
- }
-
- default:
- BUG();
- }
-
- return spec->data[0] ^ spec->data[1] ^ spec->data[2] ^ data3;
-}
-
-static bool efx_farch_filter_equal(const struct efx_farch_filter_spec *left,
- const struct efx_farch_filter_spec *right)
-{
- if (left->type != right->type ||
- memcmp(left->data, right->data, sizeof(left->data)))
- return false;
-
- if (left->flags & EFX_FILTER_FLAG_TX &&
- left->dmaq_id != right->dmaq_id)
- return false;
-
- return true;
-}
-
-/*
- * Construct/deconstruct external filter IDs. At least the RX filter
- * IDs must be ordered by matching priority, for RX NFC semantics.
- *
- * Deconstruction needs to be robust against invalid IDs so that
- * efx_filter_remove_id_safe() and efx_filter_get_filter_safe() can
- * accept user-provided IDs.
- */
-
-#define EFX_FARCH_FILTER_MATCH_PRI_COUNT 5
-
-static const u8 efx_farch_filter_type_match_pri[EFX_FARCH_FILTER_TYPE_COUNT] = {
- [EFX_FARCH_FILTER_TCP_FULL] = 0,
- [EFX_FARCH_FILTER_UDP_FULL] = 0,
- [EFX_FARCH_FILTER_TCP_WILD] = 1,
- [EFX_FARCH_FILTER_UDP_WILD] = 1,
- [EFX_FARCH_FILTER_MAC_FULL] = 2,
- [EFX_FARCH_FILTER_MAC_WILD] = 3,
- [EFX_FARCH_FILTER_UC_DEF] = 4,
- [EFX_FARCH_FILTER_MC_DEF] = 4,
-};
-
-static const enum efx_farch_filter_table_id efx_farch_filter_range_table[] = {
- EFX_FARCH_FILTER_TABLE_RX_IP, /* RX match pri 0 */
- EFX_FARCH_FILTER_TABLE_RX_IP,
- EFX_FARCH_FILTER_TABLE_RX_MAC,
- EFX_FARCH_FILTER_TABLE_RX_MAC,
- EFX_FARCH_FILTER_TABLE_RX_DEF, /* RX match pri 4 */
- EFX_FARCH_FILTER_TABLE_TX_MAC, /* TX match pri 0 */
- EFX_FARCH_FILTER_TABLE_TX_MAC, /* TX match pri 1 */
-};
-
-#define EFX_FARCH_FILTER_INDEX_WIDTH 13
-#define EFX_FARCH_FILTER_INDEX_MASK ((1 << EFX_FARCH_FILTER_INDEX_WIDTH) - 1)
-
-static inline u32
-efx_farch_filter_make_id(const struct efx_farch_filter_spec *spec,
- unsigned int index)
-{
- unsigned int range;
-
- range = efx_farch_filter_type_match_pri[spec->type];
- if (!(spec->flags & EFX_FILTER_FLAG_RX))
- range += EFX_FARCH_FILTER_MATCH_PRI_COUNT;
-
- return range << EFX_FARCH_FILTER_INDEX_WIDTH | index;
-}
-
-static inline enum efx_farch_filter_table_id
-efx_farch_filter_id_table_id(u32 id)
-{
- unsigned int range = id >> EFX_FARCH_FILTER_INDEX_WIDTH;
-
- if (range < ARRAY_SIZE(efx_farch_filter_range_table))
- return efx_farch_filter_range_table[range];
- else
- return EFX_FARCH_FILTER_TABLE_COUNT; /* invalid */
-}
-
-static inline unsigned int efx_farch_filter_id_index(u32 id)
-{
- return id & EFX_FARCH_FILTER_INDEX_MASK;
-}
-
-u32 efx_farch_filter_get_rx_id_limit(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- unsigned int range = EFX_FARCH_FILTER_MATCH_PRI_COUNT - 1;
- enum efx_farch_filter_table_id table_id;
-
- do {
- table_id = efx_farch_filter_range_table[range];
- if (state->table[table_id].size != 0)
- return range << EFX_FARCH_FILTER_INDEX_WIDTH |
- state->table[table_id].size;
- } while (range--);
-
- return 0;
-}
-
-s32 efx_farch_filter_insert(struct efx_nic *efx,
- struct efx_filter_spec *gen_spec,
- bool replace_equal)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- struct efx_farch_filter_table *table;
- struct efx_farch_filter_spec spec;
- efx_oword_t filter;
- int rep_index, ins_index;
- unsigned int depth = 0;
- int rc;
-
- rc = efx_farch_filter_from_gen_spec(&spec, gen_spec);
- if (rc)
- return rc;
-
- down_write(&state->lock);
-
- table = &state->table[efx_farch_filter_spec_table_id(&spec)];
- if (table->size == 0) {
- rc = -EINVAL;
- goto out_unlock;
- }
-
- netif_vdbg(efx, hw, efx->net_dev,
- "%s: type %d search_limit=%d", __func__, spec.type,
- table->search_limit[spec.type]);
-
- if (table->id == EFX_FARCH_FILTER_TABLE_RX_DEF) {
- /* One filter spec per type */
- BUILD_BUG_ON(EFX_FARCH_FILTER_INDEX_UC_DEF != 0);
- BUILD_BUG_ON(EFX_FARCH_FILTER_INDEX_MC_DEF !=
- EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF);
- rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF;
- ins_index = rep_index;
- } else {
- /* Search concurrently for
- * (1) a filter to be replaced (rep_index): any filter
- * with the same match values, up to the current
- * search depth for this type, and
- * (2) the insertion point (ins_index): (1) or any
- * free slot before it or up to the maximum search
- * depth for this priority
- * We fail if we cannot find (2).
- *
- * We can stop once either
- * (a) we find (1), in which case we have definitely
- * found (2) as well; or
- * (b) we have searched exhaustively for (1), and have
- * either found (2) or searched exhaustively for it
- */
- u32 key = efx_farch_filter_build(&filter, &spec);
- unsigned int hash = efx_farch_filter_hash(key);
- unsigned int incr = efx_farch_filter_increment(key);
- unsigned int max_rep_depth = table->search_limit[spec.type];
- unsigned int max_ins_depth =
- spec.priority <= EFX_FILTER_PRI_HINT ?
- EFX_FARCH_FILTER_CTL_SRCH_HINT_MAX :
- EFX_FARCH_FILTER_CTL_SRCH_MAX;
- unsigned int i = hash & (table->size - 1);
-
- ins_index = -1;
- depth = 1;
-
- for (;;) {
- if (!test_bit(i, table->used_bitmap)) {
- if (ins_index < 0)
- ins_index = i;
- } else if (efx_farch_filter_equal(&spec,
- &table->spec[i])) {
- /* Case (a) */
- if (ins_index < 0)
- ins_index = i;
- rep_index = i;
- break;
- }
-
- if (depth >= max_rep_depth &&
- (ins_index >= 0 || depth >= max_ins_depth)) {
- /* Case (b) */
- if (ins_index < 0) {
- rc = -EBUSY;
- goto out_unlock;
- }
- rep_index = -1;
- break;
- }
-
- i = (i + incr) & (table->size - 1);
- ++depth;
- }
- }
-
- /* If we found a filter to be replaced, check whether we
- * should do so
- */
- if (rep_index >= 0) {
- struct efx_farch_filter_spec *saved_spec =
- &table->spec[rep_index];
-
- if (spec.priority == saved_spec->priority && !replace_equal) {
- rc = -EEXIST;
- goto out_unlock;
- }
- if (spec.priority < saved_spec->priority) {
- rc = -EPERM;
- goto out_unlock;
- }
- if (saved_spec->priority == EFX_FILTER_PRI_AUTO ||
- saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO)
- spec.flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
- }
-
- /* Insert the filter */
- if (ins_index != rep_index) {
- __set_bit(ins_index, table->used_bitmap);
- ++table->used;
- }
- table->spec[ins_index] = spec;
-
- if (table->id == EFX_FARCH_FILTER_TABLE_RX_DEF) {
- efx_farch_filter_push_rx_config(efx);
- } else {
- if (table->search_limit[spec.type] < depth) {
- table->search_limit[spec.type] = depth;
- if (spec.flags & EFX_FILTER_FLAG_TX)
- efx_farch_filter_push_tx_limits(efx);
- else
- efx_farch_filter_push_rx_config(efx);
- }
-
- efx_writeo(efx, &filter,
- table->offset + table->step * ins_index);
-
- /* If we were able to replace a filter by inserting
- * at a lower depth, clear the replaced filter
- */
- if (ins_index != rep_index && rep_index >= 0)
- efx_farch_filter_table_clear_entry(efx, table,
- rep_index);
- }
-
- netif_vdbg(efx, hw, efx->net_dev,
- "%s: filter type %d index %d rxq %u set",
- __func__, spec.type, ins_index, spec.dmaq_id);
- rc = efx_farch_filter_make_id(&spec, ins_index);
-
-out_unlock:
- up_write(&state->lock);
- return rc;
-}
-
-static void
-efx_farch_filter_table_clear_entry(struct efx_nic *efx,
- struct efx_farch_filter_table *table,
- unsigned int filter_idx)
-{
- static efx_oword_t filter;
-
- EFX_WARN_ON_PARANOID(!test_bit(filter_idx, table->used_bitmap));
- BUG_ON(table->offset == 0); /* can't clear MAC default filters */
-
- __clear_bit(filter_idx, table->used_bitmap);
- --table->used;
- memset(&table->spec[filter_idx], 0, sizeof(table->spec[0]));
-
- efx_writeo(efx, &filter, table->offset + table->step * filter_idx);
-
- /* If this filter required a greater search depth than
- * any other, the search limit for its type can now be
- * decreased. However, it is hard to determine that
- * unless the table has become completely empty - in
- * which case, all its search limits can be set to 0.
- */
- if (unlikely(table->used == 0)) {
- memset(table->search_limit, 0, sizeof(table->search_limit));
- if (table->id == EFX_FARCH_FILTER_TABLE_TX_MAC)
- efx_farch_filter_push_tx_limits(efx);
- else
- efx_farch_filter_push_rx_config(efx);
- }
-}
-
-static int efx_farch_filter_remove(struct efx_nic *efx,
- struct efx_farch_filter_table *table,
- unsigned int filter_idx,
- enum efx_filter_priority priority)
-{
- struct efx_farch_filter_spec *spec = &table->spec[filter_idx];
-
- if (!test_bit(filter_idx, table->used_bitmap) ||
- spec->priority != priority)
- return -ENOENT;
-
- if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
- efx_farch_filter_init_rx_auto(efx, spec);
- efx_farch_filter_push_rx_config(efx);
- } else {
- efx_farch_filter_table_clear_entry(efx, table, filter_idx);
- }
-
- return 0;
-}
-
-int efx_farch_filter_remove_safe(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 filter_id)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- unsigned int filter_idx;
- int rc;
-
- table_id = efx_farch_filter_id_table_id(filter_id);
- if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
- return -ENOENT;
- table = &state->table[table_id];
-
- filter_idx = efx_farch_filter_id_index(filter_id);
- if (filter_idx >= table->size)
- return -ENOENT;
- down_write(&state->lock);
-
- rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
- up_write(&state->lock);
-
- return rc;
-}
-
-int efx_farch_filter_get_safe(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 filter_id, struct efx_filter_spec *spec_buf)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- struct efx_farch_filter_spec *spec;
- unsigned int filter_idx;
- int rc = -ENOENT;
-
- down_read(&state->lock);
-
- table_id = efx_farch_filter_id_table_id(filter_id);
- if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
- goto out_unlock;
- table = &state->table[table_id];
-
- filter_idx = efx_farch_filter_id_index(filter_id);
- if (filter_idx >= table->size)
- goto out_unlock;
- spec = &table->spec[filter_idx];
-
- if (test_bit(filter_idx, table->used_bitmap) &&
- spec->priority == priority) {
- efx_farch_filter_to_gen_spec(spec_buf, spec);
- rc = 0;
- }
-
-out_unlock:
- up_read(&state->lock);
- return rc;
-}
-
-static void
-efx_farch_filter_table_clear(struct efx_nic *efx,
- enum efx_farch_filter_table_id table_id,
- enum efx_filter_priority priority)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- struct efx_farch_filter_table *table = &state->table[table_id];
- unsigned int filter_idx;
-
- down_write(&state->lock);
- for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
- if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO)
- efx_farch_filter_remove(efx, table,
- filter_idx, priority);
- }
- up_write(&state->lock);
-}
-
-int efx_farch_filter_clear_rx(struct efx_nic *efx,
- enum efx_filter_priority priority)
-{
- efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_IP,
- priority);
- efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_MAC,
- priority);
- efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_DEF,
- priority);
- return 0;
-}
-
-u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
- enum efx_filter_priority priority)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- unsigned int filter_idx;
- u32 count = 0;
-
- down_read(&state->lock);
-
- for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
- table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
- table_id++) {
- table = &state->table[table_id];
- for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
- if (test_bit(filter_idx, table->used_bitmap) &&
- table->spec[filter_idx].priority == priority)
- ++count;
- }
- }
-
- up_read(&state->lock);
-
- return count;
-}
-
-s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 *buf, u32 size)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- unsigned int filter_idx;
- s32 count = 0;
-
- down_read(&state->lock);
-
- for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
- table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
- table_id++) {
- table = &state->table[table_id];
- for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
- if (test_bit(filter_idx, table->used_bitmap) &&
- table->spec[filter_idx].priority == priority) {
- if (count == size) {
- count = -EMSGSIZE;
- goto out;
- }
- buf[count++] = efx_farch_filter_make_id(
- &table->spec[filter_idx], filter_idx);
- }
- }
- }
-out:
- up_read(&state->lock);
-
- return count;
-}
-
-/* Restore filter stater after reset */
-void efx_farch_filter_table_restore(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- efx_oword_t filter;
- unsigned int filter_idx;
-
- down_write(&state->lock);
-
- for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
- table = &state->table[table_id];
-
- /* Check whether this is a regular register table */
- if (table->step == 0)
- continue;
-
- for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
- if (!test_bit(filter_idx, table->used_bitmap))
- continue;
- efx_farch_filter_build(&filter, &table->spec[filter_idx]);
- efx_writeo(efx, &filter,
- table->offset + table->step * filter_idx);
- }
- }
-
- efx_farch_filter_push_rx_config(efx);
- efx_farch_filter_push_tx_limits(efx);
-
- up_write(&state->lock);
-}
-
-void efx_farch_filter_table_remove(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
-
- for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
- kfree(state->table[table_id].used_bitmap);
- vfree(state->table[table_id].spec);
- }
- kfree(state);
-}
-
-int efx_farch_filter_table_probe(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state;
- struct efx_farch_filter_table *table;
- unsigned table_id;
-
- state = kzalloc(sizeof(struct efx_farch_filter_state), GFP_KERNEL);
- if (!state)
- return -ENOMEM;
- efx->filter_state = state;
- init_rwsem(&state->lock);
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
- table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
- table->offset = FR_BZ_RX_FILTER_TBL0;
- table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
- table->step = FR_BZ_RX_FILTER_TBL0_STEP;
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
- table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
- table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
- table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
- table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
- table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
- table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
- table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
- table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
- table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
- table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
-
- for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
- table = &state->table[table_id];
- if (table->size == 0)
- continue;
- table->used_bitmap = kcalloc(BITS_TO_LONGS(table->size),
- sizeof(unsigned long),
- GFP_KERNEL);
- if (!table->used_bitmap)
- goto fail;
- table->spec = vzalloc(array_size(sizeof(*table->spec),
- table->size));
- if (!table->spec)
- goto fail;
- }
-
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
- if (table->size) {
- /* RX default filters must always exist */
- struct efx_farch_filter_spec *spec;
- unsigned i;
-
- for (i = 0; i < EFX_FARCH_FILTER_SIZE_RX_DEF; i++) {
- spec = &table->spec[i];
- spec->type = EFX_FARCH_FILTER_UC_DEF + i;
- efx_farch_filter_init_rx_auto(efx, spec);
- __set_bit(i, table->used_bitmap);
- }
- }
-
- efx_farch_filter_push_rx_config(efx);
-
- return 0;
-
-fail:
- efx_farch_filter_table_remove(efx);
- return -ENOMEM;
-}
-
-/* Update scatter enable flags for filters pointing to our own RX queues */
-void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- enum efx_farch_filter_table_id table_id;
- struct efx_farch_filter_table *table;
- efx_oword_t filter;
- unsigned int filter_idx;
-
- down_write(&state->lock);
-
- for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
- table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
- table_id++) {
- table = &state->table[table_id];
-
- for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
- if (!test_bit(filter_idx, table->used_bitmap) ||
- table->spec[filter_idx].dmaq_id >=
- efx->n_rx_channels)
- continue;
-
- if (efx->rx_scatter)
- table->spec[filter_idx].flags |=
- EFX_FILTER_FLAG_RX_SCATTER;
- else
- table->spec[filter_idx].flags &=
- ~EFX_FILTER_FLAG_RX_SCATTER;
-
- if (table_id == EFX_FARCH_FILTER_TABLE_RX_DEF)
- /* Pushed by efx_farch_filter_push_rx_config() */
- continue;
-
- efx_farch_filter_build(&filter, &table->spec[filter_idx]);
- efx_writeo(efx, &filter,
- table->offset + table->step * filter_idx);
- }
- }
-
- efx_farch_filter_push_rx_config(efx);
-
- up_write(&state->lock);
-}
-
-#ifdef CONFIG_RFS_ACCEL
-
-bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
- unsigned int index)
-{
- struct efx_farch_filter_state *state = efx->filter_state;
- struct efx_farch_filter_table *table;
- bool ret = false, force = false;
- u16 arfs_id;
-
- down_write(&state->lock);
- spin_lock_bh(&efx->rps_hash_lock);
- table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
- if (test_bit(index, table->used_bitmap) &&
- table->spec[index].priority == EFX_FILTER_PRI_HINT) {
- struct efx_arfs_rule *rule = NULL;
- struct efx_filter_spec spec;
-
- efx_farch_filter_to_gen_spec(&spec, &table->spec[index]);
- if (!efx->rps_hash_table) {
- /* In the absence of the table, we always returned 0 to
- * ARFS, so use the same to query it.
- */
- arfs_id = 0;
- } else {
- rule = efx_rps_hash_find(efx, &spec);
- if (!rule) {
- /* ARFS table doesn't know of this filter, remove it */
- force = true;
- } else {
- arfs_id = rule->arfs_id;
- if (!efx_rps_check_rule(rule, index, &force))
- goto out_unlock;
- }
- }
- if (force || rps_may_expire_flow(efx->net_dev, spec.dmaq_id,
- flow_id, arfs_id)) {
- if (rule)
- rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
- efx_rps_hash_del(efx, &spec);
- efx_farch_filter_table_clear_entry(efx, table, index);
- ret = true;
- }
- }
-out_unlock:
- spin_unlock_bh(&efx->rps_hash_lock);
- up_write(&state->lock);
- return ret;
-}
-
-#endif /* CONFIG_RFS_ACCEL */
-
-void efx_farch_filter_sync_rx_mode(struct efx_nic *efx)
-{
- struct net_device *net_dev = efx->net_dev;
- struct netdev_hw_addr *ha;
- union efx_multicast_hash *mc_hash = &efx->multicast_hash;
- u32 crc;
- int bit;
-
- if (!efx_dev_registered(efx))
- return;
-
- netif_addr_lock_bh(net_dev);
-
- efx->unicast_filter = !(net_dev->flags & IFF_PROMISC);
-
- /* Build multicast hash table */
- if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
- memset(mc_hash, 0xff, sizeof(*mc_hash));
- } else {
- memset(mc_hash, 0x00, sizeof(*mc_hash));
- netdev_for_each_mc_addr(ha, net_dev) {
- crc = ether_crc_le(ETH_ALEN, ha->addr);
- bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
- __set_bit_le(bit, mc_hash);
- }
-
- /* Broadcast packets go through the multicast hash filter.
- * ether_crc_le() of the broadcast address is 0xbe2612ff
- * so we always add bit 0xff to the mask.
- */
- __set_bit_le(0xff, mc_hash);
- }
-
- netif_addr_unlock_bh(net_dev);
-}
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
deleted file mode 100644
index ce3060e15b54..000000000000
--- a/drivers/net/ethernet/sfc/siena.c
+++ /dev/null
@@ -1,1109 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2005-2006 Fen Systems Ltd.
- * Copyright 2006-2013 Solarflare Communications Inc.
- */
-
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include "net_driver.h"
-#include "bitfield.h"
-#include "efx.h"
-#include "efx_common.h"
-#include "nic.h"
-#include "farch_regs.h"
-#include "io.h"
-#include "workarounds.h"
-#include "mcdi.h"
-#include "mcdi_pcol.h"
-#include "mcdi_port.h"
-#include "mcdi_port_common.h"
-#include "selftest.h"
-#include "siena_sriov.h"
-#include "rx_common.h"
-
-/* Hardware control for SFC9000 family including SFL9021 (aka Siena). */
-
-static void siena_init_wol(struct efx_nic *efx);
-
-
-static void siena_push_irq_moderation(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- efx_dword_t timer_cmd;
-
- if (channel->irq_moderation_us) {
- unsigned int ticks;
-
- ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
- EFX_POPULATE_DWORD_2(timer_cmd,
- FRF_CZ_TC_TIMER_MODE,
- FFE_CZ_TIMER_MODE_INT_HLDOFF,
- FRF_CZ_TC_TIMER_VAL,
- ticks - 1);
- } else {
- EFX_POPULATE_DWORD_2(timer_cmd,
- FRF_CZ_TC_TIMER_MODE,
- FFE_CZ_TIMER_MODE_DIS,
- FRF_CZ_TC_TIMER_VAL, 0);
- }
- efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
- channel->channel);
-}
-
-void siena_prepare_flush(struct efx_nic *efx)
-{
- if (efx->fc_disable++ == 0)
- efx_mcdi_set_mac(efx);
-}
-
-void siena_finish_flush(struct efx_nic *efx)
-{
- if (--efx->fc_disable == 0)
- efx_mcdi_set_mac(efx);
-}
-
-static const struct efx_farch_register_test siena_register_tests[] = {
- { FR_AZ_ADR_REGION,
- EFX_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
- { FR_CZ_USR_EV_CFG,
- EFX_OWORD32(0x000103FF, 0x00000000, 0x00000000, 0x00000000) },
- { FR_AZ_RX_CFG,
- EFX_OWORD32(0xFFFFFFFE, 0xFFFFFFFF, 0x0003FFFF, 0x00000000) },
- { FR_AZ_TX_CFG,
- EFX_OWORD32(0x7FFF0037, 0xFFFF8000, 0xFFFFFFFF, 0x03FFFFFF) },
- { FR_AZ_TX_RESERVED,
- EFX_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
- { FR_AZ_SRM_TX_DC_CFG,
- EFX_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
- { FR_AZ_RX_DC_CFG,
- EFX_OWORD32(0x00000003, 0x00000000, 0x00000000, 0x00000000) },
- { FR_AZ_RX_DC_PF_WM,
- EFX_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
- { FR_BZ_DP_CTRL,
- EFX_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
- { FR_BZ_RX_RSS_TKEY,
- EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
- { FR_CZ_RX_RSS_IPV6_REG1,
- EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
- { FR_CZ_RX_RSS_IPV6_REG2,
- EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
- { FR_CZ_RX_RSS_IPV6_REG3,
- EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0x00000007, 0x00000000) },
-};
-
-static int siena_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
-{
- enum reset_type reset_method = RESET_TYPE_ALL;
- int rc, rc2;
-
- efx_reset_down(efx, reset_method);
-
- /* Reset the chip immediately so that it is completely
- * quiescent regardless of what any VF driver does.
- */
- rc = efx_mcdi_reset(efx, reset_method);
- if (rc)
- goto out;
-
- tests->registers =
- efx_farch_test_registers(efx, siena_register_tests,
- ARRAY_SIZE(siena_register_tests))
- ? -1 : 1;
-
- rc = efx_mcdi_reset(efx, reset_method);
-out:
- rc2 = efx_reset_up(efx, reset_method, rc == 0);
- return rc ? rc : rc2;
-}
-
-/**************************************************************************
- *
- * PTP
- *
- **************************************************************************
- */
-
-static void siena_ptp_write_host_time(struct efx_nic *efx, u32 host_time)
-{
- _efx_writed(efx, cpu_to_le32(host_time),
- FR_CZ_MC_TREG_SMEM + MC_SMEM_P0_PTP_TIME_OFST);
-}
-
-static int siena_ptp_set_ts_config(struct efx_nic *efx,
- struct hwtstamp_config *init)
-{
- int rc;
-
- switch (init->rx_filter) {
- case HWTSTAMP_FILTER_NONE:
- /* if TX timestamping is still requested then leave PTP on */
- return efx_ptp_change_mode(efx,
- init->tx_type != HWTSTAMP_TX_OFF,
- efx_ptp_get_mode(efx));
- case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
- case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
- case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
- init->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
- return efx_ptp_change_mode(efx, true, MC_CMD_PTP_MODE_V1);
- case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
- init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
- rc = efx_ptp_change_mode(efx, true,
- MC_CMD_PTP_MODE_V2_ENHANCED);
- /* bug 33070 - old versions of the firmware do not support the
- * improved UUID filtering option. Similarly old versions of the
- * application do not expect it to be enabled. If the firmware
- * does not accept the enhanced mode, fall back to the standard
- * PTP v2 UUID filtering. */
- if (rc != 0)
- rc = efx_ptp_change_mode(efx, true, MC_CMD_PTP_MODE_V2);
- return rc;
- default:
- return -ERANGE;
- }
-}
-
-/**************************************************************************
- *
- * Device reset
- *
- **************************************************************************
- */
-
-static int siena_map_reset_flags(u32 *flags)
-{
- enum {
- SIENA_RESET_PORT = (ETH_RESET_DMA | ETH_RESET_FILTER |
- ETH_RESET_OFFLOAD | ETH_RESET_MAC |
- ETH_RESET_PHY),
- SIENA_RESET_MC = (SIENA_RESET_PORT |
- ETH_RESET_MGMT << ETH_RESET_SHARED_SHIFT),
- };
-
- if ((*flags & SIENA_RESET_MC) == SIENA_RESET_MC) {
- *flags &= ~SIENA_RESET_MC;
- return RESET_TYPE_WORLD;
- }
-
- if ((*flags & SIENA_RESET_PORT) == SIENA_RESET_PORT) {
- *flags &= ~SIENA_RESET_PORT;
- return RESET_TYPE_ALL;
- }
-
- /* no invisible reset implemented */
-
- return -EINVAL;
-}
-
-#ifdef CONFIG_EEH
-/* When a PCI device is isolated from the bus, a subsequent MMIO read is
- * required for the kernel EEH mechanisms to notice. As the Solarflare driver
- * was written to minimise MMIO read (for latency) then a periodic call to check
- * the EEH status of the device is required so that device recovery can happen
- * in a timely fashion.
- */
-static void siena_monitor(struct efx_nic *efx)
-{
- struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
-
- eeh_dev_check_failure(eehdev);
-}
-#endif
-
-static int siena_probe_nvconfig(struct efx_nic *efx)
-{
- u32 caps = 0;
- int rc;
-
- rc = efx_mcdi_get_board_cfg(efx, efx->net_dev->perm_addr, NULL, &caps);
-
- efx->timer_quantum_ns =
- (caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ?
- 3072 : 6144; /* 768 cycles */
- efx->timer_max_ns = efx->type->timer_period_max *
- efx->timer_quantum_ns;
-
- return rc;
-}
-
-static int siena_dimension_resources(struct efx_nic *efx)
-{
- /* Each port has a small block of internal SRAM dedicated to
- * the buffer table and descriptor caches. In theory we can
- * map both blocks to one port, but we don't.
- */
- efx_farch_dimension_resources(efx, FR_CZ_BUF_FULL_TBL_ROWS / 2);
- return 0;
-}
-
-/* On all Falcon-architecture NICs, PFs use BAR 0 for I/O space and BAR 2(&3)
- * for memory.
- */
-static unsigned int siena_mem_bar(struct efx_nic *efx)
-{
- return 2;
-}
-
-static unsigned int siena_mem_map_size(struct efx_nic *efx)
-{
- return FR_CZ_MC_TREG_SMEM +
- FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS;
-}
-
-static int siena_probe_nic(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data;
- efx_oword_t reg;
- int rc;
-
- /* Allocate storage for hardware specific data */
- nic_data = kzalloc(sizeof(struct siena_nic_data), GFP_KERNEL);
- if (!nic_data)
- return -ENOMEM;
- nic_data->efx = efx;
- efx->nic_data = nic_data;
-
- if (efx_farch_fpga_ver(efx) != 0) {
- netif_err(efx, probe, efx->net_dev,
- "Siena FPGA not supported\n");
- rc = -ENODEV;
- goto fail1;
- }
-
- efx->max_channels = EFX_MAX_CHANNELS;
- efx->max_vis = EFX_MAX_CHANNELS;
- efx->max_tx_channels = EFX_MAX_CHANNELS;
- efx->tx_queues_per_channel = 4;
-
- efx_reado(efx, ®, FR_AZ_CS_DEBUG);
- efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1;
-
- rc = efx_mcdi_init(efx);
- if (rc)
- goto fail1;
-
- /* Now we can reset the NIC */
- rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
- if (rc) {
- netif_err(efx, probe, efx->net_dev, "failed to reset NIC\n");
- goto fail3;
- }
-
- siena_init_wol(efx);
-
- /* Allocate memory for INT_KER */
- rc = efx_nic_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t),
- GFP_KERNEL);
- if (rc)
- goto fail4;
- BUG_ON(efx->irq_status.dma_addr & 0x0f);
-
- netif_dbg(efx, probe, efx->net_dev,
- "INT_KER at %llx (virt %p phys %llx)\n",
- (unsigned long long)efx->irq_status.dma_addr,
- efx->irq_status.addr,
- (unsigned long long)virt_to_phys(efx->irq_status.addr));
-
- /* Read in the non-volatile configuration */
- rc = siena_probe_nvconfig(efx);
- if (rc == -EINVAL) {
- netif_err(efx, probe, efx->net_dev,
- "NVRAM is invalid therefore using defaults\n");
- efx->phy_type = PHY_TYPE_NONE;
- efx->mdio.prtad = MDIO_PRTAD_NONE;
- } else if (rc) {
- goto fail5;
- }
-
- rc = efx_mcdi_mon_probe(efx);
- if (rc)
- goto fail5;
-
-#ifdef CONFIG_SFC_SRIOV
- efx_siena_sriov_probe(efx);
-#endif
- efx_ptp_defer_probe_with_channel(efx);
-
- return 0;
-
-fail5:
- efx_nic_free_buffer(efx, &efx->irq_status);
-fail4:
-fail3:
- efx_mcdi_detach(efx);
- efx_mcdi_fini(efx);
-fail1:
- kfree(efx->nic_data);
- return rc;
-}
-
-static int siena_rx_pull_rss_config(struct efx_nic *efx)
-{
- efx_oword_t temp;
-
- /* Read from IPv6 RSS key as that's longer (the IPv4 key is just the
- * first 128 bits of the same key, assuming it's been set by
- * siena_rx_push_rss_config, below)
- */
- efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
- memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
- efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
- memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
- efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
- memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
- FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
- efx_farch_rx_pull_indir_table(efx);
- return 0;
-}
-
-static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
- const u32 *rx_indir_table, const u8 *key)
-{
- efx_oword_t temp;
-
- /* Set hash key for IPv4 */
- if (key)
- memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
- memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
- efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
-
- /* Enable IPv6 RSS */
- BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
- 2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
- FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
- memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
- efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
- memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
- efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
- EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
- FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
- memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
- FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
- efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
-
- memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
- sizeof(efx->rss_context.rx_indir_table));
- efx_farch_rx_push_indir_table(efx);
-
- return 0;
-}
-
-/* This call performs hardware-specific global initialisation, such as
- * defining the descriptor cache sizes and number of RSS channels.
- * It does not set up any buffers, descriptor rings or event queues.
- */
-static int siena_init_nic(struct efx_nic *efx)
-{
- efx_oword_t temp;
- int rc;
-
- /* Recover from a failed assertion post-reset */
- rc = efx_mcdi_handle_assertion(efx);
- if (rc)
- return rc;
-
- /* Squash TX of packets of 16 bytes or less */
- efx_reado(efx, &temp, FR_AZ_TX_RESERVED);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
- efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
-
- /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
- * descriptors (which is bad).
- */
- efx_reado(efx, &temp, FR_AZ_TX_CFG);
- EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
- EFX_SET_OWORD_FIELD(temp, FRF_CZ_TX_FILTER_EN_BIT, 1);
- efx_writeo(efx, &temp, FR_AZ_TX_CFG);
-
- efx_reado(efx, &temp, FR_AZ_RX_CFG);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_DESC_PUSH_EN, 0);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_INGR_EN, 1);
- /* Enable hash insertion. This is broken for the 'Falcon' hash
- * if IPv6 hashing is also enabled, so also select Toeplitz
- * TCP/IPv4 and IPv4 hashes. */
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1);
- EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE,
- EFX_RX_USR_BUF_SIZE >> 5);
- efx_writeo(efx, &temp, FR_AZ_RX_CFG);
-
- siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
- efx->rss_context.context_id = 0; /* indicates RSS is active */
-
- /* Enable event logging */
- rc = efx_mcdi_log_ctrl(efx, true, false, 0);
- if (rc)
- return rc;
-
- /* Set destination of both TX and RX Flush events */
- EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
- efx_writeo(efx, &temp, FR_BZ_DP_CTRL);
-
- EFX_POPULATE_OWORD_1(temp, FRF_CZ_USREV_DIS, 1);
- efx_writeo(efx, &temp, FR_CZ_USR_EV_CFG);
-
- efx_farch_init_common(efx);
- return 0;
-}
-
-static void siena_remove_nic(struct efx_nic *efx)
-{
- efx_mcdi_mon_remove(efx);
-
- efx_nic_free_buffer(efx, &efx->irq_status);
-
- efx_mcdi_reset(efx, RESET_TYPE_ALL);
-
- efx_mcdi_detach(efx);
- efx_mcdi_fini(efx);
-
- /* Tear down the private nic state */
- kfree(efx->nic_data);
- efx->nic_data = NULL;
-}
-
-#define SIENA_DMA_STAT(ext_name, mcdi_name) \
- [SIENA_STAT_ ## ext_name] = \
- { #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
-#define SIENA_OTHER_STAT(ext_name) \
- [SIENA_STAT_ ## ext_name] = { #ext_name, 0, 0 }
-#define GENERIC_SW_STAT(ext_name) \
- [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
-
-static const struct efx_hw_stat_desc siena_stat_desc[SIENA_STAT_COUNT] = {
- SIENA_DMA_STAT(tx_bytes, TX_BYTES),
- SIENA_OTHER_STAT(tx_good_bytes),
- SIENA_DMA_STAT(tx_bad_bytes, TX_BAD_BYTES),
- SIENA_DMA_STAT(tx_packets, TX_PKTS),
- SIENA_DMA_STAT(tx_bad, TX_BAD_FCS_PKTS),
- SIENA_DMA_STAT(tx_pause, TX_PAUSE_PKTS),
- SIENA_DMA_STAT(tx_control, TX_CONTROL_PKTS),
- SIENA_DMA_STAT(tx_unicast, TX_UNICAST_PKTS),
- SIENA_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS),
- SIENA_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS),
- SIENA_DMA_STAT(tx_lt64, TX_LT64_PKTS),
- SIENA_DMA_STAT(tx_64, TX_64_PKTS),
- SIENA_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS),
- SIENA_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS),
- SIENA_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS),
- SIENA_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS),
- SIENA_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS),
- SIENA_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS),
- SIENA_DMA_STAT(tx_gtjumbo, TX_GTJUMBO_PKTS),
- SIENA_OTHER_STAT(tx_collision),
- SIENA_DMA_STAT(tx_single_collision, TX_SINGLE_COLLISION_PKTS),
- SIENA_DMA_STAT(tx_multiple_collision, TX_MULTIPLE_COLLISION_PKTS),
- SIENA_DMA_STAT(tx_excessive_collision, TX_EXCESSIVE_COLLISION_PKTS),
- SIENA_DMA_STAT(tx_deferred, TX_DEFERRED_PKTS),
- SIENA_DMA_STAT(tx_late_collision, TX_LATE_COLLISION_PKTS),
- SIENA_DMA_STAT(tx_excessive_deferred, TX_EXCESSIVE_DEFERRED_PKTS),
- SIENA_DMA_STAT(tx_non_tcpudp, TX_NON_TCPUDP_PKTS),
- SIENA_DMA_STAT(tx_mac_src_error, TX_MAC_SRC_ERR_PKTS),
- SIENA_DMA_STAT(tx_ip_src_error, TX_IP_SRC_ERR_PKTS),
- SIENA_DMA_STAT(rx_bytes, RX_BYTES),
- SIENA_OTHER_STAT(rx_good_bytes),
- SIENA_DMA_STAT(rx_bad_bytes, RX_BAD_BYTES),
- SIENA_DMA_STAT(rx_packets, RX_PKTS),
- SIENA_DMA_STAT(rx_good, RX_GOOD_PKTS),
- SIENA_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS),
- SIENA_DMA_STAT(rx_pause, RX_PAUSE_PKTS),
- SIENA_DMA_STAT(rx_control, RX_CONTROL_PKTS),
- SIENA_DMA_STAT(rx_unicast, RX_UNICAST_PKTS),
- SIENA_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS),
- SIENA_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS),
- SIENA_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS),
- SIENA_DMA_STAT(rx_64, RX_64_PKTS),
- SIENA_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS),
- SIENA_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS),
- SIENA_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS),
- SIENA_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS),
- SIENA_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS),
- SIENA_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS),
- SIENA_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS),
- SIENA_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS),
- SIENA_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS),
- SIENA_DMA_STAT(rx_false_carrier, RX_FALSE_CARRIER_PKTS),
- SIENA_DMA_STAT(rx_symbol_error, RX_SYMBOL_ERROR_PKTS),
- SIENA_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS),
- SIENA_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS),
- SIENA_DMA_STAT(rx_internal_error, RX_INTERNAL_ERROR_PKTS),
- SIENA_DMA_STAT(rx_nodesc_drop_cnt, RX_NODESC_DROPS),
- GENERIC_SW_STAT(rx_nodesc_trunc),
- GENERIC_SW_STAT(rx_noskb_drops),
-};
-static const unsigned long siena_stat_mask[] = {
- [0 ... BITS_TO_LONGS(SIENA_STAT_COUNT) - 1] = ~0UL,
-};
-
-static size_t siena_describe_nic_stats(struct efx_nic *efx, u8 *names)
-{
- return efx_nic_describe_stats(siena_stat_desc, SIENA_STAT_COUNT,
- siena_stat_mask, names);
-}
-
-static int siena_try_update_nic_stats(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- u64 *stats = nic_data->stats;
- __le64 *dma_stats;
- __le64 generation_start, generation_end;
-
- dma_stats = efx->stats_buffer.addr;
-
- generation_end = dma_stats[efx->num_mac_stats - 1];
- if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
- return 0;
- rmb();
- efx_nic_update_stats(siena_stat_desc, SIENA_STAT_COUNT, siena_stat_mask,
- stats, efx->stats_buffer.addr, false);
- rmb();
- generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
- if (generation_end != generation_start)
- return -EAGAIN;
-
- /* Update derived statistics */
- efx_nic_fix_nodesc_drop_stat(efx,
- &stats[SIENA_STAT_rx_nodesc_drop_cnt]);
- efx_update_diff_stat(&stats[SIENA_STAT_tx_good_bytes],
- stats[SIENA_STAT_tx_bytes] -
- stats[SIENA_STAT_tx_bad_bytes]);
- stats[SIENA_STAT_tx_collision] =
- stats[SIENA_STAT_tx_single_collision] +
- stats[SIENA_STAT_tx_multiple_collision] +
- stats[SIENA_STAT_tx_excessive_collision] +
- stats[SIENA_STAT_tx_late_collision];
- efx_update_diff_stat(&stats[SIENA_STAT_rx_good_bytes],
- stats[SIENA_STAT_rx_bytes] -
- stats[SIENA_STAT_rx_bad_bytes]);
- efx_update_sw_stats(efx, stats);
- return 0;
-}
-
-static size_t siena_update_nic_stats(struct efx_nic *efx, u64 *full_stats,
- struct rtnl_link_stats64 *core_stats)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- u64 *stats = nic_data->stats;
- int retry;
-
- /* If we're unlucky enough to read statistics wduring the DMA, wait
- * up to 10ms for it to finish (typically takes <500us) */
- for (retry = 0; retry < 100; ++retry) {
- if (siena_try_update_nic_stats(efx) == 0)
- break;
- udelay(100);
- }
-
- if (full_stats)
- memcpy(full_stats, stats, sizeof(u64) * SIENA_STAT_COUNT);
-
- if (core_stats) {
- core_stats->rx_packets = stats[SIENA_STAT_rx_packets];
- core_stats->tx_packets = stats[SIENA_STAT_tx_packets];
- core_stats->rx_bytes = stats[SIENA_STAT_rx_bytes];
- core_stats->tx_bytes = stats[SIENA_STAT_tx_bytes];
- core_stats->rx_dropped = stats[SIENA_STAT_rx_nodesc_drop_cnt] +
- stats[GENERIC_STAT_rx_nodesc_trunc] +
- stats[GENERIC_STAT_rx_noskb_drops];
- core_stats->multicast = stats[SIENA_STAT_rx_multicast];
- core_stats->collisions = stats[SIENA_STAT_tx_collision];
- core_stats->rx_length_errors =
- stats[SIENA_STAT_rx_gtjumbo] +
- stats[SIENA_STAT_rx_length_error];
- core_stats->rx_crc_errors = stats[SIENA_STAT_rx_bad];
- core_stats->rx_frame_errors = stats[SIENA_STAT_rx_align_error];
- core_stats->rx_fifo_errors = stats[SIENA_STAT_rx_overflow];
- core_stats->tx_window_errors =
- stats[SIENA_STAT_tx_late_collision];
-
- core_stats->rx_errors = (core_stats->rx_length_errors +
- core_stats->rx_crc_errors +
- core_stats->rx_frame_errors +
- stats[SIENA_STAT_rx_symbol_error]);
- core_stats->tx_errors = (core_stats->tx_window_errors +
- stats[SIENA_STAT_tx_bad]);
- }
-
- return SIENA_STAT_COUNT;
-}
-
-static int siena_mac_reconfigure(struct efx_nic *efx, bool mtu_only __always_unused)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_MCAST_HASH_IN_LEN);
- int rc;
-
- BUILD_BUG_ON(MC_CMD_SET_MCAST_HASH_IN_LEN !=
- MC_CMD_SET_MCAST_HASH_IN_HASH0_OFST +
- sizeof(efx->multicast_hash));
-
- efx_farch_filter_sync_rx_mode(efx);
-
- WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
- rc = efx_mcdi_set_mac(efx);
- if (rc != 0)
- return rc;
-
- memcpy(MCDI_PTR(inbuf, SET_MCAST_HASH_IN_HASH0),
- efx->multicast_hash.byte, sizeof(efx->multicast_hash));
- return efx_mcdi_rpc(efx, MC_CMD_SET_MCAST_HASH,
- inbuf, sizeof(inbuf), NULL, 0, NULL);
-}
-
-/**************************************************************************
- *
- * Wake on LAN
- *
- **************************************************************************
- */
-
-static void siena_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
-
- wol->supported = WAKE_MAGIC;
- if (nic_data->wol_filter_id != -1)
- wol->wolopts = WAKE_MAGIC;
- else
- wol->wolopts = 0;
- memset(&wol->sopass, 0, sizeof(wol->sopass));
-}
-
-
-static int siena_set_wol(struct efx_nic *efx, u32 type)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- int rc;
-
- if (type & ~WAKE_MAGIC)
- return -EINVAL;
-
- if (type & WAKE_MAGIC) {
- if (nic_data->wol_filter_id != -1)
- efx_mcdi_wol_filter_remove(efx,
- nic_data->wol_filter_id);
- rc = efx_mcdi_wol_filter_set_magic(efx, efx->net_dev->dev_addr,
- &nic_data->wol_filter_id);
- if (rc)
- goto fail;
-
- pci_wake_from_d3(efx->pci_dev, true);
- } else {
- rc = efx_mcdi_wol_filter_reset(efx);
- nic_data->wol_filter_id = -1;
- pci_wake_from_d3(efx->pci_dev, false);
- if (rc)
- goto fail;
- }
-
- return 0;
- fail:
- netif_err(efx, hw, efx->net_dev, "%s failed: type=%d rc=%d\n",
- __func__, type, rc);
- return rc;
-}
-
-
-static void siena_init_wol(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- int rc;
-
- rc = efx_mcdi_wol_filter_get_magic(efx, &nic_data->wol_filter_id);
-
- if (rc != 0) {
- /* If it failed, attempt to get into a synchronised
- * state with MC by resetting any set WoL filters */
- efx_mcdi_wol_filter_reset(efx);
- nic_data->wol_filter_id = -1;
- } else if (nic_data->wol_filter_id != -1) {
- pci_wake_from_d3(efx->pci_dev, true);
- }
-}
-
-/**************************************************************************
- *
- * MCDI
- *
- **************************************************************************
- */
-
-#define MCDI_PDU(efx) \
- (efx_port_num(efx) ? MC_SMEM_P1_PDU_OFST : MC_SMEM_P0_PDU_OFST)
-#define MCDI_DOORBELL(efx) \
- (efx_port_num(efx) ? MC_SMEM_P1_DOORBELL_OFST : MC_SMEM_P0_DOORBELL_OFST)
-#define MCDI_STATUS(efx) \
- (efx_port_num(efx) ? MC_SMEM_P1_STATUS_OFST : MC_SMEM_P0_STATUS_OFST)
-
-static void siena_mcdi_request(struct efx_nic *efx,
- const efx_dword_t *hdr, size_t hdr_len,
- const efx_dword_t *sdu, size_t sdu_len)
-{
- unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
- unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx);
- unsigned int i;
- unsigned int inlen_dw = DIV_ROUND_UP(sdu_len, 4);
-
- EFX_WARN_ON_PARANOID(hdr_len != 4);
-
- efx_writed(efx, hdr, pdu);
-
- for (i = 0; i < inlen_dw; i++)
- efx_writed(efx, &sdu[i], pdu + hdr_len + 4 * i);
-
- /* Ensure the request is written out before the doorbell */
- wmb();
-
- /* ring the doorbell with a distinctive value */
- _efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
-}
-
-static bool siena_mcdi_poll_response(struct efx_nic *efx)
-{
- unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
- efx_dword_t hdr;
-
- efx_readd(efx, &hdr, pdu);
-
- /* All 1's indicates that shared memory is in reset (and is
- * not a valid hdr). Wait for it to come out reset before
- * completing the command
- */
- return EFX_DWORD_FIELD(hdr, EFX_DWORD_0) != 0xffffffff &&
- EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE);
-}
-
-static void siena_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf,
- size_t offset, size_t outlen)
-{
- unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
- unsigned int outlen_dw = DIV_ROUND_UP(outlen, 4);
- int i;
-
- for (i = 0; i < outlen_dw; i++)
- efx_readd(efx, &outbuf[i], pdu + offset + 4 * i);
-}
-
-static int siena_mcdi_poll_reboot(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_STATUS(efx);
- efx_dword_t reg;
- u32 value;
-
- efx_readd(efx, ®, addr);
- value = EFX_DWORD_FIELD(reg, EFX_DWORD_0);
-
- if (value == 0)
- return 0;
-
- EFX_ZERO_DWORD(reg);
- efx_writed(efx, ®, addr);
-
- /* MAC statistics have been cleared on the NIC; clear the local
- * copies that we update with efx_update_diff_stat().
- */
- nic_data->stats[SIENA_STAT_tx_good_bytes] = 0;
- nic_data->stats[SIENA_STAT_rx_good_bytes] = 0;
-
- if (value == MC_STATUS_DWORD_ASSERT)
- return -EINTR;
- else
- return -EIO;
-}
-
-/**************************************************************************
- *
- * MTD
- *
- **************************************************************************
- */
-
-#ifdef CONFIG_SFC_MTD
-
-struct siena_nvram_type_info {
- int port;
- const char *name;
-};
-
-static const struct siena_nvram_type_info siena_nvram_types[] = {
- [MC_CMD_NVRAM_TYPE_DISABLED_CALLISTO] = { 0, "sfc_dummy_phy" },
- [MC_CMD_NVRAM_TYPE_MC_FW] = { 0, "sfc_mcfw" },
- [MC_CMD_NVRAM_TYPE_MC_FW_BACKUP] = { 0, "sfc_mcfw_backup" },
- [MC_CMD_NVRAM_TYPE_STATIC_CFG_PORT0] = { 0, "sfc_static_cfg" },
- [MC_CMD_NVRAM_TYPE_STATIC_CFG_PORT1] = { 1, "sfc_static_cfg" },
- [MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT0] = { 0, "sfc_dynamic_cfg" },
- [MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT1] = { 1, "sfc_dynamic_cfg" },
- [MC_CMD_NVRAM_TYPE_EXP_ROM] = { 0, "sfc_exp_rom" },
- [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT0] = { 0, "sfc_exp_rom_cfg" },
- [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1] = { 1, "sfc_exp_rom_cfg" },
- [MC_CMD_NVRAM_TYPE_PHY_PORT0] = { 0, "sfc_phy_fw" },
- [MC_CMD_NVRAM_TYPE_PHY_PORT1] = { 1, "sfc_phy_fw" },
- [MC_CMD_NVRAM_TYPE_FPGA] = { 0, "sfc_fpga" },
-};
-
-static int siena_mtd_probe_partition(struct efx_nic *efx,
- struct efx_mcdi_mtd_partition *part,
- unsigned int type)
-{
- const struct siena_nvram_type_info *info;
- size_t size, erase_size;
- bool protected;
- int rc;
-
- if (type >= ARRAY_SIZE(siena_nvram_types) ||
- siena_nvram_types[type].name == NULL)
- return -ENODEV;
-
- info = &siena_nvram_types[type];
-
- if (info->port != efx_port_num(efx))
- return -ENODEV;
-
- rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
- if (rc)
- return rc;
- if (protected)
- return -ENODEV; /* hide it */
-
- part->nvram_type = type;
- part->common.dev_type_name = "Siena NVRAM manager";
- part->common.type_name = info->name;
-
- part->common.mtd.type = MTD_NORFLASH;
- part->common.mtd.flags = MTD_CAP_NORFLASH;
- part->common.mtd.size = size;
- part->common.mtd.erasesize = erase_size;
-
- return 0;
-}
-
-static int siena_mtd_get_fw_subtypes(struct efx_nic *efx,
- struct efx_mcdi_mtd_partition *parts,
- size_t n_parts)
-{
- uint16_t fw_subtype_list[
- MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM];
- size_t i;
- int rc;
-
- rc = efx_mcdi_get_board_cfg(efx, NULL, fw_subtype_list, NULL);
- if (rc)
- return rc;
-
- for (i = 0; i < n_parts; i++)
- parts[i].fw_subtype = fw_subtype_list[parts[i].nvram_type];
-
- return 0;
-}
-
-static int siena_mtd_probe(struct efx_nic *efx)
-{
- struct efx_mcdi_mtd_partition *parts;
- u32 nvram_types;
- unsigned int type;
- size_t n_parts;
- int rc;
-
- ASSERT_RTNL();
-
- rc = efx_mcdi_nvram_types(efx, &nvram_types);
- if (rc)
- return rc;
-
- parts = kcalloc(hweight32(nvram_types), sizeof(*parts), GFP_KERNEL);
- if (!parts)
- return -ENOMEM;
-
- type = 0;
- n_parts = 0;
-
- while (nvram_types != 0) {
- if (nvram_types & 1) {
- rc = siena_mtd_probe_partition(efx, &parts[n_parts],
- type);
- if (rc == 0)
- n_parts++;
- else if (rc != -ENODEV)
- goto fail;
- }
- type++;
- nvram_types >>= 1;
- }
-
- rc = siena_mtd_get_fw_subtypes(efx, parts, n_parts);
- if (rc)
- goto fail;
-
- rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
-fail:
- if (rc)
- kfree(parts);
- return rc;
-}
-
-#endif /* CONFIG_SFC_MTD */
-
-static unsigned int siena_check_caps(const struct efx_nic *efx,
- u8 flag, u32 offset)
-{
- /* Siena did not support MC_CMD_GET_CAPABILITIES */
- return 0;
-}
-
-static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx)
-{
- /* Maximum link speed is 10G */
- return EFX_RECYCLE_RING_SIZE_10G;
-}
-
-/**************************************************************************
- *
- * Revision-dependent attributes used by efx.c and nic.c
- *
- **************************************************************************
- */
-
-const struct efx_nic_type siena_a0_nic_type = {
- .is_vf = false,
- .mem_bar = siena_mem_bar,
- .mem_map_size = siena_mem_map_size,
- .probe = siena_probe_nic,
- .remove = siena_remove_nic,
- .init = siena_init_nic,
- .dimension_resources = siena_dimension_resources,
- .fini = efx_port_dummy_op_void,
-#ifdef CONFIG_EEH
- .monitor = siena_monitor,
-#else
- .monitor = NULL,
-#endif
- .map_reset_reason = efx_mcdi_map_reset_reason,
- .map_reset_flags = siena_map_reset_flags,
- .reset = efx_mcdi_reset,
- .probe_port = efx_mcdi_port_probe,
- .remove_port = efx_mcdi_port_remove,
- .fini_dmaq = efx_farch_fini_dmaq,
- .prepare_flush = siena_prepare_flush,
- .finish_flush = siena_finish_flush,
- .prepare_flr = efx_port_dummy_op_void,
- .finish_flr = efx_farch_finish_flr,
- .describe_stats = siena_describe_nic_stats,
- .update_stats = siena_update_nic_stats,
- .start_stats = efx_mcdi_mac_start_stats,
- .pull_stats = efx_mcdi_mac_pull_stats,
- .stop_stats = efx_mcdi_mac_stop_stats,
- .push_irq_moderation = siena_push_irq_moderation,
- .reconfigure_mac = siena_mac_reconfigure,
- .check_mac_fault = efx_mcdi_mac_check_fault,
- .reconfigure_port = efx_mcdi_port_reconfigure,
- .get_wol = siena_get_wol,
- .set_wol = siena_set_wol,
- .resume_wol = siena_init_wol,
- .test_chip = siena_test_chip,
- .test_nvram = efx_mcdi_nvram_test_all,
- .mcdi_request = siena_mcdi_request,
- .mcdi_poll_response = siena_mcdi_poll_response,
- .mcdi_read_response = siena_mcdi_read_response,
- .mcdi_poll_reboot = siena_mcdi_poll_reboot,
- .irq_enable_master = efx_farch_irq_enable_master,
- .irq_test_generate = efx_farch_irq_test_generate,
- .irq_disable_non_ev = efx_farch_irq_disable_master,
- .irq_handle_msi = efx_farch_msi_interrupt,
- .irq_handle_legacy = efx_farch_legacy_interrupt,
- .tx_probe = efx_farch_tx_probe,
- .tx_init = efx_farch_tx_init,
- .tx_remove = efx_farch_tx_remove,
- .tx_write = efx_farch_tx_write,
- .tx_limit_len = efx_farch_tx_limit_len,
- .tx_enqueue = __efx_enqueue_skb,
- .rx_push_rss_config = siena_rx_push_rss_config,
- .rx_pull_rss_config = siena_rx_pull_rss_config,
- .rx_probe = efx_farch_rx_probe,
- .rx_init = efx_farch_rx_init,
- .rx_remove = efx_farch_rx_remove,
- .rx_write = efx_farch_rx_write,
- .rx_defer_refill = efx_farch_rx_defer_refill,
- .rx_packet = __efx_rx_packet,
- .ev_probe = efx_farch_ev_probe,
- .ev_init = efx_farch_ev_init,
- .ev_fini = efx_farch_ev_fini,
- .ev_remove = efx_farch_ev_remove,
- .ev_process = efx_farch_ev_process,
- .ev_read_ack = efx_farch_ev_read_ack,
- .ev_test_generate = efx_farch_ev_test_generate,
- .filter_table_probe = efx_farch_filter_table_probe,
- .filter_table_restore = efx_farch_filter_table_restore,
- .filter_table_remove = efx_farch_filter_table_remove,
- .filter_update_rx_scatter = efx_farch_filter_update_rx_scatter,
- .filter_insert = efx_farch_filter_insert,
- .filter_remove_safe = efx_farch_filter_remove_safe,
- .filter_get_safe = efx_farch_filter_get_safe,
- .filter_clear_rx = efx_farch_filter_clear_rx,
- .filter_count_rx_used = efx_farch_filter_count_rx_used,
- .filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
- .filter_get_rx_ids = efx_farch_filter_get_rx_ids,
-#ifdef CONFIG_RFS_ACCEL
- .filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
-#endif
-#ifdef CONFIG_SFC_MTD
- .mtd_probe = siena_mtd_probe,
- .mtd_rename = efx_mcdi_mtd_rename,
- .mtd_read = efx_mcdi_mtd_read,
- .mtd_erase = efx_mcdi_mtd_erase,
- .mtd_write = efx_mcdi_mtd_write,
- .mtd_sync = efx_mcdi_mtd_sync,
-#endif
- .ptp_write_host_time = siena_ptp_write_host_time,
- .ptp_set_ts_config = siena_ptp_set_ts_config,
-#ifdef CONFIG_SFC_SRIOV
- .sriov_configure = efx_siena_sriov_configure,
- .sriov_init = efx_siena_sriov_init,
- .sriov_fini = efx_siena_sriov_fini,
- .sriov_wanted = efx_siena_sriov_wanted,
- .sriov_reset = efx_siena_sriov_reset,
- .sriov_flr = efx_siena_sriov_flr,
- .sriov_set_vf_mac = efx_siena_sriov_set_vf_mac,
- .sriov_set_vf_vlan = efx_siena_sriov_set_vf_vlan,
- .sriov_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk,
- .sriov_get_vf_config = efx_siena_sriov_get_vf_config,
- .vswitching_probe = efx_port_dummy_op_int,
- .vswitching_restore = efx_port_dummy_op_int,
- .vswitching_remove = efx_port_dummy_op_void,
- .set_mac_address = efx_siena_sriov_mac_address_changed,
-#endif
-
- .revision = EFX_REV_SIENA_A0,
- .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
- .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
- .buf_tbl_base = FR_BZ_BUF_FULL_TBL,
- .evq_ptr_tbl_base = FR_BZ_EVQ_PTR_TBL,
- .evq_rptr_tbl_base = FR_BZ_EVQ_RPTR,
- .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
- .rx_prefix_size = FS_BZ_RX_PREFIX_SIZE,
- .rx_hash_offset = FS_BZ_RX_PREFIX_HASH_OFST,
- .rx_buffer_padding = 0,
- .can_rx_scatter = true,
- .option_descriptors = false,
- .min_interrupt_mode = EFX_INT_MODE_LEGACY,
- .timer_period_max = 1 << FRF_CZ_TC_TIMER_VAL_WIDTH,
- .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
- NETIF_F_RXHASH | NETIF_F_NTUPLE),
- .mcdi_max_ver = 1,
- .max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS,
- .hwtstamp_filters = (1 << HWTSTAMP_FILTER_NONE |
- 1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT |
- 1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT),
- .rx_hash_key_size = 16,
- .check_caps = siena_check_caps,
- .sensor_event = efx_mcdi_sensor_event,
- .rx_recycle_ring_size = efx_siena_recycle_ring_size,
-};
diff --git a/drivers/net/ethernet/sfc/siena/farch.c b/drivers/net/ethernet/sfc/siena/farch.c
new file mode 100644
index 000000000000..9599123bc28d
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/farch.c
@@ -0,0 +1,2988 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/crc32.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "sriov.h"
+#include "siena_sriov.h"
+#include "io.h"
+#include "workarounds.h"
+
+/* Falcon-architecture (SFC9000-family) support */
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ **************************************************************************
+ */
+
+/* This is set to 16 for a good reason. In summary, if larger than
+ * 16, the descriptor cache holds more than a default socket
+ * buffer's worth of packets (for UDP we can only have at most one
+ * socket buffer's worth outstanding). This combined with the fact
+ * that we only get 1 TX event per descriptor cache means the NIC
+ * goes idle.
+ */
+#define TX_DC_ENTRIES 16
+#define TX_DC_ENTRIES_ORDER 1
+
+#define RX_DC_ENTRIES 64
+#define RX_DC_ENTRIES_ORDER 3
+
+/* If EFX_MAX_INT_ERRORS internal errors occur within
+ * EFX_INT_ERROR_EXPIRE seconds, we consider the NIC broken and
+ * disable it.
+ */
+#define EFX_INT_ERROR_EXPIRE 3600
+#define EFX_MAX_INT_ERRORS 5
+
+/* Depth of RX flush request fifo */
+#define EFX_RX_FLUSH_COUNT 4
+
+/* Driver generated events */
+#define _EFX_CHANNEL_MAGIC_TEST 0x000101
+#define _EFX_CHANNEL_MAGIC_FILL 0x000102
+#define _EFX_CHANNEL_MAGIC_RX_DRAIN 0x000103
+#define _EFX_CHANNEL_MAGIC_TX_DRAIN 0x000104
+
+#define _EFX_CHANNEL_MAGIC(_code, _data) ((_code) << 8 | (_data))
+#define _EFX_CHANNEL_MAGIC_CODE(_magic) ((_magic) >> 8)
+
+#define EFX_CHANNEL_MAGIC_TEST(_channel) \
+ _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TEST, (_channel)->channel)
+#define EFX_CHANNEL_MAGIC_FILL(_rx_queue) \
+ _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_FILL, \
+ efx_rx_queue_index(_rx_queue))
+#define EFX_CHANNEL_MAGIC_RX_DRAIN(_rx_queue) \
+ _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_RX_DRAIN, \
+ efx_rx_queue_index(_rx_queue))
+#define EFX_CHANNEL_MAGIC_TX_DRAIN(_tx_queue) \
+ _EFX_CHANNEL_MAGIC(_EFX_CHANNEL_MAGIC_TX_DRAIN, \
+ (_tx_queue)->queue)
+
+static void efx_farch_magic_event(struct efx_channel *channel, u32 magic);
+
+/**************************************************************************
+ *
+ * Hardware access
+ *
+ **************************************************************************/
+
+static inline void efx_write_buf_tbl(struct efx_nic *efx, efx_qword_t *value,
+ unsigned int index)
+{
+ efx_sram_writeq(efx, efx->membase + efx->type->buf_tbl_base,
+ value, index);
+}
+
+static bool efx_masked_compare_oword(const efx_oword_t *a, const efx_oword_t *b,
+ const efx_oword_t *mask)
+{
+ return ((a->u64[0] ^ b->u64[0]) & mask->u64[0]) ||
+ ((a->u64[1] ^ b->u64[1]) & mask->u64[1]);
+}
+
+int efx_farch_test_registers(struct efx_nic *efx,
+ const struct efx_farch_register_test *regs,
+ size_t n_regs)
+{
+ unsigned address = 0;
+ int i, j;
+ efx_oword_t mask, imask, original, reg, buf;
+
+ for (i = 0; i < n_regs; ++i) {
+ address = regs[i].address;
+ mask = imask = regs[i].mask;
+ EFX_INVERT_OWORD(imask);
+
+ efx_reado(efx, &original, address);
+
+ /* bit sweep on and off */
+ for (j = 0; j < 128; j++) {
+ if (!EFX_EXTRACT_OWORD32(mask, j, j))
+ continue;
+
+ /* Test this testable bit can be set in isolation */
+ EFX_AND_OWORD(reg, original, mask);
+ EFX_SET_OWORD32(reg, j, j, 1);
+
+ efx_writeo(efx, ®, address);
+ efx_reado(efx, &buf, address);
+
+ if (efx_masked_compare_oword(®, &buf, &mask))
+ goto fail;
+
+ /* Test this testable bit can be cleared in isolation */
+ EFX_OR_OWORD(reg, original, mask);
+ EFX_SET_OWORD32(reg, j, j, 0);
+
+ efx_writeo(efx, ®, address);
+ efx_reado(efx, &buf, address);
+
+ if (efx_masked_compare_oword(®, &buf, &mask))
+ goto fail;
+ }
+
+ efx_writeo(efx, &original, address);
+ }
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev,
+ "wrote "EFX_OWORD_FMT" read "EFX_OWORD_FMT
+ " at address 0x%x mask "EFX_OWORD_FMT"\n", EFX_OWORD_VAL(reg),
+ EFX_OWORD_VAL(buf), address, EFX_OWORD_VAL(mask));
+ return -EIO;
+}
+
+/**************************************************************************
+ *
+ * Special buffer handling
+ * Special buffers are used for event queues and the TX and RX
+ * descriptor rings.
+ *
+ *************************************************************************/
+
+/*
+ * Initialise a special buffer
+ *
+ * This will define a buffer (previously allocated via
+ * efx_alloc_special_buffer()) in the buffer table, allowing
+ * it to be used for event queues, descriptor rings etc.
+ */
+static void
+efx_init_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
+{
+ efx_qword_t buf_desc;
+ unsigned int index;
+ dma_addr_t dma_addr;
+ int i;
+
+ EFX_WARN_ON_PARANOID(!buffer->buf.addr);
+
+ /* Write buffer descriptors to NIC */
+ for (i = 0; i < buffer->entries; i++) {
+ index = buffer->index + i;
+ dma_addr = buffer->buf.dma_addr + (i * EFX_BUF_SIZE);
+ netif_dbg(efx, probe, efx->net_dev,
+ "mapping special buffer %d at %llx\n",
+ index, (unsigned long long)dma_addr);
+ EFX_POPULATE_QWORD_3(buf_desc,
+ FRF_AZ_BUF_ADR_REGION, 0,
+ FRF_AZ_BUF_ADR_FBUF, dma_addr >> 12,
+ FRF_AZ_BUF_OWNER_ID_FBUF, 0);
+ efx_write_buf_tbl(efx, &buf_desc, index);
+ }
+}
+
+/* Unmaps a buffer and clears the buffer table entries */
+static void
+efx_fini_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
+{
+ efx_oword_t buf_tbl_upd;
+ unsigned int start = buffer->index;
+ unsigned int end = (buffer->index + buffer->entries - 1);
+
+ if (!buffer->entries)
+ return;
+
+ netif_dbg(efx, hw, efx->net_dev, "unmapping special buffers %d-%d\n",
+ buffer->index, buffer->index + buffer->entries - 1);
+
+ EFX_POPULATE_OWORD_4(buf_tbl_upd,
+ FRF_AZ_BUF_UPD_CMD, 0,
+ FRF_AZ_BUF_CLR_CMD, 1,
+ FRF_AZ_BUF_CLR_END_ID, end,
+ FRF_AZ_BUF_CLR_START_ID, start);
+ efx_writeo(efx, &buf_tbl_upd, FR_AZ_BUF_TBL_UPD);
+}
+
+/*
+ * Allocate a new special buffer
+ *
+ * This allocates memory for a new buffer, clears it and allocates a
+ * new buffer ID range. It does not write into the buffer table.
+ *
+ * This call will allocate 4KB buffers, since 8KB buffers can't be
+ * used for event queues and descriptor rings.
+ */
+static int efx_alloc_special_buffer(struct efx_nic *efx,
+ struct efx_special_buffer *buffer,
+ unsigned int len)
+{
+#ifdef CONFIG_SFC_SRIOV
+ struct siena_nic_data *nic_data = efx->nic_data;
+#endif
+ len = ALIGN(len, EFX_BUF_SIZE);
+
+ if (efx_nic_alloc_buffer(efx, &buffer->buf, len, GFP_KERNEL))
+ return -ENOMEM;
+ buffer->entries = len / EFX_BUF_SIZE;
+ BUG_ON(buffer->buf.dma_addr & (EFX_BUF_SIZE - 1));
+
+ /* Select new buffer ID */
+ buffer->index = efx->next_buffer_table;
+ efx->next_buffer_table += buffer->entries;
+#ifdef CONFIG_SFC_SRIOV
+ BUG_ON(efx_siena_sriov_enabled(efx) &&
+ nic_data->vf_buftbl_base < efx->next_buffer_table);
+#endif
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "allocating special buffers %d-%d at %llx+%x "
+ "(virt %p phys %llx)\n", buffer->index,
+ buffer->index + buffer->entries - 1,
+ (u64)buffer->buf.dma_addr, len,
+ buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+ return 0;
+}
+
+static void
+efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
+{
+ if (!buffer->buf.addr)
+ return;
+
+ netif_dbg(efx, hw, efx->net_dev,
+ "deallocating special buffers %d-%d at %llx+%x "
+ "(virt %p phys %llx)\n", buffer->index,
+ buffer->index + buffer->entries - 1,
+ (u64)buffer->buf.dma_addr, buffer->buf.len,
+ buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+ efx_nic_free_buffer(efx, &buffer->buf);
+ buffer->entries = 0;
+}
+
+/**************************************************************************
+ *
+ * TX path
+ *
+ **************************************************************************/
+
+/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
+static inline void efx_farch_notify_tx_desc(struct efx_tx_queue *tx_queue)
+{
+ unsigned write_ptr;
+ efx_dword_t reg;
+
+ write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+ EFX_POPULATE_DWORD_1(reg, FRF_AZ_TX_DESC_WPTR_DWORD, write_ptr);
+ efx_writed_page(tx_queue->efx, ®,
+ FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue);
+}
+
+/* Write pointer and first descriptor for TX descriptor ring */
+static inline void efx_farch_push_tx_desc(struct efx_tx_queue *tx_queue,
+ const efx_qword_t *txd)
+{
+ unsigned write_ptr;
+ efx_oword_t reg;
+
+ BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0);
+ BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0);
+
+ write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+ EFX_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true,
+ FRF_AZ_TX_DESC_WPTR, write_ptr);
+ reg.qword[0] = *txd;
+ efx_writeo_page(tx_queue->efx, ®,
+ FR_BZ_TX_DESC_UPD_P0, tx_queue->queue);
+}
+
+
+/* For each entry inserted into the software descriptor ring, create a
+ * descriptor in the hardware TX descriptor ring (in host memory), and
+ * write a doorbell.
+ */
+void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer;
+ efx_qword_t *txd;
+ unsigned write_ptr;
+ unsigned old_write_count = tx_queue->write_count;
+
+ tx_queue->xmit_pending = false;
+ if (unlikely(tx_queue->write_count == tx_queue->insert_count))
+ return;
+
+ do {
+ write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+ buffer = &tx_queue->buffer[write_ptr];
+ txd = efx_tx_desc(tx_queue, write_ptr);
+ ++tx_queue->write_count;
+
+ EFX_WARN_ON_ONCE_PARANOID(buffer->flags & EFX_TX_BUF_OPTION);
+
+ /* Create TX descriptor ring entry */
+ BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
+ EFX_POPULATE_QWORD_4(*txd,
+ FSF_AZ_TX_KER_CONT,
+ buffer->flags & EFX_TX_BUF_CONT,
+ FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
+ FSF_AZ_TX_KER_BUF_REGION, 0,
+ FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
+ } while (tx_queue->write_count != tx_queue->insert_count);
+
+ wmb(); /* Ensure descriptors are written before they are fetched */
+
+ if (efx_nic_may_push_tx_desc(tx_queue, old_write_count)) {
+ txd = efx_tx_desc(tx_queue,
+ old_write_count & tx_queue->ptr_mask);
+ efx_farch_push_tx_desc(tx_queue, txd);
+ ++tx_queue->pushes;
+ } else {
+ efx_farch_notify_tx_desc(tx_queue);
+ }
+}
+
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, unsigned int len)
+{
+ /* Don't cross 4K boundaries with descriptors. */
+ unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
+
+ len = min(limit, len);
+
+ return len;
+}
+
+
+/* Allocate hardware resources for a TX queue */
+int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned entries;
+
+ tx_queue->type = ((tx_queue->label & 1) ? EFX_TXQ_TYPE_OUTER_CSUM : 0) |
+ ((tx_queue->label & 2) ? EFX_TXQ_TYPE_HIGHPRI : 0);
+ entries = tx_queue->ptr_mask + 1;
+ return efx_alloc_special_buffer(efx, &tx_queue->txd,
+ entries * sizeof(efx_qword_t));
+}
+
+void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
+{
+ int csum = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
+ struct efx_nic *efx = tx_queue->efx;
+ efx_oword_t reg;
+
+ /* Pin TX descriptor ring */
+ efx_init_special_buffer(efx, &tx_queue->txd);
+
+ /* Push TX descriptor ring to card */
+ EFX_POPULATE_OWORD_10(reg,
+ FRF_AZ_TX_DESCQ_EN, 1,
+ FRF_AZ_TX_ISCSI_DDIG_EN, 0,
+ FRF_AZ_TX_ISCSI_HDIG_EN, 0,
+ FRF_AZ_TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index,
+ FRF_AZ_TX_DESCQ_EVQ_ID,
+ tx_queue->channel->channel,
+ FRF_AZ_TX_DESCQ_OWNER_ID, 0,
+ FRF_AZ_TX_DESCQ_LABEL, tx_queue->label,
+ FRF_AZ_TX_DESCQ_SIZE,
+ __ffs(tx_queue->txd.entries),
+ FRF_AZ_TX_DESCQ_TYPE, 0,
+ FRF_BZ_TX_NON_IP_DROP_DIS, 1);
+
+ EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
+ EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS, !csum);
+
+ efx_writeo_table(efx, ®, efx->type->txd_ptr_tbl_base,
+ tx_queue->queue);
+
+ EFX_POPULATE_OWORD_1(reg,
+ FRF_BZ_TX_PACE,
+ (tx_queue->type & EFX_TXQ_TYPE_HIGHPRI) ?
+ FFE_BZ_TX_PACE_OFF :
+ FFE_BZ_TX_PACE_RESERVED);
+ efx_writeo_table(efx, ®, FR_BZ_TX_PACE_TBL, tx_queue->queue);
+
+ tx_queue->tso_version = 1;
+}
+
+static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ efx_oword_t tx_flush_descq;
+
+ WARN_ON(atomic_read(&tx_queue->flush_outstanding));
+ atomic_set(&tx_queue->flush_outstanding, 1);
+
+ EFX_POPULATE_OWORD_2(tx_flush_descq,
+ FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
+ FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue);
+ efx_writeo(efx, &tx_flush_descq, FR_AZ_TX_FLUSH_DESCQ);
+}
+
+void efx_farch_tx_fini(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ efx_oword_t tx_desc_ptr;
+
+ /* Remove TX descriptor ring from card */
+ EFX_ZERO_OWORD(tx_desc_ptr);
+ efx_writeo_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
+ tx_queue->queue);
+
+ /* Unpin TX descriptor ring */
+ efx_fini_special_buffer(efx, &tx_queue->txd);
+}
+
+/* Free buffers backing TX queue */
+void efx_farch_tx_remove(struct efx_tx_queue *tx_queue)
+{
+ efx_free_special_buffer(tx_queue->efx, &tx_queue->txd);
+}
+
+/**************************************************************************
+ *
+ * RX path
+ *
+ **************************************************************************/
+
+/* This creates an entry in the RX descriptor queue */
+static inline void
+efx_farch_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned index)
+{
+ struct efx_rx_buffer *rx_buf;
+ efx_qword_t *rxd;
+
+ rxd = efx_rx_desc(rx_queue, index);
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ EFX_POPULATE_QWORD_3(*rxd,
+ FSF_AZ_RX_KER_BUF_SIZE,
+ rx_buf->len -
+ rx_queue->efx->type->rx_buffer_padding,
+ FSF_AZ_RX_KER_BUF_REGION, 0,
+ FSF_AZ_RX_KER_BUF_ADDR, rx_buf->dma_addr);
+}
+
+/* This writes to the RX_DESC_WPTR register for the specified receive
+ * descriptor ring.
+ */
+void efx_farch_rx_write(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ efx_dword_t reg;
+ unsigned write_ptr;
+
+ while (rx_queue->notified_count != rx_queue->added_count) {
+ efx_farch_build_rx_desc(
+ rx_queue,
+ rx_queue->notified_count & rx_queue->ptr_mask);
+ ++rx_queue->notified_count;
+ }
+
+ wmb();
+ write_ptr = rx_queue->added_count & rx_queue->ptr_mask;
+ EFX_POPULATE_DWORD_1(reg, FRF_AZ_RX_DESC_WPTR_DWORD, write_ptr);
+ efx_writed_page(efx, ®, FR_AZ_RX_DESC_UPD_DWORD_P0,
+ efx_rx_queue_index(rx_queue));
+}
+
+int efx_farch_rx_probe(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned entries;
+
+ entries = rx_queue->ptr_mask + 1;
+ return efx_alloc_special_buffer(efx, &rx_queue->rxd,
+ entries * sizeof(efx_qword_t));
+}
+
+void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
+{
+ efx_oword_t rx_desc_ptr;
+ struct efx_nic *efx = rx_queue->efx;
+ bool jumbo_en;
+
+ /* For kernel-mode queues in Siena, the JUMBO flag enables scatter. */
+ jumbo_en = efx->rx_scatter;
+
+ netif_dbg(efx, hw, efx->net_dev,
+ "RX queue %d ring in special buffers %d-%d\n",
+ efx_rx_queue_index(rx_queue), rx_queue->rxd.index,
+ rx_queue->rxd.index + rx_queue->rxd.entries - 1);
+
+ rx_queue->scatter_n = 0;
+
+ /* Pin RX descriptor ring */
+ efx_init_special_buffer(efx, &rx_queue->rxd);
+
+ /* Push RX descriptor ring to card */
+ EFX_POPULATE_OWORD_10(rx_desc_ptr,
+ FRF_AZ_RX_ISCSI_DDIG_EN, true,
+ FRF_AZ_RX_ISCSI_HDIG_EN, true,
+ FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
+ FRF_AZ_RX_DESCQ_EVQ_ID,
+ efx_rx_queue_channel(rx_queue)->channel,
+ FRF_AZ_RX_DESCQ_OWNER_ID, 0,
+ FRF_AZ_RX_DESCQ_LABEL,
+ efx_rx_queue_index(rx_queue),
+ FRF_AZ_RX_DESCQ_SIZE,
+ __ffs(rx_queue->rxd.entries),
+ FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
+ FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
+ FRF_AZ_RX_DESCQ_EN, 1);
+ efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+ efx_rx_queue_index(rx_queue));
+}
+
+static void efx_farch_flush_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ efx_oword_t rx_flush_descq;
+
+ EFX_POPULATE_OWORD_2(rx_flush_descq,
+ FRF_AZ_RX_FLUSH_DESCQ_CMD, 1,
+ FRF_AZ_RX_FLUSH_DESCQ,
+ efx_rx_queue_index(rx_queue));
+ efx_writeo(efx, &rx_flush_descq, FR_AZ_RX_FLUSH_DESCQ);
+}
+
+void efx_farch_rx_fini(struct efx_rx_queue *rx_queue)
+{
+ efx_oword_t rx_desc_ptr;
+ struct efx_nic *efx = rx_queue->efx;
+
+ /* Remove RX descriptor ring from card */
+ EFX_ZERO_OWORD(rx_desc_ptr);
+ efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+ efx_rx_queue_index(rx_queue));
+
+ /* Unpin RX descriptor ring */
+ efx_fini_special_buffer(efx, &rx_queue->rxd);
+}
+
+/* Free buffers backing RX queue */
+void efx_farch_rx_remove(struct efx_rx_queue *rx_queue)
+{
+ efx_free_special_buffer(rx_queue->efx, &rx_queue->rxd);
+}
+
+/**************************************************************************
+ *
+ * Flush handling
+ *
+ **************************************************************************/
+
+/* efx_farch_flush_queues() must be woken up when all flushes are completed,
+ * or more RX flushes can be kicked off.
+ */
+static bool efx_farch_flush_wake(struct efx_nic *efx)
+{
+ /* Ensure that all updates are visible to efx_farch_flush_queues() */
+ smp_mb();
+
+ return (atomic_read(&efx->active_queues) == 0 ||
+ (atomic_read(&efx->rxq_flush_outstanding) < EFX_RX_FLUSH_COUNT
+ && atomic_read(&efx->rxq_flush_pending) > 0));
+}
+
+static bool efx_check_tx_flush_complete(struct efx_nic *efx)
+{
+ bool i = true;
+ efx_oword_t txd_ptr_tbl;
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_reado_table(efx, &txd_ptr_tbl,
+ FR_BZ_TX_DESC_PTR_TBL, tx_queue->queue);
+ if (EFX_OWORD_FIELD(txd_ptr_tbl,
+ FRF_AZ_TX_DESCQ_FLUSH) ||
+ EFX_OWORD_FIELD(txd_ptr_tbl,
+ FRF_AZ_TX_DESCQ_EN)) {
+ netif_dbg(efx, hw, efx->net_dev,
+ "flush did not complete on TXQ %d\n",
+ tx_queue->queue);
+ i = false;
+ } else if (atomic_cmpxchg(&tx_queue->flush_outstanding,
+ 1, 0)) {
+ /* The flush is complete, but we didn't
+ * receive a flush completion event
+ */
+ netif_dbg(efx, hw, efx->net_dev,
+ "flush complete on TXQ %d, so drain "
+ "the queue\n", tx_queue->queue);
+ /* Don't need to increment active_queues as it
+ * has already been incremented for the queues
+ * which did not drain
+ */
+ efx_farch_magic_event(channel,
+ EFX_CHANNEL_MAGIC_TX_DRAIN(
+ tx_queue));
+ }
+ }
+ }
+
+ return i;
+}
+
+/* Flush all the transmit queues, and continue flushing receive queues until
+ * they're all flushed. Wait for the DRAIN events to be received so that there
+ * are no more RX and TX events left on any channel. */
+static int efx_farch_do_flush(struct efx_nic *efx)
+{
+ unsigned timeout = msecs_to_jiffies(5000); /* 5s for all flushes and drains */
+ struct efx_channel *channel;
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+ int rc = 0;
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_farch_flush_tx_queue(tx_queue);
+ }
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ rx_queue->flush_pending = true;
+ atomic_inc(&efx->rxq_flush_pending);
+ }
+ }
+
+ while (timeout && atomic_read(&efx->active_queues) > 0) {
+ /* If SRIOV is enabled, then offload receive queue flushing to
+ * the firmware (though we will still have to poll for
+ * completion). If that fails, fall back to the old scheme.
+ */
+ if (efx_siena_sriov_enabled(efx)) {
+ rc = efx_mcdi_flush_rxqs(efx);
+ if (!rc)
+ goto wait;
+ }
+
+ /* The hardware supports four concurrent rx flushes, each of
+ * which may need to be retried if there is an outstanding
+ * descriptor fetch
+ */
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ if (atomic_read(&efx->rxq_flush_outstanding) >=
+ EFX_RX_FLUSH_COUNT)
+ break;
+
+ if (rx_queue->flush_pending) {
+ rx_queue->flush_pending = false;
+ atomic_dec(&efx->rxq_flush_pending);
+ atomic_inc(&efx->rxq_flush_outstanding);
+ efx_farch_flush_rx_queue(rx_queue);
+ }
+ }
+ }
+
+ wait:
+ timeout = wait_event_timeout(efx->flush_wq,
+ efx_farch_flush_wake(efx),
+ timeout);
+ }
+
+ if (atomic_read(&efx->active_queues) &&
+ !efx_check_tx_flush_complete(efx)) {
+ netif_err(efx, hw, efx->net_dev, "failed to flush %d queues "
+ "(rx %d+%d)\n", atomic_read(&efx->active_queues),
+ atomic_read(&efx->rxq_flush_outstanding),
+ atomic_read(&efx->rxq_flush_pending));
+ rc = -ETIMEDOUT;
+
+ atomic_set(&efx->active_queues, 0);
+ atomic_set(&efx->rxq_flush_pending, 0);
+ atomic_set(&efx->rxq_flush_outstanding, 0);
+ }
+
+ return rc;
+}
+
+int efx_farch_fini_dmaq(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int rc = 0;
+
+ /* Do not attempt to write to the NIC during EEH recovery */
+ if (efx->state != STATE_RECOVERY) {
+ /* Only perform flush if DMA is enabled */
+ if (efx->pci_dev->is_busmaster) {
+ efx->type->prepare_flush(efx);
+ rc = efx_farch_do_flush(efx);
+ efx->type->finish_flush(efx);
+ }
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_farch_rx_fini(rx_queue);
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ efx_farch_tx_fini(tx_queue);
+ }
+ }
+
+ return rc;
+}
+
+/* Reset queue and flush accounting after FLR
+ *
+ * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
+ * mastering was disabled), in which case we don't receive (RXQ) flush
+ * completion events. This means that efx->rxq_flush_outstanding remained at 4
+ * after the FLR; also, efx->active_queues was non-zero (as no flush completion
+ * events were received, and we didn't go through efx_check_tx_flush_complete())
+ * If we don't fix this up, on the next call to efx_realloc_channels() we won't
+ * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
+ * for batched flush requests; and the efx->active_queues gets messed up because
+ * we keep incrementing for the newly initialised queues, but it never went to
+ * zero previously. Then we get a timeout every time we try to restart the
+ * queues, as it doesn't go back to zero when we should be flushing the queues.
+ */
+void efx_farch_finish_flr(struct efx_nic *efx)
+{
+ atomic_set(&efx->rxq_flush_pending, 0);
+ atomic_set(&efx->rxq_flush_outstanding, 0);
+ atomic_set(&efx->active_queues, 0);
+}
+
+
+/**************************************************************************
+ *
+ * Event queue processing
+ * Event queues are processed by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Update a channel's event queue's read pointer (RPTR) register
+ *
+ * This writes the EVQ_RPTR_REG register for the specified channel's
+ * event queue.
+ */
+void efx_farch_ev_read_ack(struct efx_channel *channel)
+{
+ efx_dword_t reg;
+ struct efx_nic *efx = channel->efx;
+
+ EFX_POPULATE_DWORD_1(reg, FRF_AZ_EVQ_RPTR,
+ channel->eventq_read_ptr & channel->eventq_mask);
+
+ /* For Falcon A1, EVQ_RPTR_KER is documented as having a step size
+ * of 4 bytes, but it is really 16 bytes just like later revisions.
+ */
+ efx_writed(efx, ®,
+ efx->type->evq_rptr_tbl_base +
+ FR_BZ_EVQ_RPTR_STEP * channel->channel);
+}
+
+/* Use HW to insert a SW defined event */
+void efx_farch_generate_event(struct efx_nic *efx, unsigned int evq,
+ efx_qword_t *event)
+{
+ efx_oword_t drv_ev_reg;
+
+ BUILD_BUG_ON(FRF_AZ_DRV_EV_DATA_LBN != 0 ||
+ FRF_AZ_DRV_EV_DATA_WIDTH != 64);
+ drv_ev_reg.u32[0] = event->u32[0];
+ drv_ev_reg.u32[1] = event->u32[1];
+ drv_ev_reg.u32[2] = 0;
+ drv_ev_reg.u32[3] = 0;
+ EFX_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, evq);
+ efx_writeo(efx, &drv_ev_reg, FR_AZ_DRV_EV);
+}
+
+static void efx_farch_magic_event(struct efx_channel *channel, u32 magic)
+{
+ efx_qword_t event;
+
+ EFX_POPULATE_QWORD_2(event, FSF_AZ_EV_CODE,
+ FSE_AZ_EV_CODE_DRV_GEN_EV,
+ FSF_AZ_DRV_GEN_EV_MAGIC, magic);
+ efx_farch_generate_event(channel->efx, channel->channel, &event);
+}
+
+/* Handle a transmit completion event
+ *
+ * The NIC batches TX completion events; the message we receive is of
+ * the form "complete all TX events up to this index".
+ */
+static void
+efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
+{
+ unsigned int tx_ev_desc_ptr;
+ unsigned int tx_ev_q_label;
+ struct efx_tx_queue *tx_queue;
+ struct efx_nic *efx = channel->efx;
+
+ if (unlikely(READ_ONCE(efx->reset_pending)))
+ return;
+
+ if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
+ /* Transmit completion */
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
+ tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+ efx_xmit_done(tx_queue, tx_ev_desc_ptr);
+ } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
+ /* Rewrite the FIFO write pointer */
+ tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+ tx_queue = channel->tx_queue +
+ (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);
+
+ netif_tx_lock(efx->net_dev);
+ efx_farch_notify_tx_desc(tx_queue);
+ netif_tx_unlock(efx->net_dev);
+ } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_PKT_ERR)) {
+ efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+ } else {
+ netif_err(efx, tx_err, efx->net_dev,
+ "channel %d unexpected TX event "
+ EFX_QWORD_FMT"\n", channel->channel,
+ EFX_QWORD_VAL(*event));
+ }
+}
+
+/* Detect errors included in the rx_evt_pkt_ok bit. */
+static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
+ const efx_qword_t *event)
+{
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ struct efx_nic *efx = rx_queue->efx;
+ bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
+ bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
+ bool rx_ev_frm_trunc, rx_ev_tobe_disc;
+ bool rx_ev_other_err, rx_ev_pause_frm;
+
+ rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
+ rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event,
+ FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
+ rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event,
+ FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR);
+ rx_ev_tcp_udp_chksum_err = EFX_QWORD_FIELD(*event,
+ FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
+ rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
+ rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
+ rx_ev_pause_frm = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
+
+ /* Every error apart from tobe_disc and pause_frm */
+ rx_ev_other_err = (rx_ev_tcp_udp_chksum_err |
+ rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
+ rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
+
+ /* Count errors that are not in MAC stats. Ignore expected
+ * checksum errors during self-test. */
+ if (rx_ev_frm_trunc)
+ ++channel->n_rx_frm_trunc;
+ else if (rx_ev_tobe_disc)
+ ++channel->n_rx_tobe_disc;
+ else if (!efx->loopback_selftest) {
+ if (rx_ev_ip_hdr_chksum_err)
+ ++channel->n_rx_ip_hdr_chksum_err;
+ else if (rx_ev_tcp_udp_chksum_err)
+ ++channel->n_rx_tcp_udp_chksum_err;
+ }
+
+ /* TOBE_DISC is expected on unicast mismatches; don't print out an
+ * error message. FRM_TRUNC indicates RXDP dropped the packet due
+ * to a FIFO overflow.
+ */
+#ifdef DEBUG
+ if (rx_ev_other_err && net_ratelimit()) {
+ netif_dbg(efx, rx_err, efx->net_dev,
+ " RX queue %d unexpected RX event "
+ EFX_QWORD_FMT "%s%s%s%s%s%s%s\n",
+ efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event),
+ rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
+ rx_ev_ip_hdr_chksum_err ?
+ " [IP_HDR_CHKSUM_ERR]" : "",
+ rx_ev_tcp_udp_chksum_err ?
+ " [TCP_UDP_CHKSUM_ERR]" : "",
+ rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
+ rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
+ rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
+ rx_ev_pause_frm ? " [PAUSE]" : "");
+ }
+#else
+ (void) rx_ev_other_err;
+#endif
+
+ if (efx->net_dev->features & NETIF_F_RXALL)
+ /* don't discard frame for CRC error */
+ rx_ev_eth_crc_err = false;
+
+ /* The frame must be discarded if any of these are true. */
+ return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
+ rx_ev_tobe_disc | rx_ev_pause_frm) ?
+ EFX_RX_PKT_DISCARD : 0;
+}
+
+/* Handle receive events that are not in-order. Return true if this
+ * can be handled as a partial packet discard, false if it's more
+ * serious.
+ */
+static bool
+efx_farch_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
+{
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned expected, dropped;
+
+ if (rx_queue->scatter_n &&
+ index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
+ rx_queue->ptr_mask)) {
+ ++channel->n_rx_nodesc_trunc;
+ return true;
+ }
+
+ expected = rx_queue->removed_count & rx_queue->ptr_mask;
+ dropped = (index - expected) & rx_queue->ptr_mask;
+ netif_info(efx, rx_err, efx->net_dev,
+ "dropped %d events (index=%d expected=%d)\n",
+ dropped, index, expected);
+
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ return false;
+}
+
+/* Handle a packet received event
+ *
+ * The NIC gives a "discard" flag if it's a unicast packet with the
+ * wrong destination address
+ * Also "is multicast" and "matches multicast filter" flags can be used to
+ * discard non-matching multicast packets.
+ */
+static void
+efx_farch_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
+{
+ unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
+ unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
+ unsigned expected_ptr;
+ bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
+ u16 flags;
+ struct efx_rx_queue *rx_queue;
+ struct efx_nic *efx = channel->efx;
+
+ if (unlikely(READ_ONCE(efx->reset_pending)))
+ return;
+
+ rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
+ rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
+ WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
+ channel->channel);
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+
+ rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
+ expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
+ rx_queue->ptr_mask);
+
+ /* Check for partial drops and other errors */
+ if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
+ unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
+ if (rx_ev_desc_ptr != expected_ptr &&
+ !efx_farch_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
+ return;
+
+ /* Discard all pending fragments */
+ if (rx_queue->scatter_n) {
+ efx_rx_packet(
+ rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD);
+ rx_queue->removed_count += rx_queue->scatter_n;
+ rx_queue->scatter_n = 0;
+ }
+
+ /* Return if there is no new fragment */
+ if (rx_ev_desc_ptr != expected_ptr)
+ return;
+
+ /* Discard new fragment if not SOP */
+ if (!rx_ev_sop) {
+ efx_rx_packet(
+ rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ 1, 0, EFX_RX_PKT_DISCARD);
+ ++rx_queue->removed_count;
+ return;
+ }
+ }
+
+ ++rx_queue->scatter_n;
+ if (rx_ev_cont)
+ return;
+
+ rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
+ rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
+ rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
+
+ if (likely(rx_ev_pkt_ok)) {
+ /* If packet is marked as OK then we can rely on the
+ * hardware checksum and classification.
+ */
+ flags = 0;
+ switch (rx_ev_hdr_type) {
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
+ flags |= EFX_RX_PKT_TCP;
+ fallthrough;
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
+ flags |= EFX_RX_PKT_CSUMMED;
+ fallthrough;
+ case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
+ case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
+ break;
+ }
+ } else {
+ flags = efx_farch_handle_rx_not_ok(rx_queue, event);
+ }
+
+ /* Detect multicast packets that didn't match the filter */
+ rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
+ if (rx_ev_mcast_pkt) {
+ unsigned int rx_ev_mcast_hash_match =
+ EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_HASH_MATCH);
+
+ if (unlikely(!rx_ev_mcast_hash_match)) {
+ ++channel->n_rx_mcast_mismatch;
+ flags |= EFX_RX_PKT_DISCARD;
+ }
+ }
+
+ channel->irq_mod_score += 2;
+
+ /* Handle received packet */
+ efx_rx_packet(rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ rx_queue->scatter_n, rx_ev_byte_cnt, flags);
+ rx_queue->removed_count += rx_queue->scatter_n;
+ rx_queue->scatter_n = 0;
+}
+
+/* If this flush done event corresponds to a &struct efx_tx_queue, then
+ * send an %EFX_CHANNEL_MAGIC_TX_DRAIN event to drain the event queue
+ * of all transmit completions.
+ */
+static void
+efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+ int qid;
+
+ qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+ if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
+ channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL);
+ tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL);
+ if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0))
+ efx_farch_magic_event(tx_queue->channel,
+ EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
+ }
+}
+
+/* If this flush done event corresponds to a &struct efx_rx_queue: If the flush
+ * was successful then send an %EFX_CHANNEL_MAGIC_RX_DRAIN, otherwise add
+ * the RX queue back to the mask of RX queues in need of flushing.
+ */
+static void
+efx_farch_handle_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
+{
+ struct efx_channel *channel;
+ struct efx_rx_queue *rx_queue;
+ int qid;
+ bool failed;
+
+ qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
+ failed = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
+ if (qid >= efx->n_channels)
+ return;
+ channel = efx_get_channel(efx, qid);
+ if (!efx_channel_has_rx_queue(channel))
+ return;
+ rx_queue = efx_channel_get_rx_queue(channel);
+
+ if (failed) {
+ netif_info(efx, hw, efx->net_dev,
+ "RXQ %d flush retry\n", qid);
+ rx_queue->flush_pending = true;
+ atomic_inc(&efx->rxq_flush_pending);
+ } else {
+ efx_farch_magic_event(efx_rx_queue_channel(rx_queue),
+ EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue));
+ }
+ atomic_dec(&efx->rxq_flush_outstanding);
+ if (efx_farch_flush_wake(efx))
+ wake_up(&efx->flush_wq);
+}
+
+static void
+efx_farch_handle_drain_event(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+
+ WARN_ON(atomic_read(&efx->active_queues) == 0);
+ atomic_dec(&efx->active_queues);
+ if (efx_farch_flush_wake(efx))
+ wake_up(&efx->flush_wq);
+}
+
+static void efx_farch_handle_generated_event(struct efx_channel *channel,
+ efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ struct efx_rx_queue *rx_queue =
+ efx_channel_has_rx_queue(channel) ?
+ efx_channel_get_rx_queue(channel) : NULL;
+ unsigned magic, code;
+
+ magic = EFX_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC);
+ code = _EFX_CHANNEL_MAGIC_CODE(magic);
+
+ if (magic == EFX_CHANNEL_MAGIC_TEST(channel)) {
+ channel->event_test_cpu = raw_smp_processor_id();
+ } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_FILL(rx_queue)) {
+ /* The queue must be empty, so we won't receive any rx
+ * events, so efx_process_channel() won't refill the
+ * queue. Refill it here */
+ efx_fast_push_rx_descriptors(rx_queue, true);
+ } else if (rx_queue && magic == EFX_CHANNEL_MAGIC_RX_DRAIN(rx_queue)) {
+ efx_farch_handle_drain_event(channel);
+ } else if (code == _EFX_CHANNEL_MAGIC_TX_DRAIN) {
+ efx_farch_handle_drain_event(channel);
+ } else {
+ netif_dbg(efx, hw, efx->net_dev, "channel %d received "
+ "generated event "EFX_QWORD_FMT"\n",
+ channel->channel, EFX_QWORD_VAL(*event));
+ }
+}
+
+static void
+efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned int ev_sub_code;
+ unsigned int ev_sub_data;
+
+ ev_sub_code = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBCODE);
+ ev_sub_data = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+
+ switch (ev_sub_code) {
+ case FSE_AZ_TX_DESCQ_FLS_DONE_EV:
+ netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
+ channel->channel, ev_sub_data);
+ efx_farch_handle_tx_flush_done(efx, event);
+#ifdef CONFIG_SFC_SRIOV
+ efx_siena_sriov_tx_flush_done(efx, event);
+#endif
+ break;
+ case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
+ netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
+ channel->channel, ev_sub_data);
+ efx_farch_handle_rx_flush_done(efx, event);
+#ifdef CONFIG_SFC_SRIOV
+ efx_siena_sriov_rx_flush_done(efx, event);
+#endif
+ break;
+ case FSE_AZ_EVQ_INIT_DONE_EV:
+ netif_dbg(efx, hw, efx->net_dev,
+ "channel %d EVQ %d initialised\n",
+ channel->channel, ev_sub_data);
+ break;
+ case FSE_AZ_SRM_UPD_DONE_EV:
+ netif_vdbg(efx, hw, efx->net_dev,
+ "channel %d SRAM update done\n", channel->channel);
+ break;
+ case FSE_AZ_WAKE_UP_EV:
+ netif_vdbg(efx, hw, efx->net_dev,
+ "channel %d RXQ %d wakeup event\n",
+ channel->channel, ev_sub_data);
+ break;
+ case FSE_AZ_TIMER_EV:
+ netif_vdbg(efx, hw, efx->net_dev,
+ "channel %d RX queue %d timer expired\n",
+ channel->channel, ev_sub_data);
+ break;
+ case FSE_AA_RX_RECOVER_EV:
+ netif_err(efx, rx_err, efx->net_dev,
+ "channel %d seen DRIVER RX_RESET event. "
+ "Resetting.\n", channel->channel);
+ atomic_inc(&efx->rx_reset);
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ break;
+ case FSE_BZ_RX_DSC_ERROR_EV:
+ if (ev_sub_data < EFX_VI_BASE) {
+ netif_err(efx, rx_err, efx->net_dev,
+ "RX DMA Q %d reports descriptor fetch error."
+ " RX Q %d is disabled.\n", ev_sub_data,
+ ev_sub_data);
+ efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+ }
+#ifdef CONFIG_SFC_SRIOV
+ else
+ efx_siena_sriov_desc_fetch_err(efx, ev_sub_data);
+#endif
+ break;
+ case FSE_BZ_TX_DSC_ERROR_EV:
+ if (ev_sub_data < EFX_VI_BASE) {
+ netif_err(efx, tx_err, efx->net_dev,
+ "TX DMA Q %d reports descriptor fetch error."
+ " TX Q %d is disabled.\n", ev_sub_data,
+ ev_sub_data);
+ efx_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+ }
+#ifdef CONFIG_SFC_SRIOV
+ else
+ efx_siena_sriov_desc_fetch_err(efx, ev_sub_data);
+#endif
+ break;
+ default:
+ netif_vdbg(efx, hw, efx->net_dev,
+ "channel %d unknown driver event code %d "
+ "data %04x\n", channel->channel, ev_sub_code,
+ ev_sub_data);
+ break;
+ }
+}
+
+int efx_farch_ev_process(struct efx_channel *channel, int budget)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned int read_ptr;
+ efx_qword_t event, *p_event;
+ int ev_code;
+ int spent = 0;
+
+ if (budget <= 0)
+ return spent;
+
+ read_ptr = channel->eventq_read_ptr;
+
+ for (;;) {
+ p_event = efx_event(channel, read_ptr);
+ event = *p_event;
+
+ if (!efx_event_present(&event))
+ /* End of events */
+ break;
+
+ netif_vdbg(channel->efx, intr, channel->efx->net_dev,
+ "channel %d event is "EFX_QWORD_FMT"\n",
+ channel->channel, EFX_QWORD_VAL(event));
+
+ /* Clear this event by marking it all ones */
+ EFX_SET_QWORD(*p_event);
+
+ ++read_ptr;
+
+ ev_code = EFX_QWORD_FIELD(event, FSF_AZ_EV_CODE);
+
+ switch (ev_code) {
+ case FSE_AZ_EV_CODE_RX_EV:
+ efx_farch_handle_rx_event(channel, &event);
+ if (++spent == budget)
+ goto out;
+ break;
+ case FSE_AZ_EV_CODE_TX_EV:
+ efx_farch_handle_tx_event(channel, &event);
+ break;
+ case FSE_AZ_EV_CODE_DRV_GEN_EV:
+ efx_farch_handle_generated_event(channel, &event);
+ break;
+ case FSE_AZ_EV_CODE_DRIVER_EV:
+ efx_farch_handle_driver_event(channel, &event);
+ break;
+#ifdef CONFIG_SFC_SRIOV
+ case FSE_CZ_EV_CODE_USER_EV:
+ efx_siena_sriov_event(channel, &event);
+ break;
+#endif
+ case FSE_CZ_EV_CODE_MCDI_EV:
+ efx_mcdi_process_event(channel, &event);
+ break;
+ case FSE_AZ_EV_CODE_GLOBAL_EV:
+ if (efx->type->handle_global_event &&
+ efx->type->handle_global_event(channel, &event))
+ break;
+ fallthrough;
+ default:
+ netif_err(channel->efx, hw, channel->efx->net_dev,
+ "channel %d unknown event type %d (data "
+ EFX_QWORD_FMT ")\n", channel->channel,
+ ev_code, EFX_QWORD_VAL(event));
+ }
+ }
+
+out:
+ channel->eventq_read_ptr = read_ptr;
+ return spent;
+}
+
+/* Allocate buffer table entries for event queue */
+int efx_farch_ev_probe(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned entries;
+
+ entries = channel->eventq_mask + 1;
+ return efx_alloc_special_buffer(efx, &channel->eventq,
+ entries * sizeof(efx_qword_t));
+}
+
+int efx_farch_ev_init(struct efx_channel *channel)
+{
+ efx_oword_t reg;
+ struct efx_nic *efx = channel->efx;
+
+ netif_dbg(efx, hw, efx->net_dev,
+ "channel %d event queue in special buffers %d-%d\n",
+ channel->channel, channel->eventq.index,
+ channel->eventq.index + channel->eventq.entries - 1);
+
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_CZ_TIMER_Q_EN, 1,
+ FRF_CZ_HOST_NOTIFY_MODE, 0,
+ FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
+ efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, channel->channel);
+
+ /* Pin event queue buffer */
+ efx_init_special_buffer(efx, &channel->eventq);
+
+ /* Fill event queue with all ones (i.e. empty events) */
+ memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+ /* Push event queue to card */
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_AZ_EVQ_EN, 1,
+ FRF_AZ_EVQ_SIZE, __ffs(channel->eventq.entries),
+ FRF_AZ_EVQ_BUF_BASE_ID, channel->eventq.index);
+ efx_writeo_table(efx, ®, efx->type->evq_ptr_tbl_base,
+ channel->channel);
+
+ return 0;
+}
+
+void efx_farch_ev_fini(struct efx_channel *channel)
+{
+ efx_oword_t reg;
+ struct efx_nic *efx = channel->efx;
+
+ /* Remove event queue from card */
+ EFX_ZERO_OWORD(reg);
+ efx_writeo_table(efx, ®, efx->type->evq_ptr_tbl_base,
+ channel->channel);
+ efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, channel->channel);
+
+ /* Unpin event queue */
+ efx_fini_special_buffer(efx, &channel->eventq);
+}
+
+/* Free buffers backing event queue */
+void efx_farch_ev_remove(struct efx_channel *channel)
+{
+ efx_free_special_buffer(channel->efx, &channel->eventq);
+}
+
+
+void efx_farch_ev_test_generate(struct efx_channel *channel)
+{
+ efx_farch_magic_event(channel, EFX_CHANNEL_MAGIC_TEST(channel));
+}
+
+void efx_farch_rx_defer_refill(struct efx_rx_queue *rx_queue)
+{
+ efx_farch_magic_event(efx_rx_queue_channel(rx_queue),
+ EFX_CHANNEL_MAGIC_FILL(rx_queue));
+}
+
+/**************************************************************************
+ *
+ * Hardware interrupts
+ * The hardware interrupt handler does very little work; all the event
+ * queue processing is carried out by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Enable/disable/generate interrupts */
+static inline void efx_farch_interrupts(struct efx_nic *efx,
+ bool enabled, bool force)
+{
+ efx_oword_t int_en_reg_ker;
+
+ EFX_POPULATE_OWORD_3(int_en_reg_ker,
+ FRF_AZ_KER_INT_LEVE_SEL, efx->irq_level,
+ FRF_AZ_KER_INT_KER, force,
+ FRF_AZ_DRV_INT_EN_KER, enabled);
+ efx_writeo(efx, &int_en_reg_ker, FR_AZ_INT_EN_KER);
+}
+
+void efx_farch_irq_enable_master(struct efx_nic *efx)
+{
+ EFX_ZERO_OWORD(*((efx_oword_t *) efx->irq_status.addr));
+ wmb(); /* Ensure interrupt vector is clear before interrupts enabled */
+
+ efx_farch_interrupts(efx, true, false);
+}
+
+void efx_farch_irq_disable_master(struct efx_nic *efx)
+{
+ /* Disable interrupts */
+ efx_farch_interrupts(efx, false, false);
+}
+
+/* Generate a test interrupt
+ * Interrupt must already have been enabled, otherwise nasty things
+ * may happen.
+ */
+int efx_farch_irq_test_generate(struct efx_nic *efx)
+{
+ efx_farch_interrupts(efx, true, true);
+ return 0;
+}
+
+/* Process a fatal interrupt
+ * Disable bus mastering ASAP and schedule a reset
+ */
+irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
+{
+ efx_oword_t *int_ker = efx->irq_status.addr;
+ efx_oword_t fatal_intr;
+ int error, mem_perr;
+
+ efx_reado(efx, &fatal_intr, FR_AZ_FATAL_INTR_KER);
+ error = EFX_OWORD_FIELD(fatal_intr, FRF_AZ_FATAL_INTR);
+
+ netif_err(efx, hw, efx->net_dev, "SYSTEM ERROR "EFX_OWORD_FMT" status "
+ EFX_OWORD_FMT ": %s\n", EFX_OWORD_VAL(*int_ker),
+ EFX_OWORD_VAL(fatal_intr),
+ error ? "disabling bus mastering" : "no recognised error");
+
+ /* If this is a memory parity error dump which blocks are offending */
+ mem_perr = (EFX_OWORD_FIELD(fatal_intr, FRF_AZ_MEM_PERR_INT_KER) ||
+ EFX_OWORD_FIELD(fatal_intr, FRF_AZ_SRM_PERR_INT_KER));
+ if (mem_perr) {
+ efx_oword_t reg;
+ efx_reado(efx, ®, FR_AZ_MEM_STAT);
+ netif_err(efx, hw, efx->net_dev,
+ "SYSTEM ERROR: memory parity error "EFX_OWORD_FMT"\n",
+ EFX_OWORD_VAL(reg));
+ }
+
+ /* Disable both devices */
+ pci_clear_master(efx->pci_dev);
+ efx_farch_irq_disable_master(efx);
+
+ /* Count errors and reset or disable the NIC accordingly */
+ if (efx->int_error_count == 0 ||
+ time_after(jiffies, efx->int_error_expire)) {
+ efx->int_error_count = 0;
+ efx->int_error_expire =
+ jiffies + EFX_INT_ERROR_EXPIRE * HZ;
+ }
+ if (++efx->int_error_count < EFX_MAX_INT_ERRORS) {
+ netif_err(efx, hw, efx->net_dev,
+ "SYSTEM ERROR - reset scheduled\n");
+ efx_schedule_reset(efx, RESET_TYPE_INT_ERROR);
+ } else {
+ netif_err(efx, hw, efx->net_dev,
+ "SYSTEM ERROR - max number of errors seen."
+ "NIC will be disabled\n");
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Handle a legacy interrupt
+ * Acknowledges the interrupt and schedule event queue processing.
+ */
+irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id)
+{
+ struct efx_nic *efx = dev_id;
+ bool soft_enabled = READ_ONCE(efx->irq_soft_enabled);
+ efx_oword_t *int_ker = efx->irq_status.addr;
+ irqreturn_t result = IRQ_NONE;
+ struct efx_channel *channel;
+ efx_dword_t reg;
+ u32 queues;
+ int syserr;
+
+ /* Read the ISR which also ACKs the interrupts */
+ efx_readd(efx, ®, FR_BZ_INT_ISR0);
+ queues = EFX_EXTRACT_DWORD(reg, 0, 31);
+
+ /* Legacy interrupts are disabled too late by the EEH kernel
+ * code. Disable them earlier.
+ * If an EEH error occurred, the read will have returned all ones.
+ */
+ if (EFX_DWORD_IS_ALL_ONES(reg) && efx_try_recovery(efx) &&
+ !efx->eeh_disabled_legacy_irq) {
+ disable_irq_nosync(efx->legacy_irq);
+ efx->eeh_disabled_legacy_irq = true;
+ }
+
+ /* Handle non-event-queue sources */
+ if (queues & (1U << efx->irq_level) && soft_enabled) {
+ syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+ if (unlikely(syserr))
+ return efx_farch_fatal_interrupt(efx);
+ efx->last_irq_cpu = raw_smp_processor_id();
+ }
+
+ if (queues != 0) {
+ efx->irq_zero_count = 0;
+
+ /* Schedule processing of any interrupting queues */
+ if (likely(soft_enabled)) {
+ efx_for_each_channel(channel, efx) {
+ if (queues & 1)
+ efx_schedule_channel_irq(channel);
+ queues >>= 1;
+ }
+ }
+ result = IRQ_HANDLED;
+
+ } else {
+ efx_qword_t *event;
+
+ /* Legacy ISR read can return zero once (SF bug 15783) */
+
+ /* We can't return IRQ_HANDLED more than once on seeing ISR=0
+ * because this might be a shared interrupt. */
+ if (efx->irq_zero_count++ == 0)
+ result = IRQ_HANDLED;
+
+ /* Ensure we schedule or rearm all event queues */
+ if (likely(soft_enabled)) {
+ efx_for_each_channel(channel, efx) {
+ event = efx_event(channel,
+ channel->eventq_read_ptr);
+ if (efx_event_present(event))
+ efx_schedule_channel_irq(channel);
+ else
+ efx_farch_ev_read_ack(channel);
+ }
+ }
+ }
+
+ if (result == IRQ_HANDLED)
+ netif_vdbg(efx, intr, efx->net_dev,
+ "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
+
+ return result;
+}
+
+/* Handle an MSI interrupt
+ *
+ * Handle an MSI hardware interrupt. This routine schedules event
+ * queue processing. No interrupt acknowledgement cycle is necessary.
+ * Also, we never need to check that the interrupt is for us, since
+ * MSI interrupts cannot be shared.
+ */
+irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id)
+{
+ struct efx_msi_context *context = dev_id;
+ struct efx_nic *efx = context->efx;
+ efx_oword_t *int_ker = efx->irq_status.addr;
+ int syserr;
+
+ netif_vdbg(efx, intr, efx->net_dev,
+ "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
+
+ if (!likely(READ_ONCE(efx->irq_soft_enabled)))
+ return IRQ_HANDLED;
+
+ /* Handle non-event-queue sources */
+ if (context->index == efx->irq_level) {
+ syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+ if (unlikely(syserr))
+ return efx_farch_fatal_interrupt(efx);
+ efx->last_irq_cpu = raw_smp_processor_id();
+ }
+
+ /* Schedule processing of the channel */
+ efx_schedule_channel_irq(efx->channel[context->index]);
+
+ return IRQ_HANDLED;
+}
+
+/* Setup RSS indirection table.
+ * This maps from the hash value of the packet to RXQ
+ */
+void efx_farch_rx_push_indir_table(struct efx_nic *efx)
+{
+ size_t i = 0;
+ efx_dword_t dword;
+
+ BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
+ FR_BZ_RX_INDIRECTION_TBL_ROWS);
+
+ for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
+ EFX_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
+ efx->rss_context.rx_indir_table[i]);
+ efx_writed(efx, &dword,
+ FR_BZ_RX_INDIRECTION_TBL +
+ FR_BZ_RX_INDIRECTION_TBL_STEP * i);
+ }
+}
+
+void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
+{
+ size_t i = 0;
+ efx_dword_t dword;
+
+ BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
+ FR_BZ_RX_INDIRECTION_TBL_ROWS);
+
+ for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
+ efx_readd(efx, &dword,
+ FR_BZ_RX_INDIRECTION_TBL +
+ FR_BZ_RX_INDIRECTION_TBL_STEP * i);
+ efx->rss_context.rx_indir_table[i] = EFX_DWORD_FIELD(dword, FRF_BZ_IT_QUEUE);
+ }
+}
+
+/* Looks at available SRAM resources and works out how many queues we
+ * can support, and where things like descriptor caches should live.
+ *
+ * SRAM is split up as follows:
+ * 0 buftbl entries for channels
+ * efx->vf_buftbl_base buftbl entries for SR-IOV
+ * efx->rx_dc_base RX descriptor caches
+ * efx->tx_dc_base TX descriptor caches
+ */
+void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
+{
+ unsigned vi_count, total_tx_channels;
+#ifdef CONFIG_SFC_SRIOV
+ struct siena_nic_data *nic_data;
+ unsigned buftbl_min;
+#endif
+
+ total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels;
+ vi_count = max(efx->n_channels, total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL);
+
+#ifdef CONFIG_SFC_SRIOV
+ nic_data = efx->nic_data;
+ /* Account for the buffer table entries backing the datapath channels
+ * and the descriptor caches for those channels.
+ */
+ buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
+ total_tx_channels * EFX_MAX_TXQ_PER_CHANNEL * EFX_MAX_DMAQ_SIZE +
+ efx->n_channels * EFX_MAX_EVQ_SIZE)
+ * sizeof(efx_qword_t) / EFX_BUF_SIZE);
+ if (efx->type->sriov_wanted) {
+ if (efx->type->sriov_wanted(efx)) {
+ unsigned vi_dc_entries, buftbl_free;
+ unsigned entries_per_vf, vf_limit;
+
+ nic_data->vf_buftbl_base = buftbl_min;
+
+ vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES;
+ vi_count = max(vi_count, EFX_VI_BASE);
+ buftbl_free = (sram_lim_qw - buftbl_min -
+ vi_count * vi_dc_entries);
+
+ entries_per_vf = ((vi_dc_entries +
+ EFX_VF_BUFTBL_PER_VI) *
+ efx_vf_size(efx));
+ vf_limit = min(buftbl_free / entries_per_vf,
+ (1024U - EFX_VI_BASE) >> efx->vi_scale);
+
+ if (efx->vf_count > vf_limit) {
+ netif_err(efx, probe, efx->net_dev,
+ "Reducing VF count from from %d to %d\n",
+ efx->vf_count, vf_limit);
+ efx->vf_count = vf_limit;
+ }
+ vi_count += efx->vf_count * efx_vf_size(efx);
+ }
+ }
+#endif
+
+ efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
+ efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
+}
+
+u32 efx_farch_fpga_ver(struct efx_nic *efx)
+{
+ efx_oword_t altera_build;
+ efx_reado(efx, &altera_build, FR_AZ_ALTERA_BUILD);
+ return EFX_OWORD_FIELD(altera_build, FRF_AZ_ALTERA_BUILD_VER);
+}
+
+void efx_farch_init_common(struct efx_nic *efx)
+{
+ efx_oword_t temp;
+
+ /* Set positions of descriptor caches in SRAM. */
+ EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, efx->tx_dc_base);
+ efx_writeo(efx, &temp, FR_AZ_SRM_TX_DC_CFG);
+ EFX_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, efx->rx_dc_base);
+ efx_writeo(efx, &temp, FR_AZ_SRM_RX_DC_CFG);
+
+ /* Set TX descriptor cache size. */
+ BUILD_BUG_ON(TX_DC_ENTRIES != (8 << TX_DC_ENTRIES_ORDER));
+ EFX_POPULATE_OWORD_1(temp, FRF_AZ_TX_DC_SIZE, TX_DC_ENTRIES_ORDER);
+ efx_writeo(efx, &temp, FR_AZ_TX_DC_CFG);
+
+ /* Set RX descriptor cache size. Set low watermark to size-8, as
+ * this allows most efficient prefetching.
+ */
+ BUILD_BUG_ON(RX_DC_ENTRIES != (8 << RX_DC_ENTRIES_ORDER));
+ EFX_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_SIZE, RX_DC_ENTRIES_ORDER);
+ efx_writeo(efx, &temp, FR_AZ_RX_DC_CFG);
+ EFX_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_PF_LWM, RX_DC_ENTRIES - 8);
+ efx_writeo(efx, &temp, FR_AZ_RX_DC_PF_WM);
+
+ /* Program INT_KER address */
+ EFX_POPULATE_OWORD_2(temp,
+ FRF_AZ_NORM_INT_VEC_DIS_KER,
+ EFX_INT_MODE_USE_MSI(efx),
+ FRF_AZ_INT_ADR_KER, efx->irq_status.dma_addr);
+ efx_writeo(efx, &temp, FR_AZ_INT_ADR_KER);
+
+ if (EFX_WORKAROUND_17213(efx) && !EFX_INT_MODE_USE_MSI(efx))
+ /* Use an interrupt level unused by event queues */
+ efx->irq_level = 0x1f;
+ else
+ /* Use a valid MSI-X vector */
+ efx->irq_level = 0;
+
+ /* Enable all the genuinely fatal interrupts. (They are still
+ * masked by the overall interrupt mask, controlled by
+ * falcon_interrupts()).
+ *
+ * Note: All other fatal interrupts are enabled
+ */
+ EFX_POPULATE_OWORD_3(temp,
+ FRF_AZ_ILL_ADR_INT_KER_EN, 1,
+ FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
+ FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
+ EFX_INVERT_OWORD(temp);
+ efx_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
+
+ /* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be
+ * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q.
+ */
+ efx_reado(efx, &temp, FR_AZ_TX_RESERVED);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1);
+ /* Enable SW_EV to inherit in char driver - assume harmless here */
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1);
+ /* Prefetch threshold 2 => fetch when descriptor cache half empty */
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_THRESHOLD, 2);
+ /* Disable hardware watchdog which can misfire */
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
+ /* Squash TX of packets of 16 bytes or less */
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+ efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
+
+ EFX_POPULATE_OWORD_4(temp,
+ /* Default values */
+ FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
+ FRF_BZ_TX_PACE_SB_AF, 0xb,
+ FRF_BZ_TX_PACE_FB_BASE, 0,
+ /* Allow large pace values in the fast bin. */
+ FRF_BZ_TX_PACE_BIN_TH,
+ FFE_BZ_TX_PACE_RESERVED);
+ efx_writeo(efx, &temp, FR_BZ_TX_PACE);
+}
+
+/**************************************************************************
+ *
+ * Filter tables
+ *
+ **************************************************************************
+ */
+
+/* "Fudge factors" - difference between programmed value and actual depth.
+ * Due to pipelined implementation we need to program H/W with a value that
+ * is larger than the hop limit we want.
+ */
+#define EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD 3
+#define EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL 1
+
+/* Hard maximum search limit. Hardware will time-out beyond 200-something.
+ * We also need to avoid infinite loops in efx_farch_filter_search() when the
+ * table is full.
+ */
+#define EFX_FARCH_FILTER_CTL_SRCH_MAX 200
+
+/* Don't try very hard to find space for performance hints, as this is
+ * counter-productive. */
+#define EFX_FARCH_FILTER_CTL_SRCH_HINT_MAX 5
+
+enum efx_farch_filter_type {
+ EFX_FARCH_FILTER_TCP_FULL = 0,
+ EFX_FARCH_FILTER_TCP_WILD,
+ EFX_FARCH_FILTER_UDP_FULL,
+ EFX_FARCH_FILTER_UDP_WILD,
+ EFX_FARCH_FILTER_MAC_FULL = 4,
+ EFX_FARCH_FILTER_MAC_WILD,
+ EFX_FARCH_FILTER_UC_DEF = 8,
+ EFX_FARCH_FILTER_MC_DEF,
+ EFX_FARCH_FILTER_TYPE_COUNT, /* number of specific types */
+};
+
+enum efx_farch_filter_table_id {
+ EFX_FARCH_FILTER_TABLE_RX_IP = 0,
+ EFX_FARCH_FILTER_TABLE_RX_MAC,
+ EFX_FARCH_FILTER_TABLE_RX_DEF,
+ EFX_FARCH_FILTER_TABLE_TX_MAC,
+ EFX_FARCH_FILTER_TABLE_COUNT,
+};
+
+enum efx_farch_filter_index {
+ EFX_FARCH_FILTER_INDEX_UC_DEF,
+ EFX_FARCH_FILTER_INDEX_MC_DEF,
+ EFX_FARCH_FILTER_SIZE_RX_DEF,
+};
+
+struct efx_farch_filter_spec {
+ u8 type:4;
+ u8 priority:4;
+ u8 flags;
+ u16 dmaq_id;
+ u32 data[3];
+};
+
+struct efx_farch_filter_table {
+ enum efx_farch_filter_table_id id;
+ u32 offset; /* address of table relative to BAR */
+ unsigned size; /* number of entries */
+ unsigned step; /* step between entries */
+ unsigned used; /* number currently used */
+ unsigned long *used_bitmap;
+ struct efx_farch_filter_spec *spec;
+ unsigned search_limit[EFX_FARCH_FILTER_TYPE_COUNT];
+};
+
+struct efx_farch_filter_state {
+ struct rw_semaphore lock; /* Protects table contents */
+ struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT];
+};
+
+static void
+efx_farch_filter_table_clear_entry(struct efx_nic *efx,
+ struct efx_farch_filter_table *table,
+ unsigned int filter_idx);
+
+/* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit
+ * key derived from the n-tuple. The initial LFSR state is 0xffff. */
+static u16 efx_farch_filter_hash(u32 key)
+{
+ u16 tmp;
+
+ /* First 16 rounds */
+ tmp = 0x1fff ^ key >> 16;
+ tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+ tmp = tmp ^ tmp >> 9;
+ /* Last 16 rounds */
+ tmp = tmp ^ tmp << 13 ^ key;
+ tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+ return tmp ^ tmp >> 9;
+}
+
+/* To allow for hash collisions, filter search continues at these
+ * increments from the first possible entry selected by the hash. */
+static u16 efx_farch_filter_increment(u32 key)
+{
+ return key * 2 - 1;
+}
+
+static enum efx_farch_filter_table_id
+efx_farch_filter_spec_table_id(const struct efx_farch_filter_spec *spec)
+{
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
+ (EFX_FARCH_FILTER_TCP_FULL >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
+ (EFX_FARCH_FILTER_TCP_WILD >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
+ (EFX_FARCH_FILTER_UDP_FULL >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_IP !=
+ (EFX_FARCH_FILTER_UDP_WILD >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_MAC !=
+ (EFX_FARCH_FILTER_MAC_FULL >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_RX_MAC !=
+ (EFX_FARCH_FILTER_MAC_WILD >> 2));
+ BUILD_BUG_ON(EFX_FARCH_FILTER_TABLE_TX_MAC !=
+ EFX_FARCH_FILTER_TABLE_RX_MAC + 2);
+ return (spec->type >> 2) + ((spec->flags & EFX_FILTER_FLAG_TX) ? 2 : 0);
+}
+
+static void efx_farch_filter_push_rx_config(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ struct efx_farch_filter_table *table;
+ efx_oword_t filter_ctl;
+
+ efx_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+ EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_TCP_FULL] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+ EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_TCP_WILD] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+ EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_UDP_FULL] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+ EFX_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_UDP_WILD] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
+ if (table->size) {
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_MAC_FULL] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT,
+ table->search_limit[EFX_FARCH_FILTER_MAC_WILD] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+ }
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
+ if (table->size) {
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_UNICAST_NOMATCH_Q_ID,
+ table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].dmaq_id);
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED,
+ !!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
+ EFX_FILTER_FLAG_RX_RSS));
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID,
+ table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].dmaq_id);
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
+ !!(table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
+ EFX_FILTER_FLAG_RX_RSS));
+
+ /* There is a single bit to enable RX scatter for all
+ * unmatched packets. Only set it if scatter is
+ * enabled in both filter specs.
+ */
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+ !!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
+ table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
+ EFX_FILTER_FLAG_RX_SCATTER));
+ } else {
+ /* We don't expose 'default' filters because unmatched
+ * packets always go to the queue number found in the
+ * RSS table. But we still need to set the RX scatter
+ * bit here.
+ */
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+ efx->rx_scatter);
+ }
+
+ efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+}
+
+static void efx_farch_filter_push_tx_limits(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ struct efx_farch_filter_table *table;
+ efx_oword_t tx_cfg;
+
+ efx_reado(efx, &tx_cfg, FR_AZ_TX_CFG);
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
+ if (table->size) {
+ EFX_SET_OWORD_FIELD(
+ tx_cfg, FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE,
+ table->search_limit[EFX_FARCH_FILTER_MAC_FULL] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+ EFX_SET_OWORD_FIELD(
+ tx_cfg, FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE,
+ table->search_limit[EFX_FARCH_FILTER_MAC_WILD] +
+ EFX_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+ }
+
+ efx_writeo(efx, &tx_cfg, FR_AZ_TX_CFG);
+}
+
+static int
+efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
+ const struct efx_filter_spec *gen_spec)
+{
+ bool is_full = false;
+
+ if ((gen_spec->flags & EFX_FILTER_FLAG_RX_RSS) && gen_spec->rss_context)
+ return -EINVAL;
+
+ spec->priority = gen_spec->priority;
+ spec->flags = gen_spec->flags;
+ spec->dmaq_id = gen_spec->dmaq_id;
+
+ switch (gen_spec->match_flags) {
+ case (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
+ EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
+ EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT):
+ is_full = true;
+ fallthrough;
+ case (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
+ EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT): {
+ __be32 rhost, host1, host2;
+ __be16 rport, port1, port2;
+
+ EFX_WARN_ON_PARANOID(!(gen_spec->flags & EFX_FILTER_FLAG_RX));
+
+ if (gen_spec->ether_type != htons(ETH_P_IP))
+ return -EPROTONOSUPPORT;
+ if (gen_spec->loc_port == 0 ||
+ (is_full && gen_spec->rem_port == 0))
+ return -EADDRNOTAVAIL;
+ switch (gen_spec->ip_proto) {
+ case IPPROTO_TCP:
+ spec->type = (is_full ? EFX_FARCH_FILTER_TCP_FULL :
+ EFX_FARCH_FILTER_TCP_WILD);
+ break;
+ case IPPROTO_UDP:
+ spec->type = (is_full ? EFX_FARCH_FILTER_UDP_FULL :
+ EFX_FARCH_FILTER_UDP_WILD);
+ break;
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ /* Filter is constructed in terms of source and destination,
+ * with the odd wrinkle that the ports are swapped in a UDP
+ * wildcard filter. We need to convert from local and remote
+ * (= zero for wildcard) addresses.
+ */
+ rhost = is_full ? gen_spec->rem_host[0] : 0;
+ rport = is_full ? gen_spec->rem_port : 0;
+ host1 = rhost;
+ host2 = gen_spec->loc_host[0];
+ if (!is_full && gen_spec->ip_proto == IPPROTO_UDP) {
+ port1 = gen_spec->loc_port;
+ port2 = rport;
+ } else {
+ port1 = rport;
+ port2 = gen_spec->loc_port;
+ }
+ spec->data[0] = ntohl(host1) << 16 | ntohs(port1);
+ spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16;
+ spec->data[2] = ntohl(host2);
+
+ break;
+ }
+
+ case EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_OUTER_VID:
+ is_full = true;
+ fallthrough;
+ case EFX_FILTER_MATCH_LOC_MAC:
+ spec->type = (is_full ? EFX_FARCH_FILTER_MAC_FULL :
+ EFX_FARCH_FILTER_MAC_WILD);
+ spec->data[0] = is_full ? ntohs(gen_spec->outer_vid) : 0;
+ spec->data[1] = (gen_spec->loc_mac[2] << 24 |
+ gen_spec->loc_mac[3] << 16 |
+ gen_spec->loc_mac[4] << 8 |
+ gen_spec->loc_mac[5]);
+ spec->data[2] = (gen_spec->loc_mac[0] << 8 |
+ gen_spec->loc_mac[1]);
+ break;
+
+ case EFX_FILTER_MATCH_LOC_MAC_IG:
+ spec->type = (is_multicast_ether_addr(gen_spec->loc_mac) ?
+ EFX_FARCH_FILTER_MC_DEF :
+ EFX_FARCH_FILTER_UC_DEF);
+ memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */
+ break;
+
+ default:
+ return -EPROTONOSUPPORT;
+ }
+
+ return 0;
+}
+
+static void
+efx_farch_filter_to_gen_spec(struct efx_filter_spec *gen_spec,
+ const struct efx_farch_filter_spec *spec)
+{
+ bool is_full = false;
+
+ /* *gen_spec should be completely initialised, to be consistent
+ * with efx_filter_init_{rx,tx}() and in case we want to copy
+ * it back to userland.
+ */
+ memset(gen_spec, 0, sizeof(*gen_spec));
+
+ gen_spec->priority = spec->priority;
+ gen_spec->flags = spec->flags;
+ gen_spec->dmaq_id = spec->dmaq_id;
+
+ switch (spec->type) {
+ case EFX_FARCH_FILTER_TCP_FULL:
+ case EFX_FARCH_FILTER_UDP_FULL:
+ is_full = true;
+ fallthrough;
+ case EFX_FARCH_FILTER_TCP_WILD:
+ case EFX_FARCH_FILTER_UDP_WILD: {
+ __be32 host1, host2;
+ __be16 port1, port2;
+
+ gen_spec->match_flags =
+ EFX_FILTER_MATCH_ETHER_TYPE |
+ EFX_FILTER_MATCH_IP_PROTO |
+ EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT;
+ if (is_full)
+ gen_spec->match_flags |= (EFX_FILTER_MATCH_REM_HOST |
+ EFX_FILTER_MATCH_REM_PORT);
+ gen_spec->ether_type = htons(ETH_P_IP);
+ gen_spec->ip_proto =
+ (spec->type == EFX_FARCH_FILTER_TCP_FULL ||
+ spec->type == EFX_FARCH_FILTER_TCP_WILD) ?
+ IPPROTO_TCP : IPPROTO_UDP;
+
+ host1 = htonl(spec->data[0] >> 16 | spec->data[1] << 16);
+ port1 = htons(spec->data[0]);
+ host2 = htonl(spec->data[2]);
+ port2 = htons(spec->data[1] >> 16);
+ if (spec->flags & EFX_FILTER_FLAG_TX) {
+ gen_spec->loc_host[0] = host1;
+ gen_spec->rem_host[0] = host2;
+ } else {
+ gen_spec->loc_host[0] = host2;
+ gen_spec->rem_host[0] = host1;
+ }
+ if (!!(gen_spec->flags & EFX_FILTER_FLAG_TX) ^
+ (!is_full && gen_spec->ip_proto == IPPROTO_UDP)) {
+ gen_spec->loc_port = port1;
+ gen_spec->rem_port = port2;
+ } else {
+ gen_spec->loc_port = port2;
+ gen_spec->rem_port = port1;
+ }
+
+ break;
+ }
+
+ case EFX_FARCH_FILTER_MAC_FULL:
+ is_full = true;
+ fallthrough;
+ case EFX_FARCH_FILTER_MAC_WILD:
+ gen_spec->match_flags = EFX_FILTER_MATCH_LOC_MAC;
+ if (is_full)
+ gen_spec->match_flags |= EFX_FILTER_MATCH_OUTER_VID;
+ gen_spec->loc_mac[0] = spec->data[2] >> 8;
+ gen_spec->loc_mac[1] = spec->data[2];
+ gen_spec->loc_mac[2] = spec->data[1] >> 24;
+ gen_spec->loc_mac[3] = spec->data[1] >> 16;
+ gen_spec->loc_mac[4] = spec->data[1] >> 8;
+ gen_spec->loc_mac[5] = spec->data[1];
+ gen_spec->outer_vid = htons(spec->data[0]);
+ break;
+
+ case EFX_FARCH_FILTER_UC_DEF:
+ case EFX_FARCH_FILTER_MC_DEF:
+ gen_spec->match_flags = EFX_FILTER_MATCH_LOC_MAC_IG;
+ gen_spec->loc_mac[0] = spec->type == EFX_FARCH_FILTER_MC_DEF;
+ break;
+
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+
+static void
+efx_farch_filter_init_rx_auto(struct efx_nic *efx,
+ struct efx_farch_filter_spec *spec)
+{
+ /* If there's only one channel then disable RSS for non VF
+ * traffic, thereby allowing VFs to use RSS when the PF can't.
+ */
+ spec->priority = EFX_FILTER_PRI_AUTO;
+ spec->flags = (EFX_FILTER_FLAG_RX |
+ (efx_rss_enabled(efx) ? EFX_FILTER_FLAG_RX_RSS : 0) |
+ (efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0));
+ spec->dmaq_id = 0;
+}
+
+/* Build a filter entry and return its n-tuple key. */
+static u32 efx_farch_filter_build(efx_oword_t *filter,
+ struct efx_farch_filter_spec *spec)
+{
+ u32 data3;
+
+ switch (efx_farch_filter_spec_table_id(spec)) {
+ case EFX_FARCH_FILTER_TABLE_RX_IP: {
+ bool is_udp = (spec->type == EFX_FARCH_FILTER_UDP_FULL ||
+ spec->type == EFX_FARCH_FILTER_UDP_WILD);
+ EFX_POPULATE_OWORD_7(
+ *filter,
+ FRF_BZ_RSS_EN,
+ !!(spec->flags & EFX_FILTER_FLAG_RX_RSS),
+ FRF_BZ_SCATTER_EN,
+ !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER),
+ FRF_BZ_TCP_UDP, is_udp,
+ FRF_BZ_RXQ_ID, spec->dmaq_id,
+ EFX_DWORD_2, spec->data[2],
+ EFX_DWORD_1, spec->data[1],
+ EFX_DWORD_0, spec->data[0]);
+ data3 = is_udp;
+ break;
+ }
+
+ case EFX_FARCH_FILTER_TABLE_RX_MAC: {
+ bool is_wild = spec->type == EFX_FARCH_FILTER_MAC_WILD;
+ EFX_POPULATE_OWORD_7(
+ *filter,
+ FRF_CZ_RMFT_RSS_EN,
+ !!(spec->flags & EFX_FILTER_FLAG_RX_RSS),
+ FRF_CZ_RMFT_SCATTER_EN,
+ !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER),
+ FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id,
+ FRF_CZ_RMFT_WILDCARD_MATCH, is_wild,
+ FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2],
+ FRF_CZ_RMFT_DEST_MAC_LO, spec->data[1],
+ FRF_CZ_RMFT_VLAN_ID, spec->data[0]);
+ data3 = is_wild;
+ break;
+ }
+
+ case EFX_FARCH_FILTER_TABLE_TX_MAC: {
+ bool is_wild = spec->type == EFX_FARCH_FILTER_MAC_WILD;
+ EFX_POPULATE_OWORD_5(*filter,
+ FRF_CZ_TMFT_TXQ_ID, spec->dmaq_id,
+ FRF_CZ_TMFT_WILDCARD_MATCH, is_wild,
+ FRF_CZ_TMFT_SRC_MAC_HI, spec->data[2],
+ FRF_CZ_TMFT_SRC_MAC_LO, spec->data[1],
+ FRF_CZ_TMFT_VLAN_ID, spec->data[0]);
+ data3 = is_wild | spec->dmaq_id << 1;
+ break;
+ }
+
+ default:
+ BUG();
+ }
+
+ return spec->data[0] ^ spec->data[1] ^ spec->data[2] ^ data3;
+}
+
+static bool efx_farch_filter_equal(const struct efx_farch_filter_spec *left,
+ const struct efx_farch_filter_spec *right)
+{
+ if (left->type != right->type ||
+ memcmp(left->data, right->data, sizeof(left->data)))
+ return false;
+
+ if (left->flags & EFX_FILTER_FLAG_TX &&
+ left->dmaq_id != right->dmaq_id)
+ return false;
+
+ return true;
+}
+
+/*
+ * Construct/deconstruct external filter IDs. At least the RX filter
+ * IDs must be ordered by matching priority, for RX NFC semantics.
+ *
+ * Deconstruction needs to be robust against invalid IDs so that
+ * efx_filter_remove_id_safe() and efx_filter_get_filter_safe() can
+ * accept user-provided IDs.
+ */
+
+#define EFX_FARCH_FILTER_MATCH_PRI_COUNT 5
+
+static const u8 efx_farch_filter_type_match_pri[EFX_FARCH_FILTER_TYPE_COUNT] = {
+ [EFX_FARCH_FILTER_TCP_FULL] = 0,
+ [EFX_FARCH_FILTER_UDP_FULL] = 0,
+ [EFX_FARCH_FILTER_TCP_WILD] = 1,
+ [EFX_FARCH_FILTER_UDP_WILD] = 1,
+ [EFX_FARCH_FILTER_MAC_FULL] = 2,
+ [EFX_FARCH_FILTER_MAC_WILD] = 3,
+ [EFX_FARCH_FILTER_UC_DEF] = 4,
+ [EFX_FARCH_FILTER_MC_DEF] = 4,
+};
+
+static const enum efx_farch_filter_table_id efx_farch_filter_range_table[] = {
+ EFX_FARCH_FILTER_TABLE_RX_IP, /* RX match pri 0 */
+ EFX_FARCH_FILTER_TABLE_RX_IP,
+ EFX_FARCH_FILTER_TABLE_RX_MAC,
+ EFX_FARCH_FILTER_TABLE_RX_MAC,
+ EFX_FARCH_FILTER_TABLE_RX_DEF, /* RX match pri 4 */
+ EFX_FARCH_FILTER_TABLE_TX_MAC, /* TX match pri 0 */
+ EFX_FARCH_FILTER_TABLE_TX_MAC, /* TX match pri 1 */
+};
+
+#define EFX_FARCH_FILTER_INDEX_WIDTH 13
+#define EFX_FARCH_FILTER_INDEX_MASK ((1 << EFX_FARCH_FILTER_INDEX_WIDTH) - 1)
+
+static inline u32
+efx_farch_filter_make_id(const struct efx_farch_filter_spec *spec,
+ unsigned int index)
+{
+ unsigned int range;
+
+ range = efx_farch_filter_type_match_pri[spec->type];
+ if (!(spec->flags & EFX_FILTER_FLAG_RX))
+ range += EFX_FARCH_FILTER_MATCH_PRI_COUNT;
+
+ return range << EFX_FARCH_FILTER_INDEX_WIDTH | index;
+}
+
+static inline enum efx_farch_filter_table_id
+efx_farch_filter_id_table_id(u32 id)
+{
+ unsigned int range = id >> EFX_FARCH_FILTER_INDEX_WIDTH;
+
+ if (range < ARRAY_SIZE(efx_farch_filter_range_table))
+ return efx_farch_filter_range_table[range];
+ else
+ return EFX_FARCH_FILTER_TABLE_COUNT; /* invalid */
+}
+
+static inline unsigned int efx_farch_filter_id_index(u32 id)
+{
+ return id & EFX_FARCH_FILTER_INDEX_MASK;
+}
+
+u32 efx_farch_filter_get_rx_id_limit(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ unsigned int range = EFX_FARCH_FILTER_MATCH_PRI_COUNT - 1;
+ enum efx_farch_filter_table_id table_id;
+
+ do {
+ table_id = efx_farch_filter_range_table[range];
+ if (state->table[table_id].size != 0)
+ return range << EFX_FARCH_FILTER_INDEX_WIDTH |
+ state->table[table_id].size;
+ } while (range--);
+
+ return 0;
+}
+
+s32 efx_farch_filter_insert(struct efx_nic *efx,
+ struct efx_filter_spec *gen_spec,
+ bool replace_equal)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ struct efx_farch_filter_table *table;
+ struct efx_farch_filter_spec spec;
+ efx_oword_t filter;
+ int rep_index, ins_index;
+ unsigned int depth = 0;
+ int rc;
+
+ rc = efx_farch_filter_from_gen_spec(&spec, gen_spec);
+ if (rc)
+ return rc;
+
+ down_write(&state->lock);
+
+ table = &state->table[efx_farch_filter_spec_table_id(&spec)];
+ if (table->size == 0) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
+ netif_vdbg(efx, hw, efx->net_dev,
+ "%s: type %d search_limit=%d", __func__, spec.type,
+ table->search_limit[spec.type]);
+
+ if (table->id == EFX_FARCH_FILTER_TABLE_RX_DEF) {
+ /* One filter spec per type */
+ BUILD_BUG_ON(EFX_FARCH_FILTER_INDEX_UC_DEF != 0);
+ BUILD_BUG_ON(EFX_FARCH_FILTER_INDEX_MC_DEF !=
+ EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF);
+ rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF;
+ ins_index = rep_index;
+ } else {
+ /* Search concurrently for
+ * (1) a filter to be replaced (rep_index): any filter
+ * with the same match values, up to the current
+ * search depth for this type, and
+ * (2) the insertion point (ins_index): (1) or any
+ * free slot before it or up to the maximum search
+ * depth for this priority
+ * We fail if we cannot find (2).
+ *
+ * We can stop once either
+ * (a) we find (1), in which case we have definitely
+ * found (2) as well; or
+ * (b) we have searched exhaustively for (1), and have
+ * either found (2) or searched exhaustively for it
+ */
+ u32 key = efx_farch_filter_build(&filter, &spec);
+ unsigned int hash = efx_farch_filter_hash(key);
+ unsigned int incr = efx_farch_filter_increment(key);
+ unsigned int max_rep_depth = table->search_limit[spec.type];
+ unsigned int max_ins_depth =
+ spec.priority <= EFX_FILTER_PRI_HINT ?
+ EFX_FARCH_FILTER_CTL_SRCH_HINT_MAX :
+ EFX_FARCH_FILTER_CTL_SRCH_MAX;
+ unsigned int i = hash & (table->size - 1);
+
+ ins_index = -1;
+ depth = 1;
+
+ for (;;) {
+ if (!test_bit(i, table->used_bitmap)) {
+ if (ins_index < 0)
+ ins_index = i;
+ } else if (efx_farch_filter_equal(&spec,
+ &table->spec[i])) {
+ /* Case (a) */
+ if (ins_index < 0)
+ ins_index = i;
+ rep_index = i;
+ break;
+ }
+
+ if (depth >= max_rep_depth &&
+ (ins_index >= 0 || depth >= max_ins_depth)) {
+ /* Case (b) */
+ if (ins_index < 0) {
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+ rep_index = -1;
+ break;
+ }
+
+ i = (i + incr) & (table->size - 1);
+ ++depth;
+ }
+ }
+
+ /* If we found a filter to be replaced, check whether we
+ * should do so
+ */
+ if (rep_index >= 0) {
+ struct efx_farch_filter_spec *saved_spec =
+ &table->spec[rep_index];
+
+ if (spec.priority == saved_spec->priority && !replace_equal) {
+ rc = -EEXIST;
+ goto out_unlock;
+ }
+ if (spec.priority < saved_spec->priority) {
+ rc = -EPERM;
+ goto out_unlock;
+ }
+ if (saved_spec->priority == EFX_FILTER_PRI_AUTO ||
+ saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO)
+ spec.flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
+ }
+
+ /* Insert the filter */
+ if (ins_index != rep_index) {
+ __set_bit(ins_index, table->used_bitmap);
+ ++table->used;
+ }
+ table->spec[ins_index] = spec;
+
+ if (table->id == EFX_FARCH_FILTER_TABLE_RX_DEF) {
+ efx_farch_filter_push_rx_config(efx);
+ } else {
+ if (table->search_limit[spec.type] < depth) {
+ table->search_limit[spec.type] = depth;
+ if (spec.flags & EFX_FILTER_FLAG_TX)
+ efx_farch_filter_push_tx_limits(efx);
+ else
+ efx_farch_filter_push_rx_config(efx);
+ }
+
+ efx_writeo(efx, &filter,
+ table->offset + table->step * ins_index);
+
+ /* If we were able to replace a filter by inserting
+ * at a lower depth, clear the replaced filter
+ */
+ if (ins_index != rep_index && rep_index >= 0)
+ efx_farch_filter_table_clear_entry(efx, table,
+ rep_index);
+ }
+
+ netif_vdbg(efx, hw, efx->net_dev,
+ "%s: filter type %d index %d rxq %u set",
+ __func__, spec.type, ins_index, spec.dmaq_id);
+ rc = efx_farch_filter_make_id(&spec, ins_index);
+
+out_unlock:
+ up_write(&state->lock);
+ return rc;
+}
+
+static void
+efx_farch_filter_table_clear_entry(struct efx_nic *efx,
+ struct efx_farch_filter_table *table,
+ unsigned int filter_idx)
+{
+ static efx_oword_t filter;
+
+ EFX_WARN_ON_PARANOID(!test_bit(filter_idx, table->used_bitmap));
+ BUG_ON(table->offset == 0); /* can't clear MAC default filters */
+
+ __clear_bit(filter_idx, table->used_bitmap);
+ --table->used;
+ memset(&table->spec[filter_idx], 0, sizeof(table->spec[0]));
+
+ efx_writeo(efx, &filter, table->offset + table->step * filter_idx);
+
+ /* If this filter required a greater search depth than
+ * any other, the search limit for its type can now be
+ * decreased. However, it is hard to determine that
+ * unless the table has become completely empty - in
+ * which case, all its search limits can be set to 0.
+ */
+ if (unlikely(table->used == 0)) {
+ memset(table->search_limit, 0, sizeof(table->search_limit));
+ if (table->id == EFX_FARCH_FILTER_TABLE_TX_MAC)
+ efx_farch_filter_push_tx_limits(efx);
+ else
+ efx_farch_filter_push_rx_config(efx);
+ }
+}
+
+static int efx_farch_filter_remove(struct efx_nic *efx,
+ struct efx_farch_filter_table *table,
+ unsigned int filter_idx,
+ enum efx_filter_priority priority)
+{
+ struct efx_farch_filter_spec *spec = &table->spec[filter_idx];
+
+ if (!test_bit(filter_idx, table->used_bitmap) ||
+ spec->priority != priority)
+ return -ENOENT;
+
+ if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
+ efx_farch_filter_init_rx_auto(efx, spec);
+ efx_farch_filter_push_rx_config(efx);
+ } else {
+ efx_farch_filter_table_clear_entry(efx, table, filter_idx);
+ }
+
+ return 0;
+}
+
+int efx_farch_filter_remove_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ unsigned int filter_idx;
+ int rc;
+
+ table_id = efx_farch_filter_id_table_id(filter_id);
+ if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
+ return -ENOENT;
+ table = &state->table[table_id];
+
+ filter_idx = efx_farch_filter_id_index(filter_id);
+ if (filter_idx >= table->size)
+ return -ENOENT;
+ down_write(&state->lock);
+
+ rc = efx_farch_filter_remove(efx, table, filter_idx, priority);
+ up_write(&state->lock);
+
+ return rc;
+}
+
+int efx_farch_filter_get_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id, struct efx_filter_spec *spec_buf)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ struct efx_farch_filter_spec *spec;
+ unsigned int filter_idx;
+ int rc = -ENOENT;
+
+ down_read(&state->lock);
+
+ table_id = efx_farch_filter_id_table_id(filter_id);
+ if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT)
+ goto out_unlock;
+ table = &state->table[table_id];
+
+ filter_idx = efx_farch_filter_id_index(filter_id);
+ if (filter_idx >= table->size)
+ goto out_unlock;
+ spec = &table->spec[filter_idx];
+
+ if (test_bit(filter_idx, table->used_bitmap) &&
+ spec->priority == priority) {
+ efx_farch_filter_to_gen_spec(spec_buf, spec);
+ rc = 0;
+ }
+
+out_unlock:
+ up_read(&state->lock);
+ return rc;
+}
+
+static void
+efx_farch_filter_table_clear(struct efx_nic *efx,
+ enum efx_farch_filter_table_id table_id,
+ enum efx_filter_priority priority)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ struct efx_farch_filter_table *table = &state->table[table_id];
+ unsigned int filter_idx;
+
+ down_write(&state->lock);
+ for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
+ if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO)
+ efx_farch_filter_remove(efx, table,
+ filter_idx, priority);
+ }
+ up_write(&state->lock);
+}
+
+int efx_farch_filter_clear_rx(struct efx_nic *efx,
+ enum efx_filter_priority priority)
+{
+ efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_IP,
+ priority);
+ efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_MAC,
+ priority);
+ efx_farch_filter_table_clear(efx, EFX_FARCH_FILTER_TABLE_RX_DEF,
+ priority);
+ return 0;
+}
+
+u32 efx_farch_filter_count_rx_used(struct efx_nic *efx,
+ enum efx_filter_priority priority)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ unsigned int filter_idx;
+ u32 count = 0;
+
+ down_read(&state->lock);
+
+ for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
+ table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
+ table_id++) {
+ table = &state->table[table_id];
+ for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+ if (test_bit(filter_idx, table->used_bitmap) &&
+ table->spec[filter_idx].priority == priority)
+ ++count;
+ }
+ }
+
+ up_read(&state->lock);
+
+ return count;
+}
+
+s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 *buf, u32 size)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ unsigned int filter_idx;
+ s32 count = 0;
+
+ down_read(&state->lock);
+
+ for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
+ table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
+ table_id++) {
+ table = &state->table[table_id];
+ for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+ if (test_bit(filter_idx, table->used_bitmap) &&
+ table->spec[filter_idx].priority == priority) {
+ if (count == size) {
+ count = -EMSGSIZE;
+ goto out;
+ }
+ buf[count++] = efx_farch_filter_make_id(
+ &table->spec[filter_idx], filter_idx);
+ }
+ }
+ }
+out:
+ up_read(&state->lock);
+
+ return count;
+}
+
+/* Restore filter stater after reset */
+void efx_farch_filter_table_restore(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ efx_oword_t filter;
+ unsigned int filter_idx;
+
+ down_write(&state->lock);
+
+ for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
+ table = &state->table[table_id];
+
+ /* Check whether this is a regular register table */
+ if (table->step == 0)
+ continue;
+
+ for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+ if (!test_bit(filter_idx, table->used_bitmap))
+ continue;
+ efx_farch_filter_build(&filter, &table->spec[filter_idx]);
+ efx_writeo(efx, &filter,
+ table->offset + table->step * filter_idx);
+ }
+ }
+
+ efx_farch_filter_push_rx_config(efx);
+ efx_farch_filter_push_tx_limits(efx);
+
+ up_write(&state->lock);
+}
+
+void efx_farch_filter_table_remove(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+
+ for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
+ kfree(state->table[table_id].used_bitmap);
+ vfree(state->table[table_id].spec);
+ }
+ kfree(state);
+}
+
+int efx_farch_filter_table_probe(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state;
+ struct efx_farch_filter_table *table;
+ unsigned table_id;
+
+ state = kzalloc(sizeof(struct efx_farch_filter_state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+ efx->filter_state = state;
+ init_rwsem(&state->lock);
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+ table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
+ table->offset = FR_BZ_RX_FILTER_TBL0;
+ table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
+ table->step = FR_BZ_RX_FILTER_TBL0_STEP;
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
+ table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
+ table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
+ table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
+ table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
+ table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
+ table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
+ table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
+ table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
+ table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
+ table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
+
+ for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
+ table = &state->table[table_id];
+ if (table->size == 0)
+ continue;
+ table->used_bitmap = kcalloc(BITS_TO_LONGS(table->size),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!table->used_bitmap)
+ goto fail;
+ table->spec = vzalloc(array_size(sizeof(*table->spec),
+ table->size));
+ if (!table->spec)
+ goto fail;
+ }
+
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
+ if (table->size) {
+ /* RX default filters must always exist */
+ struct efx_farch_filter_spec *spec;
+ unsigned i;
+
+ for (i = 0; i < EFX_FARCH_FILTER_SIZE_RX_DEF; i++) {
+ spec = &table->spec[i];
+ spec->type = EFX_FARCH_FILTER_UC_DEF + i;
+ efx_farch_filter_init_rx_auto(efx, spec);
+ __set_bit(i, table->used_bitmap);
+ }
+ }
+
+ efx_farch_filter_push_rx_config(efx);
+
+ return 0;
+
+fail:
+ efx_farch_filter_table_remove(efx);
+ return -ENOMEM;
+}
+
+/* Update scatter enable flags for filters pointing to our own RX queues */
+void efx_farch_filter_update_rx_scatter(struct efx_nic *efx)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ enum efx_farch_filter_table_id table_id;
+ struct efx_farch_filter_table *table;
+ efx_oword_t filter;
+ unsigned int filter_idx;
+
+ down_write(&state->lock);
+
+ for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP;
+ table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF;
+ table_id++) {
+ table = &state->table[table_id];
+
+ for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+ if (!test_bit(filter_idx, table->used_bitmap) ||
+ table->spec[filter_idx].dmaq_id >=
+ efx->n_rx_channels)
+ continue;
+
+ if (efx->rx_scatter)
+ table->spec[filter_idx].flags |=
+ EFX_FILTER_FLAG_RX_SCATTER;
+ else
+ table->spec[filter_idx].flags &=
+ ~EFX_FILTER_FLAG_RX_SCATTER;
+
+ if (table_id == EFX_FARCH_FILTER_TABLE_RX_DEF)
+ /* Pushed by efx_farch_filter_push_rx_config() */
+ continue;
+
+ efx_farch_filter_build(&filter, &table->spec[filter_idx]);
+ efx_writeo(efx, &filter,
+ table->offset + table->step * filter_idx);
+ }
+ }
+
+ efx_farch_filter_push_rx_config(efx);
+
+ up_write(&state->lock);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+ unsigned int index)
+{
+ struct efx_farch_filter_state *state = efx->filter_state;
+ struct efx_farch_filter_table *table;
+ bool ret = false, force = false;
+ u16 arfs_id;
+
+ down_write(&state->lock);
+ spin_lock_bh(&efx->rps_hash_lock);
+ table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+ if (test_bit(index, table->used_bitmap) &&
+ table->spec[index].priority == EFX_FILTER_PRI_HINT) {
+ struct efx_arfs_rule *rule = NULL;
+ struct efx_filter_spec spec;
+
+ efx_farch_filter_to_gen_spec(&spec, &table->spec[index]);
+ if (!efx->rps_hash_table) {
+ /* In the absence of the table, we always returned 0 to
+ * ARFS, so use the same to query it.
+ */
+ arfs_id = 0;
+ } else {
+ rule = efx_rps_hash_find(efx, &spec);
+ if (!rule) {
+ /* ARFS table doesn't know of this filter, remove it */
+ force = true;
+ } else {
+ arfs_id = rule->arfs_id;
+ if (!efx_rps_check_rule(rule, index, &force))
+ goto out_unlock;
+ }
+ }
+ if (force || rps_may_expire_flow(efx->net_dev, spec.dmaq_id,
+ flow_id, arfs_id)) {
+ if (rule)
+ rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+ efx_rps_hash_del(efx, &spec);
+ efx_farch_filter_table_clear_entry(efx, table, index);
+ ret = true;
+ }
+ }
+out_unlock:
+ spin_unlock_bh(&efx->rps_hash_lock);
+ up_write(&state->lock);
+ return ret;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+void efx_farch_filter_sync_rx_mode(struct efx_nic *efx)
+{
+ struct net_device *net_dev = efx->net_dev;
+ struct netdev_hw_addr *ha;
+ union efx_multicast_hash *mc_hash = &efx->multicast_hash;
+ u32 crc;
+ int bit;
+
+ if (!efx_dev_registered(efx))
+ return;
+
+ netif_addr_lock_bh(net_dev);
+
+ efx->unicast_filter = !(net_dev->flags & IFF_PROMISC);
+
+ /* Build multicast hash table */
+ if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+ memset(mc_hash, 0xff, sizeof(*mc_hash));
+ } else {
+ memset(mc_hash, 0x00, sizeof(*mc_hash));
+ netdev_for_each_mc_addr(ha, net_dev) {
+ crc = ether_crc_le(ETH_ALEN, ha->addr);
+ bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
+ __set_bit_le(bit, mc_hash);
+ }
+
+ /* Broadcast packets go through the multicast hash filter.
+ * ether_crc_le() of the broadcast address is 0xbe2612ff
+ * so we always add bit 0xff to the mask.
+ */
+ __set_bit_le(0xff, mc_hash);
+ }
+
+ netif_addr_unlock_bh(net_dev);
+}
diff --git a/drivers/net/ethernet/sfc/siena/siena.c b/drivers/net/ethernet/sfc/siena/siena.c
new file mode 100644
index 000000000000..ce3060e15b54
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/siena.c
@@ -0,0 +1,1109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "efx_common.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+#include "mcdi_port.h"
+#include "mcdi_port_common.h"
+#include "selftest.h"
+#include "siena_sriov.h"
+#include "rx_common.h"
+
+/* Hardware control for SFC9000 family including SFL9021 (aka Siena). */
+
+static void siena_init_wol(struct efx_nic *efx);
+
+
+static void siena_push_irq_moderation(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ efx_dword_t timer_cmd;
+
+ if (channel->irq_moderation_us) {
+ unsigned int ticks;
+
+ ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
+ EFX_POPULATE_DWORD_2(timer_cmd,
+ FRF_CZ_TC_TIMER_MODE,
+ FFE_CZ_TIMER_MODE_INT_HLDOFF,
+ FRF_CZ_TC_TIMER_VAL,
+ ticks - 1);
+ } else {
+ EFX_POPULATE_DWORD_2(timer_cmd,
+ FRF_CZ_TC_TIMER_MODE,
+ FFE_CZ_TIMER_MODE_DIS,
+ FRF_CZ_TC_TIMER_VAL, 0);
+ }
+ efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
+ channel->channel);
+}
+
+void siena_prepare_flush(struct efx_nic *efx)
+{
+ if (efx->fc_disable++ == 0)
+ efx_mcdi_set_mac(efx);
+}
+
+void siena_finish_flush(struct efx_nic *efx)
+{
+ if (--efx->fc_disable == 0)
+ efx_mcdi_set_mac(efx);
+}
+
+static const struct efx_farch_register_test siena_register_tests[] = {
+ { FR_AZ_ADR_REGION,
+ EFX_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
+ { FR_CZ_USR_EV_CFG,
+ EFX_OWORD32(0x000103FF, 0x00000000, 0x00000000, 0x00000000) },
+ { FR_AZ_RX_CFG,
+ EFX_OWORD32(0xFFFFFFFE, 0xFFFFFFFF, 0x0003FFFF, 0x00000000) },
+ { FR_AZ_TX_CFG,
+ EFX_OWORD32(0x7FFF0037, 0xFFFF8000, 0xFFFFFFFF, 0x03FFFFFF) },
+ { FR_AZ_TX_RESERVED,
+ EFX_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
+ { FR_AZ_SRM_TX_DC_CFG,
+ EFX_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
+ { FR_AZ_RX_DC_CFG,
+ EFX_OWORD32(0x00000003, 0x00000000, 0x00000000, 0x00000000) },
+ { FR_AZ_RX_DC_PF_WM,
+ EFX_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
+ { FR_BZ_DP_CTRL,
+ EFX_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
+ { FR_BZ_RX_RSS_TKEY,
+ EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
+ { FR_CZ_RX_RSS_IPV6_REG1,
+ EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
+ { FR_CZ_RX_RSS_IPV6_REG2,
+ EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF) },
+ { FR_CZ_RX_RSS_IPV6_REG3,
+ EFX_OWORD32(0xFFFFFFFF, 0xFFFFFFFF, 0x00000007, 0x00000000) },
+};
+
+static int siena_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
+{
+ enum reset_type reset_method = RESET_TYPE_ALL;
+ int rc, rc2;
+
+ efx_reset_down(efx, reset_method);
+
+ /* Reset the chip immediately so that it is completely
+ * quiescent regardless of what any VF driver does.
+ */
+ rc = efx_mcdi_reset(efx, reset_method);
+ if (rc)
+ goto out;
+
+ tests->registers =
+ efx_farch_test_registers(efx, siena_register_tests,
+ ARRAY_SIZE(siena_register_tests))
+ ? -1 : 1;
+
+ rc = efx_mcdi_reset(efx, reset_method);
+out:
+ rc2 = efx_reset_up(efx, reset_method, rc == 0);
+ return rc ? rc : rc2;
+}
+
+/**************************************************************************
+ *
+ * PTP
+ *
+ **************************************************************************
+ */
+
+static void siena_ptp_write_host_time(struct efx_nic *efx, u32 host_time)
+{
+ _efx_writed(efx, cpu_to_le32(host_time),
+ FR_CZ_MC_TREG_SMEM + MC_SMEM_P0_PTP_TIME_OFST);
+}
+
+static int siena_ptp_set_ts_config(struct efx_nic *efx,
+ struct hwtstamp_config *init)
+{
+ int rc;
+
+ switch (init->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ /* if TX timestamping is still requested then leave PTP on */
+ return efx_ptp_change_mode(efx,
+ init->tx_type != HWTSTAMP_TX_OFF,
+ efx_ptp_get_mode(efx));
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ init->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+ return efx_ptp_change_mode(efx, true, MC_CMD_PTP_MODE_V1);
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+ rc = efx_ptp_change_mode(efx, true,
+ MC_CMD_PTP_MODE_V2_ENHANCED);
+ /* bug 33070 - old versions of the firmware do not support the
+ * improved UUID filtering option. Similarly old versions of the
+ * application do not expect it to be enabled. If the firmware
+ * does not accept the enhanced mode, fall back to the standard
+ * PTP v2 UUID filtering. */
+ if (rc != 0)
+ rc = efx_ptp_change_mode(efx, true, MC_CMD_PTP_MODE_V2);
+ return rc;
+ default:
+ return -ERANGE;
+ }
+}
+
+/**************************************************************************
+ *
+ * Device reset
+ *
+ **************************************************************************
+ */
+
+static int siena_map_reset_flags(u32 *flags)
+{
+ enum {
+ SIENA_RESET_PORT = (ETH_RESET_DMA | ETH_RESET_FILTER |
+ ETH_RESET_OFFLOAD | ETH_RESET_MAC |
+ ETH_RESET_PHY),
+ SIENA_RESET_MC = (SIENA_RESET_PORT |
+ ETH_RESET_MGMT << ETH_RESET_SHARED_SHIFT),
+ };
+
+ if ((*flags & SIENA_RESET_MC) == SIENA_RESET_MC) {
+ *flags &= ~SIENA_RESET_MC;
+ return RESET_TYPE_WORLD;
+ }
+
+ if ((*flags & SIENA_RESET_PORT) == SIENA_RESET_PORT) {
+ *flags &= ~SIENA_RESET_PORT;
+ return RESET_TYPE_ALL;
+ }
+
+ /* no invisible reset implemented */
+
+ return -EINVAL;
+}
+
+#ifdef CONFIG_EEH
+/* When a PCI device is isolated from the bus, a subsequent MMIO read is
+ * required for the kernel EEH mechanisms to notice. As the Solarflare driver
+ * was written to minimise MMIO read (for latency) then a periodic call to check
+ * the EEH status of the device is required so that device recovery can happen
+ * in a timely fashion.
+ */
+static void siena_monitor(struct efx_nic *efx)
+{
+ struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+
+ eeh_dev_check_failure(eehdev);
+}
+#endif
+
+static int siena_probe_nvconfig(struct efx_nic *efx)
+{
+ u32 caps = 0;
+ int rc;
+
+ rc = efx_mcdi_get_board_cfg(efx, efx->net_dev->perm_addr, NULL, &caps);
+
+ efx->timer_quantum_ns =
+ (caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ?
+ 3072 : 6144; /* 768 cycles */
+ efx->timer_max_ns = efx->type->timer_period_max *
+ efx->timer_quantum_ns;
+
+ return rc;
+}
+
+static int siena_dimension_resources(struct efx_nic *efx)
+{
+ /* Each port has a small block of internal SRAM dedicated to
+ * the buffer table and descriptor caches. In theory we can
+ * map both blocks to one port, but we don't.
+ */
+ efx_farch_dimension_resources(efx, FR_CZ_BUF_FULL_TBL_ROWS / 2);
+ return 0;
+}
+
+/* On all Falcon-architecture NICs, PFs use BAR 0 for I/O space and BAR 2(&3)
+ * for memory.
+ */
+static unsigned int siena_mem_bar(struct efx_nic *efx)
+{
+ return 2;
+}
+
+static unsigned int siena_mem_map_size(struct efx_nic *efx)
+{
+ return FR_CZ_MC_TREG_SMEM +
+ FR_CZ_MC_TREG_SMEM_STEP * FR_CZ_MC_TREG_SMEM_ROWS;
+}
+
+static int siena_probe_nic(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data;
+ efx_oword_t reg;
+ int rc;
+
+ /* Allocate storage for hardware specific data */
+ nic_data = kzalloc(sizeof(struct siena_nic_data), GFP_KERNEL);
+ if (!nic_data)
+ return -ENOMEM;
+ nic_data->efx = efx;
+ efx->nic_data = nic_data;
+
+ if (efx_farch_fpga_ver(efx) != 0) {
+ netif_err(efx, probe, efx->net_dev,
+ "Siena FPGA not supported\n");
+ rc = -ENODEV;
+ goto fail1;
+ }
+
+ efx->max_channels = EFX_MAX_CHANNELS;
+ efx->max_vis = EFX_MAX_CHANNELS;
+ efx->max_tx_channels = EFX_MAX_CHANNELS;
+ efx->tx_queues_per_channel = 4;
+
+ efx_reado(efx, ®, FR_AZ_CS_DEBUG);
+ efx->port_num = EFX_OWORD_FIELD(reg, FRF_CZ_CS_PORT_NUM) - 1;
+
+ rc = efx_mcdi_init(efx);
+ if (rc)
+ goto fail1;
+
+ /* Now we can reset the NIC */
+ rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev, "failed to reset NIC\n");
+ goto fail3;
+ }
+
+ siena_init_wol(efx);
+
+ /* Allocate memory for INT_KER */
+ rc = efx_nic_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t),
+ GFP_KERNEL);
+ if (rc)
+ goto fail4;
+ BUG_ON(efx->irq_status.dma_addr & 0x0f);
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "INT_KER at %llx (virt %p phys %llx)\n",
+ (unsigned long long)efx->irq_status.dma_addr,
+ efx->irq_status.addr,
+ (unsigned long long)virt_to_phys(efx->irq_status.addr));
+
+ /* Read in the non-volatile configuration */
+ rc = siena_probe_nvconfig(efx);
+ if (rc == -EINVAL) {
+ netif_err(efx, probe, efx->net_dev,
+ "NVRAM is invalid therefore using defaults\n");
+ efx->phy_type = PHY_TYPE_NONE;
+ efx->mdio.prtad = MDIO_PRTAD_NONE;
+ } else if (rc) {
+ goto fail5;
+ }
+
+ rc = efx_mcdi_mon_probe(efx);
+ if (rc)
+ goto fail5;
+
+#ifdef CONFIG_SFC_SRIOV
+ efx_siena_sriov_probe(efx);
+#endif
+ efx_ptp_defer_probe_with_channel(efx);
+
+ return 0;
+
+fail5:
+ efx_nic_free_buffer(efx, &efx->irq_status);
+fail4:
+fail3:
+ efx_mcdi_detach(efx);
+ efx_mcdi_fini(efx);
+fail1:
+ kfree(efx->nic_data);
+ return rc;
+}
+
+static int siena_rx_pull_rss_config(struct efx_nic *efx)
+{
+ efx_oword_t temp;
+
+ /* Read from IPv6 RSS key as that's longer (the IPv4 key is just the
+ * first 128 bits of the same key, assuming it's been set by
+ * siena_rx_push_rss_config, below)
+ */
+ efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
+ memcpy(efx->rss_context.rx_hash_key, &temp, sizeof(temp));
+ efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
+ memcpy(efx->rss_context.rx_hash_key + sizeof(temp), &temp, sizeof(temp));
+ efx_reado(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
+ memcpy(efx->rss_context.rx_hash_key + 2 * sizeof(temp), &temp,
+ FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
+ efx_farch_rx_pull_indir_table(efx);
+ return 0;
+}
+
+static int siena_rx_push_rss_config(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table, const u8 *key)
+{
+ efx_oword_t temp;
+
+ /* Set hash key for IPv4 */
+ if (key)
+ memcpy(efx->rss_context.rx_hash_key, key, sizeof(temp));
+ memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
+ efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
+
+ /* Enable IPv6 RSS */
+ BUILD_BUG_ON(sizeof(efx->rss_context.rx_hash_key) <
+ 2 * sizeof(temp) + FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8 ||
+ FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN != 0);
+ memcpy(&temp, efx->rss_context.rx_hash_key, sizeof(temp));
+ efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG1);
+ memcpy(&temp, efx->rss_context.rx_hash_key + sizeof(temp), sizeof(temp));
+ efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG2);
+ EFX_POPULATE_OWORD_2(temp, FRF_CZ_RX_RSS_IPV6_THASH_ENABLE, 1,
+ FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE, 1);
+ memcpy(&temp, efx->rss_context.rx_hash_key + 2 * sizeof(temp),
+ FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH / 8);
+ efx_writeo(efx, &temp, FR_CZ_RX_RSS_IPV6_REG3);
+
+ memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+ sizeof(efx->rss_context.rx_indir_table));
+ efx_farch_rx_push_indir_table(efx);
+
+ return 0;
+}
+
+/* This call performs hardware-specific global initialisation, such as
+ * defining the descriptor cache sizes and number of RSS channels.
+ * It does not set up any buffers, descriptor rings or event queues.
+ */
+static int siena_init_nic(struct efx_nic *efx)
+{
+ efx_oword_t temp;
+ int rc;
+
+ /* Recover from a failed assertion post-reset */
+ rc = efx_mcdi_handle_assertion(efx);
+ if (rc)
+ return rc;
+
+ /* Squash TX of packets of 16 bytes or less */
+ efx_reado(efx, &temp, FR_AZ_TX_RESERVED);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+ efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
+
+ /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
+ * descriptors (which is bad).
+ */
+ efx_reado(efx, &temp, FR_AZ_TX_CFG);
+ EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
+ EFX_SET_OWORD_FIELD(temp, FRF_CZ_TX_FILTER_EN_BIT, 1);
+ efx_writeo(efx, &temp, FR_AZ_TX_CFG);
+
+ efx_reado(efx, &temp, FR_AZ_RX_CFG);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_DESC_PUSH_EN, 0);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_INGR_EN, 1);
+ /* Enable hash insertion. This is broken for the 'Falcon' hash
+ * if IPv6 hashing is also enabled, so also select Toeplitz
+ * TCP/IPv4 and IPv4 hashes. */
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE,
+ EFX_RX_USR_BUF_SIZE >> 5);
+ efx_writeo(efx, &temp, FR_AZ_RX_CFG);
+
+ siena_rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL);
+ efx->rss_context.context_id = 0; /* indicates RSS is active */
+
+ /* Enable event logging */
+ rc = efx_mcdi_log_ctrl(efx, true, false, 0);
+ if (rc)
+ return rc;
+
+ /* Set destination of both TX and RX Flush events */
+ EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
+ efx_writeo(efx, &temp, FR_BZ_DP_CTRL);
+
+ EFX_POPULATE_OWORD_1(temp, FRF_CZ_USREV_DIS, 1);
+ efx_writeo(efx, &temp, FR_CZ_USR_EV_CFG);
+
+ efx_farch_init_common(efx);
+ return 0;
+}
+
+static void siena_remove_nic(struct efx_nic *efx)
+{
+ efx_mcdi_mon_remove(efx);
+
+ efx_nic_free_buffer(efx, &efx->irq_status);
+
+ efx_mcdi_reset(efx, RESET_TYPE_ALL);
+
+ efx_mcdi_detach(efx);
+ efx_mcdi_fini(efx);
+
+ /* Tear down the private nic state */
+ kfree(efx->nic_data);
+ efx->nic_data = NULL;
+}
+
+#define SIENA_DMA_STAT(ext_name, mcdi_name) \
+ [SIENA_STAT_ ## ext_name] = \
+ { #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
+#define SIENA_OTHER_STAT(ext_name) \
+ [SIENA_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+#define GENERIC_SW_STAT(ext_name) \
+ [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+
+static const struct efx_hw_stat_desc siena_stat_desc[SIENA_STAT_COUNT] = {
+ SIENA_DMA_STAT(tx_bytes, TX_BYTES),
+ SIENA_OTHER_STAT(tx_good_bytes),
+ SIENA_DMA_STAT(tx_bad_bytes, TX_BAD_BYTES),
+ SIENA_DMA_STAT(tx_packets, TX_PKTS),
+ SIENA_DMA_STAT(tx_bad, TX_BAD_FCS_PKTS),
+ SIENA_DMA_STAT(tx_pause, TX_PAUSE_PKTS),
+ SIENA_DMA_STAT(tx_control, TX_CONTROL_PKTS),
+ SIENA_DMA_STAT(tx_unicast, TX_UNICAST_PKTS),
+ SIENA_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS),
+ SIENA_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS),
+ SIENA_DMA_STAT(tx_lt64, TX_LT64_PKTS),
+ SIENA_DMA_STAT(tx_64, TX_64_PKTS),
+ SIENA_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS),
+ SIENA_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS),
+ SIENA_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS),
+ SIENA_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS),
+ SIENA_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS),
+ SIENA_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS),
+ SIENA_DMA_STAT(tx_gtjumbo, TX_GTJUMBO_PKTS),
+ SIENA_OTHER_STAT(tx_collision),
+ SIENA_DMA_STAT(tx_single_collision, TX_SINGLE_COLLISION_PKTS),
+ SIENA_DMA_STAT(tx_multiple_collision, TX_MULTIPLE_COLLISION_PKTS),
+ SIENA_DMA_STAT(tx_excessive_collision, TX_EXCESSIVE_COLLISION_PKTS),
+ SIENA_DMA_STAT(tx_deferred, TX_DEFERRED_PKTS),
+ SIENA_DMA_STAT(tx_late_collision, TX_LATE_COLLISION_PKTS),
+ SIENA_DMA_STAT(tx_excessive_deferred, TX_EXCESSIVE_DEFERRED_PKTS),
+ SIENA_DMA_STAT(tx_non_tcpudp, TX_NON_TCPUDP_PKTS),
+ SIENA_DMA_STAT(tx_mac_src_error, TX_MAC_SRC_ERR_PKTS),
+ SIENA_DMA_STAT(tx_ip_src_error, TX_IP_SRC_ERR_PKTS),
+ SIENA_DMA_STAT(rx_bytes, RX_BYTES),
+ SIENA_OTHER_STAT(rx_good_bytes),
+ SIENA_DMA_STAT(rx_bad_bytes, RX_BAD_BYTES),
+ SIENA_DMA_STAT(rx_packets, RX_PKTS),
+ SIENA_DMA_STAT(rx_good, RX_GOOD_PKTS),
+ SIENA_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS),
+ SIENA_DMA_STAT(rx_pause, RX_PAUSE_PKTS),
+ SIENA_DMA_STAT(rx_control, RX_CONTROL_PKTS),
+ SIENA_DMA_STAT(rx_unicast, RX_UNICAST_PKTS),
+ SIENA_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS),
+ SIENA_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS),
+ SIENA_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS),
+ SIENA_DMA_STAT(rx_64, RX_64_PKTS),
+ SIENA_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS),
+ SIENA_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS),
+ SIENA_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS),
+ SIENA_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS),
+ SIENA_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS),
+ SIENA_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS),
+ SIENA_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS),
+ SIENA_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS),
+ SIENA_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS),
+ SIENA_DMA_STAT(rx_false_carrier, RX_FALSE_CARRIER_PKTS),
+ SIENA_DMA_STAT(rx_symbol_error, RX_SYMBOL_ERROR_PKTS),
+ SIENA_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS),
+ SIENA_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS),
+ SIENA_DMA_STAT(rx_internal_error, RX_INTERNAL_ERROR_PKTS),
+ SIENA_DMA_STAT(rx_nodesc_drop_cnt, RX_NODESC_DROPS),
+ GENERIC_SW_STAT(rx_nodesc_trunc),
+ GENERIC_SW_STAT(rx_noskb_drops),
+};
+static const unsigned long siena_stat_mask[] = {
+ [0 ... BITS_TO_LONGS(SIENA_STAT_COUNT) - 1] = ~0UL,
+};
+
+static size_t siena_describe_nic_stats(struct efx_nic *efx, u8 *names)
+{
+ return efx_nic_describe_stats(siena_stat_desc, SIENA_STAT_COUNT,
+ siena_stat_mask, names);
+}
+
+static int siena_try_update_nic_stats(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ u64 *stats = nic_data->stats;
+ __le64 *dma_stats;
+ __le64 generation_start, generation_end;
+
+ dma_stats = efx->stats_buffer.addr;
+
+ generation_end = dma_stats[efx->num_mac_stats - 1];
+ if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
+ return 0;
+ rmb();
+ efx_nic_update_stats(siena_stat_desc, SIENA_STAT_COUNT, siena_stat_mask,
+ stats, efx->stats_buffer.addr, false);
+ rmb();
+ generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
+ if (generation_end != generation_start)
+ return -EAGAIN;
+
+ /* Update derived statistics */
+ efx_nic_fix_nodesc_drop_stat(efx,
+ &stats[SIENA_STAT_rx_nodesc_drop_cnt]);
+ efx_update_diff_stat(&stats[SIENA_STAT_tx_good_bytes],
+ stats[SIENA_STAT_tx_bytes] -
+ stats[SIENA_STAT_tx_bad_bytes]);
+ stats[SIENA_STAT_tx_collision] =
+ stats[SIENA_STAT_tx_single_collision] +
+ stats[SIENA_STAT_tx_multiple_collision] +
+ stats[SIENA_STAT_tx_excessive_collision] +
+ stats[SIENA_STAT_tx_late_collision];
+ efx_update_diff_stat(&stats[SIENA_STAT_rx_good_bytes],
+ stats[SIENA_STAT_rx_bytes] -
+ stats[SIENA_STAT_rx_bad_bytes]);
+ efx_update_sw_stats(efx, stats);
+ return 0;
+}
+
+static size_t siena_update_nic_stats(struct efx_nic *efx, u64 *full_stats,
+ struct rtnl_link_stats64 *core_stats)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ u64 *stats = nic_data->stats;
+ int retry;
+
+ /* If we're unlucky enough to read statistics wduring the DMA, wait
+ * up to 10ms for it to finish (typically takes <500us) */
+ for (retry = 0; retry < 100; ++retry) {
+ if (siena_try_update_nic_stats(efx) == 0)
+ break;
+ udelay(100);
+ }
+
+ if (full_stats)
+ memcpy(full_stats, stats, sizeof(u64) * SIENA_STAT_COUNT);
+
+ if (core_stats) {
+ core_stats->rx_packets = stats[SIENA_STAT_rx_packets];
+ core_stats->tx_packets = stats[SIENA_STAT_tx_packets];
+ core_stats->rx_bytes = stats[SIENA_STAT_rx_bytes];
+ core_stats->tx_bytes = stats[SIENA_STAT_tx_bytes];
+ core_stats->rx_dropped = stats[SIENA_STAT_rx_nodesc_drop_cnt] +
+ stats[GENERIC_STAT_rx_nodesc_trunc] +
+ stats[GENERIC_STAT_rx_noskb_drops];
+ core_stats->multicast = stats[SIENA_STAT_rx_multicast];
+ core_stats->collisions = stats[SIENA_STAT_tx_collision];
+ core_stats->rx_length_errors =
+ stats[SIENA_STAT_rx_gtjumbo] +
+ stats[SIENA_STAT_rx_length_error];
+ core_stats->rx_crc_errors = stats[SIENA_STAT_rx_bad];
+ core_stats->rx_frame_errors = stats[SIENA_STAT_rx_align_error];
+ core_stats->rx_fifo_errors = stats[SIENA_STAT_rx_overflow];
+ core_stats->tx_window_errors =
+ stats[SIENA_STAT_tx_late_collision];
+
+ core_stats->rx_errors = (core_stats->rx_length_errors +
+ core_stats->rx_crc_errors +
+ core_stats->rx_frame_errors +
+ stats[SIENA_STAT_rx_symbol_error]);
+ core_stats->tx_errors = (core_stats->tx_window_errors +
+ stats[SIENA_STAT_tx_bad]);
+ }
+
+ return SIENA_STAT_COUNT;
+}
+
+static int siena_mac_reconfigure(struct efx_nic *efx, bool mtu_only __always_unused)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_MCAST_HASH_IN_LEN);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_SET_MCAST_HASH_IN_LEN !=
+ MC_CMD_SET_MCAST_HASH_IN_HASH0_OFST +
+ sizeof(efx->multicast_hash));
+
+ efx_farch_filter_sync_rx_mode(efx);
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ rc = efx_mcdi_set_mac(efx);
+ if (rc != 0)
+ return rc;
+
+ memcpy(MCDI_PTR(inbuf, SET_MCAST_HASH_IN_HASH0),
+ efx->multicast_hash.byte, sizeof(efx->multicast_hash));
+ return efx_mcdi_rpc(efx, MC_CMD_SET_MCAST_HASH,
+ inbuf, sizeof(inbuf), NULL, 0, NULL);
+}
+
+/**************************************************************************
+ *
+ * Wake on LAN
+ *
+ **************************************************************************
+ */
+
+static void siena_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+
+ wol->supported = WAKE_MAGIC;
+ if (nic_data->wol_filter_id != -1)
+ wol->wolopts = WAKE_MAGIC;
+ else
+ wol->wolopts = 0;
+ memset(&wol->sopass, 0, sizeof(wol->sopass));
+}
+
+
+static int siena_set_wol(struct efx_nic *efx, u32 type)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ if (type & ~WAKE_MAGIC)
+ return -EINVAL;
+
+ if (type & WAKE_MAGIC) {
+ if (nic_data->wol_filter_id != -1)
+ efx_mcdi_wol_filter_remove(efx,
+ nic_data->wol_filter_id);
+ rc = efx_mcdi_wol_filter_set_magic(efx, efx->net_dev->dev_addr,
+ &nic_data->wol_filter_id);
+ if (rc)
+ goto fail;
+
+ pci_wake_from_d3(efx->pci_dev, true);
+ } else {
+ rc = efx_mcdi_wol_filter_reset(efx);
+ nic_data->wol_filter_id = -1;
+ pci_wake_from_d3(efx->pci_dev, false);
+ if (rc)
+ goto fail;
+ }
+
+ return 0;
+ fail:
+ netif_err(efx, hw, efx->net_dev, "%s failed: type=%d rc=%d\n",
+ __func__, type, rc);
+ return rc;
+}
+
+
+static void siena_init_wol(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ rc = efx_mcdi_wol_filter_get_magic(efx, &nic_data->wol_filter_id);
+
+ if (rc != 0) {
+ /* If it failed, attempt to get into a synchronised
+ * state with MC by resetting any set WoL filters */
+ efx_mcdi_wol_filter_reset(efx);
+ nic_data->wol_filter_id = -1;
+ } else if (nic_data->wol_filter_id != -1) {
+ pci_wake_from_d3(efx->pci_dev, true);
+ }
+}
+
+/**************************************************************************
+ *
+ * MCDI
+ *
+ **************************************************************************
+ */
+
+#define MCDI_PDU(efx) \
+ (efx_port_num(efx) ? MC_SMEM_P1_PDU_OFST : MC_SMEM_P0_PDU_OFST)
+#define MCDI_DOORBELL(efx) \
+ (efx_port_num(efx) ? MC_SMEM_P1_DOORBELL_OFST : MC_SMEM_P0_DOORBELL_OFST)
+#define MCDI_STATUS(efx) \
+ (efx_port_num(efx) ? MC_SMEM_P1_STATUS_OFST : MC_SMEM_P0_STATUS_OFST)
+
+static void siena_mcdi_request(struct efx_nic *efx,
+ const efx_dword_t *hdr, size_t hdr_len,
+ const efx_dword_t *sdu, size_t sdu_len)
+{
+ unsigned pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
+ unsigned doorbell = FR_CZ_MC_TREG_SMEM + MCDI_DOORBELL(efx);
+ unsigned int i;
+ unsigned int inlen_dw = DIV_ROUND_UP(sdu_len, 4);
+
+ EFX_WARN_ON_PARANOID(hdr_len != 4);
+
+ efx_writed(efx, hdr, pdu);
+
+ for (i = 0; i < inlen_dw; i++)
+ efx_writed(efx, &sdu[i], pdu + hdr_len + 4 * i);
+
+ /* Ensure the request is written out before the doorbell */
+ wmb();
+
+ /* ring the doorbell with a distinctive value */
+ _efx_writed(efx, (__force __le32) 0x45789abc, doorbell);
+}
+
+static bool siena_mcdi_poll_response(struct efx_nic *efx)
+{
+ unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
+ efx_dword_t hdr;
+
+ efx_readd(efx, &hdr, pdu);
+
+ /* All 1's indicates that shared memory is in reset (and is
+ * not a valid hdr). Wait for it to come out reset before
+ * completing the command
+ */
+ return EFX_DWORD_FIELD(hdr, EFX_DWORD_0) != 0xffffffff &&
+ EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE);
+}
+
+static void siena_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf,
+ size_t offset, size_t outlen)
+{
+ unsigned int pdu = FR_CZ_MC_TREG_SMEM + MCDI_PDU(efx);
+ unsigned int outlen_dw = DIV_ROUND_UP(outlen, 4);
+ int i;
+
+ for (i = 0; i < outlen_dw; i++)
+ efx_readd(efx, &outbuf[i], pdu + offset + 4 * i);
+}
+
+static int siena_mcdi_poll_reboot(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ unsigned int addr = FR_CZ_MC_TREG_SMEM + MCDI_STATUS(efx);
+ efx_dword_t reg;
+ u32 value;
+
+ efx_readd(efx, ®, addr);
+ value = EFX_DWORD_FIELD(reg, EFX_DWORD_0);
+
+ if (value == 0)
+ return 0;
+
+ EFX_ZERO_DWORD(reg);
+ efx_writed(efx, ®, addr);
+
+ /* MAC statistics have been cleared on the NIC; clear the local
+ * copies that we update with efx_update_diff_stat().
+ */
+ nic_data->stats[SIENA_STAT_tx_good_bytes] = 0;
+ nic_data->stats[SIENA_STAT_rx_good_bytes] = 0;
+
+ if (value == MC_STATUS_DWORD_ASSERT)
+ return -EINTR;
+ else
+ return -EIO;
+}
+
+/**************************************************************************
+ *
+ * MTD
+ *
+ **************************************************************************
+ */
+
+#ifdef CONFIG_SFC_MTD
+
+struct siena_nvram_type_info {
+ int port;
+ const char *name;
+};
+
+static const struct siena_nvram_type_info siena_nvram_types[] = {
+ [MC_CMD_NVRAM_TYPE_DISABLED_CALLISTO] = { 0, "sfc_dummy_phy" },
+ [MC_CMD_NVRAM_TYPE_MC_FW] = { 0, "sfc_mcfw" },
+ [MC_CMD_NVRAM_TYPE_MC_FW_BACKUP] = { 0, "sfc_mcfw_backup" },
+ [MC_CMD_NVRAM_TYPE_STATIC_CFG_PORT0] = { 0, "sfc_static_cfg" },
+ [MC_CMD_NVRAM_TYPE_STATIC_CFG_PORT1] = { 1, "sfc_static_cfg" },
+ [MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT0] = { 0, "sfc_dynamic_cfg" },
+ [MC_CMD_NVRAM_TYPE_DYNAMIC_CFG_PORT1] = { 1, "sfc_dynamic_cfg" },
+ [MC_CMD_NVRAM_TYPE_EXP_ROM] = { 0, "sfc_exp_rom" },
+ [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT0] = { 0, "sfc_exp_rom_cfg" },
+ [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1] = { 1, "sfc_exp_rom_cfg" },
+ [MC_CMD_NVRAM_TYPE_PHY_PORT0] = { 0, "sfc_phy_fw" },
+ [MC_CMD_NVRAM_TYPE_PHY_PORT1] = { 1, "sfc_phy_fw" },
+ [MC_CMD_NVRAM_TYPE_FPGA] = { 0, "sfc_fpga" },
+};
+
+static int siena_mtd_probe_partition(struct efx_nic *efx,
+ struct efx_mcdi_mtd_partition *part,
+ unsigned int type)
+{
+ const struct siena_nvram_type_info *info;
+ size_t size, erase_size;
+ bool protected;
+ int rc;
+
+ if (type >= ARRAY_SIZE(siena_nvram_types) ||
+ siena_nvram_types[type].name == NULL)
+ return -ENODEV;
+
+ info = &siena_nvram_types[type];
+
+ if (info->port != efx_port_num(efx))
+ return -ENODEV;
+
+ rc = efx_mcdi_nvram_info(efx, type, &size, &erase_size, &protected);
+ if (rc)
+ return rc;
+ if (protected)
+ return -ENODEV; /* hide it */
+
+ part->nvram_type = type;
+ part->common.dev_type_name = "Siena NVRAM manager";
+ part->common.type_name = info->name;
+
+ part->common.mtd.type = MTD_NORFLASH;
+ part->common.mtd.flags = MTD_CAP_NORFLASH;
+ part->common.mtd.size = size;
+ part->common.mtd.erasesize = erase_size;
+
+ return 0;
+}
+
+static int siena_mtd_get_fw_subtypes(struct efx_nic *efx,
+ struct efx_mcdi_mtd_partition *parts,
+ size_t n_parts)
+{
+ uint16_t fw_subtype_list[
+ MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM];
+ size_t i;
+ int rc;
+
+ rc = efx_mcdi_get_board_cfg(efx, NULL, fw_subtype_list, NULL);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < n_parts; i++)
+ parts[i].fw_subtype = fw_subtype_list[parts[i].nvram_type];
+
+ return 0;
+}
+
+static int siena_mtd_probe(struct efx_nic *efx)
+{
+ struct efx_mcdi_mtd_partition *parts;
+ u32 nvram_types;
+ unsigned int type;
+ size_t n_parts;
+ int rc;
+
+ ASSERT_RTNL();
+
+ rc = efx_mcdi_nvram_types(efx, &nvram_types);
+ if (rc)
+ return rc;
+
+ parts = kcalloc(hweight32(nvram_types), sizeof(*parts), GFP_KERNEL);
+ if (!parts)
+ return -ENOMEM;
+
+ type = 0;
+ n_parts = 0;
+
+ while (nvram_types != 0) {
+ if (nvram_types & 1) {
+ rc = siena_mtd_probe_partition(efx, &parts[n_parts],
+ type);
+ if (rc == 0)
+ n_parts++;
+ else if (rc != -ENODEV)
+ goto fail;
+ }
+ type++;
+ nvram_types >>= 1;
+ }
+
+ rc = siena_mtd_get_fw_subtypes(efx, parts, n_parts);
+ if (rc)
+ goto fail;
+
+ rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
+fail:
+ if (rc)
+ kfree(parts);
+ return rc;
+}
+
+#endif /* CONFIG_SFC_MTD */
+
+static unsigned int siena_check_caps(const struct efx_nic *efx,
+ u8 flag, u32 offset)
+{
+ /* Siena did not support MC_CMD_GET_CAPABILITIES */
+ return 0;
+}
+
+static unsigned int efx_siena_recycle_ring_size(const struct efx_nic *efx)
+{
+ /* Maximum link speed is 10G */
+ return EFX_RECYCLE_RING_SIZE_10G;
+}
+
+/**************************************************************************
+ *
+ * Revision-dependent attributes used by efx.c and nic.c
+ *
+ **************************************************************************
+ */
+
+const struct efx_nic_type siena_a0_nic_type = {
+ .is_vf = false,
+ .mem_bar = siena_mem_bar,
+ .mem_map_size = siena_mem_map_size,
+ .probe = siena_probe_nic,
+ .remove = siena_remove_nic,
+ .init = siena_init_nic,
+ .dimension_resources = siena_dimension_resources,
+ .fini = efx_port_dummy_op_void,
+#ifdef CONFIG_EEH
+ .monitor = siena_monitor,
+#else
+ .monitor = NULL,
+#endif
+ .map_reset_reason = efx_mcdi_map_reset_reason,
+ .map_reset_flags = siena_map_reset_flags,
+ .reset = efx_mcdi_reset,
+ .probe_port = efx_mcdi_port_probe,
+ .remove_port = efx_mcdi_port_remove,
+ .fini_dmaq = efx_farch_fini_dmaq,
+ .prepare_flush = siena_prepare_flush,
+ .finish_flush = siena_finish_flush,
+ .prepare_flr = efx_port_dummy_op_void,
+ .finish_flr = efx_farch_finish_flr,
+ .describe_stats = siena_describe_nic_stats,
+ .update_stats = siena_update_nic_stats,
+ .start_stats = efx_mcdi_mac_start_stats,
+ .pull_stats = efx_mcdi_mac_pull_stats,
+ .stop_stats = efx_mcdi_mac_stop_stats,
+ .push_irq_moderation = siena_push_irq_moderation,
+ .reconfigure_mac = siena_mac_reconfigure,
+ .check_mac_fault = efx_mcdi_mac_check_fault,
+ .reconfigure_port = efx_mcdi_port_reconfigure,
+ .get_wol = siena_get_wol,
+ .set_wol = siena_set_wol,
+ .resume_wol = siena_init_wol,
+ .test_chip = siena_test_chip,
+ .test_nvram = efx_mcdi_nvram_test_all,
+ .mcdi_request = siena_mcdi_request,
+ .mcdi_poll_response = siena_mcdi_poll_response,
+ .mcdi_read_response = siena_mcdi_read_response,
+ .mcdi_poll_reboot = siena_mcdi_poll_reboot,
+ .irq_enable_master = efx_farch_irq_enable_master,
+ .irq_test_generate = efx_farch_irq_test_generate,
+ .irq_disable_non_ev = efx_farch_irq_disable_master,
+ .irq_handle_msi = efx_farch_msi_interrupt,
+ .irq_handle_legacy = efx_farch_legacy_interrupt,
+ .tx_probe = efx_farch_tx_probe,
+ .tx_init = efx_farch_tx_init,
+ .tx_remove = efx_farch_tx_remove,
+ .tx_write = efx_farch_tx_write,
+ .tx_limit_len = efx_farch_tx_limit_len,
+ .tx_enqueue = __efx_enqueue_skb,
+ .rx_push_rss_config = siena_rx_push_rss_config,
+ .rx_pull_rss_config = siena_rx_pull_rss_config,
+ .rx_probe = efx_farch_rx_probe,
+ .rx_init = efx_farch_rx_init,
+ .rx_remove = efx_farch_rx_remove,
+ .rx_write = efx_farch_rx_write,
+ .rx_defer_refill = efx_farch_rx_defer_refill,
+ .rx_packet = __efx_rx_packet,
+ .ev_probe = efx_farch_ev_probe,
+ .ev_init = efx_farch_ev_init,
+ .ev_fini = efx_farch_ev_fini,
+ .ev_remove = efx_farch_ev_remove,
+ .ev_process = efx_farch_ev_process,
+ .ev_read_ack = efx_farch_ev_read_ack,
+ .ev_test_generate = efx_farch_ev_test_generate,
+ .filter_table_probe = efx_farch_filter_table_probe,
+ .filter_table_restore = efx_farch_filter_table_restore,
+ .filter_table_remove = efx_farch_filter_table_remove,
+ .filter_update_rx_scatter = efx_farch_filter_update_rx_scatter,
+ .filter_insert = efx_farch_filter_insert,
+ .filter_remove_safe = efx_farch_filter_remove_safe,
+ .filter_get_safe = efx_farch_filter_get_safe,
+ .filter_clear_rx = efx_farch_filter_clear_rx,
+ .filter_count_rx_used = efx_farch_filter_count_rx_used,
+ .filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
+ .filter_get_rx_ids = efx_farch_filter_get_rx_ids,
+#ifdef CONFIG_RFS_ACCEL
+ .filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
+#endif
+#ifdef CONFIG_SFC_MTD
+ .mtd_probe = siena_mtd_probe,
+ .mtd_rename = efx_mcdi_mtd_rename,
+ .mtd_read = efx_mcdi_mtd_read,
+ .mtd_erase = efx_mcdi_mtd_erase,
+ .mtd_write = efx_mcdi_mtd_write,
+ .mtd_sync = efx_mcdi_mtd_sync,
+#endif
+ .ptp_write_host_time = siena_ptp_write_host_time,
+ .ptp_set_ts_config = siena_ptp_set_ts_config,
+#ifdef CONFIG_SFC_SRIOV
+ .sriov_configure = efx_siena_sriov_configure,
+ .sriov_init = efx_siena_sriov_init,
+ .sriov_fini = efx_siena_sriov_fini,
+ .sriov_wanted = efx_siena_sriov_wanted,
+ .sriov_reset = efx_siena_sriov_reset,
+ .sriov_flr = efx_siena_sriov_flr,
+ .sriov_set_vf_mac = efx_siena_sriov_set_vf_mac,
+ .sriov_set_vf_vlan = efx_siena_sriov_set_vf_vlan,
+ .sriov_set_vf_spoofchk = efx_siena_sriov_set_vf_spoofchk,
+ .sriov_get_vf_config = efx_siena_sriov_get_vf_config,
+ .vswitching_probe = efx_port_dummy_op_int,
+ .vswitching_restore = efx_port_dummy_op_int,
+ .vswitching_remove = efx_port_dummy_op_void,
+ .set_mac_address = efx_siena_sriov_mac_address_changed,
+#endif
+
+ .revision = EFX_REV_SIENA_A0,
+ .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
+ .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
+ .buf_tbl_base = FR_BZ_BUF_FULL_TBL,
+ .evq_ptr_tbl_base = FR_BZ_EVQ_PTR_TBL,
+ .evq_rptr_tbl_base = FR_BZ_EVQ_RPTR,
+ .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
+ .rx_prefix_size = FS_BZ_RX_PREFIX_SIZE,
+ .rx_hash_offset = FS_BZ_RX_PREFIX_HASH_OFST,
+ .rx_buffer_padding = 0,
+ .can_rx_scatter = true,
+ .option_descriptors = false,
+ .min_interrupt_mode = EFX_INT_MODE_LEGACY,
+ .timer_period_max = 1 << FRF_CZ_TC_TIMER_VAL_WIDTH,
+ .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+ NETIF_F_RXHASH | NETIF_F_NTUPLE),
+ .mcdi_max_ver = 1,
+ .max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS,
+ .hwtstamp_filters = (1 << HWTSTAMP_FILTER_NONE |
+ 1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT |
+ 1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT),
+ .rx_hash_key_size = 16,
+ .check_caps = siena_check_caps,
+ .sensor_event = efx_mcdi_sensor_event,
+ .rx_recycle_ring_size = efx_siena_recycle_ring_size,
+};
diff --git a/drivers/net/ethernet/sfc/siena/siena_sriov.c b/drivers/net/ethernet/sfc/siena/siena_sriov.c
new file mode 100644
index 000000000000..f12851a527d9
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/siena_sriov.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2010-2012 Solarflare Communications Inc.
+ */
+#include <linux/pci.h>
+#include <linux/module.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "efx_channels.h"
+#include "nic.h"
+#include "io.h"
+#include "mcdi.h"
+#include "filter.h"
+#include "mcdi_pcol.h"
+#include "farch_regs.h"
+#include "siena_sriov.h"
+#include "vfdi.h"
+
+/* Number of longs required to track all the VIs in a VF */
+#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
+
+/* Maximum number of RX queues supported */
+#define VF_MAX_RX_QUEUES 63
+
+/**
+ * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
+ * @VF_TX_FILTER_OFF: Disabled
+ * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
+ * 2 TX queues allowed per VF.
+ * @VF_TX_FILTER_ON: Enabled
+ */
+enum efx_vf_tx_filter_mode {
+ VF_TX_FILTER_OFF,
+ VF_TX_FILTER_AUTO,
+ VF_TX_FILTER_ON,
+};
+
+/**
+ * struct siena_vf - Back-end resource and protocol state for a PCI VF
+ * @efx: The Efx NIC owning this VF
+ * @pci_rid: The PCI requester ID for this VF
+ * @pci_name: The PCI name (formatted address) of this VF
+ * @index: Index of VF within its port and PF.
+ * @req: VFDI incoming request work item. Incoming USR_EV events are received
+ * by the NAPI handler, but must be handled by executing MCDI requests
+ * inside a work item.
+ * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
+ * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
+ * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
+ * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
+ * @status_lock
+ * @busy: VFDI request queued to be processed or being processed. Receiving
+ * a VFDI request when @busy is set is an error condition.
+ * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
+ * @buftbl_base: Buffer table entries for this VF start at this index.
+ * @rx_filtering: Receive filtering has been requested by the VF driver.
+ * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
+ * @rx_filter_qid: VF relative qid for RX filter requested by VF.
+ * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
+ * @tx_filter_mode: Transmit MAC filtering mode.
+ * @tx_filter_id: Transmit MAC filter ID.
+ * @addr: The MAC address and outer vlan tag of the VF.
+ * @status_addr: VF DMA address of page for &struct vfdi_status updates.
+ * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
+ * @peer_page_addrs and @peer_page_count from simultaneous
+ * updates by the VM and consumption by
+ * efx_siena_sriov_update_vf_addr()
+ * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
+ * @peer_page_count: Number of entries in @peer_page_count.
+ * @evq0_addrs: Array of guest pages backing evq0.
+ * @evq0_count: Number of entries in @evq0_addrs.
+ * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
+ * to wait for flush completions.
+ * @txq_lock: Mutex for TX queue allocation.
+ * @txq_mask: Mask of initialized transmit queues.
+ * @txq_count: Number of initialized transmit queues.
+ * @rxq_mask: Mask of initialized receive queues.
+ * @rxq_count: Number of initialized receive queues.
+ * @rxq_retry_mask: Mask or receive queues that need to be flushed again
+ * due to flush failure.
+ * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
+ * @reset_work: Work item to schedule a VF reset.
+ */
+struct siena_vf {
+ struct efx_nic *efx;
+ unsigned int pci_rid;
+ char pci_name[13]; /* dddd:bb:dd.f */
+ unsigned int index;
+ struct work_struct req;
+ u64 req_addr;
+ int req_type;
+ unsigned req_seqno;
+ unsigned msg_seqno;
+ bool busy;
+ struct efx_buffer buf;
+ unsigned buftbl_base;
+ bool rx_filtering;
+ enum efx_filter_flags rx_filter_flags;
+ unsigned rx_filter_qid;
+ int rx_filter_id;
+ enum efx_vf_tx_filter_mode tx_filter_mode;
+ int tx_filter_id;
+ struct vfdi_endpoint addr;
+ u64 status_addr;
+ struct mutex status_lock;
+ u64 *peer_page_addrs;
+ unsigned peer_page_count;
+ u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
+ EFX_BUF_SIZE];
+ unsigned evq0_count;
+ wait_queue_head_t flush_waitq;
+ struct mutex txq_lock;
+ unsigned long txq_mask[VI_MASK_LENGTH];
+ unsigned txq_count;
+ unsigned long rxq_mask[VI_MASK_LENGTH];
+ unsigned rxq_count;
+ unsigned long rxq_retry_mask[VI_MASK_LENGTH];
+ atomic_t rxq_retry_count;
+ struct work_struct reset_work;
+};
+
+struct efx_memcpy_req {
+ unsigned int from_rid;
+ void *from_buf;
+ u64 from_addr;
+ unsigned int to_rid;
+ u64 to_addr;
+ unsigned length;
+};
+
+/**
+ * struct efx_local_addr - A MAC address on the vswitch without a VF.
+ *
+ * Siena does not have a switch, so VFs can't transmit data to each
+ * other. Instead the VFs must be made aware of the local addresses
+ * on the vswitch, so that they can arrange for an alternative
+ * software datapath to be used.
+ *
+ * @link: List head for insertion into efx->local_addr_list.
+ * @addr: Ethernet address
+ */
+struct efx_local_addr {
+ struct list_head link;
+ u8 addr[ETH_ALEN];
+};
+
+/**
+ * struct efx_endpoint_page - Page of vfdi_endpoint structures
+ *
+ * @link: List head for insertion into efx->local_page_list.
+ * @ptr: Pointer to page.
+ * @addr: DMA address of page.
+ */
+struct efx_endpoint_page {
+ struct list_head link;
+ void *ptr;
+ dma_addr_t addr;
+};
+
+/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
+#define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \
+ ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
+#define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \
+ (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
+ (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
+#define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \
+ (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
+ (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
+
+#define EFX_FIELD_MASK(_field) \
+ ((1 << _field ## _WIDTH) - 1)
+
+/* VFs can only use this many transmit channels */
+static unsigned int vf_max_tx_channels = 2;
+module_param(vf_max_tx_channels, uint, 0444);
+MODULE_PARM_DESC(vf_max_tx_channels,
+ "Limit the number of TX channels VFs can use");
+
+static int max_vfs = -1;
+module_param(max_vfs, int, 0444);
+MODULE_PARM_DESC(max_vfs,
+ "Reduce the number of VFs initialized by the driver");
+
+/* Workqueue used by VFDI communication. We can't use the global
+ * workqueue because it may be running the VF driver's probe()
+ * routine, which will be blocked there waiting for a VFDI response.
+ */
+static struct workqueue_struct *vfdi_workqueue;
+
+static unsigned abs_index(struct siena_vf *vf, unsigned index)
+{
+ return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
+}
+
+static int efx_siena_sriov_cmd(struct efx_nic *efx, bool enable,
+ unsigned *vi_scale_out, unsigned *vf_total_out)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SRIOV_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_SRIOV_OUT_LEN);
+ unsigned vi_scale, vf_total;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
+ MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
+ MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
+ outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_SRIOV_OUT_LEN)
+ return -EIO;
+
+ vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
+ vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
+ if (vi_scale > EFX_VI_SCALE_MAX)
+ return -EOPNOTSUPP;
+
+ if (vi_scale_out)
+ *vi_scale_out = vi_scale;
+ if (vf_total_out)
+ *vf_total_out = vf_total;
+
+ return 0;
+}
+
+static void efx_siena_sriov_usrev(struct efx_nic *efx, bool enabled)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ efx_oword_t reg;
+
+ EFX_POPULATE_OWORD_2(reg,
+ FRF_CZ_USREV_DIS, enabled ? 0 : 1,
+ FRF_CZ_DFLT_EVQ, nic_data->vfdi_channel->channel);
+ efx_writeo(efx, ®, FR_CZ_USR_EV_CFG);
+}
+
+static int efx_siena_sriov_memcpy(struct efx_nic *efx,
+ struct efx_memcpy_req *req,
+ unsigned int count)
+{
+ MCDI_DECLARE_BUF(inbuf, MCDI_CTL_SDU_LEN_MAX_V1);
+ MCDI_DECLARE_STRUCT_PTR(record);
+ unsigned int index, used;
+ u64 from_addr;
+ u32 from_rid;
+ int rc;
+
+ mb(); /* Finish writing source/reading dest before DMA starts */
+
+ if (WARN_ON(count > MC_CMD_MEMCPY_IN_RECORD_MAXNUM))
+ return -ENOBUFS;
+ used = MC_CMD_MEMCPY_IN_LEN(count);
+
+ for (index = 0; index < count; index++) {
+ record = MCDI_ARRAY_STRUCT_PTR(inbuf, MEMCPY_IN_RECORD, index);
+ MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_NUM_RECORDS,
+ count);
+ MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
+ req->to_rid);
+ MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR,
+ req->to_addr);
+ if (req->from_buf == NULL) {
+ from_rid = req->from_rid;
+ from_addr = req->from_addr;
+ } else {
+ if (WARN_ON(used + req->length >
+ MCDI_CTL_SDU_LEN_MAX_V1)) {
+ rc = -ENOBUFS;
+ goto out;
+ }
+
+ from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
+ from_addr = used;
+ memcpy(_MCDI_PTR(inbuf, used), req->from_buf,
+ req->length);
+ used += req->length;
+ }
+
+ MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
+ MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR,
+ from_addr);
+ MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
+ req->length);
+
+ ++req;
+ }
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
+out:
+ mb(); /* Don't write source/read dest before DMA is complete */
+
+ return rc;
+}
+
+/* The TX filter is entirely controlled by this driver, and is modified
+ * underneath the feet of the VF
+ */
+static void efx_siena_sriov_reset_tx_filter(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct efx_filter_spec filter;
+ u16 vlan;
+ int rc;
+
+ if (vf->tx_filter_id != -1) {
+ efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+ vf->tx_filter_id);
+ netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
+ vf->pci_name, vf->tx_filter_id);
+ vf->tx_filter_id = -1;
+ }
+
+ if (is_zero_ether_addr(vf->addr.mac_addr))
+ return;
+
+ /* Turn on TX filtering automatically if not explicitly
+ * enabled or disabled.
+ */
+ if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
+ vf->tx_filter_mode = VF_TX_FILTER_ON;
+
+ vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
+ efx_filter_init_tx(&filter, abs_index(vf, 0));
+ rc = efx_filter_set_eth_local(&filter,
+ vlan ? vlan : EFX_FILTER_VID_UNSPEC,
+ vf->addr.mac_addr);
+ BUG_ON(rc);
+
+ rc = efx_filter_insert_filter(efx, &filter, true);
+ if (rc < 0) {
+ netif_warn(efx, hw, efx->net_dev,
+ "Unable to migrate tx filter for vf %s\n",
+ vf->pci_name);
+ } else {
+ netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
+ vf->pci_name, rc);
+ vf->tx_filter_id = rc;
+ }
+}
+
+/* The RX filter is managed here on behalf of the VF driver */
+static void efx_siena_sriov_reset_rx_filter(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct efx_filter_spec filter;
+ u16 vlan;
+ int rc;
+
+ if (vf->rx_filter_id != -1) {
+ efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
+ vf->rx_filter_id);
+ netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
+ vf->pci_name, vf->rx_filter_id);
+ vf->rx_filter_id = -1;
+ }
+
+ if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
+ return;
+
+ vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
+ efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
+ vf->rx_filter_flags,
+ abs_index(vf, vf->rx_filter_qid));
+ rc = efx_filter_set_eth_local(&filter,
+ vlan ? vlan : EFX_FILTER_VID_UNSPEC,
+ vf->addr.mac_addr);
+ BUG_ON(rc);
+
+ rc = efx_filter_insert_filter(efx, &filter, true);
+ if (rc < 0) {
+ netif_warn(efx, hw, efx->net_dev,
+ "Unable to insert rx filter for vf %s\n",
+ vf->pci_name);
+ } else {
+ netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
+ vf->pci_name, rc);
+ vf->rx_filter_id = rc;
+ }
+}
+
+static void __efx_siena_sriov_update_vf_addr(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct siena_nic_data *nic_data = efx->nic_data;
+
+ efx_siena_sriov_reset_tx_filter(vf);
+ efx_siena_sriov_reset_rx_filter(vf);
+ queue_work(vfdi_workqueue, &nic_data->peer_work);
+}
+
+/* Push the peer list to this VF. The caller must hold status_lock to interlock
+ * with VFDI requests, and they must be serialised against manipulation of
+ * local_page_list, either by acquiring local_lock or by running from
+ * efx_siena_sriov_peer_work()
+ */
+static void __efx_siena_sriov_push_vf_status(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct vfdi_status *status = nic_data->vfdi_status.addr;
+ struct efx_memcpy_req copy[4];
+ struct efx_endpoint_page *epp;
+ unsigned int pos, count;
+ unsigned data_offset;
+ efx_qword_t event;
+
+ WARN_ON(!mutex_is_locked(&vf->status_lock));
+ WARN_ON(!vf->status_addr);
+
+ status->local = vf->addr;
+ status->generation_end = ++status->generation_start;
+
+ memset(copy, '\0', sizeof(copy));
+ /* Write generation_start */
+ copy[0].from_buf = &status->generation_start;
+ copy[0].to_rid = vf->pci_rid;
+ copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
+ generation_start);
+ copy[0].length = sizeof(status->generation_start);
+ /* DMA the rest of the structure (excluding the generations). This
+ * assumes that the non-generation portion of vfdi_status is in
+ * one chunk starting at the version member.
+ */
+ data_offset = offsetof(struct vfdi_status, version);
+ copy[1].from_rid = efx->pci_dev->devfn;
+ copy[1].from_addr = nic_data->vfdi_status.dma_addr + data_offset;
+ copy[1].to_rid = vf->pci_rid;
+ copy[1].to_addr = vf->status_addr + data_offset;
+ copy[1].length = status->length - data_offset;
+
+ /* Copy the peer pages */
+ pos = 2;
+ count = 0;
+ list_for_each_entry(epp, &nic_data->local_page_list, link) {
+ if (count == vf->peer_page_count) {
+ /* The VF driver will know they need to provide more
+ * pages because peer_addr_count is too large.
+ */
+ break;
+ }
+ copy[pos].from_buf = NULL;
+ copy[pos].from_rid = efx->pci_dev->devfn;
+ copy[pos].from_addr = epp->addr;
+ copy[pos].to_rid = vf->pci_rid;
+ copy[pos].to_addr = vf->peer_page_addrs[count];
+ copy[pos].length = EFX_PAGE_SIZE;
+
+ if (++pos == ARRAY_SIZE(copy)) {
+ efx_siena_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
+ pos = 0;
+ }
+ ++count;
+ }
+
+ /* Write generation_end */
+ copy[pos].from_buf = &status->generation_end;
+ copy[pos].to_rid = vf->pci_rid;
+ copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
+ generation_end);
+ copy[pos].length = sizeof(status->generation_end);
+ efx_siena_sriov_memcpy(efx, copy, pos + 1);
+
+ /* Notify the guest */
+ EFX_POPULATE_QWORD_3(event,
+ FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
+ VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
+ VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
+ ++vf->msg_seqno;
+ efx_farch_generate_event(efx,
+ EFX_VI_BASE + vf->index * efx_vf_size(efx),
+ &event);
+}
+
+static void efx_siena_sriov_bufs(struct efx_nic *efx, unsigned offset,
+ u64 *addr, unsigned count)
+{
+ efx_qword_t buf;
+ unsigned pos;
+
+ for (pos = 0; pos < count; ++pos) {
+ EFX_POPULATE_QWORD_3(buf,
+ FRF_AZ_BUF_ADR_REGION, 0,
+ FRF_AZ_BUF_ADR_FBUF,
+ addr ? addr[pos] >> 12 : 0,
+ FRF_AZ_BUF_OWNER_ID_FBUF, 0);
+ efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
+ &buf, offset + pos);
+ }
+}
+
+static bool bad_vf_index(struct efx_nic *efx, unsigned index)
+{
+ return index >= efx_vf_size(efx);
+}
+
+static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
+{
+ unsigned max_buf_count = max_entry_count *
+ sizeof(efx_qword_t) / EFX_BUF_SIZE;
+
+ return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
+}
+
+/* Check that VI specified by per-port index belongs to a VF.
+ * Optionally set VF index and VI index within the VF.
+ */
+static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
+ struct siena_vf **vf_out, unsigned *rel_index_out)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ unsigned vf_i;
+
+ if (abs_index < EFX_VI_BASE)
+ return true;
+ vf_i = (abs_index - EFX_VI_BASE) / efx_vf_size(efx);
+ if (vf_i >= efx->vf_init_count)
+ return true;
+
+ if (vf_out)
+ *vf_out = nic_data->vf + vf_i;
+ if (rel_index_out)
+ *rel_index_out = abs_index % efx_vf_size(efx);
+ return false;
+}
+
+static int efx_vfdi_init_evq(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct vfdi_req *req = vf->buf.addr;
+ unsigned vf_evq = req->u.init_evq.index;
+ unsigned buf_count = req->u.init_evq.buf_count;
+ unsigned abs_evq = abs_index(vf, vf_evq);
+ unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
+ efx_oword_t reg;
+
+ if (bad_vf_index(efx, vf_evq) ||
+ bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
+ vf->pci_name, vf_evq, buf_count);
+ return VFDI_RC_EINVAL;
+ }
+
+ efx_siena_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
+
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_CZ_TIMER_Q_EN, 1,
+ FRF_CZ_HOST_NOTIFY_MODE, 0,
+ FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
+ efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq);
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_AZ_EVQ_EN, 1,
+ FRF_AZ_EVQ_SIZE, __ffs(buf_count),
+ FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
+ efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq);
+
+ if (vf_evq == 0) {
+ memcpy(vf->evq0_addrs, req->u.init_evq.addr,
+ buf_count * sizeof(u64));
+ vf->evq0_count = buf_count;
+ }
+
+ return VFDI_RC_SUCCESS;
+}
+
+static int efx_vfdi_init_rxq(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct vfdi_req *req = vf->buf.addr;
+ unsigned vf_rxq = req->u.init_rxq.index;
+ unsigned vf_evq = req->u.init_rxq.evq;
+ unsigned buf_count = req->u.init_rxq.buf_count;
+ unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
+ unsigned label;
+ efx_oword_t reg;
+
+ if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
+ vf_rxq >= VF_MAX_RX_QUEUES ||
+ bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
+ "buf_count %d\n", vf->pci_name, vf_rxq,
+ vf_evq, buf_count);
+ return VFDI_RC_EINVAL;
+ }
+ if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
+ ++vf->rxq_count;
+ efx_siena_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
+
+ label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
+ EFX_POPULATE_OWORD_6(reg,
+ FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
+ FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
+ FRF_AZ_RX_DESCQ_LABEL, label,
+ FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
+ FRF_AZ_RX_DESCQ_JUMBO,
+ !!(req->u.init_rxq.flags &
+ VFDI_RXQ_FLAG_SCATTER_EN),
+ FRF_AZ_RX_DESCQ_EN, 1);
+ efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL,
+ abs_index(vf, vf_rxq));
+
+ return VFDI_RC_SUCCESS;
+}
+
+static int efx_vfdi_init_txq(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct vfdi_req *req = vf->buf.addr;
+ unsigned vf_txq = req->u.init_txq.index;
+ unsigned vf_evq = req->u.init_txq.evq;
+ unsigned buf_count = req->u.init_txq.buf_count;
+ unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
+ unsigned label, eth_filt_en;
+ efx_oword_t reg;
+
+ if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
+ vf_txq >= vf_max_tx_channels ||
+ bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
+ "buf_count %d\n", vf->pci_name, vf_txq,
+ vf_evq, buf_count);
+ return VFDI_RC_EINVAL;
+ }
+
+ mutex_lock(&vf->txq_lock);
+ if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
+ ++vf->txq_count;
+ mutex_unlock(&vf->txq_lock);
+ efx_siena_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
+
+ eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
+
+ label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
+ EFX_POPULATE_OWORD_8(reg,
+ FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
+ FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
+ FRF_AZ_TX_DESCQ_EN, 1,
+ FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
+ FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
+ FRF_AZ_TX_DESCQ_LABEL, label,
+ FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
+ FRF_BZ_TX_NON_IP_DROP_DIS, 1);
+ efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL,
+ abs_index(vf, vf_txq));
+
+ return VFDI_RC_SUCCESS;
+}
+
+/* Returns true when efx_vfdi_fini_all_queues should wake */
+static bool efx_vfdi_flush_wake(struct siena_vf *vf)
+{
+ /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
+ smp_mb();
+
+ return (!vf->txq_count && !vf->rxq_count) ||
+ atomic_read(&vf->rxq_retry_count);
+}
+
+static void efx_vfdi_flush_clear(struct siena_vf *vf)
+{
+ memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
+ vf->txq_count = 0;
+ memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
+ vf->rxq_count = 0;
+ memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
+ atomic_set(&vf->rxq_retry_count, 0);
+}
+
+static int efx_vfdi_fini_all_queues(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ efx_oword_t reg;
+ unsigned count = efx_vf_size(efx);
+ unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
+ unsigned timeout = HZ;
+ unsigned index, rxqs_count;
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FLUSH_RX_QUEUES_IN_LENMAX);
+ int rc;
+
+ BUILD_BUG_ON(VF_MAX_RX_QUEUES >
+ MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
+
+ rtnl_lock();
+ siena_prepare_flush(efx);
+ rtnl_unlock();
+
+ /* Flush all the initialized queues */
+ rxqs_count = 0;
+ for (index = 0; index < count; ++index) {
+ if (test_bit(index, vf->txq_mask)) {
+ EFX_POPULATE_OWORD_2(reg,
+ FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
+ FRF_AZ_TX_FLUSH_DESCQ,
+ vf_offset + index);
+ efx_writeo(efx, ®, FR_AZ_TX_FLUSH_DESCQ);
+ }
+ if (test_bit(index, vf->rxq_mask)) {
+ MCDI_SET_ARRAY_DWORD(
+ inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
+ rxqs_count, vf_offset + index);
+ rxqs_count++;
+ }
+ }
+
+ atomic_set(&vf->rxq_retry_count, 0);
+ while (timeout && (vf->rxq_count || vf->txq_count)) {
+ rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, inbuf,
+ MC_CMD_FLUSH_RX_QUEUES_IN_LEN(rxqs_count),
+ NULL, 0, NULL);
+ WARN_ON(rc < 0);
+
+ timeout = wait_event_timeout(vf->flush_waitq,
+ efx_vfdi_flush_wake(vf),
+ timeout);
+ rxqs_count = 0;
+ for (index = 0; index < count; ++index) {
+ if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
+ atomic_dec(&vf->rxq_retry_count);
+ MCDI_SET_ARRAY_DWORD(
+ inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
+ rxqs_count, vf_offset + index);
+ rxqs_count++;
+ }
+ }
+ }
+
+ rtnl_lock();
+ siena_finish_flush(efx);
+ rtnl_unlock();
+
+ /* Irrespective of success/failure, fini the queues */
+ EFX_ZERO_OWORD(reg);
+ for (index = 0; index < count; ++index) {
+ efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL,
+ vf_offset + index);
+ efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL,
+ vf_offset + index);
+ efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL,
+ vf_offset + index);
+ efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL,
+ vf_offset + index);
+ }
+ efx_siena_sriov_bufs(efx, vf->buftbl_base, NULL,
+ EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
+ efx_vfdi_flush_clear(vf);
+
+ vf->evq0_count = 0;
+
+ return timeout ? 0 : VFDI_RC_ETIMEDOUT;
+}
+
+static int efx_vfdi_insert_filter(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct vfdi_req *req = vf->buf.addr;
+ unsigned vf_rxq = req->u.mac_filter.rxq;
+ unsigned flags;
+
+ if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
+ "flags 0x%x\n", vf->pci_name, vf_rxq,
+ req->u.mac_filter.flags);
+ return VFDI_RC_EINVAL;
+ }
+
+ flags = 0;
+ if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
+ flags |= EFX_FILTER_FLAG_RX_RSS;
+ if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
+ flags |= EFX_FILTER_FLAG_RX_SCATTER;
+ vf->rx_filter_flags = flags;
+ vf->rx_filter_qid = vf_rxq;
+ vf->rx_filtering = true;
+
+ efx_siena_sriov_reset_rx_filter(vf);
+ queue_work(vfdi_workqueue, &nic_data->peer_work);
+
+ return VFDI_RC_SUCCESS;
+}
+
+static int efx_vfdi_remove_all_filters(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct siena_nic_data *nic_data = efx->nic_data;
+
+ vf->rx_filtering = false;
+ efx_siena_sriov_reset_rx_filter(vf);
+ queue_work(vfdi_workqueue, &nic_data->peer_work);
+
+ return VFDI_RC_SUCCESS;
+}
+
+static int efx_vfdi_set_status_page(struct siena_vf *vf)
+{
+ struct efx_nic *efx = vf->efx;
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct vfdi_req *req = vf->buf.addr;
+ u64 page_count = req->u.set_status_page.peer_page_count;
+ u64 max_page_count =
+ (EFX_PAGE_SIZE -
+ offsetof(struct vfdi_req, u.set_status_page.peer_page_addr[0]))
+ / sizeof(req->u.set_status_page.peer_page_addr[0]);
+
+ if (!req->u.set_status_page.dma_addr || page_count > max_page_count) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Invalid SET_STATUS_PAGE from %s\n",
+ vf->pci_name);
+ return VFDI_RC_EINVAL;
+ }
+
+ mutex_lock(&nic_data->local_lock);
+ mutex_lock(&vf->status_lock);
+ vf->status_addr = req->u.set_status_page.dma_addr;
+
+ kfree(vf->peer_page_addrs);
+ vf->peer_page_addrs = NULL;
+ vf->peer_page_count = 0;
+
+ if (page_count) {
+ vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
+ GFP_KERNEL);
+ if (vf->peer_page_addrs) {
+ memcpy(vf->peer_page_addrs,
+ req->u.set_status_page.peer_page_addr,
+ page_count * sizeof(u64));
+ vf->peer_page_count = page_count;
+ }
+ }
+
+ __efx_siena_sriov_push_vf_status(vf);
+ mutex_unlock(&vf->status_lock);
+ mutex_unlock(&nic_data->local_lock);
+
+ return VFDI_RC_SUCCESS;
+}
+
+static int efx_vfdi_clear_status_page(struct siena_vf *vf)
+{
+ mutex_lock(&vf->status_lock);
+ vf->status_addr = 0;
+ mutex_unlock(&vf->status_lock);
+
+ return VFDI_RC_SUCCESS;
+}
+
+typedef int (*efx_vfdi_op_t)(struct siena_vf *vf);
+
+static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
+ [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
+ [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
+ [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
+ [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
+ [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
+ [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
+ [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
+ [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
+};
+
+static void efx_siena_sriov_vfdi(struct work_struct *work)
+{
+ struct siena_vf *vf = container_of(work, struct siena_vf, req);
+ struct efx_nic *efx = vf->efx;
+ struct vfdi_req *req = vf->buf.addr;
+ struct efx_memcpy_req copy[2];
+ int rc;
+
+ /* Copy this page into the local address space */
+ memset(copy, '\0', sizeof(copy));
+ copy[0].from_rid = vf->pci_rid;
+ copy[0].from_addr = vf->req_addr;
+ copy[0].to_rid = efx->pci_dev->devfn;
+ copy[0].to_addr = vf->buf.dma_addr;
+ copy[0].length = EFX_PAGE_SIZE;
+ rc = efx_siena_sriov_memcpy(efx, copy, 1);
+ if (rc) {
+ /* If we can't get the request, we can't reply to the caller */
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Unable to fetch VFDI request from %s rc %d\n",
+ vf->pci_name, -rc);
+ vf->busy = false;
+ return;
+ }
+
+ if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
+ rc = vfdi_ops[req->op](vf);
+ if (rc == 0) {
+ netif_dbg(efx, hw, efx->net_dev,
+ "vfdi request %d from %s ok\n",
+ req->op, vf->pci_name);
+ }
+ } else {
+ netif_dbg(efx, hw, efx->net_dev,
+ "ERROR: Unrecognised request %d from VF %s addr "
+ "%llx\n", req->op, vf->pci_name,
+ (unsigned long long)vf->req_addr);
+ rc = VFDI_RC_EOPNOTSUPP;
+ }
+
+ /* Allow subsequent VF requests */
+ vf->busy = false;
+ smp_wmb();
+
+ /* Respond to the request */
+ req->rc = rc;
+ req->op = VFDI_OP_RESPONSE;
+
+ memset(copy, '\0', sizeof(copy));
+ copy[0].from_buf = &req->rc;
+ copy[0].to_rid = vf->pci_rid;
+ copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
+ copy[0].length = sizeof(req->rc);
+ copy[1].from_buf = &req->op;
+ copy[1].to_rid = vf->pci_rid;
+ copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
+ copy[1].length = sizeof(req->op);
+
+ (void)efx_siena_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
+}
+
+
+
+/* After a reset the event queues inside the guests no longer exist. Fill the
+ * event ring in guest memory with VFDI reset events, then (re-initialise) the
+ * event queue to raise an interrupt. The guest driver will then recover.
+ */
+
+static void efx_siena_sriov_reset_vf(struct siena_vf *vf,
+ struct efx_buffer *buffer)
+{
+ struct efx_nic *efx = vf->efx;
+ struct efx_memcpy_req copy_req[4];
+ efx_qword_t event;
+ unsigned int pos, count, k, buftbl, abs_evq;
+ efx_oword_t reg;
+ efx_dword_t ptr;
+ int rc;
+
+ BUG_ON(buffer->len != EFX_PAGE_SIZE);
+
+ if (!vf->evq0_count)
+ return;
+ BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
+
+ mutex_lock(&vf->status_lock);
+ EFX_POPULATE_QWORD_3(event,
+ FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
+ VFDI_EV_SEQ, vf->msg_seqno,
+ VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
+ vf->msg_seqno++;
+ for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
+ memcpy(buffer->addr + pos, &event, sizeof(event));
+
+ for (pos = 0; pos < vf->evq0_count; pos += count) {
+ count = min_t(unsigned, vf->evq0_count - pos,
+ ARRAY_SIZE(copy_req));
+ for (k = 0; k < count; k++) {
+ copy_req[k].from_buf = NULL;
+ copy_req[k].from_rid = efx->pci_dev->devfn;
+ copy_req[k].from_addr = buffer->dma_addr;
+ copy_req[k].to_rid = vf->pci_rid;
+ copy_req[k].to_addr = vf->evq0_addrs[pos + k];
+ copy_req[k].length = EFX_PAGE_SIZE;
+ }
+ rc = efx_siena_sriov_memcpy(efx, copy_req, count);
+ if (rc) {
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Unable to notify %s of reset"
+ ": %d\n", vf->pci_name, -rc);
+ break;
+ }
+ }
+
+ /* Reinitialise, arm and trigger evq0 */
+ abs_evq = abs_index(vf, 0);
+ buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
+ efx_siena_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
+
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_CZ_TIMER_Q_EN, 1,
+ FRF_CZ_HOST_NOTIFY_MODE, 0,
+ FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
+ efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq);
+ EFX_POPULATE_OWORD_3(reg,
+ FRF_AZ_EVQ_EN, 1,
+ FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
+ FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
+ efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq);
+ EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
+ efx_writed(efx, &ptr, FR_BZ_EVQ_RPTR + FR_BZ_EVQ_RPTR_STEP * abs_evq);
+
+ mutex_unlock(&vf->status_lock);
+}
+
+static void efx_siena_sriov_reset_vf_work(struct work_struct *work)
+{
+ struct siena_vf *vf = container_of(work, struct siena_vf, req);
+ struct efx_nic *efx = vf->efx;
+ struct efx_buffer buf;
+
+ if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO)) {
+ efx_siena_sriov_reset_vf(vf, &buf);
+ efx_nic_free_buffer(efx, &buf);
+ }
+}
+
+static void efx_siena_sriov_handle_no_channel(struct efx_nic *efx)
+{
+ netif_err(efx, drv, efx->net_dev,
+ "ERROR: IOV requires MSI-X and 1 additional interrupt"
+ "vector. IOV disabled\n");
+ efx->vf_count = 0;
+}
+
+static int efx_siena_sriov_probe_channel(struct efx_channel *channel)
+{
+ struct siena_nic_data *nic_data = channel->efx->nic_data;
+ nic_data->vfdi_channel = channel;
+
+ return 0;
+}
+
+static void
+efx_siena_sriov_get_channel_name(struct efx_channel *channel,
+ char *buf, size_t len)
+{
+ snprintf(buf, len, "%s-iov", channel->efx->name);
+}
+
+static const struct efx_channel_type efx_siena_sriov_channel_type = {
+ .handle_no_channel = efx_siena_sriov_handle_no_channel,
+ .pre_probe = efx_siena_sriov_probe_channel,
+ .post_remove = efx_channel_dummy_op_void,
+ .get_name = efx_siena_sriov_get_channel_name,
+ /* no copy operation; channel must not be reallocated */
+ .keep_eventq = true,
+};
+
+void efx_siena_sriov_probe(struct efx_nic *efx)
+{
+ unsigned count;
+
+ if (!max_vfs)
+ return;
+
+ if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) {
+ pci_info(efx->pci_dev, "no SR-IOV VFs probed\n");
+ return;
+ }
+ if (count > 0 && count > max_vfs)
+ count = max_vfs;
+
+ /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
+ efx->vf_count = count;
+
+ efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_siena_sriov_channel_type;
+}
+
+/* Copy the list of individual addresses into the vfdi_status.peers
+ * array and auxiliary pages, protected by %local_lock. Drop that lock
+ * and then broadcast the address list to every VF.
+ */
+static void efx_siena_sriov_peer_work(struct work_struct *data)
+{
+ struct siena_nic_data *nic_data = container_of(data,
+ struct siena_nic_data,
+ peer_work);
+ struct efx_nic *efx = nic_data->efx;
+ struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr;
+ struct siena_vf *vf;
+ struct efx_local_addr *local_addr;
+ struct vfdi_endpoint *peer;
+ struct efx_endpoint_page *epp;
+ struct list_head pages;
+ unsigned int peer_space;
+ unsigned int peer_count;
+ unsigned int pos;
+
+ mutex_lock(&nic_data->local_lock);
+
+ /* Move the existing peer pages off %local_page_list */
+ INIT_LIST_HEAD(&pages);
+ list_splice_tail_init(&nic_data->local_page_list, &pages);
+
+ /* Populate the VF addresses starting from entry 1 (entry 0 is
+ * the PF address)
+ */
+ peer = vfdi_status->peers + 1;
+ peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
+ peer_count = 1;
+ for (pos = 0; pos < efx->vf_count; ++pos) {
+ vf = nic_data->vf + pos;
+
+ mutex_lock(&vf->status_lock);
+ if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
+ *peer++ = vf->addr;
+ ++peer_count;
+ --peer_space;
+ BUG_ON(peer_space == 0);
+ }
+ mutex_unlock(&vf->status_lock);
+ }
+
+ /* Fill the remaining addresses */
+ list_for_each_entry(local_addr, &nic_data->local_addr_list, link) {
+ ether_addr_copy(peer->mac_addr, local_addr->addr);
+ peer->tci = 0;
+ ++peer;
+ ++peer_count;
+ if (--peer_space == 0) {
+ if (list_empty(&pages)) {
+ epp = kmalloc(sizeof(*epp), GFP_KERNEL);
+ if (!epp)
+ break;
+ epp->ptr = dma_alloc_coherent(
+ &efx->pci_dev->dev, EFX_PAGE_SIZE,
+ &epp->addr, GFP_KERNEL);
+ if (!epp->ptr) {
+ kfree(epp);
+ break;
+ }
+ } else {
+ epp = list_first_entry(
+ &pages, struct efx_endpoint_page, link);
+ list_del(&epp->link);
+ }
+
+ list_add_tail(&epp->link, &nic_data->local_page_list);
+ peer = (struct vfdi_endpoint *)epp->ptr;
+ peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
+ }
+ }
+ vfdi_status->peer_count = peer_count;
+ mutex_unlock(&nic_data->local_lock);
+
+ /* Free any now unused endpoint pages */
+ while (!list_empty(&pages)) {
+ epp = list_first_entry(
+ &pages, struct efx_endpoint_page, link);
+ list_del(&epp->link);
+ dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
+ epp->ptr, epp->addr);
+ kfree(epp);
+ }
+
+ /* Finally, push the pages */
+ for (pos = 0; pos < efx->vf_count; ++pos) {
+ vf = nic_data->vf + pos;
+
+ mutex_lock(&vf->status_lock);
+ if (vf->status_addr)
+ __efx_siena_sriov_push_vf_status(vf);
+ mutex_unlock(&vf->status_lock);
+ }
+}
+
+static void efx_siena_sriov_free_local(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct efx_local_addr *local_addr;
+ struct efx_endpoint_page *epp;
+
+ while (!list_empty(&nic_data->local_addr_list)) {
+ local_addr = list_first_entry(&nic_data->local_addr_list,
+ struct efx_local_addr, link);
+ list_del(&local_addr->link);
+ kfree(local_addr);
+ }
+
+ while (!list_empty(&nic_data->local_page_list)) {
+ epp = list_first_entry(&nic_data->local_page_list,
+ struct efx_endpoint_page, link);
+ list_del(&epp->link);
+ dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
+ epp->ptr, epp->addr);
+ kfree(epp);
+ }
+}
+
+static int efx_siena_sriov_vf_alloc(struct efx_nic *efx)
+{
+ unsigned index;
+ struct siena_vf *vf;
+ struct siena_nic_data *nic_data = efx->nic_data;
+
+ nic_data->vf = kcalloc(efx->vf_count, sizeof(*nic_data->vf),
+ GFP_KERNEL);
+ if (!nic_data->vf)
+ return -ENOMEM;
+
+ for (index = 0; index < efx->vf_count; ++index) {
+ vf = nic_data->vf + index;
+
+ vf->efx = efx;
+ vf->index = index;
+ vf->rx_filter_id = -1;
+ vf->tx_filter_mode = VF_TX_FILTER_AUTO;
+ vf->tx_filter_id = -1;
+ INIT_WORK(&vf->req, efx_siena_sriov_vfdi);
+ INIT_WORK(&vf->reset_work, efx_siena_sriov_reset_vf_work);
+ init_waitqueue_head(&vf->flush_waitq);
+ mutex_init(&vf->status_lock);
+ mutex_init(&vf->txq_lock);
+ }
+
+ return 0;
+}
+
+static void efx_siena_sriov_vfs_fini(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+ unsigned int pos;
+
+ for (pos = 0; pos < efx->vf_count; ++pos) {
+ vf = nic_data->vf + pos;
+
+ efx_nic_free_buffer(efx, &vf->buf);
+ kfree(vf->peer_page_addrs);
+ vf->peer_page_addrs = NULL;
+ vf->peer_page_count = 0;
+
+ vf->evq0_count = 0;
+ }
+}
+
+static int efx_siena_sriov_vfs_init(struct efx_nic *efx)
+{
+ struct pci_dev *pci_dev = efx->pci_dev;
+ struct siena_nic_data *nic_data = efx->nic_data;
+ unsigned index, devfn, sriov, buftbl_base;
+ u16 offset, stride;
+ struct siena_vf *vf;
+ int rc;
+
+ sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
+ if (!sriov)
+ return -ENOENT;
+
+ pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
+ pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
+
+ buftbl_base = nic_data->vf_buftbl_base;
+ devfn = pci_dev->devfn + offset;
+ for (index = 0; index < efx->vf_count; ++index) {
+ vf = nic_data->vf + index;
+
+ /* Reserve buffer entries */
+ vf->buftbl_base = buftbl_base;
+ buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
+
+ vf->pci_rid = devfn;
+ snprintf(vf->pci_name, sizeof(vf->pci_name),
+ "%04x:%02x:%02x.%d",
+ pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE,
+ GFP_KERNEL);
+ if (rc)
+ goto fail;
+
+ devfn += stride;
+ }
+
+ return 0;
+
+fail:
+ efx_siena_sriov_vfs_fini(efx);
+ return rc;
+}
+
+int efx_siena_sriov_init(struct efx_nic *efx)
+{
+ struct net_device *net_dev = efx->net_dev;
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct vfdi_status *vfdi_status;
+ int rc;
+
+ /* Ensure there's room for vf_channel */
+ BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
+ /* Ensure that VI_BASE is aligned on VI_SCALE */
+ BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
+
+ if (efx->vf_count == 0)
+ return 0;
+
+ rc = efx_siena_sriov_cmd(efx, true, NULL, NULL);
+ if (rc)
+ goto fail_cmd;
+
+ rc = efx_nic_alloc_buffer(efx, &nic_data->vfdi_status,
+ sizeof(*vfdi_status), GFP_KERNEL);
+ if (rc)
+ goto fail_status;
+ vfdi_status = nic_data->vfdi_status.addr;
+ memset(vfdi_status, 0, sizeof(*vfdi_status));
+ vfdi_status->version = 1;
+ vfdi_status->length = sizeof(*vfdi_status);
+ vfdi_status->max_tx_channels = vf_max_tx_channels;
+ vfdi_status->vi_scale = efx->vi_scale;
+ vfdi_status->rss_rxq_count = efx->rss_spread;
+ vfdi_status->peer_count = 1 + efx->vf_count;
+ vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
+
+ rc = efx_siena_sriov_vf_alloc(efx);
+ if (rc)
+ goto fail_alloc;
+
+ mutex_init(&nic_data->local_lock);
+ INIT_WORK(&nic_data->peer_work, efx_siena_sriov_peer_work);
+ INIT_LIST_HEAD(&nic_data->local_addr_list);
+ INIT_LIST_HEAD(&nic_data->local_page_list);
+
+ rc = efx_siena_sriov_vfs_init(efx);
+ if (rc)
+ goto fail_vfs;
+
+ rtnl_lock();
+ ether_addr_copy(vfdi_status->peers[0].mac_addr, net_dev->dev_addr);
+ efx->vf_init_count = efx->vf_count;
+ rtnl_unlock();
+
+ efx_siena_sriov_usrev(efx, true);
+
+ /* At this point we must be ready to accept VFDI requests */
+
+ rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
+ if (rc)
+ goto fail_pci;
+
+ netif_info(efx, probe, net_dev,
+ "enabled SR-IOV for %d VFs, %d VI per VF\n",
+ efx->vf_count, efx_vf_size(efx));
+ return 0;
+
+fail_pci:
+ efx_siena_sriov_usrev(efx, false);
+ rtnl_lock();
+ efx->vf_init_count = 0;
+ rtnl_unlock();
+ efx_siena_sriov_vfs_fini(efx);
+fail_vfs:
+ cancel_work_sync(&nic_data->peer_work);
+ efx_siena_sriov_free_local(efx);
+ kfree(nic_data->vf);
+fail_alloc:
+ efx_nic_free_buffer(efx, &nic_data->vfdi_status);
+fail_status:
+ efx_siena_sriov_cmd(efx, false, NULL, NULL);
+fail_cmd:
+ return rc;
+}
+
+void efx_siena_sriov_fini(struct efx_nic *efx)
+{
+ struct siena_vf *vf;
+ unsigned int pos;
+ struct siena_nic_data *nic_data = efx->nic_data;
+
+ if (efx->vf_init_count == 0)
+ return;
+
+ /* Disable all interfaces to reconfiguration */
+ BUG_ON(nic_data->vfdi_channel->enabled);
+ efx_siena_sriov_usrev(efx, false);
+ rtnl_lock();
+ efx->vf_init_count = 0;
+ rtnl_unlock();
+
+ /* Flush all reconfiguration work */
+ for (pos = 0; pos < efx->vf_count; ++pos) {
+ vf = nic_data->vf + pos;
+ cancel_work_sync(&vf->req);
+ cancel_work_sync(&vf->reset_work);
+ }
+ cancel_work_sync(&nic_data->peer_work);
+
+ pci_disable_sriov(efx->pci_dev);
+
+ /* Tear down back-end state */
+ efx_siena_sriov_vfs_fini(efx);
+ efx_siena_sriov_free_local(efx);
+ kfree(nic_data->vf);
+ efx_nic_free_buffer(efx, &nic_data->vfdi_status);
+ efx_siena_sriov_cmd(efx, false, NULL, NULL);
+}
+
+void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ struct siena_vf *vf;
+ unsigned qid, seq, type, data;
+
+ qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
+
+ /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
+ BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
+ seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
+ type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
+ data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
+
+ netif_vdbg(efx, hw, efx->net_dev,
+ "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
+ qid, seq, type, data);
+
+ if (map_vi_index(efx, qid, &vf, NULL))
+ return;
+ if (vf->busy)
+ goto error;
+
+ if (type == VFDI_EV_TYPE_REQ_WORD0) {
+ /* Resynchronise */
+ vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
+ vf->req_seqno = seq + 1;
+ vf->req_addr = 0;
+ } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
+ goto error;
+
+ switch (vf->req_type) {
+ case VFDI_EV_TYPE_REQ_WORD0:
+ case VFDI_EV_TYPE_REQ_WORD1:
+ case VFDI_EV_TYPE_REQ_WORD2:
+ vf->req_addr |= (u64)data << (vf->req_type << 4);
+ ++vf->req_type;
+ return;
+
+ case VFDI_EV_TYPE_REQ_WORD3:
+ vf->req_addr |= (u64)data << 48;
+ vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
+ vf->busy = true;
+ queue_work(vfdi_workqueue, &vf->req);
+ return;
+ }
+
+error:
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "ERROR: Screaming VFDI request from %s\n",
+ vf->pci_name);
+ /* Reset the request and sequence number */
+ vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
+ vf->req_seqno = seq + 1;
+}
+
+void efx_siena_sriov_flr(struct efx_nic *efx, unsigned vf_i)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+
+ if (vf_i > efx->vf_init_count)
+ return;
+ vf = nic_data->vf + vf_i;
+ netif_info(efx, hw, efx->net_dev,
+ "FLR on VF %s\n", vf->pci_name);
+
+ vf->status_addr = 0;
+ efx_vfdi_remove_all_filters(vf);
+ efx_vfdi_flush_clear(vf);
+
+ vf->evq0_count = 0;
+}
+
+int efx_siena_sriov_mac_address_changed(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr;
+
+ if (!efx->vf_init_count)
+ return 0;
+ ether_addr_copy(vfdi_status->peers[0].mac_addr,
+ efx->net_dev->dev_addr);
+ queue_work(vfdi_workqueue, &nic_data->peer_work);
+
+ return 0;
+}
+
+void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
+{
+ struct siena_vf *vf;
+ unsigned queue, qid;
+
+ queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+ if (map_vi_index(efx, queue, &vf, &qid))
+ return;
+ /* Ignore flush completions triggered by an FLR */
+ if (!test_bit(qid, vf->txq_mask))
+ return;
+
+ __clear_bit(qid, vf->txq_mask);
+ --vf->txq_count;
+
+ if (efx_vfdi_flush_wake(vf))
+ wake_up(&vf->flush_waitq);
+}
+
+void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
+{
+ struct siena_vf *vf;
+ unsigned ev_failed, queue, qid;
+
+ queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
+ ev_failed = EFX_QWORD_FIELD(*event,
+ FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
+ if (map_vi_index(efx, queue, &vf, &qid))
+ return;
+ if (!test_bit(qid, vf->rxq_mask))
+ return;
+
+ if (ev_failed) {
+ set_bit(qid, vf->rxq_retry_mask);
+ atomic_inc(&vf->rxq_retry_count);
+ } else {
+ __clear_bit(qid, vf->rxq_mask);
+ --vf->rxq_count;
+ }
+ if (efx_vfdi_flush_wake(vf))
+ wake_up(&vf->flush_waitq);
+}
+
+/* Called from napi. Schedule the reset work item */
+void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
+{
+ struct siena_vf *vf;
+ unsigned int rel;
+
+ if (map_vi_index(efx, dmaq, &vf, &rel))
+ return;
+
+ if (net_ratelimit())
+ netif_err(efx, hw, efx->net_dev,
+ "VF %d DMA Q %d reports descriptor fetch error.\n",
+ vf->index, rel);
+ queue_work(vfdi_workqueue, &vf->reset_work);
+}
+
+/* Reset all VFs */
+void efx_siena_sriov_reset(struct efx_nic *efx)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ unsigned int vf_i;
+ struct efx_buffer buf;
+ struct siena_vf *vf;
+
+ ASSERT_RTNL();
+
+ if (efx->vf_init_count == 0)
+ return;
+
+ efx_siena_sriov_usrev(efx, true);
+ (void)efx_siena_sriov_cmd(efx, true, NULL, NULL);
+
+ if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO))
+ return;
+
+ for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
+ vf = nic_data->vf + vf_i;
+ efx_siena_sriov_reset_vf(vf, &buf);
+ }
+
+ efx_nic_free_buffer(efx, &buf);
+}
+
+int efx_init_sriov(void)
+{
+ /* A single threaded workqueue is sufficient. efx_siena_sriov_vfdi() and
+ * efx_siena_sriov_peer_work() spend almost all their time sleeping for
+ * MCDI to complete anyway
+ */
+ vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
+ if (!vfdi_workqueue)
+ return -ENOMEM;
+ return 0;
+}
+
+void efx_fini_sriov(void)
+{
+ destroy_workqueue(vfdi_workqueue);
+}
+
+int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+
+ if (vf_i >= efx->vf_init_count)
+ return -EINVAL;
+ vf = nic_data->vf + vf_i;
+
+ mutex_lock(&vf->status_lock);
+ ether_addr_copy(vf->addr.mac_addr, mac);
+ __efx_siena_sriov_update_vf_addr(vf);
+ mutex_unlock(&vf->status_lock);
+
+ return 0;
+}
+
+int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i,
+ u16 vlan, u8 qos)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+ u16 tci;
+
+ if (vf_i >= efx->vf_init_count)
+ return -EINVAL;
+ vf = nic_data->vf + vf_i;
+
+ mutex_lock(&vf->status_lock);
+ tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
+ vf->addr.tci = htons(tci);
+ __efx_siena_sriov_update_vf_addr(vf);
+ mutex_unlock(&vf->status_lock);
+
+ return 0;
+}
+
+int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i,
+ bool spoofchk)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+ int rc;
+
+ if (vf_i >= efx->vf_init_count)
+ return -EINVAL;
+ vf = nic_data->vf + vf_i;
+
+ mutex_lock(&vf->txq_lock);
+ if (vf->txq_count == 0) {
+ vf->tx_filter_mode =
+ spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
+ rc = 0;
+ } else {
+ /* This cannot be changed while TX queues are running */
+ rc = -EBUSY;
+ }
+ mutex_unlock(&vf->txq_lock);
+ return rc;
+}
+
+int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf_i,
+ struct ifla_vf_info *ivi)
+{
+ struct siena_nic_data *nic_data = efx->nic_data;
+ struct siena_vf *vf;
+ u16 tci;
+
+ if (vf_i >= efx->vf_init_count)
+ return -EINVAL;
+ vf = nic_data->vf + vf_i;
+
+ ivi->vf = vf_i;
+ ether_addr_copy(ivi->mac, vf->addr.mac_addr);
+ ivi->max_tx_rate = 0;
+ ivi->min_tx_rate = 0;
+ tci = ntohs(vf->addr.tci);
+ ivi->vlan = tci & VLAN_VID_MASK;
+ ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
+ ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
+
+ return 0;
+}
+
+bool efx_siena_sriov_wanted(struct efx_nic *efx)
+{
+ return efx->vf_count != 0;
+}
+
+int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs)
+{
+ return 0;
+}
diff --git a/drivers/net/ethernet/sfc/siena/siena_sriov.h b/drivers/net/ethernet/sfc/siena/siena_sriov.h
new file mode 100644
index 000000000000..e548c4daf189
--- /dev/null
+++ b/drivers/net/ethernet/sfc/siena/siena_sriov.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2015 Solarflare Communications Inc.
+ */
+
+#ifndef SIENA_SRIOV_H
+#define SIENA_SRIOV_H
+
+#include "net_driver.h"
+
+/* On the SFC9000 family each port is associated with 1 PCI physical
+ * function (PF) handled by sfc and a configurable number of virtual
+ * functions (VFs) that may be handled by some other driver, often in
+ * a VM guest. The queue pointer registers are mapped in both PF and
+ * VF BARs such that an 8K region provides access to a single RX, TX
+ * and event queue (collectively a Virtual Interface, VI or VNIC).
+ *
+ * The PF has access to all 1024 VIs while VFs are mapped to VIs
+ * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered
+ * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE).
+ * The number of VIs and the VI_SCALE value are configurable but must
+ * be established at boot time by firmware.
+ */
+
+/* Maximum VI_SCALE parameter supported by Siena */
+#define EFX_VI_SCALE_MAX 6
+/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX),
+ * so this is the smallest allowed value.
+ */
+#define EFX_VI_BASE 128U
+/* Maximum number of VFs allowed */
+#define EFX_VF_COUNT_MAX 127
+/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */
+#define EFX_MAX_VF_EVQ_SIZE 8192UL
+/* The number of buffer table entries reserved for each VI on a VF */
+#define EFX_VF_BUFTBL_PER_VI \
+ ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \
+ sizeof(efx_qword_t) / EFX_BUF_SIZE)
+
+int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs);
+int efx_siena_sriov_init(struct efx_nic *efx);
+void efx_siena_sriov_fini(struct efx_nic *efx);
+int efx_siena_sriov_mac_address_changed(struct efx_nic *efx);
+bool efx_siena_sriov_wanted(struct efx_nic *efx);
+void efx_siena_sriov_reset(struct efx_nic *efx);
+void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr);
+
+int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac);
+int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf,
+ u16 vlan, u8 qos);
+int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf,
+ bool spoofchk);
+int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf,
+ struct ifla_vf_info *ivf);
+
+#ifdef CONFIG_SFC_SRIOV
+
+static inline bool efx_siena_sriov_enabled(struct efx_nic *efx)
+{
+ return efx->vf_init_count != 0;
+}
+#else /* !CONFIG_SFC_SRIOV */
+static inline bool efx_siena_sriov_enabled(struct efx_nic *efx)
+{
+ return false;
+}
+#endif /* CONFIG_SFC_SRIOV */
+
+void efx_siena_sriov_probe(struct efx_nic *efx);
+void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event);
+void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event);
+void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event);
+void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq);
+
+#endif /* SIENA_SRIOV_H */
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
deleted file mode 100644
index f12851a527d9..000000000000
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ /dev/null
@@ -1,1686 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2010-2012 Solarflare Communications Inc.
- */
-#include <linux/pci.h>
-#include <linux/module.h>
-#include "net_driver.h"
-#include "efx.h"
-#include "efx_channels.h"
-#include "nic.h"
-#include "io.h"
-#include "mcdi.h"
-#include "filter.h"
-#include "mcdi_pcol.h"
-#include "farch_regs.h"
-#include "siena_sriov.h"
-#include "vfdi.h"
-
-/* Number of longs required to track all the VIs in a VF */
-#define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX)
-
-/* Maximum number of RX queues supported */
-#define VF_MAX_RX_QUEUES 63
-
-/**
- * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour
- * @VF_TX_FILTER_OFF: Disabled
- * @VF_TX_FILTER_AUTO: Enabled if MAC address assigned to VF and only
- * 2 TX queues allowed per VF.
- * @VF_TX_FILTER_ON: Enabled
- */
-enum efx_vf_tx_filter_mode {
- VF_TX_FILTER_OFF,
- VF_TX_FILTER_AUTO,
- VF_TX_FILTER_ON,
-};
-
-/**
- * struct siena_vf - Back-end resource and protocol state for a PCI VF
- * @efx: The Efx NIC owning this VF
- * @pci_rid: The PCI requester ID for this VF
- * @pci_name: The PCI name (formatted address) of this VF
- * @index: Index of VF within its port and PF.
- * @req: VFDI incoming request work item. Incoming USR_EV events are received
- * by the NAPI handler, but must be handled by executing MCDI requests
- * inside a work item.
- * @req_addr: VFDI incoming request DMA address (in VF's PCI address space).
- * @req_type: Expected next incoming (from VF) %VFDI_EV_TYPE member.
- * @req_seqno: Expected next incoming (from VF) %VFDI_EV_SEQ member.
- * @msg_seqno: Next %VFDI_EV_SEQ member to reply to VF. Protected by
- * @status_lock
- * @busy: VFDI request queued to be processed or being processed. Receiving
- * a VFDI request when @busy is set is an error condition.
- * @buf: Incoming VFDI requests are DMA from the VF into this buffer.
- * @buftbl_base: Buffer table entries for this VF start at this index.
- * @rx_filtering: Receive filtering has been requested by the VF driver.
- * @rx_filter_flags: The flags sent in the %VFDI_OP_INSERT_FILTER request.
- * @rx_filter_qid: VF relative qid for RX filter requested by VF.
- * @rx_filter_id: Receive MAC filter ID. Only one filter per VF is supported.
- * @tx_filter_mode: Transmit MAC filtering mode.
- * @tx_filter_id: Transmit MAC filter ID.
- * @addr: The MAC address and outer vlan tag of the VF.
- * @status_addr: VF DMA address of page for &struct vfdi_status updates.
- * @status_lock: Mutex protecting @msg_seqno, @status_addr, @addr,
- * @peer_page_addrs and @peer_page_count from simultaneous
- * updates by the VM and consumption by
- * efx_siena_sriov_update_vf_addr()
- * @peer_page_addrs: Pointer to an array of guest pages for local addresses.
- * @peer_page_count: Number of entries in @peer_page_count.
- * @evq0_addrs: Array of guest pages backing evq0.
- * @evq0_count: Number of entries in @evq0_addrs.
- * @flush_waitq: wait queue used by %VFDI_OP_FINI_ALL_QUEUES handler
- * to wait for flush completions.
- * @txq_lock: Mutex for TX queue allocation.
- * @txq_mask: Mask of initialized transmit queues.
- * @txq_count: Number of initialized transmit queues.
- * @rxq_mask: Mask of initialized receive queues.
- * @rxq_count: Number of initialized receive queues.
- * @rxq_retry_mask: Mask or receive queues that need to be flushed again
- * due to flush failure.
- * @rxq_retry_count: Number of receive queues in @rxq_retry_mask.
- * @reset_work: Work item to schedule a VF reset.
- */
-struct siena_vf {
- struct efx_nic *efx;
- unsigned int pci_rid;
- char pci_name[13]; /* dddd:bb:dd.f */
- unsigned int index;
- struct work_struct req;
- u64 req_addr;
- int req_type;
- unsigned req_seqno;
- unsigned msg_seqno;
- bool busy;
- struct efx_buffer buf;
- unsigned buftbl_base;
- bool rx_filtering;
- enum efx_filter_flags rx_filter_flags;
- unsigned rx_filter_qid;
- int rx_filter_id;
- enum efx_vf_tx_filter_mode tx_filter_mode;
- int tx_filter_id;
- struct vfdi_endpoint addr;
- u64 status_addr;
- struct mutex status_lock;
- u64 *peer_page_addrs;
- unsigned peer_page_count;
- u64 evq0_addrs[EFX_MAX_VF_EVQ_SIZE * sizeof(efx_qword_t) /
- EFX_BUF_SIZE];
- unsigned evq0_count;
- wait_queue_head_t flush_waitq;
- struct mutex txq_lock;
- unsigned long txq_mask[VI_MASK_LENGTH];
- unsigned txq_count;
- unsigned long rxq_mask[VI_MASK_LENGTH];
- unsigned rxq_count;
- unsigned long rxq_retry_mask[VI_MASK_LENGTH];
- atomic_t rxq_retry_count;
- struct work_struct reset_work;
-};
-
-struct efx_memcpy_req {
- unsigned int from_rid;
- void *from_buf;
- u64 from_addr;
- unsigned int to_rid;
- u64 to_addr;
- unsigned length;
-};
-
-/**
- * struct efx_local_addr - A MAC address on the vswitch without a VF.
- *
- * Siena does not have a switch, so VFs can't transmit data to each
- * other. Instead the VFs must be made aware of the local addresses
- * on the vswitch, so that they can arrange for an alternative
- * software datapath to be used.
- *
- * @link: List head for insertion into efx->local_addr_list.
- * @addr: Ethernet address
- */
-struct efx_local_addr {
- struct list_head link;
- u8 addr[ETH_ALEN];
-};
-
-/**
- * struct efx_endpoint_page - Page of vfdi_endpoint structures
- *
- * @link: List head for insertion into efx->local_page_list.
- * @ptr: Pointer to page.
- * @addr: DMA address of page.
- */
-struct efx_endpoint_page {
- struct list_head link;
- void *ptr;
- dma_addr_t addr;
-};
-
-/* Buffer table entries are reserved txq0,rxq0,evq0,txq1,rxq1,evq1 */
-#define EFX_BUFTBL_TXQ_BASE(_vf, _qid) \
- ((_vf)->buftbl_base + EFX_VF_BUFTBL_PER_VI * (_qid))
-#define EFX_BUFTBL_RXQ_BASE(_vf, _qid) \
- (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
- (EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
-#define EFX_BUFTBL_EVQ_BASE(_vf, _qid) \
- (EFX_BUFTBL_TXQ_BASE(_vf, _qid) + \
- (2 * EFX_MAX_DMAQ_SIZE * sizeof(efx_qword_t) / EFX_BUF_SIZE))
-
-#define EFX_FIELD_MASK(_field) \
- ((1 << _field ## _WIDTH) - 1)
-
-/* VFs can only use this many transmit channels */
-static unsigned int vf_max_tx_channels = 2;
-module_param(vf_max_tx_channels, uint, 0444);
-MODULE_PARM_DESC(vf_max_tx_channels,
- "Limit the number of TX channels VFs can use");
-
-static int max_vfs = -1;
-module_param(max_vfs, int, 0444);
-MODULE_PARM_DESC(max_vfs,
- "Reduce the number of VFs initialized by the driver");
-
-/* Workqueue used by VFDI communication. We can't use the global
- * workqueue because it may be running the VF driver's probe()
- * routine, which will be blocked there waiting for a VFDI response.
- */
-static struct workqueue_struct *vfdi_workqueue;
-
-static unsigned abs_index(struct siena_vf *vf, unsigned index)
-{
- return EFX_VI_BASE + vf->index * efx_vf_size(vf->efx) + index;
-}
-
-static int efx_siena_sriov_cmd(struct efx_nic *efx, bool enable,
- unsigned *vi_scale_out, unsigned *vf_total_out)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_SRIOV_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_SRIOV_OUT_LEN);
- unsigned vi_scale, vf_total;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, SRIOV_IN_ENABLE, enable ? 1 : 0);
- MCDI_SET_DWORD(inbuf, SRIOV_IN_VI_BASE, EFX_VI_BASE);
- MCDI_SET_DWORD(inbuf, SRIOV_IN_VF_COUNT, efx->vf_count);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_SRIOV, inbuf, MC_CMD_SRIOV_IN_LEN,
- outbuf, MC_CMD_SRIOV_OUT_LEN, &outlen);
- if (rc)
- return rc;
- if (outlen < MC_CMD_SRIOV_OUT_LEN)
- return -EIO;
-
- vf_total = MCDI_DWORD(outbuf, SRIOV_OUT_VF_TOTAL);
- vi_scale = MCDI_DWORD(outbuf, SRIOV_OUT_VI_SCALE);
- if (vi_scale > EFX_VI_SCALE_MAX)
- return -EOPNOTSUPP;
-
- if (vi_scale_out)
- *vi_scale_out = vi_scale;
- if (vf_total_out)
- *vf_total_out = vf_total;
-
- return 0;
-}
-
-static void efx_siena_sriov_usrev(struct efx_nic *efx, bool enabled)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- efx_oword_t reg;
-
- EFX_POPULATE_OWORD_2(reg,
- FRF_CZ_USREV_DIS, enabled ? 0 : 1,
- FRF_CZ_DFLT_EVQ, nic_data->vfdi_channel->channel);
- efx_writeo(efx, ®, FR_CZ_USR_EV_CFG);
-}
-
-static int efx_siena_sriov_memcpy(struct efx_nic *efx,
- struct efx_memcpy_req *req,
- unsigned int count)
-{
- MCDI_DECLARE_BUF(inbuf, MCDI_CTL_SDU_LEN_MAX_V1);
- MCDI_DECLARE_STRUCT_PTR(record);
- unsigned int index, used;
- u64 from_addr;
- u32 from_rid;
- int rc;
-
- mb(); /* Finish writing source/reading dest before DMA starts */
-
- if (WARN_ON(count > MC_CMD_MEMCPY_IN_RECORD_MAXNUM))
- return -ENOBUFS;
- used = MC_CMD_MEMCPY_IN_LEN(count);
-
- for (index = 0; index < count; index++) {
- record = MCDI_ARRAY_STRUCT_PTR(inbuf, MEMCPY_IN_RECORD, index);
- MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_NUM_RECORDS,
- count);
- MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_TO_RID,
- req->to_rid);
- MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_TO_ADDR,
- req->to_addr);
- if (req->from_buf == NULL) {
- from_rid = req->from_rid;
- from_addr = req->from_addr;
- } else {
- if (WARN_ON(used + req->length >
- MCDI_CTL_SDU_LEN_MAX_V1)) {
- rc = -ENOBUFS;
- goto out;
- }
-
- from_rid = MC_CMD_MEMCPY_RECORD_TYPEDEF_RID_INLINE;
- from_addr = used;
- memcpy(_MCDI_PTR(inbuf, used), req->from_buf,
- req->length);
- used += req->length;
- }
-
- MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_RID, from_rid);
- MCDI_SET_QWORD(record, MEMCPY_RECORD_TYPEDEF_FROM_ADDR,
- from_addr);
- MCDI_SET_DWORD(record, MEMCPY_RECORD_TYPEDEF_LENGTH,
- req->length);
-
- ++req;
- }
-
- rc = efx_mcdi_rpc(efx, MC_CMD_MEMCPY, inbuf, used, NULL, 0, NULL);
-out:
- mb(); /* Don't write source/read dest before DMA is complete */
-
- return rc;
-}
-
-/* The TX filter is entirely controlled by this driver, and is modified
- * underneath the feet of the VF
- */
-static void efx_siena_sriov_reset_tx_filter(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct efx_filter_spec filter;
- u16 vlan;
- int rc;
-
- if (vf->tx_filter_id != -1) {
- efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
- vf->tx_filter_id);
- netif_dbg(efx, hw, efx->net_dev, "Removed vf %s tx filter %d\n",
- vf->pci_name, vf->tx_filter_id);
- vf->tx_filter_id = -1;
- }
-
- if (is_zero_ether_addr(vf->addr.mac_addr))
- return;
-
- /* Turn on TX filtering automatically if not explicitly
- * enabled or disabled.
- */
- if (vf->tx_filter_mode == VF_TX_FILTER_AUTO && vf_max_tx_channels <= 2)
- vf->tx_filter_mode = VF_TX_FILTER_ON;
-
- vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
- efx_filter_init_tx(&filter, abs_index(vf, 0));
- rc = efx_filter_set_eth_local(&filter,
- vlan ? vlan : EFX_FILTER_VID_UNSPEC,
- vf->addr.mac_addr);
- BUG_ON(rc);
-
- rc = efx_filter_insert_filter(efx, &filter, true);
- if (rc < 0) {
- netif_warn(efx, hw, efx->net_dev,
- "Unable to migrate tx filter for vf %s\n",
- vf->pci_name);
- } else {
- netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s tx filter %d\n",
- vf->pci_name, rc);
- vf->tx_filter_id = rc;
- }
-}
-
-/* The RX filter is managed here on behalf of the VF driver */
-static void efx_siena_sriov_reset_rx_filter(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct efx_filter_spec filter;
- u16 vlan;
- int rc;
-
- if (vf->rx_filter_id != -1) {
- efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED,
- vf->rx_filter_id);
- netif_dbg(efx, hw, efx->net_dev, "Removed vf %s rx filter %d\n",
- vf->pci_name, vf->rx_filter_id);
- vf->rx_filter_id = -1;
- }
-
- if (!vf->rx_filtering || is_zero_ether_addr(vf->addr.mac_addr))
- return;
-
- vlan = ntohs(vf->addr.tci) & VLAN_VID_MASK;
- efx_filter_init_rx(&filter, EFX_FILTER_PRI_REQUIRED,
- vf->rx_filter_flags,
- abs_index(vf, vf->rx_filter_qid));
- rc = efx_filter_set_eth_local(&filter,
- vlan ? vlan : EFX_FILTER_VID_UNSPEC,
- vf->addr.mac_addr);
- BUG_ON(rc);
-
- rc = efx_filter_insert_filter(efx, &filter, true);
- if (rc < 0) {
- netif_warn(efx, hw, efx->net_dev,
- "Unable to insert rx filter for vf %s\n",
- vf->pci_name);
- } else {
- netif_dbg(efx, hw, efx->net_dev, "Inserted vf %s rx filter %d\n",
- vf->pci_name, rc);
- vf->rx_filter_id = rc;
- }
-}
-
-static void __efx_siena_sriov_update_vf_addr(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct siena_nic_data *nic_data = efx->nic_data;
-
- efx_siena_sriov_reset_tx_filter(vf);
- efx_siena_sriov_reset_rx_filter(vf);
- queue_work(vfdi_workqueue, &nic_data->peer_work);
-}
-
-/* Push the peer list to this VF. The caller must hold status_lock to interlock
- * with VFDI requests, and they must be serialised against manipulation of
- * local_page_list, either by acquiring local_lock or by running from
- * efx_siena_sriov_peer_work()
- */
-static void __efx_siena_sriov_push_vf_status(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct siena_nic_data *nic_data = efx->nic_data;
- struct vfdi_status *status = nic_data->vfdi_status.addr;
- struct efx_memcpy_req copy[4];
- struct efx_endpoint_page *epp;
- unsigned int pos, count;
- unsigned data_offset;
- efx_qword_t event;
-
- WARN_ON(!mutex_is_locked(&vf->status_lock));
- WARN_ON(!vf->status_addr);
-
- status->local = vf->addr;
- status->generation_end = ++status->generation_start;
-
- memset(copy, '\0', sizeof(copy));
- /* Write generation_start */
- copy[0].from_buf = &status->generation_start;
- copy[0].to_rid = vf->pci_rid;
- copy[0].to_addr = vf->status_addr + offsetof(struct vfdi_status,
- generation_start);
- copy[0].length = sizeof(status->generation_start);
- /* DMA the rest of the structure (excluding the generations). This
- * assumes that the non-generation portion of vfdi_status is in
- * one chunk starting at the version member.
- */
- data_offset = offsetof(struct vfdi_status, version);
- copy[1].from_rid = efx->pci_dev->devfn;
- copy[1].from_addr = nic_data->vfdi_status.dma_addr + data_offset;
- copy[1].to_rid = vf->pci_rid;
- copy[1].to_addr = vf->status_addr + data_offset;
- copy[1].length = status->length - data_offset;
-
- /* Copy the peer pages */
- pos = 2;
- count = 0;
- list_for_each_entry(epp, &nic_data->local_page_list, link) {
- if (count == vf->peer_page_count) {
- /* The VF driver will know they need to provide more
- * pages because peer_addr_count is too large.
- */
- break;
- }
- copy[pos].from_buf = NULL;
- copy[pos].from_rid = efx->pci_dev->devfn;
- copy[pos].from_addr = epp->addr;
- copy[pos].to_rid = vf->pci_rid;
- copy[pos].to_addr = vf->peer_page_addrs[count];
- copy[pos].length = EFX_PAGE_SIZE;
-
- if (++pos == ARRAY_SIZE(copy)) {
- efx_siena_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
- pos = 0;
- }
- ++count;
- }
-
- /* Write generation_end */
- copy[pos].from_buf = &status->generation_end;
- copy[pos].to_rid = vf->pci_rid;
- copy[pos].to_addr = vf->status_addr + offsetof(struct vfdi_status,
- generation_end);
- copy[pos].length = sizeof(status->generation_end);
- efx_siena_sriov_memcpy(efx, copy, pos + 1);
-
- /* Notify the guest */
- EFX_POPULATE_QWORD_3(event,
- FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
- VFDI_EV_SEQ, (vf->msg_seqno & 0xff),
- VFDI_EV_TYPE, VFDI_EV_TYPE_STATUS);
- ++vf->msg_seqno;
- efx_farch_generate_event(efx,
- EFX_VI_BASE + vf->index * efx_vf_size(efx),
- &event);
-}
-
-static void efx_siena_sriov_bufs(struct efx_nic *efx, unsigned offset,
- u64 *addr, unsigned count)
-{
- efx_qword_t buf;
- unsigned pos;
-
- for (pos = 0; pos < count; ++pos) {
- EFX_POPULATE_QWORD_3(buf,
- FRF_AZ_BUF_ADR_REGION, 0,
- FRF_AZ_BUF_ADR_FBUF,
- addr ? addr[pos] >> 12 : 0,
- FRF_AZ_BUF_OWNER_ID_FBUF, 0);
- efx_sram_writeq(efx, efx->membase + FR_BZ_BUF_FULL_TBL,
- &buf, offset + pos);
- }
-}
-
-static bool bad_vf_index(struct efx_nic *efx, unsigned index)
-{
- return index >= efx_vf_size(efx);
-}
-
-static bool bad_buf_count(unsigned buf_count, unsigned max_entry_count)
-{
- unsigned max_buf_count = max_entry_count *
- sizeof(efx_qword_t) / EFX_BUF_SIZE;
-
- return ((buf_count & (buf_count - 1)) || buf_count > max_buf_count);
-}
-
-/* Check that VI specified by per-port index belongs to a VF.
- * Optionally set VF index and VI index within the VF.
- */
-static bool map_vi_index(struct efx_nic *efx, unsigned abs_index,
- struct siena_vf **vf_out, unsigned *rel_index_out)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- unsigned vf_i;
-
- if (abs_index < EFX_VI_BASE)
- return true;
- vf_i = (abs_index - EFX_VI_BASE) / efx_vf_size(efx);
- if (vf_i >= efx->vf_init_count)
- return true;
-
- if (vf_out)
- *vf_out = nic_data->vf + vf_i;
- if (rel_index_out)
- *rel_index_out = abs_index % efx_vf_size(efx);
- return false;
-}
-
-static int efx_vfdi_init_evq(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct vfdi_req *req = vf->buf.addr;
- unsigned vf_evq = req->u.init_evq.index;
- unsigned buf_count = req->u.init_evq.buf_count;
- unsigned abs_evq = abs_index(vf, vf_evq);
- unsigned buftbl = EFX_BUFTBL_EVQ_BASE(vf, vf_evq);
- efx_oword_t reg;
-
- if (bad_vf_index(efx, vf_evq) ||
- bad_buf_count(buf_count, EFX_MAX_VF_EVQ_SIZE)) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Invalid INIT_EVQ from %s: evq %d bufs %d\n",
- vf->pci_name, vf_evq, buf_count);
- return VFDI_RC_EINVAL;
- }
-
- efx_siena_sriov_bufs(efx, buftbl, req->u.init_evq.addr, buf_count);
-
- EFX_POPULATE_OWORD_3(reg,
- FRF_CZ_TIMER_Q_EN, 1,
- FRF_CZ_HOST_NOTIFY_MODE, 0,
- FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
- efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq);
- EFX_POPULATE_OWORD_3(reg,
- FRF_AZ_EVQ_EN, 1,
- FRF_AZ_EVQ_SIZE, __ffs(buf_count),
- FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
- efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq);
-
- if (vf_evq == 0) {
- memcpy(vf->evq0_addrs, req->u.init_evq.addr,
- buf_count * sizeof(u64));
- vf->evq0_count = buf_count;
- }
-
- return VFDI_RC_SUCCESS;
-}
-
-static int efx_vfdi_init_rxq(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct vfdi_req *req = vf->buf.addr;
- unsigned vf_rxq = req->u.init_rxq.index;
- unsigned vf_evq = req->u.init_rxq.evq;
- unsigned buf_count = req->u.init_rxq.buf_count;
- unsigned buftbl = EFX_BUFTBL_RXQ_BASE(vf, vf_rxq);
- unsigned label;
- efx_oword_t reg;
-
- if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) ||
- vf_rxq >= VF_MAX_RX_QUEUES ||
- bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Invalid INIT_RXQ from %s: rxq %d evq %d "
- "buf_count %d\n", vf->pci_name, vf_rxq,
- vf_evq, buf_count);
- return VFDI_RC_EINVAL;
- }
- if (__test_and_set_bit(req->u.init_rxq.index, vf->rxq_mask))
- ++vf->rxq_count;
- efx_siena_sriov_bufs(efx, buftbl, req->u.init_rxq.addr, buf_count);
-
- label = req->u.init_rxq.label & EFX_FIELD_MASK(FRF_AZ_RX_DESCQ_LABEL);
- EFX_POPULATE_OWORD_6(reg,
- FRF_AZ_RX_DESCQ_BUF_BASE_ID, buftbl,
- FRF_AZ_RX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
- FRF_AZ_RX_DESCQ_LABEL, label,
- FRF_AZ_RX_DESCQ_SIZE, __ffs(buf_count),
- FRF_AZ_RX_DESCQ_JUMBO,
- !!(req->u.init_rxq.flags &
- VFDI_RXQ_FLAG_SCATTER_EN),
- FRF_AZ_RX_DESCQ_EN, 1);
- efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL,
- abs_index(vf, vf_rxq));
-
- return VFDI_RC_SUCCESS;
-}
-
-static int efx_vfdi_init_txq(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct vfdi_req *req = vf->buf.addr;
- unsigned vf_txq = req->u.init_txq.index;
- unsigned vf_evq = req->u.init_txq.evq;
- unsigned buf_count = req->u.init_txq.buf_count;
- unsigned buftbl = EFX_BUFTBL_TXQ_BASE(vf, vf_txq);
- unsigned label, eth_filt_en;
- efx_oword_t reg;
-
- if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_txq) ||
- vf_txq >= vf_max_tx_channels ||
- bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Invalid INIT_TXQ from %s: txq %d evq %d "
- "buf_count %d\n", vf->pci_name, vf_txq,
- vf_evq, buf_count);
- return VFDI_RC_EINVAL;
- }
-
- mutex_lock(&vf->txq_lock);
- if (__test_and_set_bit(req->u.init_txq.index, vf->txq_mask))
- ++vf->txq_count;
- mutex_unlock(&vf->txq_lock);
- efx_siena_sriov_bufs(efx, buftbl, req->u.init_txq.addr, buf_count);
-
- eth_filt_en = vf->tx_filter_mode == VF_TX_FILTER_ON;
-
- label = req->u.init_txq.label & EFX_FIELD_MASK(FRF_AZ_TX_DESCQ_LABEL);
- EFX_POPULATE_OWORD_8(reg,
- FRF_CZ_TX_DPT_Q_MASK_WIDTH, min(efx->vi_scale, 1U),
- FRF_CZ_TX_DPT_ETH_FILT_EN, eth_filt_en,
- FRF_AZ_TX_DESCQ_EN, 1,
- FRF_AZ_TX_DESCQ_BUF_BASE_ID, buftbl,
- FRF_AZ_TX_DESCQ_EVQ_ID, abs_index(vf, vf_evq),
- FRF_AZ_TX_DESCQ_LABEL, label,
- FRF_AZ_TX_DESCQ_SIZE, __ffs(buf_count),
- FRF_BZ_TX_NON_IP_DROP_DIS, 1);
- efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL,
- abs_index(vf, vf_txq));
-
- return VFDI_RC_SUCCESS;
-}
-
-/* Returns true when efx_vfdi_fini_all_queues should wake */
-static bool efx_vfdi_flush_wake(struct siena_vf *vf)
-{
- /* Ensure that all updates are visible to efx_vfdi_fini_all_queues() */
- smp_mb();
-
- return (!vf->txq_count && !vf->rxq_count) ||
- atomic_read(&vf->rxq_retry_count);
-}
-
-static void efx_vfdi_flush_clear(struct siena_vf *vf)
-{
- memset(vf->txq_mask, 0, sizeof(vf->txq_mask));
- vf->txq_count = 0;
- memset(vf->rxq_mask, 0, sizeof(vf->rxq_mask));
- vf->rxq_count = 0;
- memset(vf->rxq_retry_mask, 0, sizeof(vf->rxq_retry_mask));
- atomic_set(&vf->rxq_retry_count, 0);
-}
-
-static int efx_vfdi_fini_all_queues(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- efx_oword_t reg;
- unsigned count = efx_vf_size(efx);
- unsigned vf_offset = EFX_VI_BASE + vf->index * efx_vf_size(efx);
- unsigned timeout = HZ;
- unsigned index, rxqs_count;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FLUSH_RX_QUEUES_IN_LENMAX);
- int rc;
-
- BUILD_BUG_ON(VF_MAX_RX_QUEUES >
- MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM);
-
- rtnl_lock();
- siena_prepare_flush(efx);
- rtnl_unlock();
-
- /* Flush all the initialized queues */
- rxqs_count = 0;
- for (index = 0; index < count; ++index) {
- if (test_bit(index, vf->txq_mask)) {
- EFX_POPULATE_OWORD_2(reg,
- FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
- FRF_AZ_TX_FLUSH_DESCQ,
- vf_offset + index);
- efx_writeo(efx, ®, FR_AZ_TX_FLUSH_DESCQ);
- }
- if (test_bit(index, vf->rxq_mask)) {
- MCDI_SET_ARRAY_DWORD(
- inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
- rxqs_count, vf_offset + index);
- rxqs_count++;
- }
- }
-
- atomic_set(&vf->rxq_retry_count, 0);
- while (timeout && (vf->rxq_count || vf->txq_count)) {
- rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, inbuf,
- MC_CMD_FLUSH_RX_QUEUES_IN_LEN(rxqs_count),
- NULL, 0, NULL);
- WARN_ON(rc < 0);
-
- timeout = wait_event_timeout(vf->flush_waitq,
- efx_vfdi_flush_wake(vf),
- timeout);
- rxqs_count = 0;
- for (index = 0; index < count; ++index) {
- if (test_and_clear_bit(index, vf->rxq_retry_mask)) {
- atomic_dec(&vf->rxq_retry_count);
- MCDI_SET_ARRAY_DWORD(
- inbuf, FLUSH_RX_QUEUES_IN_QID_OFST,
- rxqs_count, vf_offset + index);
- rxqs_count++;
- }
- }
- }
-
- rtnl_lock();
- siena_finish_flush(efx);
- rtnl_unlock();
-
- /* Irrespective of success/failure, fini the queues */
- EFX_ZERO_OWORD(reg);
- for (index = 0; index < count; ++index) {
- efx_writeo_table(efx, ®, FR_BZ_RX_DESC_PTR_TBL,
- vf_offset + index);
- efx_writeo_table(efx, ®, FR_BZ_TX_DESC_PTR_TBL,
- vf_offset + index);
- efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL,
- vf_offset + index);
- efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL,
- vf_offset + index);
- }
- efx_siena_sriov_bufs(efx, vf->buftbl_base, NULL,
- EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx));
- efx_vfdi_flush_clear(vf);
-
- vf->evq0_count = 0;
-
- return timeout ? 0 : VFDI_RC_ETIMEDOUT;
-}
-
-static int efx_vfdi_insert_filter(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct siena_nic_data *nic_data = efx->nic_data;
- struct vfdi_req *req = vf->buf.addr;
- unsigned vf_rxq = req->u.mac_filter.rxq;
- unsigned flags;
-
- if (bad_vf_index(efx, vf_rxq) || vf->rx_filtering) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Invalid INSERT_FILTER from %s: rxq %d "
- "flags 0x%x\n", vf->pci_name, vf_rxq,
- req->u.mac_filter.flags);
- return VFDI_RC_EINVAL;
- }
-
- flags = 0;
- if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_RSS)
- flags |= EFX_FILTER_FLAG_RX_RSS;
- if (req->u.mac_filter.flags & VFDI_MAC_FILTER_FLAG_SCATTER)
- flags |= EFX_FILTER_FLAG_RX_SCATTER;
- vf->rx_filter_flags = flags;
- vf->rx_filter_qid = vf_rxq;
- vf->rx_filtering = true;
-
- efx_siena_sriov_reset_rx_filter(vf);
- queue_work(vfdi_workqueue, &nic_data->peer_work);
-
- return VFDI_RC_SUCCESS;
-}
-
-static int efx_vfdi_remove_all_filters(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct siena_nic_data *nic_data = efx->nic_data;
-
- vf->rx_filtering = false;
- efx_siena_sriov_reset_rx_filter(vf);
- queue_work(vfdi_workqueue, &nic_data->peer_work);
-
- return VFDI_RC_SUCCESS;
-}
-
-static int efx_vfdi_set_status_page(struct siena_vf *vf)
-{
- struct efx_nic *efx = vf->efx;
- struct siena_nic_data *nic_data = efx->nic_data;
- struct vfdi_req *req = vf->buf.addr;
- u64 page_count = req->u.set_status_page.peer_page_count;
- u64 max_page_count =
- (EFX_PAGE_SIZE -
- offsetof(struct vfdi_req, u.set_status_page.peer_page_addr[0]))
- / sizeof(req->u.set_status_page.peer_page_addr[0]);
-
- if (!req->u.set_status_page.dma_addr || page_count > max_page_count) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Invalid SET_STATUS_PAGE from %s\n",
- vf->pci_name);
- return VFDI_RC_EINVAL;
- }
-
- mutex_lock(&nic_data->local_lock);
- mutex_lock(&vf->status_lock);
- vf->status_addr = req->u.set_status_page.dma_addr;
-
- kfree(vf->peer_page_addrs);
- vf->peer_page_addrs = NULL;
- vf->peer_page_count = 0;
-
- if (page_count) {
- vf->peer_page_addrs = kcalloc(page_count, sizeof(u64),
- GFP_KERNEL);
- if (vf->peer_page_addrs) {
- memcpy(vf->peer_page_addrs,
- req->u.set_status_page.peer_page_addr,
- page_count * sizeof(u64));
- vf->peer_page_count = page_count;
- }
- }
-
- __efx_siena_sriov_push_vf_status(vf);
- mutex_unlock(&vf->status_lock);
- mutex_unlock(&nic_data->local_lock);
-
- return VFDI_RC_SUCCESS;
-}
-
-static int efx_vfdi_clear_status_page(struct siena_vf *vf)
-{
- mutex_lock(&vf->status_lock);
- vf->status_addr = 0;
- mutex_unlock(&vf->status_lock);
-
- return VFDI_RC_SUCCESS;
-}
-
-typedef int (*efx_vfdi_op_t)(struct siena_vf *vf);
-
-static const efx_vfdi_op_t vfdi_ops[VFDI_OP_LIMIT] = {
- [VFDI_OP_INIT_EVQ] = efx_vfdi_init_evq,
- [VFDI_OP_INIT_TXQ] = efx_vfdi_init_txq,
- [VFDI_OP_INIT_RXQ] = efx_vfdi_init_rxq,
- [VFDI_OP_FINI_ALL_QUEUES] = efx_vfdi_fini_all_queues,
- [VFDI_OP_INSERT_FILTER] = efx_vfdi_insert_filter,
- [VFDI_OP_REMOVE_ALL_FILTERS] = efx_vfdi_remove_all_filters,
- [VFDI_OP_SET_STATUS_PAGE] = efx_vfdi_set_status_page,
- [VFDI_OP_CLEAR_STATUS_PAGE] = efx_vfdi_clear_status_page,
-};
-
-static void efx_siena_sriov_vfdi(struct work_struct *work)
-{
- struct siena_vf *vf = container_of(work, struct siena_vf, req);
- struct efx_nic *efx = vf->efx;
- struct vfdi_req *req = vf->buf.addr;
- struct efx_memcpy_req copy[2];
- int rc;
-
- /* Copy this page into the local address space */
- memset(copy, '\0', sizeof(copy));
- copy[0].from_rid = vf->pci_rid;
- copy[0].from_addr = vf->req_addr;
- copy[0].to_rid = efx->pci_dev->devfn;
- copy[0].to_addr = vf->buf.dma_addr;
- copy[0].length = EFX_PAGE_SIZE;
- rc = efx_siena_sriov_memcpy(efx, copy, 1);
- if (rc) {
- /* If we can't get the request, we can't reply to the caller */
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Unable to fetch VFDI request from %s rc %d\n",
- vf->pci_name, -rc);
- vf->busy = false;
- return;
- }
-
- if (req->op < VFDI_OP_LIMIT && vfdi_ops[req->op] != NULL) {
- rc = vfdi_ops[req->op](vf);
- if (rc == 0) {
- netif_dbg(efx, hw, efx->net_dev,
- "vfdi request %d from %s ok\n",
- req->op, vf->pci_name);
- }
- } else {
- netif_dbg(efx, hw, efx->net_dev,
- "ERROR: Unrecognised request %d from VF %s addr "
- "%llx\n", req->op, vf->pci_name,
- (unsigned long long)vf->req_addr);
- rc = VFDI_RC_EOPNOTSUPP;
- }
-
- /* Allow subsequent VF requests */
- vf->busy = false;
- smp_wmb();
-
- /* Respond to the request */
- req->rc = rc;
- req->op = VFDI_OP_RESPONSE;
-
- memset(copy, '\0', sizeof(copy));
- copy[0].from_buf = &req->rc;
- copy[0].to_rid = vf->pci_rid;
- copy[0].to_addr = vf->req_addr + offsetof(struct vfdi_req, rc);
- copy[0].length = sizeof(req->rc);
- copy[1].from_buf = &req->op;
- copy[1].to_rid = vf->pci_rid;
- copy[1].to_addr = vf->req_addr + offsetof(struct vfdi_req, op);
- copy[1].length = sizeof(req->op);
-
- (void)efx_siena_sriov_memcpy(efx, copy, ARRAY_SIZE(copy));
-}
-
-
-
-/* After a reset the event queues inside the guests no longer exist. Fill the
- * event ring in guest memory with VFDI reset events, then (re-initialise) the
- * event queue to raise an interrupt. The guest driver will then recover.
- */
-
-static void efx_siena_sriov_reset_vf(struct siena_vf *vf,
- struct efx_buffer *buffer)
-{
- struct efx_nic *efx = vf->efx;
- struct efx_memcpy_req copy_req[4];
- efx_qword_t event;
- unsigned int pos, count, k, buftbl, abs_evq;
- efx_oword_t reg;
- efx_dword_t ptr;
- int rc;
-
- BUG_ON(buffer->len != EFX_PAGE_SIZE);
-
- if (!vf->evq0_count)
- return;
- BUG_ON(vf->evq0_count & (vf->evq0_count - 1));
-
- mutex_lock(&vf->status_lock);
- EFX_POPULATE_QWORD_3(event,
- FSF_AZ_EV_CODE, FSE_CZ_EV_CODE_USER_EV,
- VFDI_EV_SEQ, vf->msg_seqno,
- VFDI_EV_TYPE, VFDI_EV_TYPE_RESET);
- vf->msg_seqno++;
- for (pos = 0; pos < EFX_PAGE_SIZE; pos += sizeof(event))
- memcpy(buffer->addr + pos, &event, sizeof(event));
-
- for (pos = 0; pos < vf->evq0_count; pos += count) {
- count = min_t(unsigned, vf->evq0_count - pos,
- ARRAY_SIZE(copy_req));
- for (k = 0; k < count; k++) {
- copy_req[k].from_buf = NULL;
- copy_req[k].from_rid = efx->pci_dev->devfn;
- copy_req[k].from_addr = buffer->dma_addr;
- copy_req[k].to_rid = vf->pci_rid;
- copy_req[k].to_addr = vf->evq0_addrs[pos + k];
- copy_req[k].length = EFX_PAGE_SIZE;
- }
- rc = efx_siena_sriov_memcpy(efx, copy_req, count);
- if (rc) {
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Unable to notify %s of reset"
- ": %d\n", vf->pci_name, -rc);
- break;
- }
- }
-
- /* Reinitialise, arm and trigger evq0 */
- abs_evq = abs_index(vf, 0);
- buftbl = EFX_BUFTBL_EVQ_BASE(vf, 0);
- efx_siena_sriov_bufs(efx, buftbl, vf->evq0_addrs, vf->evq0_count);
-
- EFX_POPULATE_OWORD_3(reg,
- FRF_CZ_TIMER_Q_EN, 1,
- FRF_CZ_HOST_NOTIFY_MODE, 0,
- FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
- efx_writeo_table(efx, ®, FR_BZ_TIMER_TBL, abs_evq);
- EFX_POPULATE_OWORD_3(reg,
- FRF_AZ_EVQ_EN, 1,
- FRF_AZ_EVQ_SIZE, __ffs(vf->evq0_count),
- FRF_AZ_EVQ_BUF_BASE_ID, buftbl);
- efx_writeo_table(efx, ®, FR_BZ_EVQ_PTR_TBL, abs_evq);
- EFX_POPULATE_DWORD_1(ptr, FRF_AZ_EVQ_RPTR, 0);
- efx_writed(efx, &ptr, FR_BZ_EVQ_RPTR + FR_BZ_EVQ_RPTR_STEP * abs_evq);
-
- mutex_unlock(&vf->status_lock);
-}
-
-static void efx_siena_sriov_reset_vf_work(struct work_struct *work)
-{
- struct siena_vf *vf = container_of(work, struct siena_vf, req);
- struct efx_nic *efx = vf->efx;
- struct efx_buffer buf;
-
- if (!efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO)) {
- efx_siena_sriov_reset_vf(vf, &buf);
- efx_nic_free_buffer(efx, &buf);
- }
-}
-
-static void efx_siena_sriov_handle_no_channel(struct efx_nic *efx)
-{
- netif_err(efx, drv, efx->net_dev,
- "ERROR: IOV requires MSI-X and 1 additional interrupt"
- "vector. IOV disabled\n");
- efx->vf_count = 0;
-}
-
-static int efx_siena_sriov_probe_channel(struct efx_channel *channel)
-{
- struct siena_nic_data *nic_data = channel->efx->nic_data;
- nic_data->vfdi_channel = channel;
-
- return 0;
-}
-
-static void
-efx_siena_sriov_get_channel_name(struct efx_channel *channel,
- char *buf, size_t len)
-{
- snprintf(buf, len, "%s-iov", channel->efx->name);
-}
-
-static const struct efx_channel_type efx_siena_sriov_channel_type = {
- .handle_no_channel = efx_siena_sriov_handle_no_channel,
- .pre_probe = efx_siena_sriov_probe_channel,
- .post_remove = efx_channel_dummy_op_void,
- .get_name = efx_siena_sriov_get_channel_name,
- /* no copy operation; channel must not be reallocated */
- .keep_eventq = true,
-};
-
-void efx_siena_sriov_probe(struct efx_nic *efx)
-{
- unsigned count;
-
- if (!max_vfs)
- return;
-
- if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) {
- pci_info(efx->pci_dev, "no SR-IOV VFs probed\n");
- return;
- }
- if (count > 0 && count > max_vfs)
- count = max_vfs;
-
- /* efx_nic_dimension_resources() will reduce vf_count as appopriate */
- efx->vf_count = count;
-
- efx->extra_channel_type[EFX_EXTRA_CHANNEL_IOV] = &efx_siena_sriov_channel_type;
-}
-
-/* Copy the list of individual addresses into the vfdi_status.peers
- * array and auxiliary pages, protected by %local_lock. Drop that lock
- * and then broadcast the address list to every VF.
- */
-static void efx_siena_sriov_peer_work(struct work_struct *data)
-{
- struct siena_nic_data *nic_data = container_of(data,
- struct siena_nic_data,
- peer_work);
- struct efx_nic *efx = nic_data->efx;
- struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr;
- struct siena_vf *vf;
- struct efx_local_addr *local_addr;
- struct vfdi_endpoint *peer;
- struct efx_endpoint_page *epp;
- struct list_head pages;
- unsigned int peer_space;
- unsigned int peer_count;
- unsigned int pos;
-
- mutex_lock(&nic_data->local_lock);
-
- /* Move the existing peer pages off %local_page_list */
- INIT_LIST_HEAD(&pages);
- list_splice_tail_init(&nic_data->local_page_list, &pages);
-
- /* Populate the VF addresses starting from entry 1 (entry 0 is
- * the PF address)
- */
- peer = vfdi_status->peers + 1;
- peer_space = ARRAY_SIZE(vfdi_status->peers) - 1;
- peer_count = 1;
- for (pos = 0; pos < efx->vf_count; ++pos) {
- vf = nic_data->vf + pos;
-
- mutex_lock(&vf->status_lock);
- if (vf->rx_filtering && !is_zero_ether_addr(vf->addr.mac_addr)) {
- *peer++ = vf->addr;
- ++peer_count;
- --peer_space;
- BUG_ON(peer_space == 0);
- }
- mutex_unlock(&vf->status_lock);
- }
-
- /* Fill the remaining addresses */
- list_for_each_entry(local_addr, &nic_data->local_addr_list, link) {
- ether_addr_copy(peer->mac_addr, local_addr->addr);
- peer->tci = 0;
- ++peer;
- ++peer_count;
- if (--peer_space == 0) {
- if (list_empty(&pages)) {
- epp = kmalloc(sizeof(*epp), GFP_KERNEL);
- if (!epp)
- break;
- epp->ptr = dma_alloc_coherent(
- &efx->pci_dev->dev, EFX_PAGE_SIZE,
- &epp->addr, GFP_KERNEL);
- if (!epp->ptr) {
- kfree(epp);
- break;
- }
- } else {
- epp = list_first_entry(
- &pages, struct efx_endpoint_page, link);
- list_del(&epp->link);
- }
-
- list_add_tail(&epp->link, &nic_data->local_page_list);
- peer = (struct vfdi_endpoint *)epp->ptr;
- peer_space = EFX_PAGE_SIZE / sizeof(struct vfdi_endpoint);
- }
- }
- vfdi_status->peer_count = peer_count;
- mutex_unlock(&nic_data->local_lock);
-
- /* Free any now unused endpoint pages */
- while (!list_empty(&pages)) {
- epp = list_first_entry(
- &pages, struct efx_endpoint_page, link);
- list_del(&epp->link);
- dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
- epp->ptr, epp->addr);
- kfree(epp);
- }
-
- /* Finally, push the pages */
- for (pos = 0; pos < efx->vf_count; ++pos) {
- vf = nic_data->vf + pos;
-
- mutex_lock(&vf->status_lock);
- if (vf->status_addr)
- __efx_siena_sriov_push_vf_status(vf);
- mutex_unlock(&vf->status_lock);
- }
-}
-
-static void efx_siena_sriov_free_local(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct efx_local_addr *local_addr;
- struct efx_endpoint_page *epp;
-
- while (!list_empty(&nic_data->local_addr_list)) {
- local_addr = list_first_entry(&nic_data->local_addr_list,
- struct efx_local_addr, link);
- list_del(&local_addr->link);
- kfree(local_addr);
- }
-
- while (!list_empty(&nic_data->local_page_list)) {
- epp = list_first_entry(&nic_data->local_page_list,
- struct efx_endpoint_page, link);
- list_del(&epp->link);
- dma_free_coherent(&efx->pci_dev->dev, EFX_PAGE_SIZE,
- epp->ptr, epp->addr);
- kfree(epp);
- }
-}
-
-static int efx_siena_sriov_vf_alloc(struct efx_nic *efx)
-{
- unsigned index;
- struct siena_vf *vf;
- struct siena_nic_data *nic_data = efx->nic_data;
-
- nic_data->vf = kcalloc(efx->vf_count, sizeof(*nic_data->vf),
- GFP_KERNEL);
- if (!nic_data->vf)
- return -ENOMEM;
-
- for (index = 0; index < efx->vf_count; ++index) {
- vf = nic_data->vf + index;
-
- vf->efx = efx;
- vf->index = index;
- vf->rx_filter_id = -1;
- vf->tx_filter_mode = VF_TX_FILTER_AUTO;
- vf->tx_filter_id = -1;
- INIT_WORK(&vf->req, efx_siena_sriov_vfdi);
- INIT_WORK(&vf->reset_work, efx_siena_sriov_reset_vf_work);
- init_waitqueue_head(&vf->flush_waitq);
- mutex_init(&vf->status_lock);
- mutex_init(&vf->txq_lock);
- }
-
- return 0;
-}
-
-static void efx_siena_sriov_vfs_fini(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
- unsigned int pos;
-
- for (pos = 0; pos < efx->vf_count; ++pos) {
- vf = nic_data->vf + pos;
-
- efx_nic_free_buffer(efx, &vf->buf);
- kfree(vf->peer_page_addrs);
- vf->peer_page_addrs = NULL;
- vf->peer_page_count = 0;
-
- vf->evq0_count = 0;
- }
-}
-
-static int efx_siena_sriov_vfs_init(struct efx_nic *efx)
-{
- struct pci_dev *pci_dev = efx->pci_dev;
- struct siena_nic_data *nic_data = efx->nic_data;
- unsigned index, devfn, sriov, buftbl_base;
- u16 offset, stride;
- struct siena_vf *vf;
- int rc;
-
- sriov = pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV);
- if (!sriov)
- return -ENOENT;
-
- pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_OFFSET, &offset);
- pci_read_config_word(pci_dev, sriov + PCI_SRIOV_VF_STRIDE, &stride);
-
- buftbl_base = nic_data->vf_buftbl_base;
- devfn = pci_dev->devfn + offset;
- for (index = 0; index < efx->vf_count; ++index) {
- vf = nic_data->vf + index;
-
- /* Reserve buffer entries */
- vf->buftbl_base = buftbl_base;
- buftbl_base += EFX_VF_BUFTBL_PER_VI * efx_vf_size(efx);
-
- vf->pci_rid = devfn;
- snprintf(vf->pci_name, sizeof(vf->pci_name),
- "%04x:%02x:%02x.%d",
- pci_domain_nr(pci_dev->bus), pci_dev->bus->number,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
-
- rc = efx_nic_alloc_buffer(efx, &vf->buf, EFX_PAGE_SIZE,
- GFP_KERNEL);
- if (rc)
- goto fail;
-
- devfn += stride;
- }
-
- return 0;
-
-fail:
- efx_siena_sriov_vfs_fini(efx);
- return rc;
-}
-
-int efx_siena_sriov_init(struct efx_nic *efx)
-{
- struct net_device *net_dev = efx->net_dev;
- struct siena_nic_data *nic_data = efx->nic_data;
- struct vfdi_status *vfdi_status;
- int rc;
-
- /* Ensure there's room for vf_channel */
- BUILD_BUG_ON(EFX_MAX_CHANNELS + 1 >= EFX_VI_BASE);
- /* Ensure that VI_BASE is aligned on VI_SCALE */
- BUILD_BUG_ON(EFX_VI_BASE & ((1 << EFX_VI_SCALE_MAX) - 1));
-
- if (efx->vf_count == 0)
- return 0;
-
- rc = efx_siena_sriov_cmd(efx, true, NULL, NULL);
- if (rc)
- goto fail_cmd;
-
- rc = efx_nic_alloc_buffer(efx, &nic_data->vfdi_status,
- sizeof(*vfdi_status), GFP_KERNEL);
- if (rc)
- goto fail_status;
- vfdi_status = nic_data->vfdi_status.addr;
- memset(vfdi_status, 0, sizeof(*vfdi_status));
- vfdi_status->version = 1;
- vfdi_status->length = sizeof(*vfdi_status);
- vfdi_status->max_tx_channels = vf_max_tx_channels;
- vfdi_status->vi_scale = efx->vi_scale;
- vfdi_status->rss_rxq_count = efx->rss_spread;
- vfdi_status->peer_count = 1 + efx->vf_count;
- vfdi_status->timer_quantum_ns = efx->timer_quantum_ns;
-
- rc = efx_siena_sriov_vf_alloc(efx);
- if (rc)
- goto fail_alloc;
-
- mutex_init(&nic_data->local_lock);
- INIT_WORK(&nic_data->peer_work, efx_siena_sriov_peer_work);
- INIT_LIST_HEAD(&nic_data->local_addr_list);
- INIT_LIST_HEAD(&nic_data->local_page_list);
-
- rc = efx_siena_sriov_vfs_init(efx);
- if (rc)
- goto fail_vfs;
-
- rtnl_lock();
- ether_addr_copy(vfdi_status->peers[0].mac_addr, net_dev->dev_addr);
- efx->vf_init_count = efx->vf_count;
- rtnl_unlock();
-
- efx_siena_sriov_usrev(efx, true);
-
- /* At this point we must be ready to accept VFDI requests */
-
- rc = pci_enable_sriov(efx->pci_dev, efx->vf_count);
- if (rc)
- goto fail_pci;
-
- netif_info(efx, probe, net_dev,
- "enabled SR-IOV for %d VFs, %d VI per VF\n",
- efx->vf_count, efx_vf_size(efx));
- return 0;
-
-fail_pci:
- efx_siena_sriov_usrev(efx, false);
- rtnl_lock();
- efx->vf_init_count = 0;
- rtnl_unlock();
- efx_siena_sriov_vfs_fini(efx);
-fail_vfs:
- cancel_work_sync(&nic_data->peer_work);
- efx_siena_sriov_free_local(efx);
- kfree(nic_data->vf);
-fail_alloc:
- efx_nic_free_buffer(efx, &nic_data->vfdi_status);
-fail_status:
- efx_siena_sriov_cmd(efx, false, NULL, NULL);
-fail_cmd:
- return rc;
-}
-
-void efx_siena_sriov_fini(struct efx_nic *efx)
-{
- struct siena_vf *vf;
- unsigned int pos;
- struct siena_nic_data *nic_data = efx->nic_data;
-
- if (efx->vf_init_count == 0)
- return;
-
- /* Disable all interfaces to reconfiguration */
- BUG_ON(nic_data->vfdi_channel->enabled);
- efx_siena_sriov_usrev(efx, false);
- rtnl_lock();
- efx->vf_init_count = 0;
- rtnl_unlock();
-
- /* Flush all reconfiguration work */
- for (pos = 0; pos < efx->vf_count; ++pos) {
- vf = nic_data->vf + pos;
- cancel_work_sync(&vf->req);
- cancel_work_sync(&vf->reset_work);
- }
- cancel_work_sync(&nic_data->peer_work);
-
- pci_disable_sriov(efx->pci_dev);
-
- /* Tear down back-end state */
- efx_siena_sriov_vfs_fini(efx);
- efx_siena_sriov_free_local(efx);
- kfree(nic_data->vf);
- efx_nic_free_buffer(efx, &nic_data->vfdi_status);
- efx_siena_sriov_cmd(efx, false, NULL, NULL);
-}
-
-void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event)
-{
- struct efx_nic *efx = channel->efx;
- struct siena_vf *vf;
- unsigned qid, seq, type, data;
-
- qid = EFX_QWORD_FIELD(*event, FSF_CZ_USER_QID);
-
- /* USR_EV_REG_VALUE is dword0, so access the VFDI_EV fields directly */
- BUILD_BUG_ON(FSF_CZ_USER_EV_REG_VALUE_LBN != 0);
- seq = EFX_QWORD_FIELD(*event, VFDI_EV_SEQ);
- type = EFX_QWORD_FIELD(*event, VFDI_EV_TYPE);
- data = EFX_QWORD_FIELD(*event, VFDI_EV_DATA);
-
- netif_vdbg(efx, hw, efx->net_dev,
- "USR_EV event from qid %d seq 0x%x type %d data 0x%x\n",
- qid, seq, type, data);
-
- if (map_vi_index(efx, qid, &vf, NULL))
- return;
- if (vf->busy)
- goto error;
-
- if (type == VFDI_EV_TYPE_REQ_WORD0) {
- /* Resynchronise */
- vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
- vf->req_seqno = seq + 1;
- vf->req_addr = 0;
- } else if (seq != (vf->req_seqno++ & 0xff) || type != vf->req_type)
- goto error;
-
- switch (vf->req_type) {
- case VFDI_EV_TYPE_REQ_WORD0:
- case VFDI_EV_TYPE_REQ_WORD1:
- case VFDI_EV_TYPE_REQ_WORD2:
- vf->req_addr |= (u64)data << (vf->req_type << 4);
- ++vf->req_type;
- return;
-
- case VFDI_EV_TYPE_REQ_WORD3:
- vf->req_addr |= (u64)data << 48;
- vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
- vf->busy = true;
- queue_work(vfdi_workqueue, &vf->req);
- return;
- }
-
-error:
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "ERROR: Screaming VFDI request from %s\n",
- vf->pci_name);
- /* Reset the request and sequence number */
- vf->req_type = VFDI_EV_TYPE_REQ_WORD0;
- vf->req_seqno = seq + 1;
-}
-
-void efx_siena_sriov_flr(struct efx_nic *efx, unsigned vf_i)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
-
- if (vf_i > efx->vf_init_count)
- return;
- vf = nic_data->vf + vf_i;
- netif_info(efx, hw, efx->net_dev,
- "FLR on VF %s\n", vf->pci_name);
-
- vf->status_addr = 0;
- efx_vfdi_remove_all_filters(vf);
- efx_vfdi_flush_clear(vf);
-
- vf->evq0_count = 0;
-}
-
-int efx_siena_sriov_mac_address_changed(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct vfdi_status *vfdi_status = nic_data->vfdi_status.addr;
-
- if (!efx->vf_init_count)
- return 0;
- ether_addr_copy(vfdi_status->peers[0].mac_addr,
- efx->net_dev->dev_addr);
- queue_work(vfdi_workqueue, &nic_data->peer_work);
-
- return 0;
-}
-
-void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
-{
- struct siena_vf *vf;
- unsigned queue, qid;
-
- queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
- if (map_vi_index(efx, queue, &vf, &qid))
- return;
- /* Ignore flush completions triggered by an FLR */
- if (!test_bit(qid, vf->txq_mask))
- return;
-
- __clear_bit(qid, vf->txq_mask);
- --vf->txq_count;
-
- if (efx_vfdi_flush_wake(vf))
- wake_up(&vf->flush_waitq);
-}
-
-void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event)
-{
- struct siena_vf *vf;
- unsigned ev_failed, queue, qid;
-
- queue = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
- ev_failed = EFX_QWORD_FIELD(*event,
- FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
- if (map_vi_index(efx, queue, &vf, &qid))
- return;
- if (!test_bit(qid, vf->rxq_mask))
- return;
-
- if (ev_failed) {
- set_bit(qid, vf->rxq_retry_mask);
- atomic_inc(&vf->rxq_retry_count);
- } else {
- __clear_bit(qid, vf->rxq_mask);
- --vf->rxq_count;
- }
- if (efx_vfdi_flush_wake(vf))
- wake_up(&vf->flush_waitq);
-}
-
-/* Called from napi. Schedule the reset work item */
-void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq)
-{
- struct siena_vf *vf;
- unsigned int rel;
-
- if (map_vi_index(efx, dmaq, &vf, &rel))
- return;
-
- if (net_ratelimit())
- netif_err(efx, hw, efx->net_dev,
- "VF %d DMA Q %d reports descriptor fetch error.\n",
- vf->index, rel);
- queue_work(vfdi_workqueue, &vf->reset_work);
-}
-
-/* Reset all VFs */
-void efx_siena_sriov_reset(struct efx_nic *efx)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- unsigned int vf_i;
- struct efx_buffer buf;
- struct siena_vf *vf;
-
- ASSERT_RTNL();
-
- if (efx->vf_init_count == 0)
- return;
-
- efx_siena_sriov_usrev(efx, true);
- (void)efx_siena_sriov_cmd(efx, true, NULL, NULL);
-
- if (efx_nic_alloc_buffer(efx, &buf, EFX_PAGE_SIZE, GFP_NOIO))
- return;
-
- for (vf_i = 0; vf_i < efx->vf_init_count; ++vf_i) {
- vf = nic_data->vf + vf_i;
- efx_siena_sriov_reset_vf(vf, &buf);
- }
-
- efx_nic_free_buffer(efx, &buf);
-}
-
-int efx_init_sriov(void)
-{
- /* A single threaded workqueue is sufficient. efx_siena_sriov_vfdi() and
- * efx_siena_sriov_peer_work() spend almost all their time sleeping for
- * MCDI to complete anyway
- */
- vfdi_workqueue = create_singlethread_workqueue("sfc_vfdi");
- if (!vfdi_workqueue)
- return -ENOMEM;
- return 0;
-}
-
-void efx_fini_sriov(void)
-{
- destroy_workqueue(vfdi_workqueue);
-}
-
-int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
-
- if (vf_i >= efx->vf_init_count)
- return -EINVAL;
- vf = nic_data->vf + vf_i;
-
- mutex_lock(&vf->status_lock);
- ether_addr_copy(vf->addr.mac_addr, mac);
- __efx_siena_sriov_update_vf_addr(vf);
- mutex_unlock(&vf->status_lock);
-
- return 0;
-}
-
-int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i,
- u16 vlan, u8 qos)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
- u16 tci;
-
- if (vf_i >= efx->vf_init_count)
- return -EINVAL;
- vf = nic_data->vf + vf_i;
-
- mutex_lock(&vf->status_lock);
- tci = (vlan & VLAN_VID_MASK) | ((qos & 0x7) << VLAN_PRIO_SHIFT);
- vf->addr.tci = htons(tci);
- __efx_siena_sriov_update_vf_addr(vf);
- mutex_unlock(&vf->status_lock);
-
- return 0;
-}
-
-int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf_i,
- bool spoofchk)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
- int rc;
-
- if (vf_i >= efx->vf_init_count)
- return -EINVAL;
- vf = nic_data->vf + vf_i;
-
- mutex_lock(&vf->txq_lock);
- if (vf->txq_count == 0) {
- vf->tx_filter_mode =
- spoofchk ? VF_TX_FILTER_ON : VF_TX_FILTER_OFF;
- rc = 0;
- } else {
- /* This cannot be changed while TX queues are running */
- rc = -EBUSY;
- }
- mutex_unlock(&vf->txq_lock);
- return rc;
-}
-
-int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf_i,
- struct ifla_vf_info *ivi)
-{
- struct siena_nic_data *nic_data = efx->nic_data;
- struct siena_vf *vf;
- u16 tci;
-
- if (vf_i >= efx->vf_init_count)
- return -EINVAL;
- vf = nic_data->vf + vf_i;
-
- ivi->vf = vf_i;
- ether_addr_copy(ivi->mac, vf->addr.mac_addr);
- ivi->max_tx_rate = 0;
- ivi->min_tx_rate = 0;
- tci = ntohs(vf->addr.tci);
- ivi->vlan = tci & VLAN_VID_MASK;
- ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
- ivi->spoofchk = vf->tx_filter_mode == VF_TX_FILTER_ON;
-
- return 0;
-}
-
-bool efx_siena_sriov_wanted(struct efx_nic *efx)
-{
- return efx->vf_count != 0;
-}
-
-int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs)
-{
- return 0;
-}
diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h
deleted file mode 100644
index e548c4daf189..000000000000
--- a/drivers/net/ethernet/sfc/siena_sriov.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2015 Solarflare Communications Inc.
- */
-
-#ifndef SIENA_SRIOV_H
-#define SIENA_SRIOV_H
-
-#include "net_driver.h"
-
-/* On the SFC9000 family each port is associated with 1 PCI physical
- * function (PF) handled by sfc and a configurable number of virtual
- * functions (VFs) that may be handled by some other driver, often in
- * a VM guest. The queue pointer registers are mapped in both PF and
- * VF BARs such that an 8K region provides access to a single RX, TX
- * and event queue (collectively a Virtual Interface, VI or VNIC).
- *
- * The PF has access to all 1024 VIs while VFs are mapped to VIs
- * according to VI_BASE and VI_SCALE: VF i has access to VIs numbered
- * in range [VI_BASE + i << VI_SCALE, VI_BASE + i + 1 << VI_SCALE).
- * The number of VIs and the VI_SCALE value are configurable but must
- * be established at boot time by firmware.
- */
-
-/* Maximum VI_SCALE parameter supported by Siena */
-#define EFX_VI_SCALE_MAX 6
-/* Base VI to use for SR-IOV. Must be aligned to (1 << EFX_VI_SCALE_MAX),
- * so this is the smallest allowed value.
- */
-#define EFX_VI_BASE 128U
-/* Maximum number of VFs allowed */
-#define EFX_VF_COUNT_MAX 127
-/* Limit EVQs on VFs to be only 8k to reduce buffer table reservation */
-#define EFX_MAX_VF_EVQ_SIZE 8192UL
-/* The number of buffer table entries reserved for each VI on a VF */
-#define EFX_VF_BUFTBL_PER_VI \
- ((EFX_MAX_VF_EVQ_SIZE + 2 * EFX_MAX_DMAQ_SIZE) * \
- sizeof(efx_qword_t) / EFX_BUF_SIZE)
-
-int efx_siena_sriov_configure(struct efx_nic *efx, int num_vfs);
-int efx_siena_sriov_init(struct efx_nic *efx);
-void efx_siena_sriov_fini(struct efx_nic *efx);
-int efx_siena_sriov_mac_address_changed(struct efx_nic *efx);
-bool efx_siena_sriov_wanted(struct efx_nic *efx);
-void efx_siena_sriov_reset(struct efx_nic *efx);
-void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr);
-
-int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac);
-int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf,
- u16 vlan, u8 qos);
-int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf,
- bool spoofchk);
-int efx_siena_sriov_get_vf_config(struct efx_nic *efx, int vf,
- struct ifla_vf_info *ivf);
-
-#ifdef CONFIG_SFC_SRIOV
-
-static inline bool efx_siena_sriov_enabled(struct efx_nic *efx)
-{
- return efx->vf_init_count != 0;
-}
-#else /* !CONFIG_SFC_SRIOV */
-static inline bool efx_siena_sriov_enabled(struct efx_nic *efx)
-{
- return false;
-}
-#endif /* CONFIG_SFC_SRIOV */
-
-void efx_siena_sriov_probe(struct efx_nic *efx);
-void efx_siena_sriov_tx_flush_done(struct efx_nic *efx, efx_qword_t *event);
-void efx_siena_sriov_rx_flush_done(struct efx_nic *efx, efx_qword_t *event);
-void efx_siena_sriov_event(struct efx_channel *channel, efx_qword_t *event);
-void efx_siena_sriov_desc_fetch_err(struct efx_nic *efx, unsigned dmaq);
-
-#endif /* SIENA_SRIOV_H */
^ permalink raw reply related
* [PATCH net-next 04/28] sfc: Remove build references to missing functionality
From: Martin Habets @ 2022-04-22 14:57 UTC (permalink / raw)
To: kuba, pabeni, davem; +Cc: netdev, ecree.xilinx
In-Reply-To: <165063937837.27138.6911229584057659609.stgit@palantir17.mph.net>
From: Martin Habets <martinh@xilinx.com>
Functionality not supported or needed on Siena includes:
- Anything for EF100
- EF10 specifics such as register access, PIO and TSO offload.
Also only bind to Siena NICs.
Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com>
---
drivers/net/ethernet/sfc/siena/efx.c | 28 +---
drivers/net/ethernet/sfc/siena/efx.h | 15 --
drivers/net/ethernet/sfc/siena/nic.c | 7 -
drivers/net/ethernet/sfc/siena/nic.h | 3
drivers/net/ethernet/sfc/siena/nic_common.h | 3
drivers/net/ethernet/sfc/siena/tx.c | 209 +--------------------------
6 files changed, 13 insertions(+), 252 deletions(-)
diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c
index 5e7fe75cb1d4..f11e870b2eef 100644
--- a/drivers/net/ethernet/sfc/siena/efx.c
+++ b/drivers/net/ethernet/sfc/siena/efx.c
@@ -26,7 +26,6 @@
#include "efx.h"
#include "efx_common.h"
#include "efx_channels.h"
-#include "ef100.h"
#include "rx_common.h"
#include "tx_common.h"
#include "nic.h"
@@ -795,22 +794,10 @@ static void efx_unregister_netdev(struct efx_nic *efx)
/* PCI device ID table */
static const struct pci_device_id efx_pci_table[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */
- .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */
- .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */
- .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */
- .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */
- .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */
- .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03), /* SFC9250 PF */
- .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
- {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03), /* SFC9250 VF */
- .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
+ {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */
+ .driver_data = (unsigned long)&siena_a0_nic_type},
+ {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */
+ .driver_data = (unsigned long)&siena_a0_nic_type},
{0} /* end of list */
};
@@ -1298,14 +1285,8 @@ static int __init efx_init_module(void)
if (rc < 0)
goto err_pci;
- rc = pci_register_driver(&ef100_pci_driver);
- if (rc < 0)
- goto err_pci_ef100;
-
return 0;
- err_pci_ef100:
- pci_unregister_driver(&efx_pci_driver);
err_pci:
efx_destroy_reset_workqueue();
err_reset:
@@ -1318,7 +1299,6 @@ static void __exit efx_exit_module(void)
{
printk(KERN_INFO "Solarflare NET driver unloading\n");
- pci_unregister_driver(&ef100_pci_driver);
pci_unregister_driver(&efx_pci_driver);
efx_destroy_reset_workqueue();
unregister_netdevice_notifier(&efx_netdev_notifier);
diff --git a/drivers/net/ethernet/sfc/siena/efx.h b/drivers/net/ethernet/sfc/siena/efx.h
index c05a83da9e44..962c6b66eea7 100644
--- a/drivers/net/ethernet/sfc/siena/efx.h
+++ b/drivers/net/ethernet/sfc/siena/efx.h
@@ -10,8 +10,6 @@
#include <linux/indirect_call_wrapper.h>
#include "net_driver.h"
-#include "ef100_rx.h"
-#include "ef100_tx.h"
#include "filter.h"
int efx_net_open(struct net_device *net_dev);
@@ -24,9 +22,8 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
static inline netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
{
- return INDIRECT_CALL_2(tx_queue->efx->type->tx_enqueue,
- ef100_enqueue_skb, __efx_enqueue_skb,
- tx_queue, skb);
+ return INDIRECT_CALL_1(tx_queue->efx->type->tx_enqueue,
+ __efx_enqueue_skb, tx_queue, skb);
}
void efx_xmit_done_single(struct efx_tx_queue *tx_queue);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
@@ -40,16 +37,10 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
static inline void efx_rx_flush_packet(struct efx_channel *channel)
{
if (channel->rx_pkt_n_frags)
- INDIRECT_CALL_2(channel->efx->type->rx_packet,
- __ef100_rx_packet, __efx_rx_packet,
- channel);
+ __efx_rx_packet(channel);
}
static inline bool efx_rx_buf_hash_valid(struct efx_nic *efx, const u8 *prefix)
{
- if (efx->type->rx_buf_hash_valid)
- return INDIRECT_CALL_1(efx->type->rx_buf_hash_valid,
- ef100_rx_buf_hash_valid,
- prefix);
return true;
}
diff --git a/drivers/net/ethernet/sfc/siena/nic.c b/drivers/net/ethernet/sfc/siena/nic.c
index 22fbb0ae77fb..c59357178657 100644
--- a/drivers/net/ethernet/sfc/siena/nic.c
+++ b/drivers/net/ethernet/sfc/siena/nic.c
@@ -16,7 +16,6 @@
#include "bitfield.h"
#include "efx.h"
#include "nic.h"
-#include "ef10_regs.h"
#include "farch_regs.h"
#include "io.h"
#include "workarounds.h"
@@ -195,7 +194,6 @@ struct efx_nic_reg {
#define REGISTER_BB(name) REGISTER(name, F, B, B)
#define REGISTER_BZ(name) REGISTER(name, F, B, Z)
#define REGISTER_CZ(name) REGISTER(name, F, C, Z)
-#define REGISTER_DZ(name) REGISTER(name, E, D, Z)
static const struct efx_nic_reg efx_nic_regs[] = {
REGISTER_AZ(ADR_REGION),
@@ -302,9 +300,6 @@ static const struct efx_nic_reg efx_nic_regs[] = {
REGISTER_AB(XX_TXDRV_CTL),
/* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */
/* XX_CORE_STAT is partly RC */
- REGISTER_DZ(BIU_HW_REV_ID),
- REGISTER_DZ(MC_DB_LWRD),
- REGISTER_DZ(MC_DB_HWRD),
};
struct efx_nic_reg_table {
@@ -337,7 +332,6 @@ struct efx_nic_reg_table {
FR_BZ_ ## name ## _STEP, \
FR_CZ_ ## name ## _ROWS)
#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z)
-#define REGISTER_TABLE_DZ(name) REGISTER_TABLE(name, E, D, Z)
static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
/* DRIVER is not used */
@@ -368,7 +362,6 @@ static const struct efx_nic_reg_table efx_nic_reg_tables[] = {
/* MSIX_PBA_TABLE is not mapped */
/* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */
REGISTER_TABLE_BZ(RX_FILTER_TBL0),
- REGISTER_TABLE_DZ(BIU_MC_SFT_STATUS),
};
size_t efx_nic_get_regs_len(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/siena/nic.h b/drivers/net/ethernet/sfc/siena/nic.h
index 251868235ae4..e87e4319748e 100644
--- a/drivers/net/ethernet/sfc/siena/nic.h
+++ b/drivers/net/ethernet/sfc/siena/nic.h
@@ -301,8 +301,7 @@ struct efx_ef10_nic_data {
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
bool *data_mapped);
-extern const struct efx_nic_type efx_hunt_a0_nic_type;
-extern const struct efx_nic_type efx_hunt_a0_vf_nic_type;
+extern const struct efx_nic_type siena_a0_nic_type;
int falcon_probe_board(struct efx_nic *efx, u16 revision_info);
diff --git a/drivers/net/ethernet/sfc/siena/nic_common.h b/drivers/net/ethernet/sfc/siena/nic_common.h
index 0cef35c0c559..47deeae0a034 100644
--- a/drivers/net/ethernet/sfc/siena/nic_common.h
+++ b/drivers/net/ethernet/sfc/siena/nic_common.h
@@ -75,9 +75,6 @@ static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue, unsigned i
return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
}
-int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
- bool *data_mapped);
-
/* Decide whether to push a TX descriptor to the NIC vs merely writing
* the doorbell. This can reduce latency when we are adding a single
* descriptor to an empty queue, but is otherwise pointless. Further,
diff --git a/drivers/net/ethernet/sfc/siena/tx.c b/drivers/net/ethernet/sfc/siena/tx.c
index d16e031e95f4..81ef6dc353f7 100644
--- a/drivers/net/ethernet/sfc/siena/tx.c
+++ b/drivers/net/ethernet/sfc/siena/tx.c
@@ -22,14 +22,6 @@
#include "tx.h"
#include "tx_common.h"
#include "workarounds.h"
-#include "ef10_regs.h"
-
-#ifdef EFX_USE_PIO
-
-#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES)
-unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
-
-#endif /* EFX_USE_PIO */
static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer)
@@ -123,173 +115,6 @@ static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue,
return rc;
}
-#ifdef EFX_USE_PIO
-
-struct efx_short_copy_buffer {
- int used;
- u8 buf[L1_CACHE_BYTES];
-};
-
-/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
- * Advances piobuf pointer. Leaves additional data in the copy buffer.
- */
-static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
- u8 *data, int len,
- struct efx_short_copy_buffer *copy_buf)
-{
- int block_len = len & ~(sizeof(copy_buf->buf) - 1);
-
- __iowrite64_copy(*piobuf, data, block_len >> 3);
- *piobuf += block_len;
- len -= block_len;
-
- if (len) {
- data += block_len;
- BUG_ON(copy_buf->used);
- BUG_ON(len > sizeof(copy_buf->buf));
- memcpy(copy_buf->buf, data, len);
- copy_buf->used = len;
- }
-}
-
-/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
- * Advances piobuf pointer. Leaves additional data in the copy buffer.
- */
-static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
- u8 *data, int len,
- struct efx_short_copy_buffer *copy_buf)
-{
- if (copy_buf->used) {
- /* if the copy buffer is partially full, fill it up and write */
- int copy_to_buf =
- min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
-
- memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
- copy_buf->used += copy_to_buf;
-
- /* if we didn't fill it up then we're done for now */
- if (copy_buf->used < sizeof(copy_buf->buf))
- return;
-
- __iowrite64_copy(*piobuf, copy_buf->buf,
- sizeof(copy_buf->buf) >> 3);
- *piobuf += sizeof(copy_buf->buf);
- data += copy_to_buf;
- len -= copy_to_buf;
- copy_buf->used = 0;
- }
-
- efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
-}
-
-static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
- struct efx_short_copy_buffer *copy_buf)
-{
- /* if there's anything in it, write the whole buffer, including junk */
- if (copy_buf->used)
- __iowrite64_copy(piobuf, copy_buf->buf,
- sizeof(copy_buf->buf) >> 3);
-}
-
-/* Traverse skb structure and copy fragments in to PIO buffer.
- * Advances piobuf pointer.
- */
-static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
- u8 __iomem **piobuf,
- struct efx_short_copy_buffer *copy_buf)
-{
- int i;
-
- efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
- copy_buf);
-
- for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
- skb_frag_t *f = &skb_shinfo(skb)->frags[i];
- u8 *vaddr;
-
- vaddr = kmap_atomic(skb_frag_page(f));
-
- efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + skb_frag_off(f),
- skb_frag_size(f), copy_buf);
- kunmap_atomic(vaddr);
- }
-
- EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list);
-}
-
-static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
- struct sk_buff *skb)
-{
- struct efx_tx_buffer *buffer =
- efx_tx_queue_get_insert_buffer(tx_queue);
- u8 __iomem *piobuf = tx_queue->piobuf;
-
- /* Copy to PIO buffer. Ensure the writes are padded to the end
- * of a cache line, as this is required for write-combining to be
- * effective on at least x86.
- */
-
- if (skb_shinfo(skb)->nr_frags) {
- /* The size of the copy buffer will ensure all writes
- * are the size of a cache line.
- */
- struct efx_short_copy_buffer copy_buf;
-
- copy_buf.used = 0;
-
- efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
- &piobuf, ©_buf);
- efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf);
- } else {
- /* Pad the write to the size of a cache line.
- * We can do this because we know the skb_shared_info struct is
- * after the source, and the destination buffer is big enough.
- */
- BUILD_BUG_ON(L1_CACHE_BYTES >
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
- __iowrite64_copy(tx_queue->piobuf, skb->data,
- ALIGN(skb->len, L1_CACHE_BYTES) >> 3);
- }
-
- buffer->skb = skb;
- buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION;
-
- EFX_POPULATE_QWORD_5(buffer->option,
- ESF_DZ_TX_DESC_IS_OPT, 1,
- ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
- ESF_DZ_TX_PIO_CONT, 0,
- ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
- ESF_DZ_TX_PIO_BUF_ADDR,
- tx_queue->piobuf_offset);
- ++tx_queue->insert_count;
- return 0;
-}
-
-/* Decide whether we can use TX PIO, ie. write packet data directly into
- * a buffer on the device. This can reduce latency at the expense of
- * throughput, so we only do this if both hardware and software TX rings
- * are empty, including all queues for the channel. This also ensures that
- * only one packet at a time can be using the PIO buffer. If the xmit_more
- * flag is set then we don't use this - there'll be another packet along
- * shortly and we want to hold off the doorbell.
- */
-static bool efx_tx_may_pio(struct efx_tx_queue *tx_queue)
-{
- struct efx_channel *channel = tx_queue->channel;
-
- if (!tx_queue->piobuf)
- return false;
-
- EFX_WARN_ON_ONCE_PARANOID(!channel->efx->type->option_descriptors);
-
- efx_for_each_channel_tx_queue(tx_queue, channel)
- if (!efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count))
- return false;
-
- return true;
-}
-#endif /* EFX_USE_PIO */
-
/* Send any pending traffic for a channel. xmit_more is shared across all
* queues for a channel, so we must check all of them.
*/
@@ -338,35 +163,11 @@ netdev_tx_t __efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb
* size limit.
*/
if (segments) {
- switch (tx_queue->tso_version) {
- case 1:
- rc = efx_enqueue_skb_tso(tx_queue, skb, &data_mapped);
- break;
- case 2:
- rc = efx_ef10_tx_tso_desc(tx_queue, skb, &data_mapped);
- break;
- case 0: /* No TSO on this queue, SW fallback needed */
- default:
- rc = -EINVAL;
- break;
- }
- if (rc == -EINVAL) {
- rc = efx_tx_tso_fallback(tx_queue, skb);
- tx_queue->tso_fallbacks++;
- if (rc == 0)
- return 0;
- }
- if (rc)
- goto err;
-#ifdef EFX_USE_PIO
- } else if (skb_len <= efx_piobuf_size && !xmit_more &&
- efx_tx_may_pio(tx_queue)) {
- /* Use PIO for short packets with an empty queue. */
- if (efx_enqueue_skb_pio(tx_queue, skb))
- goto err;
- tx_queue->pio_packets++;
- data_mapped = true;
-#endif
+ rc = efx_tx_tso_fallback(tx_queue, skb);
+ tx_queue->tso_fallbacks++;
+ if (rc == 0)
+ return 0;
+ goto err;
} else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) {
/* Pad short packets or coalesce short fragmented packets. */
if (efx_enqueue_skb_copy(tx_queue, skb))
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox