From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: stable@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
patches@lists.linux.dev, Kuniyuki Iwashima <kuniyu@amazon.co.jp>,
Daniel Borkmann <daniel@iogearbox.net>,
Eric Dumazet <edumazet@google.com>,
Martin KaFai Lau <kafai@fb.com>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH 5.10 35/79] tcp: Add num_closed_socks to struct sock_reuseport.
Date: Thu, 27 Oct 2022 18:55:45 +0200 [thread overview]
Message-ID: <20221027165055.564998662@linuxfoundation.org> (raw)
In-Reply-To: <20221027165054.270676357@linuxfoundation.org>
From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
[ Upstream commit 5c040eaf5d1753aafe12989ca712175df0b9c436 ]
As noted in the following commit, a closed listener has to hold the
reference to the reuseport group for socket migration. This patch adds a
field (num_closed_socks) to struct sock_reuseport to manage closed sockets
within the same reuseport group. Moreover, this and the following commits
introduce some helper functions to split socks[] into two sections and keep
TCP_LISTEN and TCP_CLOSE sockets in each section. Like a double-ended
queue, we will place TCP_LISTEN sockets from the front and TCP_CLOSE
sockets from the end.
TCP_LISTEN----------> <-------TCP_CLOSE
+---+---+ --- +---+ --- +---+ --- +---+
| 0 | 1 | ... | i | ... | j | ... | k |
+---+---+ --- +---+ --- +---+ --- +---+
i = num_socks - 1
j = max_socks - num_closed_socks
k = max_socks - 1
This patch also extends reuseport_add_sock() and reuseport_grow() to
support num_closed_socks.
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210612123224.12525-3-kuniyu@amazon.co.jp
Stable-dep-of: 69421bf98482 ("udp: Update reuse->has_conns under reuseport_lock.")
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
include/net/sock_reuseport.h | 5 ++-
net/core/sock_reuseport.c | 75 +++++++++++++++++++++++++++---------
2 files changed, 60 insertions(+), 20 deletions(-)
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 505f1e18e9bf..0e558ca7afbf 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
struct sock_reuseport {
struct rcu_head rcu;
- u16 max_socks; /* length of socks */
- u16 num_socks; /* elements in socks */
+ u16 max_socks; /* length of socks */
+ u16 num_socks; /* elements in socks */
+ u16 num_closed_socks; /* closed elements in socks */
/* The last synq overflow event timestamp of this
* reuse->socks[] group.
*/
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index b065f0a103ed..f478c65a281b 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -18,6 +18,49 @@ DEFINE_SPINLOCK(reuseport_lock);
static DEFINE_IDA(reuseport_ida);
+static int reuseport_sock_index(struct sock *sk,
+ const struct sock_reuseport *reuse,
+ bool closed)
+{
+ int left, right;
+
+ if (!closed) {
+ left = 0;
+ right = reuse->num_socks;
+ } else {
+ left = reuse->max_socks - reuse->num_closed_socks;
+ right = reuse->max_socks;
+ }
+
+ for (; left < right; left++)
+ if (reuse->socks[left] == sk)
+ return left;
+ return -1;
+}
+
+static void __reuseport_add_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ reuse->socks[reuse->num_socks] = sk;
+ /* paired with smp_rmb() in reuseport_select_sock() */
+ smp_wmb();
+ reuse->num_socks++;
+}
+
+static bool __reuseport_detach_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ int i = reuseport_sock_index(sk, reuse, false);
+
+ if (i == -1)
+ return false;
+
+ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+ reuse->num_socks--;
+
+ return true;
+}
+
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
unsigned int size = sizeof(struct sock_reuseport) +
@@ -72,9 +115,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
}
reuse->reuseport_id = id;
+ reuse->bind_inany = bind_inany;
reuse->socks[0] = sk;
reuse->num_socks = 1;
- reuse->bind_inany = bind_inany;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
out:
@@ -98,6 +141,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
return NULL;
more_reuse->num_socks = reuse->num_socks;
+ more_reuse->num_closed_socks = reuse->num_closed_socks;
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
more_reuse->bind_inany = reuse->bind_inany;
@@ -105,9 +149,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
memcpy(more_reuse->socks, reuse->socks,
reuse->num_socks * sizeof(struct sock *));
+ memcpy(more_reuse->socks +
+ (more_reuse->max_socks - more_reuse->num_closed_socks),
+ reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
+ reuse->num_closed_socks * sizeof(struct sock *));
more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
- for (i = 0; i < reuse->num_socks; ++i)
+ for (i = 0; i < reuse->max_socks; ++i)
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
more_reuse);
@@ -158,7 +206,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
return -EBUSY;
}
- if (reuse->num_socks == reuse->max_socks) {
+ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -166,10 +214,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
}
}
- reuse->socks[reuse->num_socks] = sk;
- /* paired with smp_rmb() in reuseport_select_sock() */
- smp_wmb();
- reuse->num_socks++;
+ __reuseport_add_sock(sk, reuse);
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
spin_unlock_bh(&reuseport_lock);
@@ -183,7 +228,6 @@ EXPORT_SYMBOL(reuseport_add_sock);
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
- int i;
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
@@ -200,16 +244,11 @@ void reuseport_detach_sock(struct sock *sk)
bpf_sk_reuseport_detach(sk);
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
+ __reuseport_detach_sock(sk, reuse);
+
+ if (reuse->num_socks + reuse->num_closed_socks == 0)
+ call_rcu(&reuse->rcu, reuseport_free_rcu);
- for (i = 0; i < reuse->num_socks; i++) {
- if (reuse->socks[i] == sk) {
- reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
- reuse->num_socks--;
- if (reuse->num_socks == 0)
- call_rcu(&reuse->rcu, reuseport_free_rcu);
- break;
- }
- }
spin_unlock_bh(&reuseport_lock);
}
EXPORT_SYMBOL(reuseport_detach_sock);
@@ -274,7 +313,7 @@ struct sock *reuseport_select_sock(struct sock *sk,
prog = rcu_dereference(reuse->prog);
socks = READ_ONCE(reuse->num_socks);
if (likely(socks)) {
- /* paired with smp_wmb() in reuseport_add_sock() */
+ /* paired with smp_wmb() in __reuseport_add_sock() */
smp_rmb();
if (!prog || !skb)
--
2.35.1
next prev parent reply other threads:[~2022-10-27 17:05 UTC|newest]
Thread overview: 97+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-27 16:55 [PATCH 5.10 00/79] 5.10.151-rc1 review Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 01/79] ocfs2: clear dinode links count in case of error Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 02/79] ocfs2: fix BUG when iput after ocfs2_mknod fails Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 03/79] selinux: enable use of both GFP_KERNEL and GFP_ATOMIC in convert_context() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 04/79] cpufreq: qcom: fix writes in read-only memory region Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 05/79] i2c: qcom-cci: Fix ordering of pm_runtime_xx and i2c_add_adapter Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 06/79] cpufreq: tegra194: Fix module loading Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 07/79] x86/microcode/AMD: Apply the patch early on every logical thread Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 08/79] hwmon/coretemp: Handle large core ID value Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 09/79] ata: ahci-imx: Fix MODULE_ALIAS Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 10/79] ata: ahci: Match EM_MAX_SLOTS with SATA_PMP_MAX_PORTS Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 11/79] cpufreq: qcom: fix memory leak in error path Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 12/79] kvm: Add support for arch compat vm ioctls Greg Kroah-Hartman
2022-10-30 9:54 ` Pavel Machek
2022-10-27 16:55 ` [PATCH 5.10 13/79] KVM: arm64: vgic: Fix exit condition in scan_its_table() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 14/79] media: mceusb: set timeout to at least timeout provided Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 15/79] media: venus: dec: Handle the case where find_format fails Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 16/79] bpf: Generate BTF_KIND_FLOAT when linking vmlinux Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 17/79] kbuild: Quote OBJCOPY var to avoid a pahole call break the build Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 18/79] kbuild: skip per-CPU BTF generation for pahole v1.18-v1.21 Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 19/79] kbuild: Unify options for BTF generation for vmlinux and modules Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 20/79] kbuild: Add skip_encoding_btf_enum64 option to pahole Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 21/79] block: wbt: Remove unnecessary invoking of wbt_update_limits in wbt_init Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 22/79] blk-wbt: call rq_qos_add() after wb_normal is initialized Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 23/79] arm64: errata: Remove AES hwcap for COMPAT tasks Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 24/79] r8152: add PID for the Lenovo OneLink+ Dock Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 25/79] btrfs: fix processing of delayed data refs during backref walking Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 26/79] btrfs: fix processing of delayed tree block " Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 27/79] ACPI: extlog: Handle multiple records Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 28/79] tipc: Fix recognition of trial period Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 29/79] tipc: fix an information leak in tipc_topsrv_kern_subscr Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 30/79] i40e: Fix DMA mappings leak Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 31/79] HID: magicmouse: Do not set BTN_MOUSE on double report Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 32/79] sfc: Change VF mac via PF as first preference if available Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 33/79] net/atm: fix proc_mpc_write incorrect return value Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 34/79] net: phy: dp83867: Extend RX strap quirk for SGMII mode Greg Kroah-Hartman
2022-10-27 16:55 ` Greg Kroah-Hartman [this message]
2022-10-27 19:53 ` [PATCH 5.10 35/79] tcp: Add num_closed_socks to struct sock_reuseport Kuniyuki Iwashima
2022-10-28 6:17 ` Greg KH
2022-10-28 17:05 ` Kuniyuki Iwashima
2022-10-29 6:27 ` Greg KH
2022-10-27 16:55 ` [PATCH 5.10 36/79] udp: Update reuse->has_conns under reuseport_lock Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 37/79] cifs: Fix xid leak in cifs_copy_file_range() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 38/79] cifs: Fix xid leak in cifs_flock() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 39/79] cifs: Fix xid leak in cifs_ses_add_channel() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 40/79] net: hsr: avoid possible NULL deref in skb_clone() Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 41/79] ionic: catch NULL pointer issue on reconfig Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 42/79] nvme-hwmon: rework to avoid devm allocation Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 43/79] nvme-hwmon: Return error code when registration fails Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 44/79] nvme-hwmon: consistently ignore errors from nvme_hwmon_init Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 45/79] nvme-hwmon: kmalloc the NVME SMART log buffer Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 46/79] net: sched: cake: fix null pointer access issue when cake_init() fails Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 47/79] net: sched: delete duplicate cleanup of backlog and qlen Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 48/79] net: sched: sfb: fix null pointer access issue when sfb_init() fails Greg Kroah-Hartman
2022-10-27 16:55 ` [PATCH 5.10 49/79] sfc: include vport_id in filter spec hash and equal() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 50/79] net: hns: fix possible memory leak in hnae_ae_register() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 51/79] net: sched: fix race condition in qdisc_graft() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 52/79] net: phy: dp83822: disable MDI crossover status change interrupt Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 53/79] iommu/vt-d: Allow NVS regions in arch_rmrr_sanity_check() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 54/79] iommu/vt-d: Clean up si_domain in the init_dmars() error path Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 55/79] drm/virtio: Use appropriate atomic state in virtio_gpu_plane_cleanup_fb() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 56/79] dmaengine: mxs-dma: Remove the unused .id_table Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 57/79] dmaengine: mxs: use platform_driver_register Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 58/79] tracing: Simplify conditional compilation code in tracing_set_tracer() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 59/79] tracing: Do not free snapshot if tracer is on cmdline Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 60/79] xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 61/79] xen/gntdev: Accommodate VMA splitting Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 62/79] mmc: sdhci-tegra: Use actual clock rate for SW tuning correction Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 63/79] riscv: Add machine name to kernel boot log and stack dump output Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 64/79] riscv: always honor the CONFIG_CMDLINE_FORCE when parsing dtb Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 65/79] perf pmu: Validate raw event with sysfs exported format bits Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 66/79] perf: Skip and warn on unknown format configN attrs Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 67/79] fcntl: make F_GETOWN(EX) return 0 on dead owner task Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 68/79] fcntl: fix potential deadlocks for &fown_struct.lock Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 69/79] arm64: dts: qcom: sc7180-trogdor: Fixup modem memory region Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 70/79] arm64: topology: move store_cpu_topology() to shared code Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 71/79] riscv: topology: fix default topology reporting Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 72/79] perf/x86/intel/pt: Relax address filter validation Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 73/79] hv_netvsc: Fix race between VF offering and VF association message from host Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 74/79] [PATCH v3] ACPI: video: Force backlight native for more TongFang devices Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 75/79] x86/Kconfig: Drop check for -mabi=ms for CONFIG_EFI_STUB Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 76/79] Makefile.debug: re-enable debug info for .S files Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 77/79] mmc: core: Add SD card quirk for broken discard Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 78/79] blk-wbt: fix that rwb->wc is always set to 1 in wbt_init() Greg Kroah-Hartman
2022-10-27 16:56 ` [PATCH 5.10 79/79] mm: /proc/pid/smaps_rollup: fix no vmas null-deref Greg Kroah-Hartman
2022-10-27 18:10 ` [PATCH 5.10 00/79] 5.10.151-rc1 review Guenter Roeck
2022-10-27 19:25 ` Greg Kroah-Hartman
2022-10-27 19:27 ` Pavel Machek
2022-10-27 19:39 ` Guenter Roeck
2022-10-27 19:54 ` Florian Fainelli
2022-10-27 19:49 ` Linus Torvalds
2022-10-28 11:01 ` Greg Kroah-Hartman
2022-10-28 10:47 ` Sudip Mukherjee (Codethink)
2022-10-28 10:58 ` Greg Kroah-Hartman
2022-10-28 11:58 ` Jon Hunter
2022-10-28 12:21 ` Pavel Machek
2022-10-28 13:59 ` Naresh Kamboju
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221027165055.564998662@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=daniel@iogearbox.net \
--cc=edumazet@google.com \
--cc=kafai@fb.com \
--cc=kuniyu@amazon.co.jp \
--cc=patches@lists.linux.dev \
--cc=sashal@kernel.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox