From: Simon Horman <horms@verge.net.au>
To: Patrick McHardy <kaber@trash.net>
Cc: lvs-devel@vger.kernel.org, netdev@vger.kernel.org,
netfilter-devel@vger.kernel.org,
"홍신 shin hong" <hongshin@gmail.com>,
"David Miller" <davem@davemloft.net>
Subject: Re: [patch] ipvs: Use atomic operations atomicly
Date: Tue, 1 Sep 2009 11:59:12 +1000 [thread overview]
Message-ID: <20090901015909.GA12252@verge.net.au> (raw)
In-Reply-To: <4A9BC082.3090804@trash.net>
On Mon, Aug 31, 2009 at 02:22:26PM +0200, Patrick McHardy wrote:
> Simon Horman wrote:
> > A pointed out by Shin Hong, IPVS doesn't always use atomic operations
> > in an atomic manner. While this seems unlikely to be manifest in
> > strange behaviour, it seems appropriate to clean this up.
> >
> > Cc: 홍신 shin hong <hongshin@gmail.com>
> > Signed-off-by: Simon Horman <horms@verge.net.au>
>
> Applied, thanks.
>
> > if (af == AF_INET &&
> > (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
> > (((cp->protocol != IPPROTO_TCP ||
> > cp->state == IP_VS_TCP_S_ESTABLISHED) &&
> > - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
> > + (pkts % sysctl_ip_vs_sync_threshold[1]
>
> It seems that proc_do_sync_threshold() should check whether this value
> is zero. The current checks also look racy since incorrect values are
> first updated, then overwritten again.
Hi,
I'm wondering if an approach along the lines of the following is valid.
The idea is that the value in the ctl_table is essentially a scratch
value that is used by the parser and then copied into ip_vs_sync_threshold
if it is valid. I'm concerned that there are atomicity issues
surrounding writing ip_vs_sync_threshold while there might be readers.
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 98978e7..28d0c4f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -774,8 +774,8 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
extern int sysctl_ip_vs_cache_bypass;
extern int sysctl_ip_vs_expire_nodest_conn;
extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
extern int sysctl_ip_vs_nat_icmp_send;
+extern int ip_vs_sync_threshold[2];
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b95699f..f3572b6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1362,8 +1362,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ip_vs_sync_threshold[1] == ip_vs_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892..8a9ff21 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -76,6 +76,11 @@ static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
/* number of virtual services */
static int ip_vs_num_services = 0;
+/* threshold handling */
+static int ip_vs_sync_threshold_min = 0;
+static int ip_vs_sync_threshold_max = INT_MAX;
+int ip_vs_sync_threshold[2] = { 3, 50 };
+
/* sysctl variables */
static int sysctl_ip_vs_drop_entry = 0;
static int sysctl_ip_vs_drop_packet = 0;
@@ -85,7 +90,7 @@ static int sysctl_ip_vs_am_droprate = 10;
int sysctl_ip_vs_cache_bypass = 0;
int sysctl_ip_vs_expire_nodest_conn = 0;
int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
+static int sysctl_ip_vs_sync_threshold[2];
int sysctl_ip_vs_nat_icmp_send = 0;
@@ -1521,17 +1526,12 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = table->data;
- int val[2];
int rc;
- /* backup the value first */
- memcpy(val, valp, sizeof(val));
-
- rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
- if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
- /* Restore the correct value */
- memcpy(valp, val, sizeof(val));
- }
+ rc = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+ if (write && (valp[0] < valp[1]))
+ memcpy(ip_vs_sync_threshold, valp,
+ sizeof(ip_vs_sync_threshold));
return rc;
}
@@ -1698,6 +1698,8 @@ static struct ctl_table vs_vars[] = {
.maxlen = sizeof(sysctl_ip_vs_sync_threshold),
.mode = 0644,
.proc_handler = proc_do_sync_threshold,
+ .extra1 = &ip_vs_sync_threshold_min,
+ .extra2 = &ip_vs_sync_threshold_max,
},
{
.procname = "nat_icmp_send",
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e177f0d..d3322fb 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -438,7 +438,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, ip_vs_sync_threshold[0]);
cp->state = state;
cp->old_state = cp->state;
/*
next prev parent reply other threads:[~2009-09-01 1:59 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-28 2:37 [patch] ipvs: Use atomic operations atomicly Simon Horman
2009-08-31 12:22 ` Patrick McHardy
2009-08-31 14:49 ` Simon Horman
2009-09-01 1:59 ` Simon Horman [this message]
2009-09-02 12:56 ` Patrick McHardy
2009-09-19 9:51 ` Simon Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090901015909.GA12252@verge.net.au \
--to=horms@verge.net.au \
--cc=davem@davemloft.net \
--cc=hongshin@gmail.com \
--cc=kaber@trash.net \
--cc=lvs-devel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.