From: Hideo AOKI <haoki@redhat.com>
To: David Miller <davem@davemloft.net>,
Herbert Xu <herbert@gondor.apana.org.au>,
netdev <netdev@vger.kernel.org>
Cc: Takahiro Yasui <tyasui@redhat.com>,
Masami Hiramatsu <mhiramat@redhat.com>,
Satoshi Oshima <satoshi.oshima.fk@hitachi.com>,
billfink@mindspring.com, Andi Kleen <andi@firstfloor.org>,
Evgeniy Polyakov <johnpol@2ka.mipt.ru>,
Stephen Hemminger <shemminger@linux-foundation.org>,
yoshfuji@linux-ipv6.org,
Yumiko Sugita <yumiko.sugita.yf@hitachi.com>,
haoki@redhat.com
Subject: [PATCH 2/4] [CORE]: datagram: mem_scheudle functions
Date: Sat, 15 Dec 2007 00:15:04 -0500 [thread overview]
Message-ID: <476362D8.4070807@redhat.com> (raw)
In-Reply-To: <47636120.4050701@redhat.com>
This patch includes changes in network core sub system for memory
accounting.
Memory scheduling, charging, uncharging and reclaiming functions are
added. These functions use sk_forward_alloc to store socket local
accounting. They also need to use lock to keep consistency of
sk_forward_alloc and memory_allocated. They currently support only
datagram protocols.
sk_datagram_rfree() is a receive buffer detractor for datagram
protocols which are capable of protocol specific memory accounting.
To enable memory accounting in releasing receive buffer,
sock_queue_rcv_skb() is modified although the interface isn't changed.
The body of the function is implemented in
sock_queue_rcv_skb_with_owner(). Additionally, skb_set_owner_r() is
moved to sock.h to core/datagram.c because we want to use it as a
call back function.
Cc: Satoshi Oshima <satoshi.oshima.fk@hitachi.com>
signed-off-by: Takahiro Yasui <tyasui@redhat.com>
signed-off-by: Masami Hiramatsu <mhiramat@redhat.com>
signed-off-by: Hideo Aoki <haoki@redhat.com>
---
include/net/sock.h | 117 +++++++++++++++++++++++++++++++++++++++++++++++++---
net/core/datagram.c | 72 ++++++++++++++++++++++++++++++++
net/core/sock.c | 13 ++++-
3 files changed, 193 insertions(+), 9 deletions(-)
diff -pruN net-2.6-udp-take10a4-p1/include/net/sock.h net-2.6-udp-take10a4-p2/include/net/sock.h
--- net-2.6-udp-take10a4-p1/include/net/sock.h 2007-12-11 10:54:53.000000000 -0500
+++ net-2.6-udp-take10a4-p2/include/net/sock.h 2007-12-14 20:27:40.000000000 -0500
@@ -750,6 +750,9 @@ static inline struct inode *SOCK_INODE(s
return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
}
+/*
+ * Functions for memory accounting
+ */
extern void __sk_stream_mem_reclaim(struct sock *sk);
extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
@@ -778,6 +781,107 @@ static inline int sk_stream_wmem_schedul
sk_stream_mem_schedule(sk, size, 0);
}
+extern void __sk_datagram_mem_reclaim(struct sock *sk);
+extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
+
+#define SK_DATAGRAM_MEM_QUANTUM ((unsigned int)PAGE_SIZE)
+
+static inline int sk_datagram_pages(int amt)
+{
+ /* Cast to unsigned as an optimization, since amt is always positive. */
+ return DIV_ROUND_UP((unsigned int)amt, SK_DATAGRAM_MEM_QUANTUM);
+}
+
+extern void __sk_datagram_mem_reclaim(struct sock *sk);
+extern int sk_datagram_mem_schedule(struct sock *sk, int size, int kind);
+
+static inline void sk_datagram_mem_reclaim(struct sock *sk)
+{
+ unsigned long flags;
+
+ if (!sk->sk_prot->memory_allocated)
+ return;
+
+ spin_lock_irqsave(&sk->sk_lock.slock, flags);
+ __sk_datagram_mem_reclaim(sk);
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+}
+
+static inline int sk_datagram_rmem_schedule(struct sock *sk, int size)
+{
+ return size <= sk->sk_forward_alloc ||
+ sk_datagram_mem_schedule(sk, size, 1);
+}
+
+static inline int sk_datagram_wmem_schedule(struct sock *sk, int size)
+{
+ return size <= sk->sk_forward_alloc ||
+ sk_datagram_mem_schedule(sk, size, 0);
+}
+
+static inline void sk_mem_reclaim(struct sock *sk)
+{
+ if (sk->sk_type == SOCK_DGRAM)
+ sk_datagram_mem_reclaim(sk);
+}
+
+static inline int sk_wmem_schedule(struct sock *sk, int size)
+{
+ if (sk->sk_type == SOCK_DGRAM)
+ return sk_datagram_wmem_schedule(sk, size);
+ else
+ return 1;
+}
+
+static inline int sk_account_wmem_charge(struct sock *sk, int size)
+{
+ unsigned long flags;
+
+ /* account if protocol supports memory accounting. */
+ if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM)
+ return 1;
+
+ spin_lock_irqsave(&sk->sk_lock.slock, flags);
+ if (sk_datagram_wmem_schedule(sk, size)) {
+ sk->sk_forward_alloc -= size;
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+ return 1;
+ }
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+ return 0;
+}
+
+static inline int sk_account_rmem_charge(struct sock *sk, int size)
+{
+ unsigned long flags;
+
+ /* account if protocol supports memory accounting. */
+ if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM)
+ return 1;
+
+ spin_lock_irqsave(&sk->sk_lock.slock, flags);
+ if (sk_datagram_rmem_schedule(sk, size)) {
+ sk->sk_forward_alloc -= size;
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+ return 1;
+ }
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+ return 0;
+}
+
+static inline void sk_account_uncharge(struct sock *sk, int size)
+{
+ unsigned long flags;
+
+ /* account if protocol supports memory accounting. */
+ if (!sk->sk_prot->memory_allocated || sk->sk_type != SOCK_DGRAM)
+ return;
+
+ spin_lock_irqsave(&sk->sk_lock.slock, flags);
+ sk->sk_forward_alloc += size;
+ spin_unlock_irqrestore(&sk->sk_lock.slock, flags);
+}
+
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
@@ -1159,18 +1263,19 @@ static inline void skb_set_owner_w(struc
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
}
-static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
-{
- skb->sk = sk;
- skb->destructor = sock_rfree;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
-}
+extern void skb_set_owner_r(struct sk_buff *skb, struct sock *sk);
+
+void sk_datagram_rfree(struct sk_buff *skb);
extern void sk_reset_timer(struct sock *sk, struct timer_list* timer,
unsigned long expires);
extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
+extern int sock_queue_rcv_skb_with_owner(struct sock *sk, struct sk_buff *skb,
+ void set_owner_r(struct sk_buff *nskb,
+ struct sock* nsk));
+
extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
diff -pruN net-2.6-udp-take10a4-p1/net/core/datagram.c net-2.6-udp-take10a4-p2/net/core/datagram.c
--- net-2.6-udp-take10a4-p1/net/core/datagram.c 2007-12-11 10:54:55.000000000 -0500
+++ net-2.6-udp-take10a4-p2/net/core/datagram.c 2007-12-14 20:26:18.000000000 -0500
@@ -200,6 +200,14 @@ void skb_free_datagram(struct sock *sk,
kfree_skb(skb);
}
+void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb->sk = sk;
+ skb->destructor = sock_rfree;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+}
+EXPORT_SYMBOL(skb_set_owner_r);
+
/**
* skb_kill_datagram - Free a datagram skbuff forcibly
* @sk: socket
@@ -484,6 +492,70 @@ fault:
}
/**
+ * sk_datagram_rfree - receive buffer detractor for datagram protocls
+ * @skb: skbuff
+ */
+void sk_datagram_rfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+
+ skb_truesize_check(skb);
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ sk_account_uncharge(sk, skb->truesize);
+ sk_datagram_mem_reclaim(sk);
+}
+EXPORT_SYMBOL(sk_datagram_rfree);
+
+/**
+ * __sk_datagram_mem_reclaim - send buffer for datagram protocls
+ * @sk: socket
+ */
+void __sk_datagram_mem_reclaim(struct sock *sk)
+{
+ if (sk->sk_forward_alloc < SK_DATAGRAM_MEM_QUANTUM)
+ return;
+
+ atomic_sub(sk->sk_forward_alloc / SK_DATAGRAM_MEM_QUANTUM,
+ sk->sk_prot->memory_allocated);
+ sk->sk_forward_alloc &= SK_DATAGRAM_MEM_QUANTUM - 1;
+}
+EXPORT_SYMBOL(__sk_datagram_mem_reclaim);
+
+/**
+ * sk_datagram_mem_schedule - memory accounting for datagram protocls
+ * @sk: socket
+ * @size: memory size to allocate
+ * @kind: allocation type
+ *
+ * If kind is 0, it means wmem allocation. Otherwise it means rmem
+ * allocation.
+ */
+int sk_datagram_mem_schedule(struct sock *sk, int size, int kind)
+{
+ int amt;
+ struct proto *prot = sk->sk_prot;
+
+ /* Don't account and limit memory if protocol doesn't support. */
+ if (!prot->memory_allocated)
+ return 1;
+
+ amt = sk_datagram_pages(size);
+ if (atomic_add_return(amt, prot->memory_allocated) >
+ prot->sysctl_mem[0])
+ if ((kind && atomic_read(&sk->sk_rmem_alloc) + size >=
+ prot->sysctl_rmem[0]) ||
+ (!kind && atomic_read(&sk->sk_wmem_alloc) + size >=
+ prot->sysctl_wmem[0])) {
+ /* Undo changes. */
+ atomic_sub(amt, prot->memory_allocated);
+ return 0;
+ }
+ sk->sk_forward_alloc += amt * SK_DATAGRAM_MEM_QUANTUM;
+ return 1;
+}
+EXPORT_SYMBOL(sk_datagram_mem_schedule);
+
+/**
* datagram_poll - generic datagram poll
* @file: file struct
* @sock: socket
diff -pruN net-2.6-udp-take10a4-p1/net/core/sock.c net-2.6-udp-take10a4-p2/net/core/sock.c
--- net-2.6-udp-take10a4-p1/net/core/sock.c 2007-12-11 10:54:55.000000000 -0500
+++ net-2.6-udp-take10a4-p2/net/core/sock.c 2007-12-14 16:42:06.000000000 -0500
@@ -263,8 +263,9 @@ static void sock_disable_timestamp(struc
}
}
-
-int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int sock_queue_rcv_skb_with_owner(struct sock *sk, struct sk_buff *skb,
+ void set_owner_r(struct sk_buff *nskb,
+ struct sock* nsk))
{
int err = 0;
int skb_len;
@@ -283,7 +284,7 @@ int sock_queue_rcv_skb(struct sock *sk,
goto out;
skb->dev = NULL;
- skb_set_owner_r(skb, sk);
+ set_owner_r(skb, sk);
/* Cache the SKB length before we tack it onto the receive
* queue. Once it is added it no longer belongs to us and
@@ -299,6 +300,12 @@ int sock_queue_rcv_skb(struct sock *sk,
out:
return err;
}
+EXPORT_SYMBOL(sock_queue_rcv_skb_with_owner);
+
+int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return sock_queue_rcv_skb_with_owner(sk, skb, skb_set_owner_r);
+}
EXPORT_SYMBOL(sock_queue_rcv_skb);
int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
--
Hitachi Computer Products (America) Inc.
next prev parent reply other threads:[~2007-12-15 5:20 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-15 5:07 [PATCH 0/4] [UDP]: memory accounting and limitation (take 10) Hideo AOKI
2007-12-15 5:14 ` [PATCH 1/4] [UDP]: fix send buffer check Hideo AOKI
2007-12-15 5:15 ` Hideo AOKI [this message]
2007-12-15 15:32 ` [PATCH 2/4] [CORE]: datagram: mem_scheudle functions Herbert Xu
2007-12-16 21:20 ` Hideo AOKI
2007-12-15 5:15 ` [PATCH 3/4] [UDP]: add udp_mem, udp_rmem_min and udp_wmem_min Hideo AOKI
2007-12-15 5:15 ` [PATCH 4/4] [UDP]: memory accounting in IPv4 Hideo AOKI
2007-12-16 5:34 ` [PATCH 0/4] [UDP]: memory accounting and limitation (take 10) David Miller
2007-12-16 21:21 ` Hideo AOKI
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=476362D8.4070807@redhat.com \
--to=haoki@redhat.com \
--cc=andi@firstfloor.org \
--cc=billfink@mindspring.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=johnpol@2ka.mipt.ru \
--cc=mhiramat@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=satoshi.oshima.fk@hitachi.com \
--cc=shemminger@linux-foundation.org \
--cc=tyasui@redhat.com \
--cc=yoshfuji@linux-ipv6.org \
--cc=yumiko.sugita.yf@hitachi.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.