From: KaiGai Kohei <kaigai@ak.jp.nec.com>
To: Andrew Morton <akpm@osdl.org>
Cc: KaiGai Kohei <kaigai@ak.jp.nec.com>, linux-kernel@vger.kernel.org
Subject: Re: Add pacct_struct to fix some pacct bugs.
Date: Thu, 22 Jun 2006 12:08:16 +0900 [thread overview]
Message-ID: <449A09A0.70009@ak.jp.nec.com> (raw)
In-Reply-To: <449A0967.2020701@ak.jp.nec.com>
[PACCT] two phase process accounting
The pacct facility need an i/o operation when an accounting record
is generated. There is a possibility to wake OOM killer up.
If OOM killer is activated, it kills some processes to make them
release process memory regions.
But acct_process() is called in the killed processes context
before calling exit_mm(), so those processes cannot release
own memory. In the results, any processes stop in this point and
it finally cause a system stall.
---- in kernel/exit.c : do_exit() ------------
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
acct_process(code);
}
:
- snip -
:
exit_mm(tsk);
----------------------------------------------
This patch separates generating an accounting record facility
into two-phase.
In the first one, acct_collect() calculate vitual memory size
of the process and stores it into pacct_struct before exit_mm().
Then, acct_process() generates an accounting record and write
it into medium.
Signed-off-by: KaiGai Kohei <kaigai@ak.jp.nec.com>
diff --git a/include/linux/acct.h b/include/linux/acct.h
index 9a66401..6753687 100644
--- a/include/linux/acct.h
+++ b/include/linux/acct.h
@@ -121,16 +121,20 @@ struct acct_v3
#ifdef CONFIG_BSD_PROCESS_ACCT
struct vfsmount;
struct super_block;
extern void acct_auto_close_mnt(struct vfsmount *m);
extern void acct_auto_close(struct super_block *sb);
+extern void acct_init_pacct(struct pacct_struct *pacct);
+extern void acct_collect();
extern void acct_process(long exitcode);
extern void acct_update_integrals(struct task_struct *tsk);
extern void acct_clear_integrals(struct task_struct *tsk);
#else
#define acct_auto_close_mnt(x) do { } while (0)
#define acct_auto_close(x) do { } while (0)
+#define acct_init_pacct(x) do { } while (0)
+#define acct_collect() do { } while (0)
#define acct_process(x) do { } while (0)
#define acct_update_integrals(x) do { } while (0)
#define acct_clear_integrals(task) do { } while (0)
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 29b7d4f..918fdda 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -356,10 +356,14 @@ struct sighand_struct {
atomic_t count;
struct k_sigaction action[_NSIG];
spinlock_t siglock;
};
+struct pacct_struct {
+ unsigned long ac_mem;
+};
+
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
* implies a shared sighand_struct, so locking
* sighand_struct is always a proper superset of
@@ -447,10 +451,13 @@ struct signal_struct {
* thing in threads created with CLONE_THREAD */
#ifdef CONFIG_KEYS
struct key *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+ struct pacct_struct pacct; /* per-process accounting information */
+#endif
};
/* Context switch must be unlocked if interrupts are to be enabled */
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
# define __ARCH_WANT_UNLOCKED_CTXSW
diff --git a/kernel/acct.c b/kernel/acct.c
index b327f4d..f1a4e12 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -419,13 +419,13 @@ static u32 encode_float(u64 value)
/*
* do_acct_process does all actual work. Caller holds the reference to file.
*/
static void do_acct_process(long exitcode, struct file *file)
{
+ struct pacct_struct *pacct = ¤t->signal->pacct;
acct_t ac;
mm_segment_t fs;
- unsigned long vsize;
unsigned long flim;
u64 elapsed;
u64 run_time;
struct timespec uptime;
unsigned long jiffies;
@@ -503,24 +503,13 @@ static void do_acct_process(long exitcod
ac.ac_flag |= ASU;
if (current->flags & PF_DUMPCORE)
ac.ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
ac.ac_flag |= AXSIG;
-
- vsize = 0;
- if (current->mm) {
- struct vm_area_struct *vma;
- down_read(¤t->mm->mmap_sem);
- vma = current->mm->mmap;
- while (vma) {
- vsize += vma->vm_end - vma->vm_start;
- vma = vma->vm_next;
- }
- up_read(¤t->mm->mmap_sem);
- }
- vsize = vsize / 1024;
- ac.ac_mem = encode_comp_t(vsize);
+ spin_lock(¤t->sighand->siglock);
+ ac.ac_mem = encode_comp_t(pacct->ac_mem);
+ spin_unlock(¤t->sighand->siglock);
ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
ac.ac_minflt = encode_comp_t(current->signal->min_flt +
current->min_flt);
ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
@@ -544,10 +533,42 @@ static void do_acct_process(long exitcod
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
}
/**
+ * acct_init_pacct - initialize a new pacct_struct
+ */
+void acct_init_pacct(struct pacct_struct *pacct)
+{
+ memset(pacct, 0, sizeof(struct pacct_struct));
+}
+
+/**
+ * acct_collect - collect accounting information into pacct_struct
+ */
+void acct_collect(void)
+{
+ struct pacct_struct *pacct = ¤t->signal->pacct;
+ unsigned long vsize = 0;
+
+ if (current->mm) {
+ struct vm_area_struct *vma;
+ down_read(¤t->mm->mmap_sem);
+ vma = current->mm->mmap;
+ while (vma) {
+ vsize += vma->vm_end - vma->vm_start;
+ vma = vma->vm_next;
+ }
+ up_read(¤t->mm->mmap_sem);
+ }
+
+ spin_lock(¤t->sighand->siglock);
+ pacct->ac_mem = vsize / 1024;
+ spin_unlock(¤t->sighand->siglock);
+}
+
+/**
* acct_process - now just a wrapper around do_acct_process
* @exitcode: task exit code
*
* handles process accounting for an exiting task
*/
diff --git a/kernel/exit.c b/kernel/exit.c
index e06d0c1..54bdbd9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -893,11 +893,11 @@ fastcall NORET_TYPE void do_exit(long co
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
- acct_process(code);
+ acct_collect();
}
if (unlikely(tsk->robust_list))
exit_robust_list(tsk);
#ifdef CONFIG_COMPAT
if (unlikely(tsk->compat_robust_list))
@@ -905,10 +905,12 @@ fastcall NORET_TYPE void do_exit(long co
#endif
if (unlikely(tsk->audit_context))
audit_free(tsk);
exit_mm(tsk);
+ if (group_dead)
+ acct_process(code);
exit_sem(tsk);
__exit_files(tsk);
__exit_fs(tsk);
exit_namespace(tsk);
exit_thread();
diff --git a/kernel/fork.c b/kernel/fork.c
index ac8100e..d6c812c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -869,10 +869,11 @@ static inline int copy_signal(unsigned l
* of the whole CPU time limit.
*/
tsk->it_prof_expires =
secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
}
+ acct_init_pacct(&sig->pacct);
return 0;
}
void __cleanup_signal(struct signal_struct *sig)
--
Open Source Software Promotion Center, NEC
KaiGai Kohei <kaigai@ak.jp.nec.com>
next prev parent reply other threads:[~2006-06-22 3:08 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-20 6:24 Add pacct_struct to fix some pacct bugs KaiGai Kohei
2006-06-20 6:42 ` Andrew Morton
2006-06-20 7:27 ` KaiGai Kohei
2006-06-22 3:07 ` KaiGai Kohei
2006-06-22 3:08 ` KaiGai Kohei [this message]
2006-06-22 3:09 ` KaiGai Kohei
2006-06-22 3:09 ` KaiGai Kohei
2006-06-22 3:35 ` Andrew Morton
2006-06-22 4:05 ` KaiGai Kohei
2006-06-22 4:14 ` Randy.Dunlap
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=449A09A0.70009@ak.jp.nec.com \
--to=kaigai@ak.jp.nec.com \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.