From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
To: Kay Sievers <kay.sievers@vrfy.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
Greg Kroah-Hartman <gregkh@suse.de>,
Andrew Morton <akpm@linux-foundation.org>
Subject: Re: sched: delayed cleanup of user_struct
Date: Tue, 10 Mar 2009 00:19:40 +0530 [thread overview]
Message-ID: <20090309184940.GA9507@linux.vnet.ibm.com> (raw)
In-Reply-To: <1236623837.2791.1.camel@nga>
On Mon, Mar 09, 2009 at 07:37:17PM +0100, Kay Sievers wrote:
> From: Kay Sievers <kay.sievers@vrfy.org>
> Subject: sched: delayed cleanup of user_struct
>
> During bootup performance tracing we see repeated occurrences of
> /sys/kernel/uid/* events for the same uid, leading to a,
> in this case, rather pointless userspace processing for the
> same uid over and over.
>
> This is usally caused by tools which change their uid to "nobody",
> to run without privileges to read data supplied by untrusted users.
>
> This change delays the execution of the (already existing) scheduled
> work, to cleanup the uid after 0.5 seconds, so the allocated and announced
> uid can possibly be re-used by another process.
>
> This is the current behavior, where almost every invocation of a
> binary, which changes the uid, creates two events:
> $ read START < /sys/kernel/uevent_seqnum; \
> for i in `seq 100`; do su --shell=/bin/true bin; done; \
> read END < /sys/kernel/uevent_seqnum; \
> echo $(($END - $START))
> 178
>
> With the delayed cleanup, we get only two events, and userspace finishes
> a bit faster too:
> $ read START < /sys/kernel/uevent_seqnum; \
> for i in `seq 100`; do su --shell=/bin/true bin; done; \
> read END < /sys/kernel/uevent_seqnum; \
> echo $(($END - $START))
> 1
>
makes sense. I do have a patch though which changes some of the cleanup
code (fixing a memory leak) in -mm. These two patches will conflict.
> Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
> Cc: Greg Kroah-Hartman <gregkh@suse.de>
> Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
> ---
> include/linux/sched.h | 2 +-
> kernel/user.c | 28 +++++++++++++---------------
> 2 files changed, 14 insertions(+), 16 deletions(-)
>
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -670,7 +670,7 @@ struct user_struct {
> struct task_group *tg;
> #ifdef CONFIG_SYSFS
> struct kobject kobj;
> - struct work_struct work;
> + struct delayed_work work;
> #endif
> #endif
> };
> --- a/kernel/user.c
> +++ b/kernel/user.c
> @@ -75,6 +75,7 @@ static void uid_hash_remove(struct user_
> put_user_ns(up->user_ns);
> }
>
> +/* uidhash_lock must be held */
> static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
> {
> struct user_struct *user;
> @@ -82,7 +83,9 @@ static struct user_struct *uid_hash_find
>
> hlist_for_each_entry(user, h, hashent, uidhash_node) {
> if (user->uid == uid) {
> - atomic_inc(&user->__count);
> + /* possibly resurrect an "almost deleted" object */
> + if (atomic_inc_return(&user->__count) == 1)
> + cancel_delayed_work(&user->work);
> return user;
> }
> }
> @@ -283,12 +286,12 @@ int __init uids_sysfs_init(void)
> return uids_user_create(&root_user);
> }
>
> -/* work function to remove sysfs directory for a user and free up
> +/* delayed work function to remove the user and free up
> * corresponding structures.
> */
> -static void remove_user_sysfs_dir(struct work_struct *w)
> +static void remove_user_delayed(struct work_struct *w)
> {
> - struct user_struct *up = container_of(w, struct user_struct, work);
> + struct user_struct *up = container_of(w, struct user_struct, work.work);
> unsigned long flags;
> int remove_user = 0;
>
> @@ -299,15 +302,12 @@ static void remove_user_sysfs_dir(struct
> */
> uids_mutex_lock();
>
> - local_irq_save(flags);
> -
> - if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
> + spin_lock_irqsave(&uidhash_lock, flags);
> + if (atomic_read(&up->__count) == 0) {
> uid_hash_remove(up);
> remove_user = 1;
> - spin_unlock_irqrestore(&uidhash_lock, flags);
> - } else {
> - local_irq_restore(flags);
> }
> + spin_unlock_irqrestore(&uidhash_lock, flags);
>
> if (!remove_user)
> goto done;
> @@ -331,12 +331,8 @@ done:
> */
> static void free_user(struct user_struct *up, unsigned long flags)
> {
> - /* restore back the count */
> - atomic_inc(&up->__count);
> spin_unlock_irqrestore(&uidhash_lock, flags);
> -
> - INIT_WORK(&up->work, remove_user_sysfs_dir);
> - schedule_work(&up->work);
> + schedule_delayed_work(&up->work, msecs_to_jiffies(500));
> }
>
> #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
> @@ -442,6 +438,8 @@ struct user_struct *alloc_uid(struct use
> if (uids_user_create(new))
> goto out_destoy_sched;
>
> + INIT_DELAYED_WORK(&new->work, remove_user_delayed);
> +
> /*
> * Before adding this, check whether we raced
> * on adding the same user already..
>
--
regards,
Dhaval
next prev parent reply other threads:[~2009-03-09 18:50 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-09 18:37 sched: delayed cleanup of user_struct Kay Sievers
2009-03-09 18:49 ` Dhaval Giani [this message]
2009-03-10 14:47 ` Kay Sievers
2009-03-19 9:34 ` Dhaval Giani
2009-03-19 17:28 ` Kay Sievers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090309184940.GA9507@linux.vnet.ibm.com \
--to=dhaval@linux.vnet.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=gregkh@suse.de \
--cc=kay.sievers@vrfy.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.