linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: Elizabeth Figura <zfigura@codeweavers.com>
Cc: "Arnd Bergmann" <arnd@arndb.de>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Shuah Khan" <shuah@kernel.org>,
	linux-kernel@vger.kernel.org, linux-api@vger.kernel.org,
	wine-devel@winehq.org, "André Almeida" <andrealmeid@igalia.com>,
	"Wolfram Sang" <wsa@kernel.org>,
	"Arkadiusz Hiler" <ahiler@codeweavers.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Andy Lutomirski" <luto@kernel.org>,
	linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
	"Randy Dunlap" <rdunlap@infradead.org>
Subject: Re: [PATCH v3 04/30] ntsync: Introduce NTSYNC_IOC_WAIT_ANY.
Date: Thu, 11 Apr 2024 15:34:23 +0200	[thread overview]
Message-ID: <2024041111-handsaw-scruffy-27f3@gregkh> (raw)
In-Reply-To: <20240329000621.148791-5-zfigura@codeweavers.com>

On Thu, Mar 28, 2024 at 07:05:55PM -0500, Elizabeth Figura wrote:
> This corresponds to part of the functionality of the NT syscall
> NtWaitForMultipleObjects(). Specifically, it implements the behaviour where
> the third argument (wait_any) is TRUE, and it does not handle alertable waits.
> Those features have been split out into separate patches to ease review.
> 
> NTSYNC_IOC_WAIT_ANY is a vectored wait function similar to poll(). Unlike
> poll(), it "consumes" objects when they are signaled. For semaphores, this means
> decreasing one from the internal counter. At most one object can be consumed by
> this function.
> 
> Up to 64 objects can be waited on at once. As soon as one is signaled, the
> object with the lowest index is consumed, and that index is returned via the
> "index" field.

So it's kind of like our internal locks already?  Or futex?

> 
> A timeout is supported. The timeout is passed as a u64 nanosecond value, which
> represents absolute time measured against either the MONOTONIC or REALTIME clock
> (controlled by the flags argument). If U64_MAX is passed, the ioctl waits
> indefinitely.
> 
> This ioctl validates that all objects belong to the relevant device. This is not
> necessary for any technical reason related to NTSYNC_IOC_WAIT_ANY, but will be
> necessary for NTSYNC_IOC_WAIT_ALL introduced in the following patch.
> 
> Two u32s of padding are left in the ntsync_wait_args structure; one will be used
> by a patch later in the series (which is split out to ease review).
> 
> Signed-off-by: Elizabeth Figura <zfigura@codeweavers.com>
> ---
>  drivers/misc/ntsync.c       | 250 ++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/ntsync.h |  16 +++
>  2 files changed, 266 insertions(+)
> 
> diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
> index 3c2f743c58b0..c6f84a5fc8c0 100644
> --- a/drivers/misc/ntsync.c
> +++ b/drivers/misc/ntsync.c
> @@ -6,11 +6,16 @@
>   */
>  
>  #include <linux/anon_inodes.h>
> +#include <linux/atomic.h>
>  #include <linux/file.h>
>  #include <linux/fs.h>
> +#include <linux/hrtimer.h>
> +#include <linux/ktime.h>
>  #include <linux/miscdevice.h>
>  #include <linux/module.h>
>  #include <linux/overflow.h>
> +#include <linux/sched.h>
> +#include <linux/sched/signal.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock.h>
>  #include <uapi/linux/ntsync.h>
> @@ -30,6 +35,8 @@ enum ntsync_type {
>   *
>   * Both rely on struct file for reference counting. Individual
>   * ntsync_obj objects take a reference to the device when created.
> + * Wait operations take a reference to each object being waited on for
> + * the duration of the wait.
>   */
>  
>  struct ntsync_obj {
> @@ -47,12 +54,56 @@ struct ntsync_obj {
>  			__u32 max;
>  		} sem;
>  	} u;
> +
> +	struct list_head any_waiters;
> +};
> +
> +struct ntsync_q_entry {
> +	struct list_head node;
> +	struct ntsync_q *q;
> +	struct ntsync_obj *obj;
> +	__u32 index;
> +};
> +
> +struct ntsync_q {
> +	struct task_struct *task;
> +	__u32 owner;
> +
> +	/*
> +	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
> +	 * compare-and-swap may actually change object states and wake this
> +	 * task.
> +	 */
> +	atomic_t signaled;

This feels odd, why are you duplicating a normal lock functionality
here?

> +
> +	__u32 count;
> +	struct ntsync_q_entry entries[];
>  };
>  
>  struct ntsync_device {
>  	struct file *file;
>  };
>  
> +static void try_wake_any_sem(struct ntsync_obj *sem)
> +{
> +	struct ntsync_q_entry *entry;
> +
> +	lockdep_assert_held(&sem->lock);
> +
> +	list_for_each_entry(entry, &sem->any_waiters, node) {
> +		struct ntsync_q *q = entry->q;
> +		int signaled = -1;
> +
> +		if (!sem->u.sem.count)
> +			break;
> +
> +		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
> +			sem->u.sem.count--;
> +			wake_up_process(q->task);
> +		}

You are waking up _all_ "locks" that with the atomic_try_cmpxchg() call,
right?  Not just the "first".

Or am I confused?

> +	}
> +}
> +
>  /*
>   * Actually change the semaphore state, returning -EOVERFLOW if it is made
>   * invalid.
> @@ -88,6 +139,8 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
>  
>  	prev_count = sem->u.sem.count;
>  	ret = post_sem_state(sem, args);
> +	if (!ret)
> +		try_wake_any_sem(sem);
>  
>  	spin_unlock(&sem->lock);
>  
> @@ -141,6 +194,7 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
>  	obj->dev = dev;
>  	get_file(dev->file);
>  	spin_lock_init(&obj->lock);
> +	INIT_LIST_HEAD(&obj->any_waiters);
>  
>  	return obj;
>  }
> @@ -191,6 +245,200 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
>  	return put_user(fd, &user_args->sem);
>  }
>  
> +static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
> +{
> +	struct file *file = fget(fd);
> +	struct ntsync_obj *obj;
> +
> +	if (!file)
> +		return NULL;
> +
> +	if (file->f_op != &ntsync_obj_fops) {
> +		fput(file);
> +		return NULL;
> +	}
> +
> +	obj = file->private_data;
> +	if (obj->dev != dev) {
> +		fput(file);
> +		return NULL;
> +	}
> +
> +	return obj;
> +}
> +
> +static void put_obj(struct ntsync_obj *obj)
> +{
> +	fput(obj->file);
> +}
> +
> +static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
> +{
> +	ktime_t timeout = ns_to_ktime(args->timeout);
> +	clockid_t clock = CLOCK_MONOTONIC;
> +	ktime_t *timeout_ptr;
> +	int ret = 0;
> +
> +	timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
> +
> +	if (args->flags & NTSYNC_WAIT_REALTIME)
> +		clock = CLOCK_REALTIME;
> +
> +	do {
> +		if (signal_pending(current)) {
> +			ret = -ERESTARTSYS;
> +			break;
> +		}
> +
> +		set_current_state(TASK_INTERRUPTIBLE);
> +		if (atomic_read(&q->signaled) != -1) {
> +			ret = 0;
> +			break;

What happens if the value changes right after you read it?

Rolling your own lock is tricky, and needs review from the locking
maintainers.  And probably some more documentation as to what is
happening and why our normal types of locks can't be used here?

thanks,

greg k-h

  reply	other threads:[~2024-04-11 13:34 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-29  0:05 [PATCH v3 00/30] NT synchronization primitive driver Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 01/30] ntsync: Introduce the ntsync driver and character device Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 02/30] ntsync: Introduce NTSYNC_IOC_CREATE_SEM Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 03/30] ntsync: Introduce NTSYNC_IOC_SEM_POST Elizabeth Figura
2024-04-11 13:35   ` Greg Kroah-Hartman
2024-03-29  0:05 ` [PATCH v3 04/30] ntsync: Introduce NTSYNC_IOC_WAIT_ANY Elizabeth Figura
2024-04-11 13:34   ` Greg Kroah-Hartman [this message]
2024-04-12  0:33     ` Elizabeth Figura
2024-04-12  6:16       ` Greg Kroah-Hartman
2024-03-29  0:05 ` [PATCH v3 05/30] ntsync: Introduce NTSYNC_IOC_WAIT_ALL Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 06/30] ntsync: Introduce NTSYNC_IOC_CREATE_MUTEX Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 07/30] ntsync: Introduce NTSYNC_IOC_MUTEX_UNLOCK Elizabeth Figura
2024-03-29  0:05 ` [PATCH v3 08/30] ntsync: Introduce NTSYNC_IOC_MUTEX_KILL Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 09/30] ntsync: Introduce NTSYNC_IOC_CREATE_EVENT Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 10/30] ntsync: Introduce NTSYNC_IOC_EVENT_SET Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 11/30] ntsync: Introduce NTSYNC_IOC_EVENT_RESET Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 12/30] ntsync: Introduce NTSYNC_IOC_EVENT_PULSE Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 13/30] ntsync: Introduce NTSYNC_IOC_SEM_READ Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 14/30] ntsync: Introduce NTSYNC_IOC_MUTEX_READ Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 15/30] ntsync: Introduce NTSYNC_IOC_EVENT_READ Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 16/30] ntsync: Introduce alertable waits Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 17/30] selftests: ntsync: Add some tests for semaphore state Elizabeth Figura
2024-03-29 20:07   ` Muhammad Usama Anjum
2024-03-29  0:06 ` [PATCH v3 18/30] selftests: ntsync: Add some tests for mutex state Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 19/30] selftests: ntsync: Add some tests for NTSYNC_IOC_WAIT_ANY Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 20/30] selftests: ntsync: Add some tests for NTSYNC_IOC_WAIT_ALL Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 21/30] selftests: ntsync: Add some tests for wakeup signaling with WINESYNC_IOC_WAIT_ANY Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 22/30] selftests: ntsync: Add some tests for wakeup signaling with WINESYNC_IOC_WAIT_ALL Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 23/30] selftests: ntsync: Add some tests for manual-reset event state Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 24/30] selftests: ntsync: Add some tests for auto-reset " Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 25/30] selftests: ntsync: Add some tests for wakeup signaling with events Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 26/30] selftests: ntsync: Add tests for alertable waits Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 27/30] selftests: ntsync: Add some tests for wakeup signaling via alerts Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 28/30] selftests: ntsync: Add a stress test for contended waits Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 29/30] maintainers: Add an entry for ntsync Elizabeth Figura
2024-03-29  0:06 ` [PATCH v3 30/30] docs: ntsync: Add documentation for the ntsync uAPI Elizabeth Figura
2024-04-12  5:52   ` Bagas Sanjaya

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2024041111-handsaw-scruffy-27f3@gregkh \
    --to=gregkh@linuxfoundation.org \
    --cc=ahiler@codeweavers.com \
    --cc=andrealmeid@igalia.com \
    --cc=arnd@arndb.de \
    --cc=corbet@lwn.net \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=shuah@kernel.org \
    --cc=wine-devel@winehq.org \
    --cc=wsa@kernel.org \
    --cc=zfigura@codeweavers.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).