From: "Chalios, Babis" <bchalios@amazon.es>
To: "robh@kernel.org" <robh@kernel.org>,
"krzk+dt@kernel.org" <krzk+dt@kernel.org>,
"conor+dt@kernel.org" <conor+dt@kernel.org>,
"richardcochran@gmail.com" <richardcochran@gmail.com>,
"dwmw2@infradead.org" <dwmw2@infradead.org>,
"andrew+netdev@lunn.ch" <andrew+netdev@lunn.ch>,
"davem@davemloft.net" <davem@davemloft.net>,
"edumazet@google.com" <edumazet@google.com>,
"kuba@kernel.org" <kuba@kernel.org>,
"pabeni@redhat.com" <pabeni@redhat.com>
Cc: "devicetree@vger.kernel.org" <devicetree@vger.kernel.org>,
"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
"Chalios, Babis" <bchalios@amazon.es>, "Graf (AWS),
Alexander" <graf@amazon.de>,
"mzxreary@0pointer.de" <mzxreary@0pointer.de>,
"Cali, Marco" <xmarcalx@amazon.co.uk>,
"Woodhouse, David" <dwmw@amazon.co.uk>
Subject: [PATCH v5 2/7] ptp: vmclock: support device notifications
Date: Wed, 7 Jan 2026 13:25:38 +0000 [thread overview]
Message-ID: <20260107132514.437-3-bchalios@amazon.es> (raw)
In-Reply-To: <20260107132514.437-1-bchalios@amazon.es>
Add optional support for device notifications in VMClock. When
supported, the hypervisor will send a device notification every time it
updates the seq_count to a new even value.
Moreover, add support for poll() in VMClock as a means to propagate this
notification to user space. poll() will return a POLLIN event to
listeners every time seq_count changes to a value different than the one
last seen (since open() or last read()/pread()). This means that when
poll() returns a POLLIN event, listeners need to use read() to observe
what has changed and update the reader's view of seq_count. In other
words, after a poll() returned, all subsequent calls to poll() will
immediately return with a POLLIN event until the listener calls read().
The device advertises support for the notification mechanism by setting
flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If
the flag is not present the driver won't setup the ACPI notification
handler and poll() will always immediately return POLLHUP.
Signed-off-by: Babis Chalios <bchalios@amazon.es>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
---
drivers/ptp/ptp_vmclock.c | 125 +++++++++++++++++++++++++++++--
include/uapi/linux/vmclock-abi.h | 5 ++
2 files changed, 123 insertions(+), 7 deletions(-)
diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c
index b3a83b03d9c1..38b2bacb755e 100644
--- a/drivers/ptp/ptp_vmclock.c
+++ b/drivers/ptp/ptp_vmclock.c
@@ -5,6 +5,9 @@
* Copyright © 2024 Amazon.com, Inc. or its affiliates.
*/
+#include "linux/poll.h"
+#include "linux/types.h"
+#include "linux/wait.h"
#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/err.h>
@@ -39,6 +42,7 @@ struct vmclock_state {
struct resource res;
struct vmclock_abi *clk;
struct miscdevice miscdev;
+ wait_queue_head_t disrupt_wait;
struct ptp_clock_info ptp_clock_info;
struct ptp_clock *ptp_clock;
enum clocksource_ids cs_id, sys_cs_id;
@@ -357,10 +361,15 @@ static struct ptp_clock *vmclock_ptp_register(struct device *dev,
return ptp_clock_register(&st->ptp_clock_info, dev);
}
+struct vmclock_file_state {
+ struct vmclock_state *st;
+ atomic_t seq;
+};
+
static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
{
- struct vmclock_state *st = container_of(fp->private_data,
- struct vmclock_state, miscdev);
+ struct vmclock_file_state *fst = fp->private_data;
+ struct vmclock_state *st = fst->st;
if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ)
return -EROFS;
@@ -379,11 +388,11 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
size_t count, loff_t *ppos)
{
- struct vmclock_state *st = container_of(fp->private_data,
- struct vmclock_state, miscdev);
ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
+ struct vmclock_file_state *fst = fp->private_data;
+ struct vmclock_state *st = fst->st;
+ uint32_t seq, old_seq;
size_t max_count;
- uint32_t seq;
if (*ppos >= PAGE_SIZE)
return 0;
@@ -392,6 +401,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
if (count > max_count)
count = max_count;
+ old_seq = atomic_read(&fst->seq);
while (1) {
seq = le32_to_cpu(st->clk->seq_count) & ~1U;
/* Pairs with hypervisor wmb */
@@ -402,8 +412,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
/* Pairs with hypervisor wmb */
virt_rmb();
- if (seq == le32_to_cpu(st->clk->seq_count))
- break;
+ if (seq == le32_to_cpu(st->clk->seq_count)) {
+ /*
+ * Either we updated fst->seq to seq (the latest version we observed)
+ * or someone else did (old_seq == seq), so we can break.
+ */
+ if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) ||
+ old_seq == seq) {
+ break;
+ }
+ }
if (ktime_after(ktime_get(), deadline))
return -ETIMEDOUT;
@@ -413,10 +431,58 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
return count;
}
+static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait)
+{
+ struct vmclock_file_state *fst = fp->private_data;
+ struct vmclock_state *st = fst->st;
+ uint32_t seq;
+
+ /*
+ * Hypervisor will not send us any notifications, so fail immediately
+ * to avoid having caller sleeping for ever.
+ */
+ if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
+ return POLLHUP;
+
+ poll_wait(fp, &st->disrupt_wait, wait);
+
+ seq = le32_to_cpu(st->clk->seq_count);
+ if (atomic_read(&fst->seq) != seq)
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+static int vmclock_miscdev_open(struct inode *inode, struct file *fp)
+{
+ struct vmclock_state *st = container_of(fp->private_data,
+ struct vmclock_state, miscdev);
+ struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL);
+
+ if (!fst)
+ return -ENOMEM;
+
+ fst->st = st;
+ atomic_set(&fst->seq, 0);
+
+ fp->private_data = fst;
+
+ return 0;
+}
+
+static int vmclock_miscdev_release(struct inode *inode, struct file *fp)
+{
+ kfree(fp->private_data);
+ return 0;
+}
+
static const struct file_operations vmclock_miscdev_fops = {
.owner = THIS_MODULE,
+ .open = vmclock_miscdev_open,
+ .release = vmclock_miscdev_release,
.mmap = vmclock_miscdev_mmap,
.read = vmclock_miscdev_read,
+ .poll = vmclock_miscdev_poll,
};
/* module operations */
@@ -459,6 +525,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data
return AE_ERROR;
}
+static void
+vmclock_acpi_notification_handler(acpi_handle __always_unused handle,
+ u32 __always_unused event, void *dev)
+{
+ struct device *device = dev;
+ struct vmclock_state *st = device->driver_data;
+
+ wake_up_interruptible(&st->disrupt_wait);
+}
+
+static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st)
+{
+ struct acpi_device *adev = ACPI_COMPANION(dev);
+ acpi_status status;
+
+ /*
+ * This should never happen as this function is only called when
+ * has_acpi_companion(dev) is true, but the logic is sufficiently
+ * complex that Coverity can't see the tautology.
+ */
+ if (!adev)
+ return -ENODEV;
+
+ /* The device does not support notifications. Nothing else to do */
+ if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
+ return 0;
+
+ status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
+ vmclock_acpi_notification_handler,
+ dev);
+ if (ACPI_FAILURE(status)) {
+ dev_err(dev, "failed to install notification handler");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
@@ -549,6 +653,11 @@ static int vmclock_probe(struct platform_device *pdev)
if (ret)
return ret;
+ init_waitqueue_head(&st->disrupt_wait);
+ ret = vmclock_setup_notification(dev, st);
+ if (ret)
+ return ret;
+
/*
* If the structure is big enough, it can be mapped to userspace.
* Theoretically a guest OS even using larger pages could still
@@ -581,6 +690,8 @@ static int vmclock_probe(struct platform_device *pdev)
return -ENODEV;
}
+ dev->driver_data = st;
+
dev_info(dev, "%s: registered %s%s%s\n", st->name,
st->miscdev.minor ? "miscdev" : "",
(st->miscdev.minor && st->ptp_clock) ? ", " : "",
diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h
index 937fe00e4f33..d320623b0118 100644
--- a/include/uapi/linux/vmclock-abi.h
+++ b/include/uapi/linux/vmclock-abi.h
@@ -121,6 +121,11 @@ struct vmclock_abi {
* loaded from some save state (restored from a snapshot).
*/
#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8)
+ /*
+ * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send
+ * a notification every time it updates seq_count to a new even number.
+ */
+#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9)
__u8 pad[2];
__u8 clock_status;
--
2.34.1
next prev parent reply other threads:[~2026-01-07 13:27 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-07 13:25 [PATCH v5 0/7] ptp: vmclock: Add VM generation counter and ACPI notification Chalios, Babis
2026-01-07 13:25 ` [PATCH v5 1/7] ptp: vmclock: add vm generation counter Chalios, Babis
2026-01-07 13:25 ` Chalios, Babis [this message]
2026-01-07 13:25 ` [PATCH v5 3/7] dt-bindings: ptp: Add amazon,vmclock Chalios, Babis
2026-01-07 13:26 ` [PATCH v5 4/7] ptp: ptp_vmclock: Add device tree support Chalios, Babis
2026-01-13 3:46 ` Jakub Kicinski
2026-01-07 13:26 ` [PATCH v5 5/7] ptp: ptp_vmclock: add 'VMCLOCK' to ACPI device match Chalios, Babis
2026-01-07 13:26 ` [PATCH v5 6/7] ptp: ptp_vmclock: remove dependency on CONFIG_ACPI Chalios, Babis
2026-01-07 13:26 ` [PATCH v5 7/7] ptp: ptp_vmclock: return TAI not UTC Chalios, Babis
2026-01-07 13:28 ` [PATCH v5 0/7] ptp: vmclock: Add VM generation counter and ACPI notification David Woodhouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260107132514.437-3-bchalios@amazon.es \
--to=bchalios@amazon.es \
--cc=andrew+netdev@lunn.ch \
--cc=conor+dt@kernel.org \
--cc=davem@davemloft.net \
--cc=devicetree@vger.kernel.org \
--cc=dwmw2@infradead.org \
--cc=dwmw@amazon.co.uk \
--cc=edumazet@google.com \
--cc=graf@amazon.de \
--cc=krzk+dt@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mzxreary@0pointer.de \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=richardcochran@gmail.com \
--cc=robh@kernel.org \
--cc=xmarcalx@amazon.co.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.