* [PATCH 07/55] drivers: hv: dxgkrnl: Creation of dxgcontext objects
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
Implement ioctls for creation/destruction of dxgcontext
objects:
- the LX_DXCREATECONTEXTVIRTUAL ioctl
- the LX_DXDESTROYCONTEXT ioctl.
A dxgcontext object represents a compute device execution thread.
Ccompute device DMA buffers and synchronization operations are
submitted for execution to a dxgcontext. dxgcontexts objects
belong to a dxgdevice object.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/dxgadapter.c | 103 ++++++++++++++++++++
drivers/hv/dxgkrnl/dxgkrnl.h | 38 ++++++++
drivers/hv/dxgkrnl/dxgprocess.c | 4 +
drivers/hv/dxgkrnl/dxgvmbus.c | 101 ++++++++++++++++++-
drivers/hv/dxgkrnl/dxgvmbus.h | 18 ++++
drivers/hv/dxgkrnl/ioctl.c | 168 +++++++++++++++++++++++++++++++-
drivers/hv/dxgkrnl/misc.h | 1 +
include/uapi/misc/d3dkmthk.h | 47 +++++++++
8 files changed, 477 insertions(+), 3 deletions(-)
diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c
index a9a341716eba..cd103e092ac2 100644
--- a/drivers/hv/dxgkrnl/dxgadapter.c
+++ b/drivers/hv/dxgkrnl/dxgadapter.c
@@ -206,7 +206,9 @@ struct dxgdevice *dxgdevice_create(struct dxgadapter *adapter,
device->adapter = adapter;
device->process = process;
kref_get(&adapter->adapter_kref);
+ INIT_LIST_HEAD(&device->context_list_head);
init_rwsem(&device->device_lock);
+ init_rwsem(&device->context_list_lock);
INIT_LIST_HEAD(&device->pqueue_list_head);
device->object_state = DXGOBJECTSTATE_CREATED;
device->execution_state = _D3DKMT_DEVICEEXECUTION_ACTIVE;
@@ -248,6 +250,20 @@ void dxgdevice_destroy(struct dxgdevice *device)
dxgdevice_stop(device);
+ {
+ struct dxgcontext *context;
+ struct dxgcontext *tmp;
+
+ DXG_TRACE("destroying contexts");
+ dxgdevice_acquire_context_list_lock(device);
+ list_for_each_entry_safe(context, tmp,
+ &device->context_list_head,
+ context_list_entry) {
+ dxgcontext_destroy(process, context);
+ }
+ dxgdevice_release_context_list_lock(device);
+ }
+
/* Guest handles need to be released before the host handles */
hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
if (device->handle_valid) {
@@ -302,6 +318,32 @@ bool dxgdevice_is_active(struct dxgdevice *device)
return device->object_state == DXGOBJECTSTATE_ACTIVE;
}
+void dxgdevice_acquire_context_list_lock(struct dxgdevice *device)
+{
+ down_write(&device->context_list_lock);
+}
+
+void dxgdevice_release_context_list_lock(struct dxgdevice *device)
+{
+ up_write(&device->context_list_lock);
+}
+
+void dxgdevice_add_context(struct dxgdevice *device, struct dxgcontext *context)
+{
+ down_write(&device->context_list_lock);
+ list_add_tail(&context->context_list_entry, &device->context_list_head);
+ up_write(&device->context_list_lock);
+}
+
+void dxgdevice_remove_context(struct dxgdevice *device,
+ struct dxgcontext *context)
+{
+ if (context->context_list_entry.next) {
+ list_del(&context->context_list_entry);
+ context->context_list_entry.next = NULL;
+ }
+}
+
void dxgdevice_release(struct kref *refcount)
{
struct dxgdevice *device;
@@ -310,6 +352,67 @@ void dxgdevice_release(struct kref *refcount)
kfree(device);
}
+struct dxgcontext *dxgcontext_create(struct dxgdevice *device)
+{
+ struct dxgcontext *context;
+
+ context = kzalloc(sizeof(struct dxgcontext), GFP_KERNEL);
+ if (context) {
+ kref_init(&context->context_kref);
+ context->device = device;
+ context->process = device->process;
+ context->device_handle = device->handle;
+ kref_get(&device->device_kref);
+ INIT_LIST_HEAD(&context->hwqueue_list_head);
+ init_rwsem(&context->hwqueue_list_lock);
+ dxgdevice_add_context(device, context);
+ context->object_state = DXGOBJECTSTATE_ACTIVE;
+ }
+ return context;
+}
+
+/*
+ * Called when the device context list lock is held
+ */
+void dxgcontext_destroy(struct dxgprocess *process, struct dxgcontext *context)
+{
+ DXG_TRACE("Destroying context %p", context);
+ context->object_state = DXGOBJECTSTATE_DESTROYED;
+ if (context->device) {
+ if (context->handle.v) {
+ hmgrtable_free_handle_safe(&process->handle_table,
+ HMGRENTRY_TYPE_DXGCONTEXT,
+ context->handle);
+ }
+ dxgdevice_remove_context(context->device, context);
+ kref_put(&context->device->device_kref, dxgdevice_release);
+ }
+ kref_put(&context->context_kref, dxgcontext_release);
+}
+
+void dxgcontext_destroy_safe(struct dxgprocess *process,
+ struct dxgcontext *context)
+{
+ struct dxgdevice *device = context->device;
+
+ dxgdevice_acquire_context_list_lock(device);
+ dxgcontext_destroy(process, context);
+ dxgdevice_release_context_list_lock(device);
+}
+
+bool dxgcontext_is_active(struct dxgcontext *context)
+{
+ return context->object_state == DXGOBJECTSTATE_ACTIVE;
+}
+
+void dxgcontext_release(struct kref *refcount)
+{
+ struct dxgcontext *context;
+
+ context = container_of(refcount, struct dxgcontext, context_kref);
+ kfree(context);
+}
+
struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
struct dxgadapter *adapter)
{
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index 45ac1f25cc5e..a3d8d3c9f37d 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -35,6 +35,7 @@
struct dxgprocess;
struct dxgadapter;
struct dxgdevice;
+struct dxgcontext;
/*
* Driver private data.
@@ -298,6 +299,7 @@ void dxgadapter_remove_process(struct dxgprocess_adapter *process_info);
/*
* The object represent the device object.
* The following objects take reference on the device
+ * - dxgcontext
* - device handle (struct d3dkmthandle)
*/
struct dxgdevice {
@@ -311,6 +313,8 @@ struct dxgdevice {
struct kref device_kref;
/* Protects destcruction of the device object */
struct rw_semaphore device_lock;
+ struct rw_semaphore context_list_lock;
+ struct list_head context_list_head;
/* List of paging queues. Protected by process handle table lock. */
struct list_head pqueue_list_head;
struct d3dkmthandle handle;
@@ -325,7 +329,33 @@ void dxgdevice_mark_destroyed(struct dxgdevice *device);
int dxgdevice_acquire_lock_shared(struct dxgdevice *dev);
void dxgdevice_release_lock_shared(struct dxgdevice *dev);
void dxgdevice_release(struct kref *refcount);
+void dxgdevice_add_context(struct dxgdevice *dev, struct dxgcontext *ctx);
+void dxgdevice_remove_context(struct dxgdevice *dev, struct dxgcontext *ctx);
bool dxgdevice_is_active(struct dxgdevice *dev);
+void dxgdevice_acquire_context_list_lock(struct dxgdevice *dev);
+void dxgdevice_release_context_list_lock(struct dxgdevice *dev);
+
+/*
+ * The object represent the execution context of a device.
+ */
+struct dxgcontext {
+ enum dxgobjectstate object_state;
+ struct dxgdevice *device;
+ struct dxgprocess *process;
+ /* entry in the device context list */
+ struct list_head context_list_entry;
+ struct list_head hwqueue_list_head;
+ struct rw_semaphore hwqueue_list_lock;
+ struct kref context_kref;
+ struct d3dkmthandle handle;
+ struct d3dkmthandle device_handle;
+};
+
+struct dxgcontext *dxgcontext_create(struct dxgdevice *dev);
+void dxgcontext_destroy(struct dxgprocess *pr, struct dxgcontext *ctx);
+void dxgcontext_destroy_safe(struct dxgprocess *pr, struct dxgcontext *ctx);
+void dxgcontext_release(struct kref *refcount);
+bool dxgcontext_is_active(struct dxgcontext *ctx);
long dxgk_compat_ioctl(struct file *f, unsigned int p1, unsigned long p2);
long dxgk_unlocked_ioctl(struct file *f, unsigned int p1, unsigned long p2);
@@ -371,6 +401,14 @@ int dxgvmb_send_destroy_device(struct dxgadapter *adapter,
struct d3dkmthandle h);
int dxgvmb_send_flush_device(struct dxgdevice *device,
enum dxgdevice_flushschedulerreason reason);
+struct d3dkmthandle
+dxgvmb_send_create_context(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmt_createcontextvirtual
+ *args);
+int dxgvmb_send_destroy_context(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmthandle h);
int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
struct dxgadapter *adapter,
struct d3dkmt_queryadapterinfo *args);
diff --git a/drivers/hv/dxgkrnl/dxgprocess.c b/drivers/hv/dxgkrnl/dxgprocess.c
index 8373f681e822..ca307beb9a9a 100644
--- a/drivers/hv/dxgkrnl/dxgprocess.c
+++ b/drivers/hv/dxgkrnl/dxgprocess.c
@@ -257,6 +257,10 @@ struct dxgdevice *dxgprocess_device_by_object_handle(struct dxgprocess *process,
case HMGRENTRY_TYPE_DXGDEVICE:
device = obj;
break;
+ case HMGRENTRY_TYPE_DXGCONTEXT:
+ device_handle =
+ ((struct dxgcontext *)obj)->device_handle;
+ break;
default:
DXG_ERR("invalid handle type: %d", t);
break;
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index 73804d11ec49..e66aac7c13cb 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -731,7 +731,7 @@ int dxgvmb_send_flush_device(struct dxgdevice *device,
enum dxgdevice_flushschedulerreason reason)
{
int ret;
- struct dxgkvmb_command_flushdevice *command;
+ struct dxgkvmb_command_flushdevice *command = NULL;
struct dxgvmbusmsg msg = {.hdr = NULL};
struct dxgprocess *process = device->process;
@@ -745,6 +745,105 @@ int dxgvmb_send_flush_device(struct dxgdevice *device,
command->device = device->handle;
command->reason = reason;
+ ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
+
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
+struct d3dkmthandle
+dxgvmb_send_create_context(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmt_createcontextvirtual *args)
+{
+ struct dxgkvmb_command_createcontextvirtual *command = NULL;
+ u32 cmd_size;
+ int ret;
+ struct d3dkmthandle context = {};
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+
+ if (args->priv_drv_data_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
+ DXG_ERR("PrivateDriverDataSize is invalid");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ cmd_size = sizeof(struct dxgkvmb_command_createcontextvirtual) +
+ args->priv_drv_data_size - 1;
+
+ ret = init_message(&msg, adapter, process, cmd_size);
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr,
+ DXGK_VMBCOMMAND_CREATECONTEXTVIRTUAL,
+ process->host_handle);
+ command->device = args->device;
+ command->node_ordinal = args->node_ordinal;
+ command->engine_affinity = args->engine_affinity;
+ command->flags = args->flags;
+ command->client_hint = args->client_hint;
+ command->priv_drv_data_size = args->priv_drv_data_size;
+ if (args->priv_drv_data_size) {
+ ret = copy_from_user(command->priv_drv_data,
+ args->priv_drv_data,
+ args->priv_drv_data_size);
+ if (ret) {
+ DXG_ERR("Faled to copy private data");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ }
+ /* Input command is returned back as output */
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ command, cmd_size);
+ if (ret < 0) {
+ goto cleanup;
+ } else {
+ context = command->context;
+ if (args->priv_drv_data_size) {
+ ret = copy_to_user(args->priv_drv_data,
+ command->priv_drv_data,
+ args->priv_drv_data_size);
+ if (ret) {
+ dev_err(DXGDEV,
+ "Faled to copy private data to user");
+ ret = -EINVAL;
+ dxgvmb_send_destroy_context(adapter, process,
+ context);
+ context.v = 0;
+ }
+ }
+ }
+
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return context;
+}
+
+int dxgvmb_send_destroy_context(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmthandle h)
+{
+ int ret;
+ struct dxgkvmb_command_destroycontext *command;
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+
+ ret = init_message(&msg, adapter, process, sizeof(*command));
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr,
+ DXGK_VMBCOMMAND_DESTROYCONTEXT,
+ process->host_handle);
+ command->context = h;
+
ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
cleanup:
free_message(&msg, process);
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index 4ccf45765954..ebcb7b0f62c1 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -269,4 +269,22 @@ struct dxgkvmb_command_flushdevice {
enum dxgdevice_flushschedulerreason reason;
};
+struct dxgkvmb_command_createcontextvirtual {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmthandle context;
+ struct d3dkmthandle device;
+ u32 node_ordinal;
+ u32 engine_affinity;
+ struct d3dddi_createcontextflags flags;
+ enum d3dkmt_clienthint client_hint;
+ u32 priv_drv_data_size;
+ u8 priv_drv_data[1];
+};
+
+/* The command returns ntstatus */
+struct dxgkvmb_command_destroycontext {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmthandle context;
+};
+
#endif /* _DXGVMBUS_H */
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
index 405e8b92913e..5d10ebd2ce6a 100644
--- a/drivers/hv/dxgkrnl/ioctl.c
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -550,13 +550,177 @@ dxgkio_destroy_device(struct dxgprocess *process, void *__user inargs)
return ret;
}
+static int
+dxgkio_create_context_virtual(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_createcontextvirtual args;
+ int ret;
+ struct dxgadapter *adapter = NULL;
+ struct dxgdevice *device = NULL;
+ struct dxgcontext *context = NULL;
+ struct d3dkmthandle host_context_handle = {};
+ bool device_lock_acquired = false;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /*
+ * The call acquires reference on the device. It is safe to access the
+ * adapter, because the device holds reference on it.
+ */
+ device = dxgprocess_device_by_handle(process, args.device);
+ if (device == NULL) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ret = dxgdevice_acquire_lock_shared(device);
+ if (ret < 0)
+ goto cleanup;
+
+ device_lock_acquired = true;
+
+ adapter = device->adapter;
+ ret = dxgadapter_acquire_lock_shared(adapter);
+ if (ret < 0) {
+ adapter = NULL;
+ goto cleanup;
+ }
+
+ context = dxgcontext_create(device);
+ if (context == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ host_context_handle = dxgvmb_send_create_context(adapter,
+ process, &args);
+ if (host_context_handle.v) {
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+ ret = hmgrtable_assign_handle(&process->handle_table, context,
+ HMGRENTRY_TYPE_DXGCONTEXT,
+ host_context_handle);
+ if (ret >= 0)
+ context->handle = host_context_handle;
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+ if (ret < 0)
+ goto cleanup;
+ ret = copy_to_user(&((struct d3dkmt_createcontextvirtual *)
+ inargs)->context, &host_context_handle,
+ sizeof(struct d3dkmthandle));
+ if (ret) {
+ DXG_ERR("failed to copy context handle");
+ ret = -EINVAL;
+ }
+ } else {
+ DXG_ERR("invalid host handle");
+ ret = -EINVAL;
+ }
+
+cleanup:
+
+ if (ret < 0) {
+ if (host_context_handle.v) {
+ dxgvmb_send_destroy_context(adapter, process,
+ host_context_handle);
+ }
+ if (context)
+ dxgcontext_destroy_safe(process, context);
+ }
+
+ if (adapter)
+ dxgadapter_release_lock_shared(adapter);
+
+ if (device) {
+ if (device_lock_acquired)
+ dxgdevice_release_lock_shared(device);
+ kref_put(&device->device_kref, dxgdevice_release);
+ }
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_destroy_context(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_destroycontext args;
+ int ret;
+ struct dxgadapter *adapter = NULL;
+ struct dxgcontext *context = NULL;
+ struct dxgdevice *device = NULL;
+ struct d3dkmthandle device_handle = {};
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+ context = hmgrtable_get_object_by_type(&process->handle_table,
+ HMGRENTRY_TYPE_DXGCONTEXT,
+ args.context);
+ if (context) {
+ hmgrtable_free_handle(&process->handle_table,
+ HMGRENTRY_TYPE_DXGCONTEXT, args.context);
+ context->handle.v = 0;
+ device_handle = context->device_handle;
+ context->object_state = DXGOBJECTSTATE_DESTROYED;
+ }
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+
+ if (context == NULL) {
+ DXG_ERR("invalid context handle: %x", args.context.v);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /*
+ * The call acquires reference on the device. It is safe to access the
+ * adapter, because the device holds reference on it.
+ */
+ device = dxgprocess_device_by_handle(process, device_handle);
+ if (device == NULL) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ adapter = device->adapter;
+ ret = dxgadapter_acquire_lock_shared(adapter);
+ if (ret < 0) {
+ adapter = NULL;
+ goto cleanup;
+ }
+
+ ret = dxgvmb_send_destroy_context(adapter, process, args.context);
+
+ dxgcontext_destroy_safe(process, context);
+
+cleanup:
+
+ if (adapter)
+ dxgadapter_release_lock_shared(adapter);
+
+ if (device)
+ kref_put(&device->device_kref, dxgdevice_release);
+
+ DXG_TRACE("ioctl:%s %s %d", errorstr(ret), __func__, ret);
+ return ret;
+}
+
static struct ioctl_desc ioctls[] = {
/* 0x00 */ {},
/* 0x01 */ {dxgkio_open_adapter_from_luid, LX_DXOPENADAPTERFROMLUID},
/* 0x02 */ {dxgkio_create_device, LX_DXCREATEDEVICE},
/* 0x03 */ {},
-/* 0x04 */ {},
-/* 0x05 */ {},
+/* 0x04 */ {dxgkio_create_context_virtual, LX_DXCREATECONTEXTVIRTUAL},
+/* 0x05 */ {dxgkio_destroy_context, LX_DXDESTROYCONTEXT},
/* 0x06 */ {},
/* 0x07 */ {},
/* 0x08 */ {},
diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
index e0bd33b365b0..3a9637f0b5e2 100644
--- a/drivers/hv/dxgkrnl/misc.h
+++ b/drivers/hv/dxgkrnl/misc.h
@@ -29,6 +29,7 @@ extern const struct d3dkmthandle zerohandle;
* fd_mutex
* plistmutex (process list mutex)
* table_lock (handle table lock)
+ * context_list_lock
* core_lock (dxgadapter lock)
* device_lock (dxgdevice lock)
* process_adapter_mutex
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
index 7414f0f5ce8e..4ba0070b061f 100644
--- a/include/uapi/misc/d3dkmthk.h
+++ b/include/uapi/misc/d3dkmthk.h
@@ -154,6 +154,49 @@ struct d3dkmt_destroydevice {
struct d3dkmthandle device;
};
+enum d3dkmt_clienthint {
+ _D3DKMT_CLIENTHNT_UNKNOWN = 0,
+ _D3DKMT_CLIENTHINT_OPENGL = 1,
+ _D3DKMT_CLIENTHINT_CDD = 2,
+ _D3DKMT_CLIENTHINT_DX7 = 7,
+ _D3DKMT_CLIENTHINT_DX8 = 8,
+ _D3DKMT_CLIENTHINT_DX9 = 9,
+ _D3DKMT_CLIENTHINT_DX10 = 10,
+};
+
+struct d3dddi_createcontextflags {
+ union {
+ struct {
+ __u32 null_rendering:1;
+ __u32 initial_data:1;
+ __u32 disable_gpu_timeout:1;
+ __u32 synchronization_only:1;
+ __u32 hw_queue_supported:1;
+ __u32 reserved:27;
+ };
+ __u32 value;
+ };
+};
+
+struct d3dkmt_destroycontext {
+ struct d3dkmthandle context;
+};
+
+struct d3dkmt_createcontextvirtual {
+ struct d3dkmthandle device;
+ __u32 node_ordinal;
+ __u32 engine_affinity;
+ struct d3dddi_createcontextflags flags;
+#ifdef __KERNEL__
+ void *priv_drv_data;
+#else
+ __u64 priv_drv_data;
+#endif
+ __u32 priv_drv_data_size;
+ enum d3dkmt_clienthint client_hint;
+ struct d3dkmthandle context;
+};
+
struct d3dkmt_adaptertype {
union {
struct {
@@ -232,6 +275,10 @@ struct d3dkmt_enumadapters3 {
_IOWR(0x47, 0x01, struct d3dkmt_openadapterfromluid)
#define LX_DXCREATEDEVICE \
_IOWR(0x47, 0x02, struct d3dkmt_createdevice)
+#define LX_DXCREATECONTEXTVIRTUAL \
+ _IOWR(0x47, 0x04, struct d3dkmt_createcontextvirtual)
+#define LX_DXDESTROYCONTEXT \
+ _IOWR(0x47, 0x05, struct d3dkmt_destroycontext)
#define LX_DXQUERYADAPTERINFO \
_IOWR(0x47, 0x09, struct d3dkmt_queryadapterinfo)
#define LX_DXENUMADAPTERS2 \
^ permalink raw reply related
* [PATCH 06/55] drivers: hv: dxgkrnl: Creation of dxgdevice objects
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
Implement ioctls for creation and destruction of dxgdevice
objects:
- the LX_DXCREATEDEVICE ioctl
- the LX_DXDESTROYDEVICE ioctl
A dxgdevice object represents a container of other virtual
compute device objects (allocations, sync objects, contexts,
etc.). It belongs to a dxgadapter object.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/dxgadapter.c | 187 ++++++++++++++++++++++++++++++++
drivers/hv/dxgkrnl/dxgkrnl.h | 58 ++++++++++
drivers/hv/dxgkrnl/dxgprocess.c | 43 ++++++++
drivers/hv/dxgkrnl/dxgvmbus.c | 80 ++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 22 ++++
drivers/hv/dxgkrnl/ioctl.c | 130 +++++++++++++++++++++-
drivers/hv/dxgkrnl/misc.h | 8 +-
include/uapi/misc/d3dkmthk.h | 82 ++++++++++++++
8 files changed, 604 insertions(+), 6 deletions(-)
diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c
index fa0d6beca157..a9a341716eba 100644
--- a/drivers/hv/dxgkrnl/dxgadapter.c
+++ b/drivers/hv/dxgkrnl/dxgadapter.c
@@ -194,6 +194,122 @@ void dxgadapter_release_lock_shared(struct dxgadapter *adapter)
up_read(&adapter->core_lock);
}
+struct dxgdevice *dxgdevice_create(struct dxgadapter *adapter,
+ struct dxgprocess *process)
+{
+ struct dxgdevice *device;
+ int ret;
+
+ device = kzalloc(sizeof(struct dxgdevice), GFP_KERNEL);
+ if (device) {
+ kref_init(&device->device_kref);
+ device->adapter = adapter;
+ device->process = process;
+ kref_get(&adapter->adapter_kref);
+ init_rwsem(&device->device_lock);
+ INIT_LIST_HEAD(&device->pqueue_list_head);
+ device->object_state = DXGOBJECTSTATE_CREATED;
+ device->execution_state = _D3DKMT_DEVICEEXECUTION_ACTIVE;
+
+ ret = dxgprocess_adapter_add_device(process, adapter, device);
+ if (ret < 0) {
+ kref_put(&device->device_kref, dxgdevice_release);
+ device = NULL;
+ }
+ }
+ return device;
+}
+
+void dxgdevice_stop(struct dxgdevice *device)
+{
+}
+
+void dxgdevice_mark_destroyed(struct dxgdevice *device)
+{
+ down_write(&device->device_lock);
+ device->object_state = DXGOBJECTSTATE_DESTROYED;
+ up_write(&device->device_lock);
+}
+
+void dxgdevice_destroy(struct dxgdevice *device)
+{
+ struct dxgprocess *process = device->process;
+ struct dxgadapter *adapter = device->adapter;
+ struct d3dkmthandle device_handle = {};
+
+ DXG_TRACE("Destroying device: %p", device);
+
+ down_write(&device->device_lock);
+
+ if (device->object_state != DXGOBJECTSTATE_ACTIVE)
+ goto cleanup;
+
+ device->object_state = DXGOBJECTSTATE_DESTROYED;
+
+ dxgdevice_stop(device);
+
+ /* Guest handles need to be released before the host handles */
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+ if (device->handle_valid) {
+ hmgrtable_free_handle(&process->handle_table,
+ HMGRENTRY_TYPE_DXGDEVICE, device->handle);
+ device_handle = device->handle;
+ device->handle_valid = 0;
+ }
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+
+ if (device_handle.v) {
+ up_write(&device->device_lock);
+ if (dxgadapter_acquire_lock_shared(adapter) == 0) {
+ dxgvmb_send_destroy_device(adapter, process,
+ device_handle);
+ dxgadapter_release_lock_shared(adapter);
+ }
+ down_write(&device->device_lock);
+ }
+
+cleanup:
+
+ if (device->adapter) {
+ dxgprocess_adapter_remove_device(device);
+ kref_put(&device->adapter->adapter_kref, dxgadapter_release);
+ device->adapter = NULL;
+ }
+
+ up_write(&device->device_lock);
+
+ kref_put(&device->device_kref, dxgdevice_release);
+ DXG_TRACE("Device destroyed");
+}
+
+int dxgdevice_acquire_lock_shared(struct dxgdevice *device)
+{
+ down_read(&device->device_lock);
+ if (!dxgdevice_is_active(device)) {
+ up_read(&device->device_lock);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+void dxgdevice_release_lock_shared(struct dxgdevice *device)
+{
+ up_read(&device->device_lock);
+}
+
+bool dxgdevice_is_active(struct dxgdevice *device)
+{
+ return device->object_state == DXGOBJECTSTATE_ACTIVE;
+}
+
+void dxgdevice_release(struct kref *refcount)
+{
+ struct dxgdevice *device;
+
+ device = container_of(refcount, struct dxgdevice, device_kref);
+ kfree(device);
+}
+
struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
struct dxgadapter *adapter)
{
@@ -208,6 +324,8 @@ struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
adapter_info->adapter = adapter;
adapter_info->process = process;
adapter_info->refcount = 1;
+ mutex_init(&adapter_info->device_list_mutex);
+ INIT_LIST_HEAD(&adapter_info->device_list_head);
list_add_tail(&adapter_info->process_adapter_list_entry,
&process->process_adapter_list_head);
dxgadapter_add_process(adapter, adapter_info);
@@ -221,10 +339,34 @@ struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
void dxgprocess_adapter_stop(struct dxgprocess_adapter *adapter_info)
{
+ struct dxgdevice *device;
+
+ mutex_lock(&adapter_info->device_list_mutex);
+ list_for_each_entry(device, &adapter_info->device_list_head,
+ device_list_entry) {
+ dxgdevice_stop(device);
+ }
+ mutex_unlock(&adapter_info->device_list_mutex);
}
void dxgprocess_adapter_destroy(struct dxgprocess_adapter *adapter_info)
{
+ struct dxgdevice *device;
+
+ mutex_lock(&adapter_info->device_list_mutex);
+ while (!list_empty(&adapter_info->device_list_head)) {
+ device = list_first_entry(&adapter_info->device_list_head,
+ struct dxgdevice, device_list_entry);
+ list_del(&device->device_list_entry);
+ device->device_list_entry.next = NULL;
+ mutex_unlock(&adapter_info->device_list_mutex);
+ dxgvmb_send_flush_device(device,
+ DXGDEVICE_FLUSHSCHEDULER_DEVICE_TERMINATE);
+ dxgdevice_destroy(device);
+ mutex_lock(&adapter_info->device_list_mutex);
+ }
+ mutex_unlock(&adapter_info->device_list_mutex);
+
dxgadapter_remove_process(adapter_info);
kref_put(&adapter_info->adapter->adapter_kref, dxgadapter_release);
list_del(&adapter_info->process_adapter_list_entry);
@@ -240,3 +382,48 @@ void dxgprocess_adapter_release(struct dxgprocess_adapter *adapter_info)
if (adapter_info->refcount == 0)
dxgprocess_adapter_destroy(adapter_info);
}
+
+int dxgprocess_adapter_add_device(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct dxgdevice *device)
+{
+ struct dxgprocess_adapter *entry;
+ struct dxgprocess_adapter *adapter_info = NULL;
+ int ret = 0;
+
+ dxgglobal_acquire_process_adapter_lock();
+
+ list_for_each_entry(entry, &process->process_adapter_list_head,
+ process_adapter_list_entry) {
+ if (entry->adapter == adapter) {
+ adapter_info = entry;
+ break;
+ }
+ }
+ if (adapter_info == NULL) {
+ DXG_ERR("failed to find process adapter info");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ mutex_lock(&adapter_info->device_list_mutex);
+ list_add_tail(&device->device_list_entry,
+ &adapter_info->device_list_head);
+ device->adapter_info = adapter_info;
+ mutex_unlock(&adapter_info->device_list_mutex);
+
+cleanup:
+
+ dxgglobal_release_process_adapter_lock();
+ return ret;
+}
+
+void dxgprocess_adapter_remove_device(struct dxgdevice *device)
+{
+ DXG_TRACE("Removing device: %p", device);
+ mutex_lock(&device->adapter_info->device_list_mutex);
+ if (device->device_list_entry.next) {
+ list_del(&device->device_list_entry);
+ device->device_list_entry.next = NULL;
+ }
+ mutex_unlock(&device->adapter_info->device_list_mutex);
+}
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index b089d126f801..45ac1f25cc5e 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -34,6 +34,7 @@
struct dxgprocess;
struct dxgadapter;
+struct dxgdevice;
/*
* Driver private data.
@@ -71,6 +72,10 @@ struct dxgk_device_types {
u32 virtual_monitor_device:1;
};
+enum dxgdevice_flushschedulerreason {
+ DXGDEVICE_FLUSHSCHEDULER_DEVICE_TERMINATE = 4,
+};
+
enum dxgobjectstate {
DXGOBJECTSTATE_CREATED,
DXGOBJECTSTATE_ACTIVE,
@@ -166,6 +171,9 @@ struct dxgprocess_adapter {
struct list_head adapter_process_list_entry;
/* Entry in dxgprocess::process_adapter_list_head */
struct list_head process_adapter_list_entry;
+ /* List of all dxgdevice objects created for the process on adapter */
+ struct list_head device_list_head;
+ struct mutex device_list_mutex;
struct dxgadapter *adapter;
struct dxgprocess *process;
int refcount;
@@ -175,6 +183,10 @@ struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
struct dxgadapter
*adapter);
void dxgprocess_adapter_release(struct dxgprocess_adapter *adapter);
+int dxgprocess_adapter_add_device(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct dxgdevice *device);
+void dxgprocess_adapter_remove_device(struct dxgdevice *device);
void dxgprocess_adapter_stop(struct dxgprocess_adapter *adapter_info);
void dxgprocess_adapter_destroy(struct dxgprocess_adapter *adapter_info);
@@ -222,6 +234,11 @@ struct dxgadapter *dxgprocess_get_adapter(struct dxgprocess *process,
struct d3dkmthandle handle);
struct dxgadapter *dxgprocess_adapter_by_handle(struct dxgprocess *process,
struct d3dkmthandle handle);
+struct dxgdevice *dxgprocess_device_by_handle(struct dxgprocess *process,
+ struct d3dkmthandle handle);
+struct dxgdevice *dxgprocess_device_by_object_handle(struct dxgprocess *process,
+ enum hmgrentry_type t,
+ struct d3dkmthandle h);
void dxgprocess_ht_lock_shared_down(struct dxgprocess *process);
void dxgprocess_ht_lock_shared_up(struct dxgprocess *process);
void dxgprocess_ht_lock_exclusive_down(struct dxgprocess *process);
@@ -241,6 +258,7 @@ enum dxgadapter_state {
* This object represents the grapchis adapter.
* Objects, which take reference on the adapter:
* - dxgglobal
+ * - dxgdevice
* - adapter handle (struct d3dkmthandle)
*/
struct dxgadapter {
@@ -277,6 +295,38 @@ void dxgadapter_add_process(struct dxgadapter *adapter,
struct dxgprocess_adapter *process_info);
void dxgadapter_remove_process(struct dxgprocess_adapter *process_info);
+/*
+ * The object represent the device object.
+ * The following objects take reference on the device
+ * - device handle (struct d3dkmthandle)
+ */
+struct dxgdevice {
+ enum dxgobjectstate object_state;
+ /* Device takes reference on the adapter */
+ struct dxgadapter *adapter;
+ struct dxgprocess_adapter *adapter_info;
+ struct dxgprocess *process;
+ /* Entry in the DGXPROCESS_ADAPTER device list */
+ struct list_head device_list_entry;
+ struct kref device_kref;
+ /* Protects destcruction of the device object */
+ struct rw_semaphore device_lock;
+ /* List of paging queues. Protected by process handle table lock. */
+ struct list_head pqueue_list_head;
+ struct d3dkmthandle handle;
+ enum d3dkmt_deviceexecution_state execution_state;
+ u32 handle_valid;
+};
+
+struct dxgdevice *dxgdevice_create(struct dxgadapter *a, struct dxgprocess *p);
+void dxgdevice_destroy(struct dxgdevice *device);
+void dxgdevice_stop(struct dxgdevice *device);
+void dxgdevice_mark_destroyed(struct dxgdevice *device);
+int dxgdevice_acquire_lock_shared(struct dxgdevice *dev);
+void dxgdevice_release_lock_shared(struct dxgdevice *dev);
+void dxgdevice_release(struct kref *refcount);
+bool dxgdevice_is_active(struct dxgdevice *dev);
+
long dxgk_compat_ioctl(struct file *f, unsigned int p1, unsigned long p2);
long dxgk_unlocked_ioctl(struct file *f, unsigned int p1, unsigned long p2);
@@ -313,6 +363,14 @@ int dxgvmb_send_destroy_process(struct d3dkmthandle process);
int dxgvmb_send_open_adapter(struct dxgadapter *adapter);
int dxgvmb_send_close_adapter(struct dxgadapter *adapter);
int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter);
+struct d3dkmthandle dxgvmb_send_create_device(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmt_createdevice *args);
+int dxgvmb_send_destroy_device(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmthandle h);
+int dxgvmb_send_flush_device(struct dxgdevice *device,
+ enum dxgdevice_flushschedulerreason reason);
int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
struct dxgadapter *adapter,
struct d3dkmt_queryadapterinfo *args);
diff --git a/drivers/hv/dxgkrnl/dxgprocess.c b/drivers/hv/dxgkrnl/dxgprocess.c
index ab9a01e3c8c8..8373f681e822 100644
--- a/drivers/hv/dxgkrnl/dxgprocess.c
+++ b/drivers/hv/dxgkrnl/dxgprocess.c
@@ -241,6 +241,49 @@ struct dxgadapter *dxgprocess_adapter_by_handle(struct dxgprocess *process,
return adapter;
}
+struct dxgdevice *dxgprocess_device_by_object_handle(struct dxgprocess *process,
+ enum hmgrentry_type t,
+ struct d3dkmthandle handle)
+{
+ struct dxgdevice *device = NULL;
+ void *obj;
+
+ hmgrtable_lock(&process->handle_table, DXGLOCK_SHARED);
+ obj = hmgrtable_get_object_by_type(&process->handle_table, t, handle);
+ if (obj) {
+ struct d3dkmthandle device_handle = {};
+
+ switch (t) {
+ case HMGRENTRY_TYPE_DXGDEVICE:
+ device = obj;
+ break;
+ default:
+ DXG_ERR("invalid handle type: %d", t);
+ break;
+ }
+ if (device == NULL)
+ device = hmgrtable_get_object_by_type(
+ &process->handle_table,
+ HMGRENTRY_TYPE_DXGDEVICE,
+ device_handle);
+ if (device)
+ if (kref_get_unless_zero(&device->device_kref) == 0)
+ device = NULL;
+ }
+ if (device == NULL)
+ DXG_ERR("device_by_handle failed: %d %x", t, handle.v);
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_SHARED);
+ return device;
+}
+
+struct dxgdevice *dxgprocess_device_by_handle(struct dxgprocess *process,
+ struct d3dkmthandle handle)
+{
+ return dxgprocess_device_by_object_handle(process,
+ HMGRENTRY_TYPE_DXGDEVICE,
+ handle);
+}
+
void dxgprocess_ht_lock_shared_down(struct dxgprocess *process)
{
hmgrtable_lock(&process->handle_table, DXGLOCK_SHARED);
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index 0abf45d0d3f7..73804d11ec49 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -673,6 +673,86 @@ int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter)
return ret;
}
+struct d3dkmthandle dxgvmb_send_create_device(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmt_createdevice *args)
+{
+ int ret;
+ struct dxgkvmb_command_createdevice *command;
+ struct dxgkvmb_command_createdevice_return result = { };
+ struct dxgvmbusmsg msg;
+
+ ret = init_message(&msg, adapter, process, sizeof(*command));
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_CREATEDEVICE,
+ process->host_handle);
+ command->flags = args->flags;
+
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ &result, sizeof(result));
+ if (ret < 0)
+ result.device.v = 0;
+ free_message(&msg, process);
+cleanup:
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return result.device;
+}
+
+int dxgvmb_send_destroy_device(struct dxgadapter *adapter,
+ struct dxgprocess *process,
+ struct d3dkmthandle h)
+{
+ int ret;
+ struct dxgkvmb_command_destroydevice *command;
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+
+ ret = init_message(&msg, adapter, process, sizeof(*command));
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_DESTROYDEVICE,
+ process->host_handle);
+ command->device = h;
+
+ ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
+int dxgvmb_send_flush_device(struct dxgdevice *device,
+ enum dxgdevice_flushschedulerreason reason)
+{
+ int ret;
+ struct dxgkvmb_command_flushdevice *command;
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+ struct dxgprocess *process = device->process;
+
+ ret = init_message(&msg, device->adapter, process, sizeof(*command));
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_FLUSHDEVICE,
+ process->host_handle);
+ command->device = device->handle;
+ command->reason = reason;
+
+ ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
struct dxgadapter *adapter,
struct d3dkmt_queryadapterinfo *args)
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index a805a396e083..4ccf45765954 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -247,4 +247,26 @@ struct dxgkvmb_command_queryadapterinfo_return {
u8 private_data[1];
};
+struct dxgkvmb_command_createdevice {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmt_createdeviceflags flags;
+ bool cdd_device;
+ void *error_code;
+};
+
+struct dxgkvmb_command_createdevice_return {
+ struct d3dkmthandle device;
+};
+
+struct dxgkvmb_command_destroydevice {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmthandle device;
+};
+
+struct dxgkvmb_command_flushdevice {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmthandle device;
+ enum dxgdevice_flushschedulerreason reason;
+};
+
#endif /* _DXGVMBUS_H */
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
index b08ea9430093..405e8b92913e 100644
--- a/drivers/hv/dxgkrnl/ioctl.c
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -424,10 +424,136 @@ dxgkio_query_adapter_info(struct dxgprocess *process, void *__user inargs)
return ret;
}
+static int
+dxgkio_create_device(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_createdevice args;
+ int ret;
+ struct dxgadapter *adapter = NULL;
+ struct dxgdevice *device = NULL;
+ struct d3dkmthandle host_device_handle = {};
+ bool adapter_locked = false;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ /* The call acquires reference on the adapter */
+ adapter = dxgprocess_adapter_by_handle(process, args.adapter);
+ if (adapter == NULL) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ device = dxgdevice_create(adapter, process);
+ if (device == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ ret = dxgadapter_acquire_lock_shared(adapter);
+ if (ret < 0)
+ goto cleanup;
+
+ adapter_locked = true;
+
+ host_device_handle = dxgvmb_send_create_device(adapter, process, &args);
+ if (host_device_handle.v) {
+ ret = copy_to_user(&((struct d3dkmt_createdevice *)inargs)->
+ device, &host_device_handle,
+ sizeof(struct d3dkmthandle));
+ if (ret) {
+ DXG_ERR("failed to copy device handle");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+ ret = hmgrtable_assign_handle(&process->handle_table, device,
+ HMGRENTRY_TYPE_DXGDEVICE,
+ host_device_handle);
+ if (ret >= 0) {
+ device->handle = host_device_handle;
+ device->handle_valid = 1;
+ device->object_state = DXGOBJECTSTATE_ACTIVE;
+ }
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+ }
+
+cleanup:
+
+ if (ret < 0) {
+ if (host_device_handle.v)
+ dxgvmb_send_destroy_device(adapter, process,
+ host_device_handle);
+ if (device)
+ dxgdevice_destroy(device);
+ }
+
+ if (adapter_locked)
+ dxgadapter_release_lock_shared(adapter);
+
+ if (adapter)
+ kref_put(&adapter->adapter_kref, dxgadapter_release);
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_destroy_device(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_destroydevice args;
+ int ret;
+ struct dxgadapter *adapter = NULL;
+ struct dxgdevice *device = NULL;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+ device = hmgrtable_get_object_by_type(&process->handle_table,
+ HMGRENTRY_TYPE_DXGDEVICE,
+ args.device);
+ if (device) {
+ hmgrtable_free_handle(&process->handle_table,
+ HMGRENTRY_TYPE_DXGDEVICE, args.device);
+ device->handle_valid = 0;
+ }
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+
+ if (device == NULL) {
+ DXG_ERR("invalid device handle: %x", args.device.v);
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ adapter = device->adapter;
+
+ dxgdevice_destroy(device);
+
+ if (dxgadapter_acquire_lock_shared(adapter) == 0) {
+ dxgvmb_send_destroy_device(adapter, process, args.device);
+ dxgadapter_release_lock_shared(adapter);
+ }
+
+cleanup:
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
static struct ioctl_desc ioctls[] = {
/* 0x00 */ {},
/* 0x01 */ {dxgkio_open_adapter_from_luid, LX_DXOPENADAPTERFROMLUID},
-/* 0x02 */ {},
+/* 0x02 */ {dxgkio_create_device, LX_DXCREATEDEVICE},
/* 0x03 */ {},
/* 0x04 */ {},
/* 0x05 */ {},
@@ -450,7 +576,7 @@ static struct ioctl_desc ioctls[] = {
/* 0x16 */ {},
/* 0x17 */ {},
/* 0x18 */ {},
-/* 0x19 */ {},
+/* 0x19 */ {dxgkio_destroy_device, LX_DXDESTROYDEVICE},
/* 0x1a */ {},
/* 0x1b */ {},
/* 0x1c */ {},
diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
index dc849a8ed3f2..e0bd33b365b0 100644
--- a/drivers/hv/dxgkrnl/misc.h
+++ b/drivers/hv/dxgkrnl/misc.h
@@ -27,10 +27,10 @@ extern const struct d3dkmthandle zerohandle;
*
* channel_lock (VMBus channel lock)
* fd_mutex
- * plistmutex
- * table_lock
- * core_lock
- * device_lock
+ * plistmutex (process list mutex)
+ * table_lock (handle table lock)
+ * core_lock (dxgadapter lock)
+ * device_lock (dxgdevice lock)
* process_adapter_mutex
* adapter_list_lock
* device_mutex (dxgglobal mutex)
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
index c675d5827ed5..7414f0f5ce8e 100644
--- a/include/uapi/misc/d3dkmthk.h
+++ b/include/uapi/misc/d3dkmthk.h
@@ -86,6 +86,74 @@ struct d3dkmt_openadapterfromluid {
struct d3dkmthandle adapter_handle;
};
+struct d3dddi_allocationlist {
+ struct d3dkmthandle allocation;
+ union {
+ struct {
+ __u32 write_operation :1;
+ __u32 do_not_retire_instance :1;
+ __u32 offer_priority :3;
+ __u32 reserved :27;
+ };
+ __u32 value;
+ };
+};
+
+struct d3dddi_patchlocationlist {
+ __u32 allocation_index;
+ union {
+ struct {
+ __u32 slot_id:24;
+ __u32 reserved:8;
+ };
+ __u32 value;
+ };
+ __u32 driver_id;
+ __u32 allocation_offset;
+ __u32 patch_offset;
+ __u32 split_offset;
+};
+
+struct d3dkmt_createdeviceflags {
+ __u32 legacy_mode:1;
+ __u32 request_vSync:1;
+ __u32 disable_gpu_timeout:1;
+ __u32 gdi_device:1;
+ __u32 reserved:28;
+};
+
+struct d3dkmt_createdevice {
+ struct d3dkmthandle adapter;
+ __u32 reserved3;
+ struct d3dkmt_createdeviceflags flags;
+ struct d3dkmthandle device;
+#ifdef __KERNEL__
+ void *command_buffer;
+#else
+ __u64 command_buffer;
+#endif
+ __u32 command_buffer_size;
+ __u32 reserved;
+#ifdef __KERNEL__
+ struct d3dddi_allocationlist *allocation_list;
+#else
+ __u64 allocation_list;
+#endif
+ __u32 allocation_list_size;
+ __u32 reserved1;
+#ifdef __KERNEL__
+ struct d3dddi_patchlocationlist *patch_location_list;
+#else
+ __u64 patch_location_list;
+#endif
+ __u32 patch_location_list_size;
+ __u32 reserved2;
+};
+
+struct d3dkmt_destroydevice {
+ struct d3dkmthandle device;
+};
+
struct d3dkmt_adaptertype {
union {
struct {
@@ -125,6 +193,16 @@ struct d3dkmt_queryadapterinfo {
__u32 private_data_size;
};
+enum d3dkmt_deviceexecution_state {
+ _D3DKMT_DEVICEEXECUTION_ACTIVE = 1,
+ _D3DKMT_DEVICEEXECUTION_RESET = 2,
+ _D3DKMT_DEVICEEXECUTION_HUNG = 3,
+ _D3DKMT_DEVICEEXECUTION_STOPPED = 4,
+ _D3DKMT_DEVICEEXECUTION_ERROR_OUTOFMEMORY = 5,
+ _D3DKMT_DEVICEEXECUTION_ERROR_DMAFAULT = 6,
+ _D3DKMT_DEVICEEXECUTION_ERROR_DMAPAGEFAULT = 7,
+};
+
union d3dkmt_enumadapters_filter {
struct {
__u64 include_compute_only:1;
@@ -152,12 +230,16 @@ struct d3dkmt_enumadapters3 {
#define LX_DXOPENADAPTERFROMLUID \
_IOWR(0x47, 0x01, struct d3dkmt_openadapterfromluid)
+#define LX_DXCREATEDEVICE \
+ _IOWR(0x47, 0x02, struct d3dkmt_createdevice)
#define LX_DXQUERYADAPTERINFO \
_IOWR(0x47, 0x09, struct d3dkmt_queryadapterinfo)
#define LX_DXENUMADAPTERS2 \
_IOWR(0x47, 0x14, struct d3dkmt_enumadapters2)
#define LX_DXCLOSEADAPTER \
_IOWR(0x47, 0x15, struct d3dkmt_closeadapter)
+#define LX_DXDESTROYDEVICE \
+ _IOWR(0x47, 0x19, struct d3dkmt_destroydevice)
#define LX_DXENUMADAPTERS3 \
_IOWR(0x47, 0x3e, struct d3dkmt_enumadapters3)
^ permalink raw reply related
* [PATCH 05/55] drivers: hv: dxgkrnl: Enumerate and open dxgadapter objects
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
Implement ioctls to enumerate dxgadapter objects:
- The LX_DXENUMADAPTERS2 ioctl
- The LX_DXENUMADAPTERS3 ioctl.
Implement ioctls to open adapter by LUID and to close adapter
handle:
- The LX_DXOPENADAPTERFROMLUID ioctl
- the LX_DXCLOSEADAPTER ioctl
Impllement the ioctl to query dxgadapter information:
- The LX_DXQUERYADAPTERINFO ioctl
When a dxgadapter is enumerated, it is implicitely opened and
a handle (d3dkmthandle) is created in the current process handle
table. The handle is returned to the caller and can be used
by user mode to reference the VGPU adapter in other ioctls.
The caller is responsible to close the adapter when it is not
longer used by sending the LX_DXCLOSEADAPTER ioctl.
A dxgprocess has a list of opened dxgadapter objects
(dxgprocess_adapter is used to represent the entry in the list).
A dxgadapter also has a list of dxgprocess_adapter objects.
This is needed for cleanup because either a process or an adapter
could be destroyed first.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/dxgmodule.c | 3 +
drivers/hv/dxgkrnl/ioctl.c | 482 ++++++++++++++++++++++++++++++++-
2 files changed, 484 insertions(+), 1 deletion(-)
diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c
index 17c22001ca6c..fbe1c58ecb46 100644
--- a/drivers/hv/dxgkrnl/dxgmodule.c
+++ b/drivers/hv/dxgkrnl/dxgmodule.c
@@ -721,6 +721,9 @@ static struct dxgglobal *dxgglobal_create(void)
init_rwsem(&dxgglobal->channel_lock);
+#ifdef DEBUG
+ dxgk_validate_ioctls();
+#endif
return dxgglobal;
}
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
index 60e38d104517..b08ea9430093 100644
--- a/drivers/hv/dxgkrnl/ioctl.c
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -29,8 +29,472 @@ struct ioctl_desc {
u32 arg_size;
};
-static struct ioctl_desc ioctls[] = {
+#ifdef DEBUG
+static char *errorstr(int ret)
+{
+ return ret < 0 ? "err" : "";
+}
+#endif
+
+static int dxgkio_open_adapter_from_luid(struct dxgprocess *process,
+ void *__user inargs)
+{
+ struct d3dkmt_openadapterfromluid args;
+ int ret;
+ struct dxgadapter *entry;
+ struct dxgadapter *adapter = NULL;
+ struct d3dkmt_openadapterfromluid *__user result = inargs;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("Faled to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_SHARED);
+ dxgglobal_acquire_process_adapter_lock();
+
+ list_for_each_entry(entry, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (dxgadapter_acquire_lock_shared(entry) == 0) {
+ if (*(u64 *) &entry->luid ==
+ *(u64 *) &args.adapter_luid) {
+ ret = dxgprocess_open_adapter(process, entry,
+ &args.adapter_handle);
+
+ if (ret >= 0) {
+ ret = copy_to_user(
+ &result->adapter_handle,
+ &args.adapter_handle,
+ sizeof(struct d3dkmthandle));
+ if (ret)
+ ret = -EINVAL;
+ }
+ adapter = entry;
+ }
+ dxgadapter_release_lock_shared(entry);
+ if (adapter)
+ break;
+ }
+ }
+
+ dxgglobal_release_process_adapter_lock();
+ dxgglobal_release_adapter_list_lock(DXGLOCK_SHARED);
+
+ if (args.adapter_handle.v == 0)
+ ret = -EINVAL;
+
+cleanup:
+
+ if (ret < 0)
+ dxgprocess_close_adapter(process, args.adapter_handle);
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkp_enum_adapters(struct dxgprocess *process,
+ union d3dkmt_enumadapters_filter filter,
+ u32 adapter_count_max,
+ struct d3dkmt_adapterinfo *__user info_out,
+ u32 * __user adapter_count_out)
+{
+ int ret = 0;
+ struct dxgadapter *entry;
+ struct d3dkmt_adapterinfo *info = NULL;
+ struct dxgadapter **adapters = NULL;
+ int adapter_count = 0;
+ int i;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ if (info_out == NULL || adapter_count_max == 0) {
+ ret = copy_to_user(adapter_count_out,
+ &dxgglobal->num_adapters, sizeof(u32));
+ if (ret) {
+ DXG_ERR("copy_to_user faled");
+ ret = -EINVAL;
+ }
+ goto cleanup;
+ }
+
+ if (adapter_count_max > 0xFFFF) {
+ DXG_ERR("too many adapters");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ info = vzalloc(sizeof(struct d3dkmt_adapterinfo) * adapter_count_max);
+ if (info == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ adapters = vzalloc(sizeof(struct dxgadapter *) * adapter_count_max);
+ if (adapters == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_SHARED);
+ dxgglobal_acquire_process_adapter_lock();
+ list_for_each_entry(entry, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (dxgadapter_acquire_lock_shared(entry) == 0) {
+ struct d3dkmt_adapterinfo *inf = &info[adapter_count];
+
+ ret = dxgprocess_open_adapter(process, entry,
+ &inf->adapter_handle);
+ if (ret >= 0) {
+ inf->adapter_luid = entry->luid;
+ adapters[adapter_count] = entry;
+ DXG_TRACE("adapter: %x %x:%x",
+ inf->adapter_handle.v,
+ inf->adapter_luid.b,
+ inf->adapter_luid.a);
+ adapter_count++;
+ }
+ dxgadapter_release_lock_shared(entry);
+ }
+ if (ret < 0)
+ break;
+ }
+
+ dxgglobal_release_process_adapter_lock();
+ dxgglobal_release_adapter_list_lock(DXGLOCK_SHARED);
+
+ if (adapter_count > adapter_count_max) {
+ ret = STATUS_BUFFER_TOO_SMALL;
+ DXG_TRACE("Too many adapters");
+ ret = copy_to_user(adapter_count_out,
+ &dxgglobal->num_adapters, sizeof(u32));
+ if (ret) {
+ DXG_ERR("copy_to_user failed");
+ ret = -EINVAL;
+ }
+ goto cleanup;
+ }
+
+ ret = copy_to_user(adapter_count_out, &adapter_count,
+ sizeof(adapter_count));
+ if (ret) {
+ DXG_ERR("failed to copy adapter_count");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ ret = copy_to_user(info_out, info, sizeof(info[0]) * adapter_count);
+ if (ret) {
+ DXG_ERR("failed to copy adapter info");
+ ret = -EINVAL;
+ }
+
+cleanup:
+
+ if (ret >= 0) {
+ DXG_TRACE("found %d adapters", adapter_count);
+ goto success;
+ }
+ if (info) {
+ for (i = 0; i < adapter_count; i++)
+ dxgprocess_close_adapter(process,
+ info[i].adapter_handle);
+ }
+success:
+ if (info)
+ vfree(info);
+ if (adapters)
+ vfree(adapters);
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_enum_adapters(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_enumadapters2 args;
+ int ret;
+ struct dxgadapter *entry;
+ struct d3dkmt_adapterinfo *info = NULL;
+ struct dxgadapter **adapters = NULL;
+ int adapter_count = 0;
+ int i;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ if (args.adapters == NULL) {
+ DXG_TRACE("buffer is NULL");
+ args.num_adapters = dxgglobal->num_adapters;
+ ret = copy_to_user(inargs, &args, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy args to user");
+ ret = -EINVAL;
+ }
+ goto cleanup;
+ }
+ if (args.num_adapters < dxgglobal->num_adapters) {
+ args.num_adapters = dxgglobal->num_adapters;
+ DXG_TRACE("buffer is too small");
+ ret = -EOVERFLOW;
+ goto cleanup;
+ }
+
+ if (args.num_adapters > D3DKMT_ADAPTERS_MAX) {
+ DXG_TRACE("too many adapters");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ info = vzalloc(sizeof(struct d3dkmt_adapterinfo) * args.num_adapters);
+ if (info == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ adapters = vzalloc(sizeof(struct dxgadapter *) * args.num_adapters);
+ if (adapters == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_SHARED);
+ dxgglobal_acquire_process_adapter_lock();
+
+ list_for_each_entry(entry, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (dxgadapter_acquire_lock_shared(entry) == 0) {
+ struct d3dkmt_adapterinfo *inf = &info[adapter_count];
+
+ ret = dxgprocess_open_adapter(process, entry,
+ &inf->adapter_handle);
+ if (ret >= 0) {
+ inf->adapter_luid = entry->luid;
+ adapters[adapter_count] = entry;
+ DXG_TRACE("adapter: %x %llx",
+ inf->adapter_handle.v,
+ *(u64 *) &inf->adapter_luid);
+ adapter_count++;
+ }
+ dxgadapter_release_lock_shared(entry);
+ }
+ if (ret < 0)
+ break;
+ }
+
+ dxgglobal_release_process_adapter_lock();
+ dxgglobal_release_adapter_list_lock(DXGLOCK_SHARED);
+
+ args.num_adapters = adapter_count;
+
+ ret = copy_to_user(inargs, &args, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy args to user");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ ret = copy_to_user(args.adapters, info,
+ sizeof(info[0]) * args.num_adapters);
+ if (ret) {
+ DXG_ERR("failed to copy adapter info to user");
+ ret = -EINVAL;
+ }
+
+cleanup:
+
+ if (ret < 0) {
+ if (info) {
+ for (i = 0; i < args.num_adapters; i++) {
+ dxgprocess_close_adapter(process,
+ info[i].adapter_handle);
+ }
+ }
+ } else {
+ DXG_TRACE("found %d adapters", args.num_adapters);
+ }
+
+ if (info)
+ vfree(info);
+ if (adapters)
+ vfree(adapters);
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_enum_adapters3(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_enumadapters3 args;
+ int ret;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ret = dxgkp_enum_adapters(process, args.filter,
+ args.adapter_count,
+ args.adapters,
+ &((struct d3dkmt_enumadapters3 *)inargs)->
+ adapter_count);
+
+cleanup:
+
+ DXG_TRACE("ioctl: %s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_close_adapter(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmthandle args;
+ int ret;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ret = dxgprocess_close_adapter(process, args);
+ if (ret < 0)
+ DXG_ERR("failed to close adapter: %d", ret);
+
+cleanup:
+
+ DXG_TRACE("ioctl: %s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static int
+dxgkio_query_adapter_info(struct dxgprocess *process, void *__user inargs)
+{
+ struct d3dkmt_queryadapterinfo args;
+ int ret;
+ struct dxgadapter *adapter = NULL;
+
+ ret = copy_from_user(&args, inargs, sizeof(args));
+ if (ret) {
+ DXG_ERR("failed to copy input args");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ if (args.private_data_size > DXG_MAX_VM_BUS_PACKET_SIZE ||
+ args.private_data_size == 0) {
+ DXG_ERR("invalid private data size");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ DXG_TRACE("Type: %d Size: %x", args.type, args.private_data_size);
+
+ adapter = dxgprocess_adapter_by_handle(process, args.adapter);
+ if (adapter == NULL) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ ret = dxgadapter_acquire_lock_shared(adapter);
+ if (ret < 0)
+ goto cleanup;
+
+ ret = dxgvmb_send_query_adapter_info(process, adapter, &args);
+
+ dxgadapter_release_lock_shared(adapter);
+
+cleanup:
+
+ if (adapter)
+ kref_put(&adapter->adapter_kref, dxgadapter_release);
+
+ DXG_TRACE("ioctl:%s %d", errorstr(ret), ret);
+ return ret;
+}
+
+static struct ioctl_desc ioctls[] = {
+/* 0x00 */ {},
+/* 0x01 */ {dxgkio_open_adapter_from_luid, LX_DXOPENADAPTERFROMLUID},
+/* 0x02 */ {},
+/* 0x03 */ {},
+/* 0x04 */ {},
+/* 0x05 */ {},
+/* 0x06 */ {},
+/* 0x07 */ {},
+/* 0x08 */ {},
+/* 0x09 */ {dxgkio_query_adapter_info, LX_DXQUERYADAPTERINFO},
+/* 0x0a */ {},
+/* 0x0b */ {},
+/* 0x0c */ {},
+/* 0x0d */ {},
+/* 0x0e */ {},
+/* 0x0f */ {},
+/* 0x10 */ {},
+/* 0x11 */ {},
+/* 0x12 */ {},
+/* 0x13 */ {},
+/* 0x14 */ {dxgkio_enum_adapters, LX_DXENUMADAPTERS2},
+/* 0x15 */ {dxgkio_close_adapter, LX_DXCLOSEADAPTER},
+/* 0x16 */ {},
+/* 0x17 */ {},
+/* 0x18 */ {},
+/* 0x19 */ {},
+/* 0x1a */ {},
+/* 0x1b */ {},
+/* 0x1c */ {},
+/* 0x1d */ {},
+/* 0x1e */ {},
+/* 0x1f */ {},
+/* 0x20 */ {},
+/* 0x21 */ {},
+/* 0x22 */ {},
+/* 0x23 */ {},
+/* 0x24 */ {},
+/* 0x25 */ {},
+/* 0x26 */ {},
+/* 0x27 */ {},
+/* 0x28 */ {},
+/* 0x29 */ {},
+/* 0x2a */ {},
+/* 0x2b */ {},
+/* 0x2c */ {},
+/* 0x2d */ {},
+/* 0x2e */ {},
+/* 0x2f */ {},
+/* 0x30 */ {},
+/* 0x31 */ {},
+/* 0x32 */ {},
+/* 0x33 */ {},
+/* 0x34 */ {},
+/* 0x35 */ {},
+/* 0x36 */ {},
+/* 0x37 */ {},
+/* 0x38 */ {},
+/* 0x39 */ {},
+/* 0x3a */ {},
+/* 0x3b */ {},
+/* 0x3c */ {},
+/* 0x3d */ {},
+/* 0x3e */ {dxgkio_enum_adapters3, LX_DXENUMADAPTERS3},
+/* 0x3f */ {},
+/* 0x40 */ {},
+/* 0x41 */ {},
+/* 0x42 */ {},
+/* 0x43 */ {},
+/* 0x44 */ {},
+/* 0x45 */ {},
};
/*
@@ -82,3 +546,19 @@ long dxgk_unlocked_ioctl(struct file *f, unsigned int p1, unsigned long p2)
DXG_TRACE("unlocked ioctl %x Code:%d", p1, _IOC_NR(p1));
return dxgk_ioctl(f, p1, p2);
}
+
+#ifdef DEBUG
+void dxgk_validate_ioctls(void)
+{
+ int i;
+
+ for (i=0; i < ARRAY_SIZE(ioctls); i++)
+ {
+ if (ioctls[i].ioctl && _IOC_NR(ioctls[i].ioctl) != i)
+ {
+ DXG_ERR("Invalid ioctl");
+ DXGKRNL_ASSERT(0);
+ }
+ }
+}
+#endif
^ permalink raw reply related
* [PATCH 04/55] drivers: hv: dxgkrnl: Opening of /dev/dxg device and dxgprocess creation
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
- Implement opening of the device (/dev/dxg) file object and creation of
dxgprocess objects.
- Add VM bus messages to create and destroy the host side of a dxgprocess
object.
- Implement the handle manager, which manages d3dkmthandle handles
for the internal process objects. The handles are used by a user mode
client to reference dxgkrnl objects.
dxgprocess is created for each process, which opens /dev/dxg.
dxgprocess is ref counted, so the existing dxgprocess objects is used
for a process, which opens the device object multiple time.
dxgprocess is destroyed when the file object is released.
A corresponding dxgprocess object is created on the host for every
dxgprocess object in the guest.
When a dxgkrnl object is created, in most cases the corresponding
object is created in the host. The VM references the host objects by
handles (d3dkmthandle). d3dkmthandle values for a host object and
the corresponding VM object are the same. A host handle is allocated
first and its value is assigned to the guest object.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/Makefile | 2 +-
drivers/hv/dxgkrnl/dxgadapter.c | 72 ++++
drivers/hv/dxgkrnl/dxgkrnl.h | 95 +++++-
drivers/hv/dxgkrnl/dxgmodule.c | 97 ++++++
drivers/hv/dxgkrnl/dxgprocess.c | 262 +++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.c | 164 ++++++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 36 ++
drivers/hv/dxgkrnl/hmgr.c | 563 ++++++++++++++++++++++++++++++++
drivers/hv/dxgkrnl/hmgr.h | 112 +++++++
drivers/hv/dxgkrnl/ioctl.c | 60 ++++
drivers/hv/dxgkrnl/misc.h | 9 +-
include/uapi/misc/d3dkmthk.h | 103 ++++++
12 files changed, 1569 insertions(+), 6 deletions(-)
create mode 100644 drivers/hv/dxgkrnl/dxgprocess.c
create mode 100644 drivers/hv/dxgkrnl/hmgr.c
create mode 100644 drivers/hv/dxgkrnl/hmgr.h
diff --git a/drivers/hv/dxgkrnl/Makefile b/drivers/hv/dxgkrnl/Makefile
index 2ed07d877c91..9d821e83448a 100644
--- a/drivers/hv/dxgkrnl/Makefile
+++ b/drivers/hv/dxgkrnl/Makefile
@@ -2,4 +2,4 @@
# Makefile for the hyper-v compute device driver (dxgkrnl).
obj-$(CONFIG_DXGKRNL) += dxgkrnl.o
-dxgkrnl-y := dxgmodule.o misc.o dxgadapter.o ioctl.o dxgvmbus.o
+dxgkrnl-y := dxgmodule.o hmgr.o misc.o dxgadapter.o ioctl.o dxgvmbus.o dxgprocess.o
diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c
index 07d47699d255..fa0d6beca157 100644
--- a/drivers/hv/dxgkrnl/dxgadapter.c
+++ b/drivers/hv/dxgkrnl/dxgadapter.c
@@ -100,6 +100,7 @@ void dxgadapter_start(struct dxgadapter *adapter)
void dxgadapter_stop(struct dxgadapter *adapter)
{
+ struct dxgprocess_adapter *entry;
bool adapter_stopped = false;
down_write(&adapter->core_lock);
@@ -112,6 +113,15 @@ void dxgadapter_stop(struct dxgadapter *adapter)
if (adapter_stopped)
return;
+ dxgglobal_acquire_process_adapter_lock();
+
+ list_for_each_entry(entry, &adapter->adapter_process_list_head,
+ adapter_process_list_entry) {
+ dxgprocess_adapter_stop(entry);
+ }
+
+ dxgglobal_release_process_adapter_lock();
+
if (dxgadapter_acquire_lock_exclusive(adapter) == 0) {
dxgvmb_send_close_adapter(adapter);
dxgadapter_release_lock_exclusive(adapter);
@@ -135,6 +145,21 @@ bool dxgadapter_is_active(struct dxgadapter *adapter)
return adapter->adapter_state == DXGADAPTER_STATE_ACTIVE;
}
+/* Protected by dxgglobal_acquire_process_adapter_lock */
+void dxgadapter_add_process(struct dxgadapter *adapter,
+ struct dxgprocess_adapter *process_info)
+{
+ DXG_TRACE("%p %p", adapter, process_info);
+ list_add_tail(&process_info->adapter_process_list_entry,
+ &adapter->adapter_process_list_head);
+}
+
+void dxgadapter_remove_process(struct dxgprocess_adapter *process_info)
+{
+ DXG_TRACE("%p %p", process_info->adapter, process_info);
+ list_del(&process_info->adapter_process_list_entry);
+}
+
int dxgadapter_acquire_lock_exclusive(struct dxgadapter *adapter)
{
down_write(&adapter->core_lock);
@@ -168,3 +193,50 @@ void dxgadapter_release_lock_shared(struct dxgadapter *adapter)
{
up_read(&adapter->core_lock);
}
+
+struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
+ struct dxgadapter *adapter)
+{
+ struct dxgprocess_adapter *adapter_info;
+
+ adapter_info = kzalloc(sizeof(*adapter_info), GFP_KERNEL);
+ if (adapter_info) {
+ if (kref_get_unless_zero(&adapter->adapter_kref) == 0) {
+ DXG_ERR("failed to acquire adapter reference");
+ goto cleanup;
+ }
+ adapter_info->adapter = adapter;
+ adapter_info->process = process;
+ adapter_info->refcount = 1;
+ list_add_tail(&adapter_info->process_adapter_list_entry,
+ &process->process_adapter_list_head);
+ dxgadapter_add_process(adapter, adapter_info);
+ }
+ return adapter_info;
+cleanup:
+ if (adapter_info)
+ kfree(adapter_info);
+ return NULL;
+}
+
+void dxgprocess_adapter_stop(struct dxgprocess_adapter *adapter_info)
+{
+}
+
+void dxgprocess_adapter_destroy(struct dxgprocess_adapter *adapter_info)
+{
+ dxgadapter_remove_process(adapter_info);
+ kref_put(&adapter_info->adapter->adapter_kref, dxgadapter_release);
+ list_del(&adapter_info->process_adapter_list_entry);
+ kfree(adapter_info);
+}
+
+/*
+ * Must be called when dxgglobal::process_adapter_mutex is held
+ */
+void dxgprocess_adapter_release(struct dxgprocess_adapter *adapter_info)
+{
+ adapter_info->refcount--;
+ if (adapter_info->refcount == 0)
+ dxgprocess_adapter_destroy(adapter_info);
+}
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index ba2a7c6001aa..b089d126f801 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -29,8 +29,10 @@
#include <uapi/misc/d3dkmthk.h>
#include <linux/version.h>
#include "misc.h"
+#include "hmgr.h"
#include <uapi/misc/d3dkmthk.h>
+struct dxgprocess;
struct dxgadapter;
/*
@@ -111,6 +113,10 @@ struct dxgglobal {
struct miscdevice dxgdevice;
struct mutex device_mutex;
+ /* list of created processes */
+ struct list_head plisthead;
+ struct mutex plistmutex;
+
/* list of created adapters */
struct list_head adapter_list_head;
struct rw_semaphore adapter_list_lock;
@@ -124,6 +130,9 @@ struct dxgglobal {
/* protects acces to the global VM bus channel */
struct rw_semaphore channel_lock;
+ /* protects the dxgprocess_adapter lists */
+ struct mutex process_adapter_mutex;
+
bool global_channel_initialized;
bool async_msg_enabled;
bool misc_registered;
@@ -144,13 +153,84 @@ int dxgglobal_init_global_channel(void);
void dxgglobal_destroy_global_channel(void);
struct vmbus_channel *dxgglobal_get_vmbus(void);
struct dxgvmbuschannel *dxgglobal_get_dxgvmbuschannel(void);
+void dxgglobal_acquire_process_adapter_lock(void);
+void dxgglobal_release_process_adapter_lock(void);
int dxgglobal_acquire_channel_lock(void);
void dxgglobal_release_channel_lock(void);
+/*
+ * Describes adapter information for each process
+ */
+struct dxgprocess_adapter {
+ /* Entry in dxgadapter::adapter_process_list_head */
+ struct list_head adapter_process_list_entry;
+ /* Entry in dxgprocess::process_adapter_list_head */
+ struct list_head process_adapter_list_entry;
+ struct dxgadapter *adapter;
+ struct dxgprocess *process;
+ int refcount;
+};
+
+struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process,
+ struct dxgadapter
+ *adapter);
+void dxgprocess_adapter_release(struct dxgprocess_adapter *adapter);
+void dxgprocess_adapter_stop(struct dxgprocess_adapter *adapter_info);
+void dxgprocess_adapter_destroy(struct dxgprocess_adapter *adapter_info);
+
+/*
+ * The structure represents a process, which opened the /dev/dxg device.
+ * A corresponding object is created on the host.
+ */
struct dxgprocess {
- /* Placeholder */
+ /*
+ * Process list entry in dxgglobal.
+ * Protected by the dxgglobal->plistmutex.
+ */
+ struct list_head plistentry;
+ pid_t pid;
+ pid_t tgid;
+ /* how many time the process was opened */
+ struct kref process_kref;
+ /*
+ * This handle table is used for all objects except dxgadapter
+ * The handle table lock order is higher than the local_handle_table
+ * lock
+ */
+ struct hmgrtable handle_table;
+ /*
+ * This handle table is used for dxgadapter objects.
+ * The handle table lock order is lowest.
+ */
+ struct hmgrtable local_handle_table;
+ /* Handle of the corresponding objec on the host */
+ struct d3dkmthandle host_handle;
+
+ /* List of opened adapters (dxgprocess_adapter) */
+ struct list_head process_adapter_list_head;
};
+struct dxgprocess *dxgprocess_create(void);
+void dxgprocess_destroy(struct dxgprocess *process);
+void dxgprocess_release(struct kref *refcount);
+int dxgprocess_open_adapter(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmthandle *handle);
+int dxgprocess_close_adapter(struct dxgprocess *process,
+ struct d3dkmthandle handle);
+struct dxgadapter *dxgprocess_get_adapter(struct dxgprocess *process,
+ struct d3dkmthandle handle);
+struct dxgadapter *dxgprocess_adapter_by_handle(struct dxgprocess *process,
+ struct d3dkmthandle handle);
+void dxgprocess_ht_lock_shared_down(struct dxgprocess *process);
+void dxgprocess_ht_lock_shared_up(struct dxgprocess *process);
+void dxgprocess_ht_lock_exclusive_down(struct dxgprocess *process);
+void dxgprocess_ht_lock_exclusive_up(struct dxgprocess *process);
+struct dxgprocess_adapter *dxgprocess_get_adapter_info(struct dxgprocess
+ *process,
+ struct dxgadapter
+ *adapter);
+
enum dxgadapter_state {
DXGADAPTER_STATE_ACTIVE = 0,
DXGADAPTER_STATE_STOPPED = 1,
@@ -168,6 +248,8 @@ struct dxgadapter {
struct kref adapter_kref;
/* Entry in the list of adapters in dxgglobal */
struct list_head adapter_list_entry;
+ /* The list of dxgprocess_adapter entries */
+ struct list_head adapter_process_list_head;
struct pci_dev *pci_dev;
struct hv_device *hv_dev;
struct dxgvmbuschannel channel;
@@ -191,6 +273,12 @@ void dxgadapter_release_lock_shared(struct dxgadapter *adapter);
int dxgadapter_acquire_lock_exclusive(struct dxgadapter *adapter);
void dxgadapter_acquire_lock_forced(struct dxgadapter *adapter);
void dxgadapter_release_lock_exclusive(struct dxgadapter *adapter);
+void dxgadapter_add_process(struct dxgadapter *adapter,
+ struct dxgprocess_adapter *process_info);
+void dxgadapter_remove_process(struct dxgprocess_adapter *process_info);
+
+long dxgk_compat_ioctl(struct file *f, unsigned int p1, unsigned long p2);
+long dxgk_unlocked_ioctl(struct file *f, unsigned int p1, unsigned long p2);
/*
* The convention is that VNBus instance id is a GUID, but the host sets
@@ -220,9 +308,14 @@ static inline void guid_to_luid(guid_t *guid, struct winluid *luid)
void dxgvmb_initialize(void);
int dxgvmb_send_set_iospace_region(u64 start, u64 len);
+int dxgvmb_send_create_process(struct dxgprocess *process);
+int dxgvmb_send_destroy_process(struct d3dkmthandle process);
int dxgvmb_send_open_adapter(struct dxgadapter *adapter);
int dxgvmb_send_close_adapter(struct dxgadapter *adapter);
int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter);
+int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmt_queryadapterinfo *args);
int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel,
void *command,
u32 cmd_size);
diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c
index ef80b920f010..17c22001ca6c 100644
--- a/drivers/hv/dxgkrnl/dxgmodule.c
+++ b/drivers/hv/dxgkrnl/dxgmodule.c
@@ -123,6 +123,20 @@ static struct dxgadapter *find_adapter(struct winluid *luid)
return adapter;
}
+void dxgglobal_acquire_process_adapter_lock(void)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->process_adapter_mutex);
+}
+
+void dxgglobal_release_process_adapter_lock(void)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_unlock(&dxgglobal->process_adapter_mutex);
+}
+
/*
* Creates a new dxgadapter object, which represents a virtual GPU, projected
* by the host.
@@ -147,6 +161,7 @@ int dxgglobal_create_adapter(struct pci_dev *dev, guid_t *guid,
kref_init(&adapter->adapter_kref);
init_rwsem(&adapter->core_lock);
+ INIT_LIST_HEAD(&adapter->adapter_process_list_head);
adapter->pci_dev = dev;
guid_to_luid(guid, &adapter->luid);
@@ -205,8 +220,87 @@ static void dxgglobal_stop_adapters(void)
dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
}
+/*
+ * Returns dxgprocess for the current executing process.
+ * Creates dxgprocess if it doesn't exist.
+ */
+static struct dxgprocess *dxgglobal_get_current_process(void)
+{
+ /*
+ * Find the DXG process for the current process.
+ * A new process is created if necessary.
+ */
+ struct dxgprocess *process = NULL;
+ struct dxgprocess *entry = NULL;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->plistmutex);
+ list_for_each_entry(entry, &dxgglobal->plisthead, plistentry) {
+ /* All threads of a process have the same thread group ID */
+ if (entry->tgid == current->tgid) {
+ if (kref_get_unless_zero(&entry->process_kref)) {
+ process = entry;
+ DXG_TRACE("found dxgprocess");
+ } else {
+ DXG_TRACE("process is destroyed");
+ }
+ break;
+ }
+ }
+ mutex_unlock(&dxgglobal->plistmutex);
+
+ if (process == NULL)
+ process = dxgprocess_create();
+
+ return process;
+}
+
+/*
+ * File operations for the /dev/dxg device
+ */
+
+static int dxgk_open(struct inode *n, struct file *f)
+{
+ int ret = 0;
+ struct dxgprocess *process;
+
+ DXG_TRACE("%p %d %d", f, current->pid, current->tgid);
+
+ /* Find/create a dxgprocess structure for this process */
+ process = dxgglobal_get_current_process();
+
+ if (process) {
+ f->private_data = process;
+ } else {
+ DXG_TRACE("cannot create dxgprocess");
+ ret = -EBADF;
+ }
+
+ return ret;
+}
+
+static int dxgk_release(struct inode *n, struct file *f)
+{
+ struct dxgprocess *process;
+
+ process = (struct dxgprocess *)f->private_data;
+ DXG_TRACE("%p, %p", f, process);
+
+ if (process == NULL)
+ return -EINVAL;
+
+ kref_put(&process->process_kref, dxgprocess_release);
+
+ f->private_data = NULL;
+ return 0;
+}
+
const struct file_operations dxgk_fops = {
.owner = THIS_MODULE,
+ .open = dxgk_open,
+ .release = dxgk_release,
+ .compat_ioctl = dxgk_compat_ioctl,
+ .unlocked_ioctl = dxgk_unlocked_ioctl,
};
/*
@@ -616,7 +710,10 @@ static struct dxgglobal *dxgglobal_create(void)
if (!dxgglobal)
return NULL;
+ INIT_LIST_HEAD(&dxgglobal->plisthead);
+ mutex_init(&dxgglobal->plistmutex);
mutex_init(&dxgglobal->device_mutex);
+ mutex_init(&dxgglobal->process_adapter_mutex);
INIT_LIST_HEAD(&dxgglobal->vgpu_ch_list_head);
INIT_LIST_HEAD(&dxgglobal->adapter_list_head);
diff --git a/drivers/hv/dxgkrnl/dxgprocess.c b/drivers/hv/dxgkrnl/dxgprocess.c
new file mode 100644
index 000000000000..ab9a01e3c8c8
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgprocess.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * DXGPROCESS implementation
+ *
+ */
+
+#include "dxgkrnl.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+/*
+ * Creates a new dxgprocess object
+ * Must be called when dxgglobal->plistmutex is held
+ */
+struct dxgprocess *dxgprocess_create(void)
+{
+ struct dxgprocess *process;
+ int ret;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ process = kzalloc(sizeof(struct dxgprocess), GFP_KERNEL);
+ if (process != NULL) {
+ DXG_TRACE("new dxgprocess created");
+ process->pid = current->pid;
+ process->tgid = current->tgid;
+ ret = dxgvmb_send_create_process(process);
+ if (ret < 0) {
+ DXG_TRACE("send_create_process failed");
+ kfree(process);
+ process = NULL;
+ } else {
+ INIT_LIST_HEAD(&process->plistentry);
+ kref_init(&process->process_kref);
+
+ mutex_lock(&dxgglobal->plistmutex);
+ list_add_tail(&process->plistentry,
+ &dxgglobal->plisthead);
+ mutex_unlock(&dxgglobal->plistmutex);
+
+ hmgrtable_init(&process->handle_table, process);
+ hmgrtable_init(&process->local_handle_table, process);
+ INIT_LIST_HEAD(&process->process_adapter_list_head);
+ }
+ }
+ return process;
+}
+
+void dxgprocess_destroy(struct dxgprocess *process)
+{
+ int i;
+ enum hmgrentry_type t;
+ struct d3dkmthandle h;
+ void *o;
+ struct dxgprocess_adapter *entry;
+ struct dxgprocess_adapter *tmp;
+
+ /* Destroy all adapter state */
+ dxgglobal_acquire_process_adapter_lock();
+ list_for_each_entry_safe(entry, tmp,
+ &process->process_adapter_list_head,
+ process_adapter_list_entry) {
+ dxgprocess_adapter_destroy(entry);
+ }
+ dxgglobal_release_process_adapter_lock();
+
+ i = 0;
+ while (hmgrtable_next_entry(&process->local_handle_table,
+ &i, &t, &h, &o)) {
+ switch (t) {
+ case HMGRENTRY_TYPE_DXGADAPTER:
+ dxgprocess_close_adapter(process, h);
+ break;
+ default:
+ DXG_ERR("invalid entry in handle table %d", t);
+ break;
+ }
+ }
+
+ hmgrtable_destroy(&process->handle_table);
+ hmgrtable_destroy(&process->local_handle_table);
+}
+
+void dxgprocess_release(struct kref *refcount)
+{
+ struct dxgprocess *process;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ process = container_of(refcount, struct dxgprocess, process_kref);
+
+ mutex_lock(&dxgglobal->plistmutex);
+ list_del(&process->plistentry);
+ mutex_unlock(&dxgglobal->plistmutex);
+
+ dxgprocess_destroy(process);
+
+ if (process->host_handle.v)
+ dxgvmb_send_destroy_process(process->host_handle);
+ kfree(process);
+}
+
+struct dxgprocess_adapter *dxgprocess_get_adapter_info(struct dxgprocess
+ *process,
+ struct dxgadapter
+ *adapter)
+{
+ struct dxgprocess_adapter *entry;
+
+ list_for_each_entry(entry, &process->process_adapter_list_head,
+ process_adapter_list_entry) {
+ if (adapter == entry->adapter) {
+ DXG_TRACE("Found process info %p", entry);
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Dxgprocess takes references on dxgadapter and dxgprocess_adapter.
+ *
+ * The process_adapter lock is held.
+ *
+ */
+int dxgprocess_open_adapter(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmthandle *h)
+{
+ int ret = 0;
+ struct dxgprocess_adapter *adapter_info;
+ struct d3dkmthandle handle;
+
+ h->v = 0;
+ adapter_info = dxgprocess_get_adapter_info(process, adapter);
+ if (adapter_info == NULL) {
+ DXG_TRACE("creating new process adapter info");
+ adapter_info = dxgprocess_adapter_create(process, adapter);
+ if (adapter_info == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+ } else {
+ adapter_info->refcount++;
+ }
+
+ handle = hmgrtable_alloc_handle_safe(&process->local_handle_table,
+ adapter, HMGRENTRY_TYPE_DXGADAPTER,
+ true);
+ if (handle.v) {
+ *h = handle;
+ } else {
+ DXG_ERR("failed to create adapter handle");
+ ret = -ENOMEM;
+ }
+
+cleanup:
+
+ if (ret < 0) {
+ if (adapter_info)
+ dxgprocess_adapter_release(adapter_info);
+ }
+
+ return ret;
+}
+
+int dxgprocess_close_adapter(struct dxgprocess *process,
+ struct d3dkmthandle handle)
+{
+ struct dxgadapter *adapter;
+ struct dxgprocess_adapter *adapter_info;
+ int ret = 0;
+
+ if (handle.v == 0)
+ return 0;
+
+ hmgrtable_lock(&process->local_handle_table, DXGLOCK_EXCL);
+ adapter = dxgprocess_get_adapter(process, handle);
+ if (adapter)
+ hmgrtable_free_handle(&process->local_handle_table,
+ HMGRENTRY_TYPE_DXGADAPTER, handle);
+ hmgrtable_unlock(&process->local_handle_table, DXGLOCK_EXCL);
+
+ if (adapter) {
+ adapter_info = dxgprocess_get_adapter_info(process, adapter);
+ if (adapter_info) {
+ dxgglobal_acquire_process_adapter_lock();
+ dxgprocess_adapter_release(adapter_info);
+ dxgglobal_release_process_adapter_lock();
+ } else {
+ ret = -EINVAL;
+ }
+ } else {
+ DXG_ERR("Adapter not found %x", handle.v);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct dxgadapter *dxgprocess_get_adapter(struct dxgprocess *process,
+ struct d3dkmthandle handle)
+{
+ struct dxgadapter *adapter;
+
+ adapter = hmgrtable_get_object_by_type(&process->local_handle_table,
+ HMGRENTRY_TYPE_DXGADAPTER,
+ handle);
+ if (adapter == NULL)
+ DXG_ERR("Adapter not found %x", handle.v);
+ return adapter;
+}
+
+/*
+ * Gets the adapter object from the process handle table.
+ * The adapter object is referenced.
+ * The function acquired the handle table lock shared.
+ */
+struct dxgadapter *dxgprocess_adapter_by_handle(struct dxgprocess *process,
+ struct d3dkmthandle handle)
+{
+ struct dxgadapter *adapter;
+
+ hmgrtable_lock(&process->local_handle_table, DXGLOCK_SHARED);
+ adapter = hmgrtable_get_object_by_type(&process->local_handle_table,
+ HMGRENTRY_TYPE_DXGADAPTER,
+ handle);
+ if (adapter == NULL)
+ DXG_ERR("adapter_by_handle failed %x", handle.v);
+ else if (kref_get_unless_zero(&adapter->adapter_kref) == 0) {
+ DXG_ERR("failed to acquire adapter reference");
+ adapter = NULL;
+ }
+ hmgrtable_unlock(&process->local_handle_table, DXGLOCK_SHARED);
+ return adapter;
+}
+
+void dxgprocess_ht_lock_shared_down(struct dxgprocess *process)
+{
+ hmgrtable_lock(&process->handle_table, DXGLOCK_SHARED);
+}
+
+void dxgprocess_ht_lock_shared_up(struct dxgprocess *process)
+{
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_SHARED);
+}
+
+void dxgprocess_ht_lock_exclusive_down(struct dxgprocess *process)
+{
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
+}
+
+void dxgprocess_ht_lock_exclusive_up(struct dxgprocess *process)
+{
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
+}
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index 6d4b8d9d8d07..0abf45d0d3f7 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -497,6 +497,87 @@ int dxgvmb_send_set_iospace_region(u64 start, u64 len)
return ret;
}
+int dxgvmb_send_create_process(struct dxgprocess *process)
+{
+ int ret;
+ struct dxgkvmb_command_createprocess *command;
+ struct dxgkvmb_command_createprocess_return result = { 0 };
+ struct dxgvmbusmsg msg;
+ char s[WIN_MAX_PATH];
+ int i;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, NULL, process, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ ret = dxgglobal_acquire_channel_lock();
+ if (ret < 0)
+ goto cleanup;
+
+ command_vm_to_host_init1(&command->hdr, DXGK_VMBCOMMAND_CREATEPROCESS);
+ command->process = process;
+ command->process_id = process->pid;
+ command->linux_process = 1;
+ s[0] = 0;
+ __get_task_comm(s, WIN_MAX_PATH, current);
+ for (i = 0; i < WIN_MAX_PATH; i++) {
+ command->process_name[i] = s[i];
+ if (s[i] == 0)
+ break;
+ }
+
+ ret = dxgvmb_send_sync_msg(&dxgglobal->channel, msg.hdr, msg.size,
+ &result, sizeof(result));
+ if (ret < 0) {
+ DXG_ERR("create_process failed %d", ret);
+ } else if (result.hprocess.v == 0) {
+ DXG_ERR("create_process returned 0 handle");
+ ret = -ENOTRECOVERABLE;
+ } else {
+ process->host_handle = result.hprocess;
+ DXG_TRACE("create_process returned %x",
+ process->host_handle.v);
+ }
+
+ dxgglobal_release_channel_lock();
+
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
+int dxgvmb_send_destroy_process(struct d3dkmthandle process)
+{
+ int ret;
+ struct dxgkvmb_command_destroyprocess *command;
+ struct dxgvmbusmsg msg;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, NULL, NULL, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ ret = dxgglobal_acquire_channel_lock();
+ if (ret < 0)
+ goto cleanup;
+ command_vm_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_DESTROYPROCESS,
+ process);
+ ret = dxgvmb_send_sync_msg_ntstatus(&dxgglobal->channel,
+ msg.hdr, msg.size);
+ dxgglobal_release_channel_lock();
+
+cleanup:
+ free_message(&msg, NULL);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
/*
* Virtual GPU messages to the host
*/
@@ -591,3 +672,86 @@ int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter)
DXG_ERR("Failed to get adapter info: %d", ret);
return ret;
}
+
+int dxgvmb_send_query_adapter_info(struct dxgprocess *process,
+ struct dxgadapter *adapter,
+ struct d3dkmt_queryadapterinfo *args)
+{
+ struct dxgkvmb_command_queryadapterinfo *command;
+ u32 cmd_size = sizeof(*command) + args->private_data_size - 1;
+ int ret;
+ u32 private_data_size;
+ void *private_data;
+ struct dxgvmbusmsg msg = {.hdr = NULL};
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, adapter, process, cmd_size);
+ if (ret)
+ goto cleanup;
+ command = (void *)msg.msg;
+
+ ret = copy_from_user(command->private_data,
+ args->private_data, args->private_data_size);
+ if (ret) {
+ DXG_ERR("Faled to copy private data");
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ command_vgpu_to_host_init2(&command->hdr,
+ DXGK_VMBCOMMAND_QUERYADAPTERINFO,
+ process->host_handle);
+ command->private_data_size = args->private_data_size;
+ command->query_type = args->type;
+
+ if (dxgglobal->vmbus_ver >= DXGK_VMBUS_INTERFACE_VERSION) {
+ private_data = msg.msg;
+ private_data_size = command->private_data_size +
+ sizeof(struct ntstatus);
+ } else {
+ private_data = command->private_data;
+ private_data_size = command->private_data_size;
+ }
+
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ private_data, private_data_size);
+ if (ret < 0)
+ goto cleanup;
+
+ if (dxgglobal->vmbus_ver >= DXGK_VMBUS_INTERFACE_VERSION) {
+ ret = ntstatus2int(*(struct ntstatus *)private_data);
+ if (ret < 0)
+ goto cleanup;
+ private_data = (char *)private_data + sizeof(struct ntstatus);
+ }
+
+ switch (args->type) {
+ case _KMTQAITYPE_ADAPTERTYPE:
+ case _KMTQAITYPE_ADAPTERTYPE_RENDER:
+ {
+ struct d3dkmt_adaptertype *adapter_type =
+ (void *)private_data;
+ adapter_type->paravirtualized = 1;
+ adapter_type->display_supported = 0;
+ adapter_type->post_device = 0;
+ adapter_type->indirect_display_device = 0;
+ adapter_type->acg_supported = 0;
+ adapter_type->support_set_timings_from_vidpn = 0;
+ break;
+ }
+ default:
+ break;
+ }
+ ret = copy_to_user(args->private_data, private_data,
+ args->private_data_size);
+ if (ret) {
+ DXG_ERR("Faled to copy private data to user");
+ ret = -EINVAL;
+ }
+
+cleanup:
+ free_message(&msg, process);
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index 584cdd3db6c0..a805a396e083 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -14,7 +14,11 @@
#ifndef _DXGVMBUS_H
#define _DXGVMBUS_H
+struct dxgprocess;
+struct dxgadapter;
+
#define DXG_MAX_VM_BUS_PACKET_SIZE (1024 * 128)
+#define DXG_VM_PROCESS_NAME_LENGTH 260
enum dxgkvmb_commandchanneltype {
DXGKVMB_VGPU_TO_HOST,
@@ -169,6 +173,26 @@ struct dxgkvmb_command_setiospaceregion {
u32 shared_page_gpadl;
};
+struct dxgkvmb_command_createprocess {
+ struct dxgkvmb_command_vm_to_host hdr;
+ void *process;
+ u64 process_id;
+ u16 process_name[DXG_VM_PROCESS_NAME_LENGTH + 1];
+ u8 csrss_process:1;
+ u8 dwm_process:1;
+ u8 wow64_process:1;
+ u8 linux_process:1;
+};
+
+struct dxgkvmb_command_createprocess_return {
+ struct d3dkmthandle hprocess;
+};
+
+// The command returns ntstatus
+struct dxgkvmb_command_destroyprocess {
+ struct dxgkvmb_command_vm_to_host hdr;
+};
+
struct dxgkvmb_command_openadapter {
struct dxgkvmb_command_vgpu_to_host hdr;
u32 vmbus_interface_version;
@@ -211,4 +235,16 @@ struct dxgkvmb_command_getinternaladapterinfo_return {
struct winluid host_vgpu_luid;
};
+struct dxgkvmb_command_queryadapterinfo {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ enum kmtqueryadapterinfotype query_type;
+ u32 private_data_size;
+ u8 private_data[1];
+};
+
+struct dxgkvmb_command_queryadapterinfo_return {
+ struct ntstatus status;
+ u8 private_data[1];
+};
+
#endif /* _DXGVMBUS_H */
diff --git a/drivers/hv/dxgkrnl/hmgr.c b/drivers/hv/dxgkrnl/hmgr.c
new file mode 100644
index 000000000000..526b50f46d96
--- /dev/null
+++ b/drivers/hv/dxgkrnl/hmgr.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Handle manager implementation
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+
+#include "misc.h"
+#include "dxgkrnl.h"
+#include "hmgr.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+const struct d3dkmthandle zerohandle;
+
+/*
+ * Handle parameters
+ */
+#define HMGRHANDLE_INSTANCE_BITS 6
+#define HMGRHANDLE_INDEX_BITS 24
+#define HMGRHANDLE_UNIQUE_BITS 2
+
+#define HMGRHANDLE_INSTANCE_SHIFT 0
+#define HMGRHANDLE_INDEX_SHIFT \
+ (HMGRHANDLE_INSTANCE_BITS + HMGRHANDLE_INSTANCE_SHIFT)
+#define HMGRHANDLE_UNIQUE_SHIFT \
+ (HMGRHANDLE_INDEX_BITS + HMGRHANDLE_INDEX_SHIFT)
+
+#define HMGRHANDLE_INSTANCE_MASK \
+ (((1 << HMGRHANDLE_INSTANCE_BITS) - 1) << HMGRHANDLE_INSTANCE_SHIFT)
+#define HMGRHANDLE_INDEX_MASK \
+ (((1 << HMGRHANDLE_INDEX_BITS) - 1) << HMGRHANDLE_INDEX_SHIFT)
+#define HMGRHANDLE_UNIQUE_MASK \
+ (((1 << HMGRHANDLE_UNIQUE_BITS) - 1) << HMGRHANDLE_UNIQUE_SHIFT)
+
+#define HMGRHANDLE_INSTANCE_MAX ((1 << HMGRHANDLE_INSTANCE_BITS) - 1)
+#define HMGRHANDLE_INDEX_MAX ((1 << HMGRHANDLE_INDEX_BITS) - 1)
+#define HMGRHANDLE_UNIQUE_MAX ((1 << HMGRHANDLE_UNIQUE_BITS) - 1)
+
+/*
+ * Handle entry
+ */
+struct hmgrentry {
+ union {
+ void *object;
+ struct {
+ u32 prev_free_index;
+ u32 next_free_index;
+ };
+ };
+ u32 type:HMGRENTRY_TYPE_BITS + 1;
+ u32 unique:HMGRHANDLE_UNIQUE_BITS;
+ u32 instance:HMGRHANDLE_INSTANCE_BITS;
+ u32 destroyed:1;
+};
+
+#define HMGRTABLE_SIZE_INCREMENT 1024
+#define HMGRTABLE_MIN_FREE_ENTRIES 128
+#define HMGRTABLE_INVALID_INDEX (~((1 << HMGRHANDLE_INDEX_BITS) - 1))
+#define HMGRTABLE_SIZE_MAX 0xFFFFFFF
+
+static u32 table_size_increment = HMGRTABLE_SIZE_INCREMENT;
+
+static u32 get_unique(struct d3dkmthandle h)
+{
+ return (h.v & HMGRHANDLE_UNIQUE_MASK) >> HMGRHANDLE_UNIQUE_SHIFT;
+}
+
+static u32 get_index(struct d3dkmthandle h)
+{
+ return (h.v & HMGRHANDLE_INDEX_MASK) >> HMGRHANDLE_INDEX_SHIFT;
+}
+
+static bool is_handle_valid(struct hmgrtable *table, struct d3dkmthandle h,
+ bool ignore_destroyed, enum hmgrentry_type t)
+{
+ u32 index = get_index(h);
+ u32 unique = get_unique(h);
+ struct hmgrentry *entry;
+
+ if (index >= table->table_size) {
+ DXG_ERR("Invalid index %x %d", h.v, index);
+ return false;
+ }
+
+ entry = &table->entry_table[index];
+ if (unique != entry->unique) {
+ DXG_ERR("Invalid unique %x %d %d %d %p",
+ h.v, unique, entry->unique, index, entry->object);
+ return false;
+ }
+
+ if (entry->destroyed && !ignore_destroyed) {
+ DXG_ERR("Invalid destroyed value");
+ return false;
+ }
+
+ if (entry->type == HMGRENTRY_TYPE_FREE) {
+ DXG_ERR("Entry is freed %x %d", h.v, index);
+ return false;
+ }
+
+ if (t != HMGRENTRY_TYPE_FREE && t != entry->type) {
+ DXG_ERR("type mismatch %x %d %d", h.v, t, entry->type);
+ return false;
+ }
+
+ return true;
+}
+
+static struct d3dkmthandle build_handle(u32 index, u32 unique, u32 instance)
+{
+ struct d3dkmthandle handle;
+
+ handle.v = (index << HMGRHANDLE_INDEX_SHIFT) & HMGRHANDLE_INDEX_MASK;
+ handle.v |= (unique << HMGRHANDLE_UNIQUE_SHIFT) &
+ HMGRHANDLE_UNIQUE_MASK;
+ handle.v |= (instance << HMGRHANDLE_INSTANCE_SHIFT) &
+ HMGRHANDLE_INSTANCE_MASK;
+
+ return handle;
+}
+
+inline u32 hmgrtable_get_used_entry_count(struct hmgrtable *table)
+{
+ DXGKRNL_ASSERT(table->table_size >= table->free_count);
+ return (table->table_size - table->free_count);
+}
+
+bool hmgrtable_mark_destroyed(struct hmgrtable *table, struct d3dkmthandle h)
+{
+ if (!is_handle_valid(table, h, false, HMGRENTRY_TYPE_FREE))
+ return false;
+
+ table->entry_table[get_index(h)].destroyed = true;
+ return true;
+}
+
+bool hmgrtable_unmark_destroyed(struct hmgrtable *table, struct d3dkmthandle h)
+{
+ if (!is_handle_valid(table, h, true, HMGRENTRY_TYPE_FREE))
+ return true;
+
+ DXGKRNL_ASSERT(table->entry_table[get_index(h)].destroyed);
+ table->entry_table[get_index(h)].destroyed = 0;
+ return true;
+}
+
+static bool expand_table(struct hmgrtable *table, u32 NumEntries)
+{
+ u32 new_table_size;
+ struct hmgrentry *new_entry;
+ u32 table_index;
+ u32 new_free_count;
+ u32 prev_free_index;
+ u32 tail_index = table->free_handle_list_tail;
+
+ /* The tail should point to the last free element in the list */
+ if (table->free_count != 0) {
+ if (tail_index >= table->table_size ||
+ table->entry_table[tail_index].next_free_index !=
+ HMGRTABLE_INVALID_INDEX) {
+ DXG_ERR("corruption");
+ DXG_ERR("tail_index: %x", tail_index);
+ DXG_ERR("table size: %x", table->table_size);
+ DXG_ERR("free_count: %d", table->free_count);
+ DXG_ERR("NumEntries: %x", NumEntries);
+ return false;
+ }
+ }
+
+ new_free_count = table_size_increment + table->free_count;
+ new_table_size = table->table_size + table_size_increment;
+ if (new_table_size < NumEntries) {
+ new_free_count += NumEntries - new_table_size;
+ new_table_size = NumEntries;
+ }
+
+ if (new_table_size > HMGRHANDLE_INDEX_MAX) {
+ DXG_ERR("Invalid new table size");
+ return false;
+ }
+
+ new_entry = (struct hmgrentry *)
+ vzalloc(new_table_size * sizeof(struct hmgrentry));
+ if (new_entry == NULL) {
+ DXG_ERR("allocation failed");
+ return false;
+ }
+
+ if (table->entry_table) {
+ memcpy(new_entry, table->entry_table,
+ table->table_size * sizeof(struct hmgrentry));
+ vfree(table->entry_table);
+ } else {
+ table->free_handle_list_head = 0;
+ }
+
+ table->entry_table = new_entry;
+
+ /* Initialize new table entries and add to the free list */
+ table_index = table->table_size;
+
+ prev_free_index = table->free_handle_list_tail;
+
+ while (table_index < new_table_size) {
+ struct hmgrentry *entry = &table->entry_table[table_index];
+
+ entry->prev_free_index = prev_free_index;
+ entry->next_free_index = table_index + 1;
+ entry->type = HMGRENTRY_TYPE_FREE;
+ entry->unique = 1;
+ entry->instance = 0;
+ prev_free_index = table_index;
+
+ table_index++;
+ }
+
+ table->entry_table[table_index - 1].next_free_index =
+ (u32) HMGRTABLE_INVALID_INDEX;
+
+ if (table->free_count != 0) {
+ /* Link the current free list with the new entries */
+ struct hmgrentry *entry;
+
+ entry = &table->entry_table[table->free_handle_list_tail];
+ entry->next_free_index = table->table_size;
+ }
+ table->free_handle_list_tail = new_table_size - 1;
+ if (table->free_handle_list_head == HMGRTABLE_INVALID_INDEX)
+ table->free_handle_list_head = table->table_size;
+
+ table->table_size = new_table_size;
+ table->free_count = new_free_count;
+
+ return true;
+}
+
+void hmgrtable_init(struct hmgrtable *table, struct dxgprocess *process)
+{
+ table->process = process;
+ table->entry_table = NULL;
+ table->table_size = 0;
+ table->free_handle_list_head = HMGRTABLE_INVALID_INDEX;
+ table->free_handle_list_tail = HMGRTABLE_INVALID_INDEX;
+ table->free_count = 0;
+ init_rwsem(&table->table_lock);
+}
+
+void hmgrtable_destroy(struct hmgrtable *table)
+{
+ if (table->entry_table) {
+ vfree(table->entry_table);
+ table->entry_table = NULL;
+ }
+}
+
+void hmgrtable_lock(struct hmgrtable *table, enum dxglockstate state)
+{
+ if (state == DXGLOCK_EXCL)
+ down_write(&table->table_lock);
+ else
+ down_read(&table->table_lock);
+}
+
+void hmgrtable_unlock(struct hmgrtable *table, enum dxglockstate state)
+{
+ if (state == DXGLOCK_EXCL)
+ up_write(&table->table_lock);
+ else
+ up_read(&table->table_lock);
+}
+
+struct d3dkmthandle hmgrtable_alloc_handle(struct hmgrtable *table,
+ void *object,
+ enum hmgrentry_type type,
+ bool make_valid)
+{
+ u32 index;
+ struct hmgrentry *entry;
+ u32 unique;
+
+ DXGKRNL_ASSERT(type <= HMGRENTRY_TYPE_LIMIT);
+ DXGKRNL_ASSERT(type > HMGRENTRY_TYPE_FREE);
+
+ if (table->free_count <= HMGRTABLE_MIN_FREE_ENTRIES) {
+ if (!expand_table(table, 0)) {
+ DXG_ERR("hmgrtable expand_table failed");
+ return zerohandle;
+ }
+ }
+
+ if (table->free_handle_list_head >= table->table_size) {
+ DXG_ERR("hmgrtable corrupted handle table head");
+ return zerohandle;
+ }
+
+ index = table->free_handle_list_head;
+ entry = &table->entry_table[index];
+
+ if (entry->type != HMGRENTRY_TYPE_FREE) {
+ DXG_ERR("hmgrtable expected free handle");
+ return zerohandle;
+ }
+
+ table->free_handle_list_head = entry->next_free_index;
+
+ if (entry->next_free_index != table->free_handle_list_tail) {
+ if (entry->next_free_index >= table->table_size) {
+ DXG_ERR("hmgrtable invalid next free index");
+ return zerohandle;
+ }
+ table->entry_table[entry->next_free_index].prev_free_index =
+ HMGRTABLE_INVALID_INDEX;
+ }
+
+ unique = table->entry_table[index].unique;
+
+ table->entry_table[index].object = object;
+ table->entry_table[index].type = type;
+ table->entry_table[index].instance = 0;
+ table->entry_table[index].destroyed = !make_valid;
+ table->free_count--;
+ DXGKRNL_ASSERT(table->free_count <= table->table_size);
+
+ return build_handle(index, unique, table->entry_table[index].instance);
+}
+
+int hmgrtable_assign_handle_safe(struct hmgrtable *table,
+ void *object,
+ enum hmgrentry_type type,
+ struct d3dkmthandle h)
+{
+ int ret;
+
+ hmgrtable_lock(table, DXGLOCK_EXCL);
+ ret = hmgrtable_assign_handle(table, object, type, h);
+ hmgrtable_unlock(table, DXGLOCK_EXCL);
+ return ret;
+}
+
+int hmgrtable_assign_handle(struct hmgrtable *table, void *object,
+ enum hmgrentry_type type, struct d3dkmthandle h)
+{
+ u32 index = get_index(h);
+ u32 unique = get_unique(h);
+ struct hmgrentry *entry = NULL;
+
+ DXG_TRACE("%x, %d %p, %p", h.v, index, object, table);
+
+ if (index >= HMGRHANDLE_INDEX_MAX) {
+ DXG_ERR("handle index is too big: %x %d", h.v, index);
+ return -EINVAL;
+ }
+
+ if (index >= table->table_size) {
+ u32 new_size = index + table_size_increment;
+
+ if (new_size > HMGRHANDLE_INDEX_MAX)
+ new_size = HMGRHANDLE_INDEX_MAX;
+ if (!expand_table(table, new_size)) {
+ DXG_ERR("failed to expand handle table %d",
+ new_size);
+ return -ENOMEM;
+ }
+ }
+
+ entry = &table->entry_table[index];
+
+ if (entry->type != HMGRENTRY_TYPE_FREE) {
+ DXG_ERR("the entry is not free: %d %x", entry->type,
+ hmgrtable_build_entry_handle(table, index).v);
+ return -EINVAL;
+ }
+
+ if (index != table->free_handle_list_tail) {
+ if (entry->next_free_index >= table->table_size) {
+ DXG_ERR("hmgr: invalid next free index %d",
+ entry->next_free_index);
+ return -EINVAL;
+ }
+ table->entry_table[entry->next_free_index].prev_free_index =
+ entry->prev_free_index;
+ } else {
+ table->free_handle_list_tail = entry->prev_free_index;
+ }
+
+ if (index != table->free_handle_list_head) {
+ if (entry->prev_free_index >= table->table_size) {
+ DXG_ERR("hmgr: invalid next prev index %d",
+ entry->prev_free_index);
+ return -EINVAL;
+ }
+ table->entry_table[entry->prev_free_index].next_free_index =
+ entry->next_free_index;
+ } else {
+ table->free_handle_list_head = entry->next_free_index;
+ }
+
+ entry->prev_free_index = HMGRTABLE_INVALID_INDEX;
+ entry->next_free_index = HMGRTABLE_INVALID_INDEX;
+ entry->object = object;
+ entry->type = type;
+ entry->instance = 0;
+ entry->unique = unique;
+ entry->destroyed = false;
+
+ table->free_count--;
+ DXGKRNL_ASSERT(table->free_count <= table->table_size);
+ return 0;
+}
+
+struct d3dkmthandle hmgrtable_alloc_handle_safe(struct hmgrtable *table,
+ void *obj,
+ enum hmgrentry_type type,
+ bool make_valid)
+{
+ struct d3dkmthandle h;
+
+ hmgrtable_lock(table, DXGLOCK_EXCL);
+ h = hmgrtable_alloc_handle(table, obj, type, make_valid);
+ hmgrtable_unlock(table, DXGLOCK_EXCL);
+ return h;
+}
+
+void hmgrtable_free_handle(struct hmgrtable *table, enum hmgrentry_type t,
+ struct d3dkmthandle h)
+{
+ struct hmgrentry *entry;
+ u32 i = get_index(h);
+
+ DXG_TRACE("%p %x", table, h.v);
+
+ /* Ignore the destroyed flag when checking the handle */
+ if (is_handle_valid(table, h, true, t)) {
+ DXGKRNL_ASSERT(table->free_count < table->table_size);
+ entry = &table->entry_table[i];
+ entry->unique = 1;
+ entry->type = HMGRENTRY_TYPE_FREE;
+ entry->destroyed = 0;
+ if (entry->unique != HMGRHANDLE_UNIQUE_MAX)
+ entry->unique += 1;
+ else
+ entry->unique = 1;
+
+ table->free_count++;
+ DXGKRNL_ASSERT(table->free_count <= table->table_size);
+
+ /*
+ * Insert the index to the free list at the tail.
+ */
+ entry->next_free_index = HMGRTABLE_INVALID_INDEX;
+ entry->prev_free_index = table->free_handle_list_tail;
+ entry = &table->entry_table[table->free_handle_list_tail];
+ entry->next_free_index = i;
+ table->free_handle_list_tail = i;
+ } else {
+ DXG_ERR("Invalid handle to free: %d %x", i, h.v);
+ }
+}
+
+void hmgrtable_free_handle_safe(struct hmgrtable *table, enum hmgrentry_type t,
+ struct d3dkmthandle h)
+{
+ hmgrtable_lock(table, DXGLOCK_EXCL);
+ hmgrtable_free_handle(table, t, h);
+ hmgrtable_unlock(table, DXGLOCK_EXCL);
+}
+
+struct d3dkmthandle hmgrtable_build_entry_handle(struct hmgrtable *table,
+ u32 index)
+{
+ DXGKRNL_ASSERT(index < table->table_size);
+
+ return build_handle(index, table->entry_table[index].unique,
+ table->entry_table[index].instance);
+}
+
+void *hmgrtable_get_object(struct hmgrtable *table, struct d3dkmthandle h)
+{
+ if (!is_handle_valid(table, h, false, HMGRENTRY_TYPE_FREE))
+ return NULL;
+
+ return table->entry_table[get_index(h)].object;
+}
+
+void *hmgrtable_get_object_by_type(struct hmgrtable *table,
+ enum hmgrentry_type type,
+ struct d3dkmthandle h)
+{
+ if (!is_handle_valid(table, h, false, type)) {
+ DXG_ERR("Invalid handle %x", h.v);
+ return NULL;
+ }
+ return table->entry_table[get_index(h)].object;
+}
+
+void *hmgrtable_get_entry_object(struct hmgrtable *table, u32 index)
+{
+ DXGKRNL_ASSERT(index < table->table_size);
+ DXGKRNL_ASSERT(table->entry_table[index].type != HMGRENTRY_TYPE_FREE);
+
+ return table->entry_table[index].object;
+}
+
+static enum hmgrentry_type hmgrtable_get_entry_type(struct hmgrtable *table,
+ u32 index)
+{
+ DXGKRNL_ASSERT(index < table->table_size);
+ return (enum hmgrentry_type)table->entry_table[index].type;
+}
+
+enum hmgrentry_type hmgrtable_get_object_type(struct hmgrtable *table,
+ struct d3dkmthandle h)
+{
+ if (!is_handle_valid(table, h, false, HMGRENTRY_TYPE_FREE))
+ return HMGRENTRY_TYPE_FREE;
+
+ return hmgrtable_get_entry_type(table, get_index(h));
+}
+
+void *hmgrtable_get_object_ignore_destroyed(struct hmgrtable *table,
+ struct d3dkmthandle h,
+ enum hmgrentry_type type)
+{
+ if (!is_handle_valid(table, h, true, type))
+ return NULL;
+ return table->entry_table[get_index(h)].object;
+}
+
+bool hmgrtable_next_entry(struct hmgrtable *tbl,
+ u32 *index,
+ enum hmgrentry_type *type,
+ struct d3dkmthandle *handle,
+ void **object)
+{
+ u32 i;
+ struct hmgrentry *entry;
+
+ for (i = *index; i < tbl->table_size; i++) {
+ entry = &tbl->entry_table[i];
+ if (entry->type != HMGRENTRY_TYPE_FREE) {
+ *index = i + 1;
+ *object = entry->object;
+ *handle = build_handle(i, entry->unique,
+ entry->instance);
+ *type = entry->type;
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/drivers/hv/dxgkrnl/hmgr.h b/drivers/hv/dxgkrnl/hmgr.h
new file mode 100644
index 000000000000..23eec301137f
--- /dev/null
+++ b/drivers/hv/dxgkrnl/hmgr.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Handle manager definitions
+ *
+ */
+
+#ifndef _HMGR_H_
+#define _HMGR_H_
+
+#include "misc.h"
+
+struct hmgrentry;
+
+/*
+ * Handle manager table.
+ *
+ * Implementation notes:
+ * A list of free handles is built on top of the array of table entries.
+ * free_handle_list_head is the index of the first entry in the list.
+ * m_FreeHandleListTail is the index of an entry in the list, which is
+ * HMGRTABLE_MIN_FREE_ENTRIES from the head. It means that when a handle is
+ * freed, the next time the handle can be re-used is after allocating
+ * HMGRTABLE_MIN_FREE_ENTRIES number of handles.
+ * Handles are allocated from the start of the list and free handles are
+ * inserted after the tail of the list.
+ *
+ */
+struct hmgrtable {
+ struct dxgprocess *process;
+ struct hmgrentry *entry_table;
+ u32 free_handle_list_head;
+ u32 free_handle_list_tail;
+ u32 table_size;
+ u32 free_count;
+ struct rw_semaphore table_lock;
+};
+
+/*
+ * Handle entry data types.
+ */
+#define HMGRENTRY_TYPE_BITS 5
+
+enum hmgrentry_type {
+ HMGRENTRY_TYPE_FREE = 0,
+ HMGRENTRY_TYPE_DXGADAPTER = 1,
+ HMGRENTRY_TYPE_DXGSHAREDRESOURCE = 2,
+ HMGRENTRY_TYPE_DXGDEVICE = 3,
+ HMGRENTRY_TYPE_DXGRESOURCE = 4,
+ HMGRENTRY_TYPE_DXGALLOCATION = 5,
+ HMGRENTRY_TYPE_DXGOVERLAY = 6,
+ HMGRENTRY_TYPE_DXGCONTEXT = 7,
+ HMGRENTRY_TYPE_DXGSYNCOBJECT = 8,
+ HMGRENTRY_TYPE_DXGKEYEDMUTEX = 9,
+ HMGRENTRY_TYPE_DXGPAGINGQUEUE = 10,
+ HMGRENTRY_TYPE_DXGDEVICESYNCOBJECT = 11,
+ HMGRENTRY_TYPE_DXGPROCESS = 12,
+ HMGRENTRY_TYPE_DXGSHAREDVMOBJECT = 13,
+ HMGRENTRY_TYPE_DXGPROTECTEDSESSION = 14,
+ HMGRENTRY_TYPE_DXGHWQUEUE = 15,
+ HMGRENTRY_TYPE_DXGREMOTEBUNDLEOBJECT = 16,
+ HMGRENTRY_TYPE_DXGCOMPOSITIONSURFACEOBJECT = 17,
+ HMGRENTRY_TYPE_DXGCOMPOSITIONSURFACEPROXY = 18,
+ HMGRENTRY_TYPE_DXGTRACKEDWORKLOAD = 19,
+ HMGRENTRY_TYPE_LIMIT = ((1 << HMGRENTRY_TYPE_BITS) - 1),
+ HMGRENTRY_TYPE_MONITOREDFENCE = HMGRENTRY_TYPE_LIMIT + 1,
+};
+
+void hmgrtable_init(struct hmgrtable *tbl, struct dxgprocess *process);
+void hmgrtable_destroy(struct hmgrtable *tbl);
+void hmgrtable_lock(struct hmgrtable *tbl, enum dxglockstate state);
+void hmgrtable_unlock(struct hmgrtable *tbl, enum dxglockstate state);
+struct d3dkmthandle hmgrtable_alloc_handle(struct hmgrtable *tbl, void *object,
+ enum hmgrentry_type t, bool make_valid);
+struct d3dkmthandle hmgrtable_alloc_handle_safe(struct hmgrtable *tbl,
+ void *obj,
+ enum hmgrentry_type t,
+ bool reserve);
+int hmgrtable_assign_handle(struct hmgrtable *tbl, void *obj,
+ enum hmgrentry_type, struct d3dkmthandle h);
+int hmgrtable_assign_handle_safe(struct hmgrtable *tbl, void *obj,
+ enum hmgrentry_type t, struct d3dkmthandle h);
+void hmgrtable_free_handle(struct hmgrtable *tbl, enum hmgrentry_type t,
+ struct d3dkmthandle h);
+void hmgrtable_free_handle_safe(struct hmgrtable *tbl, enum hmgrentry_type t,
+ struct d3dkmthandle h);
+struct d3dkmthandle hmgrtable_build_entry_handle(struct hmgrtable *tbl,
+ u32 index);
+enum hmgrentry_type hmgrtable_get_object_type(struct hmgrtable *tbl,
+ struct d3dkmthandle h);
+void *hmgrtable_get_object(struct hmgrtable *tbl, struct d3dkmthandle h);
+void *hmgrtable_get_object_by_type(struct hmgrtable *tbl, enum hmgrentry_type t,
+ struct d3dkmthandle h);
+void *hmgrtable_get_object_ignore_destroyed(struct hmgrtable *tbl,
+ struct d3dkmthandle h,
+ enum hmgrentry_type t);
+bool hmgrtable_mark_destroyed(struct hmgrtable *tbl, struct d3dkmthandle h);
+bool hmgrtable_unmark_destroyed(struct hmgrtable *tbl, struct d3dkmthandle h);
+void *hmgrtable_get_entry_object(struct hmgrtable *tbl, u32 index);
+bool hmgrtable_next_entry(struct hmgrtable *tbl,
+ u32 *start_index,
+ enum hmgrentry_type *type,
+ struct d3dkmthandle *handle,
+ void **object);
+
+#endif
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
index 23ecd15b0cd7..60e38d104517 100644
--- a/drivers/hv/dxgkrnl/ioctl.c
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -22,3 +22,63 @@
#undef pr_fmt
#define pr_fmt(fmt) "dxgk: " fmt
+
+struct ioctl_desc {
+ int (*ioctl_callback)(struct dxgprocess *p, void __user *arg);
+ u32 ioctl;
+ u32 arg_size;
+};
+
+static struct ioctl_desc ioctls[] = {
+
+};
+
+/*
+ * IOCTL processing
+ * The driver IOCTLs return
+ * - 0 in case of success
+ * - positive values, which are Windows NTSTATUS (for example, STATUS_PENDING).
+ * Positive values are success codes.
+ * - Linux negative error codes
+ */
+static int dxgk_ioctl(struct file *f, unsigned int p1, unsigned long p2)
+{
+ int code = _IOC_NR(p1);
+ int status;
+ struct dxgprocess *process;
+
+ if (code < 1 || code >= ARRAY_SIZE(ioctls)) {
+ DXG_ERR("bad ioctl %x %x %x %x",
+ code, _IOC_TYPE(p1), _IOC_SIZE(p1), _IOC_DIR(p1));
+ return -ENOTTY;
+ }
+ if (ioctls[code].ioctl_callback == NULL) {
+ DXG_ERR("ioctl callback is NULL %x", code);
+ return -ENOTTY;
+ }
+ if (ioctls[code].ioctl != p1) {
+ DXG_ERR("ioctl mismatch. Code: %x User: %x Kernel: %x",
+ code, p1, ioctls[code].ioctl);
+ return -ENOTTY;
+ }
+ process = (struct dxgprocess *)f->private_data;
+ if (process->tgid != current->tgid) {
+ DXG_ERR("Call from a wrong process: %d %d",
+ process->tgid, current->tgid);
+ return -ENOTTY;
+ }
+ status = ioctls[code].ioctl_callback(process, (void *__user)p2);
+ return status;
+}
+
+long dxgk_compat_ioctl(struct file *f, unsigned int p1, unsigned long p2)
+{
+ DXG_TRACE("compat ioctl %x", p1);
+ return dxgk_ioctl(f, p1, p2);
+}
+
+long dxgk_unlocked_ioctl(struct file *f, unsigned int p1, unsigned long p2)
+{
+ DXG_TRACE("unlocked ioctl %x Code:%d", p1, _IOC_NR(p1));
+ return dxgk_ioctl(f, p1, p2);
+}
diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
index d292e9a9bb7f..dc849a8ed3f2 100644
--- a/drivers/hv/dxgkrnl/misc.h
+++ b/drivers/hv/dxgkrnl/misc.h
@@ -27,10 +27,11 @@ extern const struct d3dkmthandle zerohandle;
*
* channel_lock (VMBus channel lock)
* fd_mutex
- * plistmutex (process list mutex)
- * table_lock (handle table lock)
- * core_lock (dxgadapter lock)
- * device_lock (dxgdevice lock)
+ * plistmutex
+ * table_lock
+ * core_lock
+ * device_lock
+ * process_adapter_mutex
* adapter_list_lock
* device_mutex (dxgglobal mutex)
*/
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
index 2ea04cc02a1f..c675d5827ed5 100644
--- a/include/uapi/misc/d3dkmthk.h
+++ b/include/uapi/misc/d3dkmthk.h
@@ -58,4 +58,107 @@ struct winluid {
__u32 b;
};
+#define D3DKMT_ADAPTERS_MAX 64
+
+struct d3dkmt_adapterinfo {
+ struct d3dkmthandle adapter_handle;
+ struct winluid adapter_luid;
+ __u32 num_sources;
+ __u32 present_move_regions_preferred;
+};
+
+struct d3dkmt_enumadapters2 {
+ __u32 num_adapters;
+ __u32 reserved;
+#ifdef __KERNEL__
+ struct d3dkmt_adapterinfo *adapters;
+#else
+ __u64 *adapters;
+#endif
+};
+
+struct d3dkmt_closeadapter {
+ struct d3dkmthandle adapter_handle;
+};
+
+struct d3dkmt_openadapterfromluid {
+ struct winluid adapter_luid;
+ struct d3dkmthandle adapter_handle;
+};
+
+struct d3dkmt_adaptertype {
+ union {
+ struct {
+ __u32 render_supported:1;
+ __u32 display_supported:1;
+ __u32 software_device:1;
+ __u32 post_device:1;
+ __u32 hybrid_discrete:1;
+ __u32 hybrid_integrated:1;
+ __u32 indirect_display_device:1;
+ __u32 paravirtualized:1;
+ __u32 acg_supported:1;
+ __u32 support_set_timings_from_vidpn:1;
+ __u32 detachable:1;
+ __u32 compute_only:1;
+ __u32 prototype:1;
+ __u32 reserved:19;
+ };
+ __u32 value;
+ };
+};
+
+enum kmtqueryadapterinfotype {
+ _KMTQAITYPE_UMDRIVERPRIVATE = 0,
+ _KMTQAITYPE_ADAPTERTYPE = 15,
+ _KMTQAITYPE_ADAPTERTYPE_RENDER = 57
+};
+
+struct d3dkmt_queryadapterinfo {
+ struct d3dkmthandle adapter;
+ enum kmtqueryadapterinfotype type;
+#ifdef __KERNEL__
+ void *private_data;
+#else
+ __u64 private_data;
+#endif
+ __u32 private_data_size;
+};
+
+union d3dkmt_enumadapters_filter {
+ struct {
+ __u64 include_compute_only:1;
+ __u64 include_display_only:1;
+ __u64 reserved:62;
+ };
+ __u64 value;
+};
+
+struct d3dkmt_enumadapters3 {
+ union d3dkmt_enumadapters_filter filter;
+ __u32 adapter_count;
+ __u32 reserved;
+#ifdef __KERNEL__
+ struct d3dkmt_adapterinfo *adapters;
+#else
+ __u64 adapters;
+#endif
+};
+
+/*
+ * Dxgkrnl Graphics Port Driver ioctl definitions
+ *
+ */
+
+#define LX_DXOPENADAPTERFROMLUID \
+ _IOWR(0x47, 0x01, struct d3dkmt_openadapterfromluid)
+#define LX_DXQUERYADAPTERINFO \
+ _IOWR(0x47, 0x09, struct d3dkmt_queryadapterinfo)
+#define LX_DXENUMADAPTERS2 \
+ _IOWR(0x47, 0x14, struct d3dkmt_enumadapters2)
+#define LX_DXCLOSEADAPTER \
+ _IOWR(0x47, 0x15, struct d3dkmt_closeadapter)
+#define LX_DXENUMADAPTERS3 \
+ _IOWR(0x47, 0x3e, struct d3dkmt_enumadapters3)
+
#endif /* _D3DKMTHK_H */
^ permalink raw reply related
* [PATCH 03/55] drivers: hv: dxgkrnl: Creation of dxgadapter object
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
Handle creation and destruction of dxgadapter object, which
represents a virtual compute device, projected to the VM by
the host. The dxgadapter object is created when the
corresponding VMBus channel is offered by Hyper-V.
There could be multiple virtual compute device objects, projected
by the host to VM. They are enumerated by issuing IOCTLs to
the /dev/dxg device.
The adapter object can start functioning only when the global VMBus
channel and the corresponding per device VMBus channel are
initialized. Notifications about arrival of a virtual compute PCI
device and VMBus channels can happen in any order. Therefore,
the initial dxgadapter object state is DXGADAPTER_STATE_WAITING_VMBUS.
A list of VMBus channels and a list of waiting dxgadapter objects
are maintained. When dxgkrnl is notified about a VMBus channel
arrival, if tries to start all adapters, which are not started yet.
Properties of the adapter object are determined by sending VMBus
messages to the host to the corresponding VMBus channel.
When the per virtual compute device VMBus channel or the global
channel are destroyed, the adapter object is destroyed.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/Makefile | 2 +-
drivers/hv/dxgkrnl/dxgadapter.c | 170 +++++++++++++++++++++++++
drivers/hv/dxgkrnl/dxgkrnl.h | 85 +++++++++++++
drivers/hv/dxgkrnl/dxgmodule.c | 204 +++++++++++++++++++++++++++++-
drivers/hv/dxgkrnl/dxgvmbus.c | 217 +++++++++++++++++++++++++++++---
drivers/hv/dxgkrnl/dxgvmbus.h | 128 +++++++++++++++++++
drivers/hv/dxgkrnl/misc.c | 37 ++++++
drivers/hv/dxgkrnl/misc.h | 24 +++-
8 files changed, 844 insertions(+), 23 deletions(-)
create mode 100644 drivers/hv/dxgkrnl/dxgadapter.c
create mode 100644 drivers/hv/dxgkrnl/misc.c
diff --git a/drivers/hv/dxgkrnl/Makefile b/drivers/hv/dxgkrnl/Makefile
index 76349064b60a..2ed07d877c91 100644
--- a/drivers/hv/dxgkrnl/Makefile
+++ b/drivers/hv/dxgkrnl/Makefile
@@ -2,4 +2,4 @@
# Makefile for the hyper-v compute device driver (dxgkrnl).
obj-$(CONFIG_DXGKRNL) += dxgkrnl.o
-dxgkrnl-y := dxgmodule.o dxgvmbus.o
+dxgkrnl-y := dxgmodule.o misc.o dxgadapter.o ioctl.o dxgvmbus.o
diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c
new file mode 100644
index 000000000000..07d47699d255
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgadapter.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Implementation of dxgadapter and its objects
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hyperv.h>
+#include <linux/pagemap.h>
+#include <linux/eventfd.h>
+
+#include "dxgkrnl.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+int dxgadapter_set_vmbus(struct dxgadapter *adapter, struct hv_device *hdev)
+{
+ int ret;
+
+ guid_to_luid(&hdev->channel->offermsg.offer.if_instance,
+ &adapter->luid);
+ DXG_TRACE("%x:%x %p %pUb",
+ adapter->luid.b, adapter->luid.a, hdev->channel,
+ &hdev->channel->offermsg.offer.if_instance);
+
+ ret = dxgvmbuschannel_init(&adapter->channel, hdev);
+ if (ret)
+ goto cleanup;
+
+ adapter->channel.adapter = adapter;
+ adapter->hv_dev = hdev;
+
+ ret = dxgvmb_send_open_adapter(adapter);
+ if (ret < 0) {
+ DXG_ERR("dxgvmb_send_open_adapter failed: %d", ret);
+ goto cleanup;
+ }
+
+ ret = dxgvmb_send_get_internal_adapter_info(adapter);
+
+cleanup:
+ if (ret)
+ DXG_ERR("Failed to set vmbus: %d", ret);
+ return ret;
+}
+
+void dxgadapter_start(struct dxgadapter *adapter)
+{
+ struct dxgvgpuchannel *ch = NULL;
+ struct dxgvgpuchannel *entry;
+ int ret;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ DXG_TRACE("%x-%x", adapter->luid.a, adapter->luid.b);
+
+ /* Find the corresponding vGPU vm bus channel */
+ list_for_each_entry(entry, &dxgglobal->vgpu_ch_list_head,
+ vgpu_ch_list_entry) {
+ if (memcmp(&adapter->luid,
+ &entry->adapter_luid,
+ sizeof(struct winluid)) == 0) {
+ ch = entry;
+ break;
+ }
+ }
+ if (ch == NULL) {
+ DXG_TRACE("vGPU chanel is not ready");
+ return;
+ }
+
+ /* The global channel is initialized when the first adapter starts */
+ if (!dxgglobal->global_channel_initialized) {
+ ret = dxgglobal_init_global_channel();
+ if (ret) {
+ dxgglobal_destroy_global_channel();
+ return;
+ }
+ dxgglobal->global_channel_initialized = true;
+ }
+
+ /* Initialize vGPU vm bus channel */
+ ret = dxgadapter_set_vmbus(adapter, ch->hdev);
+ if (ret) {
+ DXG_ERR("Failed to start adapter %p", adapter);
+ adapter->adapter_state = DXGADAPTER_STATE_STOPPED;
+ return;
+ }
+
+ adapter->adapter_state = DXGADAPTER_STATE_ACTIVE;
+ DXG_TRACE("Adapter started %p", adapter);
+}
+
+void dxgadapter_stop(struct dxgadapter *adapter)
+{
+ bool adapter_stopped = false;
+
+ down_write(&adapter->core_lock);
+ if (!adapter->stopping_adapter)
+ adapter->stopping_adapter = true;
+ else
+ adapter_stopped = true;
+ up_write(&adapter->core_lock);
+
+ if (adapter_stopped)
+ return;
+
+ if (dxgadapter_acquire_lock_exclusive(adapter) == 0) {
+ dxgvmb_send_close_adapter(adapter);
+ dxgadapter_release_lock_exclusive(adapter);
+ }
+ dxgvmbuschannel_destroy(&adapter->channel);
+
+ adapter->adapter_state = DXGADAPTER_STATE_STOPPED;
+}
+
+void dxgadapter_release(struct kref *refcount)
+{
+ struct dxgadapter *adapter;
+
+ adapter = container_of(refcount, struct dxgadapter, adapter_kref);
+ DXG_TRACE("%p", adapter);
+ kfree(adapter);
+}
+
+bool dxgadapter_is_active(struct dxgadapter *adapter)
+{
+ return adapter->adapter_state == DXGADAPTER_STATE_ACTIVE;
+}
+
+int dxgadapter_acquire_lock_exclusive(struct dxgadapter *adapter)
+{
+ down_write(&adapter->core_lock);
+ if (adapter->adapter_state != DXGADAPTER_STATE_ACTIVE) {
+ dxgadapter_release_lock_exclusive(adapter);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+void dxgadapter_acquire_lock_forced(struct dxgadapter *adapter)
+{
+ down_write(&adapter->core_lock);
+}
+
+void dxgadapter_release_lock_exclusive(struct dxgadapter *adapter)
+{
+ up_write(&adapter->core_lock);
+}
+
+int dxgadapter_acquire_lock_shared(struct dxgadapter *adapter)
+{
+ down_read(&adapter->core_lock);
+ if (adapter->adapter_state == DXGADAPTER_STATE_ACTIVE)
+ return 0;
+ dxgadapter_release_lock_shared(adapter);
+ return -ENODEV;
+}
+
+void dxgadapter_release_lock_shared(struct dxgadapter *adapter)
+{
+ up_read(&adapter->core_lock);
+}
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index 52b9e82c51e6..ba2a7c6001aa 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -47,9 +47,39 @@ extern struct dxgdriver dxgdrv;
#define DXGDEV dxgdrv.dxgdev
+struct dxgk_device_types {
+ u32 post_device:1;
+ u32 post_device_certain:1;
+ u32 software_device:1;
+ u32 soft_gpu_device:1;
+ u32 warp_device:1;
+ u32 bdd_device:1;
+ u32 support_miracast:1;
+ u32 mismatched_lda:1;
+ u32 indirect_display_device:1;
+ u32 xbox_one_device:1;
+ u32 child_id_support_dwm_clone:1;
+ u32 child_id_support_dwm_clone2:1;
+ u32 has_internal_panel:1;
+ u32 rfx_vgpu_device:1;
+ u32 virtual_render_device:1;
+ u32 support_preserve_boot_display:1;
+ u32 is_uefi_frame_buffer:1;
+ u32 removable_device:1;
+ u32 virtual_monitor_device:1;
+};
+
+enum dxgobjectstate {
+ DXGOBJECTSTATE_CREATED,
+ DXGOBJECTSTATE_ACTIVE,
+ DXGOBJECTSTATE_STOPPED,
+ DXGOBJECTSTATE_DESTROYED,
+};
+
struct dxgvmbuschannel {
struct vmbus_channel *channel;
struct hv_device *hdev;
+ struct dxgadapter *adapter;
spinlock_t packet_list_mutex;
struct list_head packet_list_head;
struct kmem_cache *packet_cache;
@@ -81,6 +111,10 @@ struct dxgglobal {
struct miscdevice dxgdevice;
struct mutex device_mutex;
+ /* list of created adapters */
+ struct list_head adapter_list_head;
+ struct rw_semaphore adapter_list_lock;
+
/*
* List of the vGPU VM bus channels (dxgvgpuchannel)
* Protected by device_mutex
@@ -102,6 +136,10 @@ static inline struct dxgglobal *dxggbl(void)
return dxgdrv.dxgglobal;
}
+int dxgglobal_create_adapter(struct pci_dev *dev, guid_t *guid,
+ struct winluid host_vgpu_luid);
+void dxgglobal_acquire_adapter_list_lock(enum dxglockstate state);
+void dxgglobal_release_adapter_list_lock(enum dxglockstate state);
int dxgglobal_init_global_channel(void);
void dxgglobal_destroy_global_channel(void);
struct vmbus_channel *dxgglobal_get_vmbus(void);
@@ -113,6 +151,47 @@ struct dxgprocess {
/* Placeholder */
};
+enum dxgadapter_state {
+ DXGADAPTER_STATE_ACTIVE = 0,
+ DXGADAPTER_STATE_STOPPED = 1,
+ DXGADAPTER_STATE_WAITING_VMBUS = 2,
+};
+
+/*
+ * This object represents the grapchis adapter.
+ * Objects, which take reference on the adapter:
+ * - dxgglobal
+ * - adapter handle (struct d3dkmthandle)
+ */
+struct dxgadapter {
+ struct rw_semaphore core_lock;
+ struct kref adapter_kref;
+ /* Entry in the list of adapters in dxgglobal */
+ struct list_head adapter_list_entry;
+ struct pci_dev *pci_dev;
+ struct hv_device *hv_dev;
+ struct dxgvmbuschannel channel;
+ struct d3dkmthandle host_handle;
+ enum dxgadapter_state adapter_state;
+ struct winluid host_adapter_luid;
+ struct winluid host_vgpu_luid;
+ struct winluid luid; /* VM bus channel luid */
+ u16 device_description[80];
+ u16 device_instance_id[WIN_MAX_PATH];
+ bool stopping_adapter;
+};
+
+int dxgadapter_set_vmbus(struct dxgadapter *adapter, struct hv_device *hdev);
+bool dxgadapter_is_active(struct dxgadapter *adapter);
+void dxgadapter_start(struct dxgadapter *adapter);
+void dxgadapter_stop(struct dxgadapter *adapter);
+void dxgadapter_release(struct kref *refcount);
+int dxgadapter_acquire_lock_shared(struct dxgadapter *adapter);
+void dxgadapter_release_lock_shared(struct dxgadapter *adapter);
+int dxgadapter_acquire_lock_exclusive(struct dxgadapter *adapter);
+void dxgadapter_acquire_lock_forced(struct dxgadapter *adapter);
+void dxgadapter_release_lock_exclusive(struct dxgadapter *adapter);
+
/*
* The convention is that VNBus instance id is a GUID, but the host sets
* the lower part of the value to the host adapter LUID. The function
@@ -141,6 +220,12 @@ static inline void guid_to_luid(guid_t *guid, struct winluid *luid)
void dxgvmb_initialize(void);
int dxgvmb_send_set_iospace_region(u64 start, u64 len);
+int dxgvmb_send_open_adapter(struct dxgadapter *adapter);
+int dxgvmb_send_close_adapter(struct dxgadapter *adapter);
+int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter);
+int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel,
+ void *command,
+ u32 cmd_size);
int ntstatus2int(struct ntstatus status);
diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c
index e55639dc0adc..ef80b920f010 100644
--- a/drivers/hv/dxgkrnl/dxgmodule.c
+++ b/drivers/hv/dxgkrnl/dxgmodule.c
@@ -55,6 +55,156 @@ void dxgglobal_release_channel_lock(void)
up_read(&dxggbl()->channel_lock);
}
+void dxgglobal_acquire_adapter_list_lock(enum dxglockstate state)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ if (state == DXGLOCK_EXCL)
+ down_write(&dxgglobal->adapter_list_lock);
+ else
+ down_read(&dxgglobal->adapter_list_lock);
+}
+
+void dxgglobal_release_adapter_list_lock(enum dxglockstate state)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ if (state == DXGLOCK_EXCL)
+ up_write(&dxgglobal->adapter_list_lock);
+ else
+ up_read(&dxgglobal->adapter_list_lock);
+}
+
+/*
+ * Returns a pointer to dxgadapter object, which corresponds to the given PCI
+ * device, or NULL.
+ */
+static struct dxgadapter *find_pci_adapter(struct pci_dev *dev)
+{
+ struct dxgadapter *entry;
+ struct dxgadapter *adapter = NULL;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+
+ list_for_each_entry(entry, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (dev == entry->pci_dev) {
+ adapter = entry;
+ break;
+ }
+ }
+
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+ return adapter;
+}
+
+/*
+ * Returns a pointer to dxgadapter object, which has the givel LUID
+ * device, or NULL.
+ */
+static struct dxgadapter *find_adapter(struct winluid *luid)
+{
+ struct dxgadapter *entry;
+ struct dxgadapter *adapter = NULL;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+
+ list_for_each_entry(entry, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (memcmp(luid, &entry->luid, sizeof(struct winluid)) == 0) {
+ adapter = entry;
+ break;
+ }
+ }
+
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+ return adapter;
+}
+
+/*
+ * Creates a new dxgadapter object, which represents a virtual GPU, projected
+ * by the host.
+ * The adapter is in the waiting state. It will become active when the global
+ * VM bus channel and the adapter VM bus channel are created.
+ */
+int dxgglobal_create_adapter(struct pci_dev *dev, guid_t *guid,
+ struct winluid host_vgpu_luid)
+{
+ struct dxgadapter *adapter;
+ int ret = 0;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ adapter = kzalloc(sizeof(struct dxgadapter), GFP_KERNEL);
+ if (adapter == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ adapter->adapter_state = DXGADAPTER_STATE_WAITING_VMBUS;
+ adapter->host_vgpu_luid = host_vgpu_luid;
+ kref_init(&adapter->adapter_kref);
+ init_rwsem(&adapter->core_lock);
+
+ adapter->pci_dev = dev;
+ guid_to_luid(guid, &adapter->luid);
+
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+
+ list_add_tail(&adapter->adapter_list_entry,
+ &dxgglobal->adapter_list_head);
+ dxgglobal->num_adapters++;
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+
+ DXG_TRACE("new adapter added %p %x-%x", adapter,
+ adapter->luid.a, adapter->luid.b);
+cleanup:
+ return ret;
+}
+
+/*
+ * Attempts to start dxgadapter objects, which are not active yet.
+ */
+static void dxgglobal_start_adapters(void)
+{
+ struct dxgadapter *adapter;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ if (dxgglobal->hdev == NULL) {
+ DXG_TRACE("Global channel is not ready");
+ return;
+ }
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+ list_for_each_entry(adapter, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (adapter->adapter_state == DXGADAPTER_STATE_WAITING_VMBUS)
+ dxgadapter_start(adapter);
+ }
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+}
+
+/*
+ * Stopsthe active dxgadapter objects.
+ */
+static void dxgglobal_stop_adapters(void)
+{
+ struct dxgadapter *adapter;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ if (dxgglobal->hdev == NULL) {
+ DXG_TRACE("Global channel is not ready");
+ return;
+ }
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+ list_for_each_entry(adapter, &dxgglobal->adapter_list_head,
+ adapter_list_entry) {
+ if (adapter->adapter_state == DXGADAPTER_STATE_ACTIVE)
+ dxgadapter_stop(adapter);
+ }
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+}
+
const struct file_operations dxgk_fops = {
.owner = THIS_MODULE,
};
@@ -182,6 +332,15 @@ static int dxg_pci_probe_device(struct pci_dev *dev,
DXG_TRACE("Vmbus interface version: %d", dxgglobal->vmbus_ver);
DXG_TRACE("Host luid: %x-%x", vgpu_luid.b, vgpu_luid.a);
+ /* Create new virtual GPU adapter */
+ ret = dxgglobal_create_adapter(dev, &guid, vgpu_luid);
+ if (ret)
+ goto cleanup;
+
+ /* Attempt to start the adapter in case VM bus channels are created */
+
+ dxgglobal_start_adapters();
+
cleanup:
mutex_unlock(&dxgglobal->device_mutex);
@@ -193,7 +352,25 @@ static int dxg_pci_probe_device(struct pci_dev *dev,
static void dxg_pci_remove_device(struct pci_dev *dev)
{
- /* Placeholder */
+ struct dxgadapter *adapter;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->device_mutex);
+
+ adapter = find_pci_adapter(dev);
+ if (adapter) {
+ dxgglobal_acquire_adapter_list_lock(DXGLOCK_EXCL);
+ list_del(&adapter->adapter_list_entry);
+ dxgglobal->num_adapters--;
+ dxgglobal_release_adapter_list_lock(DXGLOCK_EXCL);
+
+ dxgadapter_stop(adapter);
+ kref_put(&adapter->adapter_kref, dxgadapter_release);
+ } else {
+ DXG_ERR("Failed to find dxgadapter for pcidev");
+ }
+
+ mutex_unlock(&dxgglobal->device_mutex);
}
static struct pci_device_id dxg_pci_id_table[] = {
@@ -297,6 +474,25 @@ void dxgglobal_destroy_global_channel(void)
up_write(&dxgglobal->channel_lock);
}
+static void dxgglobal_stop_adapter_vmbus(struct hv_device *hdev)
+{
+ struct dxgadapter *adapter = NULL;
+ struct winluid luid;
+
+ guid_to_luid(&hdev->channel->offermsg.offer.if_instance, &luid);
+
+ DXG_TRACE("Stopping adapter %x:%x", luid.b, luid.a);
+
+ adapter = find_adapter(&luid);
+
+ if (adapter && adapter->adapter_state == DXGADAPTER_STATE_ACTIVE) {
+ down_write(&adapter->core_lock);
+ dxgvmbuschannel_destroy(&adapter->channel);
+ adapter->adapter_state = DXGADAPTER_STATE_STOPPED;
+ up_write(&adapter->core_lock);
+ }
+}
+
static const struct hv_vmbus_device_id dxg_vmbus_id_table[] = {
/* Per GPU Device GUID */
{ HV_GPUP_DXGK_VGPU_GUID },
@@ -329,6 +525,7 @@ static int dxg_probe_vmbus(struct hv_device *hdev,
vgpuch->hdev = hdev;
list_add_tail(&vgpuch->vgpu_ch_list_entry,
&dxgglobal->vgpu_ch_list_head);
+ dxgglobal_start_adapters();
} else if (uuid_le_cmp(hdev->dev_type,
dxg_vmbus_id_table[1].guid) == 0) {
/* This is the global Dxgkgnl channel */
@@ -341,6 +538,7 @@ static int dxg_probe_vmbus(struct hv_device *hdev,
goto error;
}
dxgglobal->hdev = hdev;
+ dxgglobal_start_adapters();
} else {
/* Unknown device type */
DXG_ERR("Unknown VM bus device type");
@@ -364,6 +562,7 @@ static int dxg_remove_vmbus(struct hv_device *hdev)
if (uuid_le_cmp(hdev->dev_type, dxg_vmbus_id_table[0].guid) == 0) {
DXG_TRACE("Remove virtual GPU channel");
+ dxgglobal_stop_adapter_vmbus(hdev);
list_for_each_entry(vgpu_channel,
&dxgglobal->vgpu_ch_list_head,
vgpu_ch_list_entry) {
@@ -420,6 +619,8 @@ static struct dxgglobal *dxgglobal_create(void)
mutex_init(&dxgglobal->device_mutex);
INIT_LIST_HEAD(&dxgglobal->vgpu_ch_list_head);
+ INIT_LIST_HEAD(&dxgglobal->adapter_list_head);
+ init_rwsem(&dxgglobal->adapter_list_lock);
init_rwsem(&dxgglobal->channel_lock);
@@ -430,6 +631,7 @@ static void dxgglobal_destroy(struct dxgglobal *dxgglobal)
{
if (dxgglobal) {
mutex_lock(&dxgglobal->device_mutex);
+ dxgglobal_stop_adapters();
dxgglobal_destroy_global_channel();
mutex_unlock(&dxgglobal->device_mutex);
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index a4365739826a..6d4b8d9d8d07 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -77,7 +77,7 @@ struct dxgvmbusmsgres {
void *res;
};
-static int init_message(struct dxgvmbusmsg *msg,
+static int init_message(struct dxgvmbusmsg *msg, struct dxgadapter *adapter,
struct dxgprocess *process, u32 size)
{
struct dxgglobal *dxgglobal = dxggbl();
@@ -99,10 +99,15 @@ static int init_message(struct dxgvmbusmsg *msg,
if (use_ext_header) {
msg->msg = (char *)&msg->hdr[1];
msg->hdr->command_offset = sizeof(msg->hdr[0]);
+ if (adapter)
+ msg->hdr->vgpu_luid = adapter->host_vgpu_luid;
} else {
msg->msg = (char *)msg->hdr;
}
- msg->channel = &dxgglobal->channel;
+ if (adapter && !dxgglobal->async_msg_enabled)
+ msg->channel = &adapter->channel;
+ else
+ msg->channel = &dxgglobal->channel;
return 0;
}
@@ -116,6 +121,37 @@ static void free_message(struct dxgvmbusmsg *msg, struct dxgprocess *process)
* Helper functions
*/
+static void command_vm_to_host_init2(struct dxgkvmb_command_vm_to_host *command,
+ enum dxgkvmb_commandtype_global t,
+ struct d3dkmthandle process)
+{
+ command->command_type = t;
+ command->process = process;
+ command->command_id = 0;
+ command->channel_type = DXGKVMB_VM_TO_HOST;
+}
+
+static void command_vgpu_to_host_init1(struct dxgkvmb_command_vgpu_to_host
+ *command,
+ enum dxgkvmb_commandtype type)
+{
+ command->command_type = type;
+ command->process.v = 0;
+ command->command_id = 0;
+ command->channel_type = DXGKVMB_VGPU_TO_HOST;
+}
+
+static void command_vgpu_to_host_init2(struct dxgkvmb_command_vgpu_to_host
+ *command,
+ enum dxgkvmb_commandtype type,
+ struct d3dkmthandle process)
+{
+ command->command_type = type;
+ command->process = process;
+ command->command_id = 0;
+ command->channel_type = DXGKVMB_VGPU_TO_HOST;
+}
+
int ntstatus2int(struct ntstatus status)
{
if (NT_SUCCESS(status))
@@ -216,22 +252,26 @@ static void process_inband_packet(struct dxgvmbuschannel *channel,
u32 packet_length = hv_pkt_datalen(desc);
struct dxgkvmb_command_host_to_vm *packet;
- if (packet_length < sizeof(struct dxgkvmb_command_host_to_vm)) {
- DXG_ERR("Invalid global packet");
- } else {
- packet = hv_pkt_data(desc);
- DXG_TRACE("global packet %d",
- packet->command_type);
- switch (packet->command_type) {
- case DXGK_VMBCOMMAND_SIGNALGUESTEVENT:
- case DXGK_VMBCOMMAND_SIGNALGUESTEVENTPASSIVE:
- break;
- case DXGK_VMBCOMMAND_SENDWNFNOTIFICATION:
- break;
- default:
- DXG_ERR("unexpected host message %d",
+ if (channel->adapter == NULL) {
+ if (packet_length < sizeof(struct dxgkvmb_command_host_to_vm)) {
+ DXG_ERR("Invalid global packet");
+ } else {
+ packet = hv_pkt_data(desc);
+ DXG_TRACE("global packet %d",
packet->command_type);
+ switch (packet->command_type) {
+ case DXGK_VMBCOMMAND_SIGNALGUESTEVENT:
+ case DXGK_VMBCOMMAND_SIGNALGUESTEVENTPASSIVE:
+ break;
+ case DXGK_VMBCOMMAND_SENDWNFNOTIFICATION:
+ break;
+ default:
+ DXG_ERR("unexpected host message %d",
+ packet->command_type);
+ }
}
+ } else {
+ DXG_ERR("Unexpected packet for adapter channel");
}
}
@@ -279,6 +319,7 @@ void dxgvmbuschannel_receive(void *ctx)
struct vmpacket_descriptor *desc;
u32 packet_length = 0;
+ DXG_TRACE("New adapter message: %p", channel->adapter);
foreach_vmbus_pkt(desc, channel->channel) {
packet_length = hv_pkt_datalen(desc);
DXG_TRACE("next packet (id, size, type): %llu %d %d",
@@ -302,6 +343,8 @@ int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel,
{
int ret;
struct dxgvmbuspacket *packet = NULL;
+ struct dxgkvmb_command_vm_to_host *cmd1;
+ struct dxgkvmb_command_vgpu_to_host *cmd2;
if (cmd_size > DXG_MAX_VM_BUS_PACKET_SIZE ||
result_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
@@ -315,6 +358,16 @@ int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel,
return -ENOMEM;
}
+ if (channel->adapter == NULL) {
+ cmd1 = command;
+ DXG_TRACE("send_sync_msg global: %d %p %d %d",
+ cmd1->command_type, command, cmd_size, result_size);
+ } else {
+ cmd2 = command;
+ DXG_TRACE("send_sync_msg adapter: %d %p %d %d",
+ cmd2->command_type, command, cmd_size, result_size);
+ }
+
packet->request_id = atomic64_inc_return(&channel->packet_request_id);
init_completion(&packet->wait);
packet->buffer = result;
@@ -358,6 +411,41 @@ int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel,
return ret;
}
+int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel,
+ void *command,
+ u32 cmd_size)
+{
+ int ret;
+ int try_count = 0;
+
+ if (cmd_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
+ DXG_ERR("%s invalid data size", __func__);
+ return -EINVAL;
+ }
+
+ if (channel->adapter) {
+ DXG_ERR("Async message sent to the adapter channel");
+ return -EINVAL;
+ }
+
+ do {
+ ret = vmbus_sendpacket(channel->channel, command, cmd_size,
+ 0, VM_PKT_DATA_INBAND, 0);
+ /*
+ * -EAGAIN is returned when the VM bus ring buffer if full.
+ * Wait 2ms to allow the host to process messages and try again.
+ */
+ if (ret == -EAGAIN) {
+ usleep_range(1000, 2000);
+ try_count++;
+ }
+ } while (ret == -EAGAIN && try_count < 5000);
+ if (ret < 0)
+ DXG_ERR("vmbus_sendpacket failed: %x", ret);
+
+ return ret;
+}
+
static int
dxgvmb_send_sync_msg_ntstatus(struct dxgvmbuschannel *channel,
void *command, u32 cmd_size)
@@ -383,7 +471,7 @@ int dxgvmb_send_set_iospace_region(u64 start, u64 len)
struct dxgvmbusmsg msg;
struct dxgglobal *dxgglobal = dxggbl();
- ret = init_message(&msg, NULL, sizeof(*command));
+ ret = init_message(&msg, NULL, NULL, sizeof(*command));
if (ret)
return ret;
command = (void *)msg.msg;
@@ -408,3 +496,98 @@ int dxgvmb_send_set_iospace_region(u64 start, u64 len)
DXG_TRACE("Error: %d", ret);
return ret;
}
+
+/*
+ * Virtual GPU messages to the host
+ */
+
+int dxgvmb_send_open_adapter(struct dxgadapter *adapter)
+{
+ int ret;
+ struct dxgkvmb_command_openadapter *command;
+ struct dxgkvmb_command_openadapter_return result = { };
+ struct dxgvmbusmsg msg;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, adapter, NULL, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init1(&command->hdr, DXGK_VMBCOMMAND_OPENADAPTER);
+ command->vmbus_interface_version = dxgglobal->vmbus_ver;
+ command->vmbus_last_compatible_interface_version =
+ DXGK_VMBUS_LAST_COMPATIBLE_INTERFACE_VERSION;
+
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ &result, sizeof(result));
+ if (ret < 0)
+ goto cleanup;
+
+ ret = ntstatus2int(result.status);
+ adapter->host_handle = result.host_adapter_handle;
+
+cleanup:
+ free_message(&msg, NULL);
+ if (ret)
+ DXG_ERR("Failed to open adapter: %d", ret);
+ return ret;
+}
+
+int dxgvmb_send_close_adapter(struct dxgadapter *adapter)
+{
+ int ret;
+ struct dxgkvmb_command_closeadapter *command;
+ struct dxgvmbusmsg msg;
+
+ ret = init_message(&msg, adapter, NULL, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init1(&command->hdr, DXGK_VMBCOMMAND_CLOSEADAPTER);
+ command->host_handle = adapter->host_handle;
+
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ NULL, 0);
+ free_message(&msg, NULL);
+ if (ret)
+ DXG_ERR("Failed to close adapter: %d", ret);
+ return ret;
+}
+
+int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter)
+{
+ int ret;
+ struct dxgkvmb_command_getinternaladapterinfo *command;
+ struct dxgkvmb_command_getinternaladapterinfo_return result = { };
+ struct dxgvmbusmsg msg;
+ u32 result_size = sizeof(result);
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, adapter, NULL, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ command_vgpu_to_host_init1(&command->hdr,
+ DXGK_VMBCOMMAND_GETINTERNALADAPTERINFO);
+ if (dxgglobal->vmbus_ver < DXGK_VMBUS_INTERFACE_VERSION)
+ result_size -= sizeof(struct winluid);
+
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
+ &result, result_size);
+ if (ret >= 0) {
+ adapter->host_adapter_luid = result.host_adapter_luid;
+ adapter->host_vgpu_luid = result.host_vgpu_luid;
+ wcsncpy(adapter->device_description, result.device_description,
+ sizeof(adapter->device_description) / sizeof(u16));
+ wcsncpy(adapter->device_instance_id, result.device_instance_id,
+ sizeof(adapter->device_instance_id) / sizeof(u16));
+ dxgglobal->async_msg_enabled = result.async_msg_enabled != 0;
+ }
+ free_message(&msg, NULL);
+ if (ret)
+ DXG_ERR("Failed to get adapter info: %d", ret);
+ return ret;
+}
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index b1bdd6039b73..584cdd3db6c0 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -47,6 +47,83 @@ enum dxgkvmb_commandtype_global {
DXGK_VMBCOMMAND_INVALID_VM_TO_HOST
};
+/*
+ *
+ * Commands, sent to the host via the per adapter VM bus channel
+ * DXG_GUEST_VGPU_VMBUS
+ *
+ */
+
+enum dxgkvmb_commandtype {
+ DXGK_VMBCOMMAND_CREATEDEVICE = 0,
+ DXGK_VMBCOMMAND_DESTROYDEVICE = 1,
+ DXGK_VMBCOMMAND_QUERYADAPTERINFO = 2,
+ DXGK_VMBCOMMAND_DDIQUERYADAPTERINFO = 3,
+ DXGK_VMBCOMMAND_CREATEALLOCATION = 4,
+ DXGK_VMBCOMMAND_DESTROYALLOCATION = 5,
+ DXGK_VMBCOMMAND_CREATECONTEXTVIRTUAL = 6,
+ DXGK_VMBCOMMAND_DESTROYCONTEXT = 7,
+ DXGK_VMBCOMMAND_CREATESYNCOBJECT = 8,
+ DXGK_VMBCOMMAND_CREATEPAGINGQUEUE = 9,
+ DXGK_VMBCOMMAND_DESTROYPAGINGQUEUE = 10,
+ DXGK_VMBCOMMAND_MAKERESIDENT = 11,
+ DXGK_VMBCOMMAND_EVICT = 12,
+ DXGK_VMBCOMMAND_ESCAPE = 13,
+ DXGK_VMBCOMMAND_OPENADAPTER = 14,
+ DXGK_VMBCOMMAND_CLOSEADAPTER = 15,
+ DXGK_VMBCOMMAND_FREEGPUVIRTUALADDRESS = 16,
+ DXGK_VMBCOMMAND_MAPGPUVIRTUALADDRESS = 17,
+ DXGK_VMBCOMMAND_RESERVEGPUVIRTUALADDRESS = 18,
+ DXGK_VMBCOMMAND_UPDATEGPUVIRTUALADDRESS = 19,
+ DXGK_VMBCOMMAND_SUBMITCOMMAND = 20,
+ dxgk_vmbcommand_queryvideomemoryinfo = 21,
+ DXGK_VMBCOMMAND_WAITFORSYNCOBJECTFROMCPU = 22,
+ DXGK_VMBCOMMAND_LOCK2 = 23,
+ DXGK_VMBCOMMAND_UNLOCK2 = 24,
+ DXGK_VMBCOMMAND_WAITFORSYNCOBJECTFROMGPU = 25,
+ DXGK_VMBCOMMAND_SIGNALSYNCOBJECT = 26,
+ DXGK_VMBCOMMAND_SIGNALFENCENTSHAREDBYREF = 27,
+ DXGK_VMBCOMMAND_GETDEVICESTATE = 28,
+ DXGK_VMBCOMMAND_MARKDEVICEASERROR = 29,
+ DXGK_VMBCOMMAND_ADAPTERSTOP = 30,
+ DXGK_VMBCOMMAND_SETQUEUEDLIMIT = 31,
+ DXGK_VMBCOMMAND_OPENRESOURCE = 32,
+ DXGK_VMBCOMMAND_SETCONTEXTSCHEDULINGPRIORITY = 33,
+ DXGK_VMBCOMMAND_PRESENTHISTORYTOKEN = 34,
+ DXGK_VMBCOMMAND_SETREDIRECTEDFLIPFENCEVALUE = 35,
+ DXGK_VMBCOMMAND_GETINTERNALADAPTERINFO = 36,
+ DXGK_VMBCOMMAND_FLUSHHEAPTRANSITIONS = 37,
+ DXGK_VMBCOMMAND_BLT = 38,
+ DXGK_VMBCOMMAND_DDIGETSTANDARDALLOCATIONDRIVERDATA = 39,
+ DXGK_VMBCOMMAND_CDDGDICOMMAND = 40,
+ DXGK_VMBCOMMAND_QUERYALLOCATIONRESIDENCY = 41,
+ DXGK_VMBCOMMAND_FLUSHDEVICE = 42,
+ DXGK_VMBCOMMAND_FLUSHADAPTER = 43,
+ DXGK_VMBCOMMAND_DDIGETNODEMETADATA = 44,
+ DXGK_VMBCOMMAND_SETEXISTINGSYSMEMSTORE = 45,
+ DXGK_VMBCOMMAND_ISSYNCOBJECTSIGNALED = 46,
+ DXGK_VMBCOMMAND_CDDSYNCGPUACCESS = 47,
+ DXGK_VMBCOMMAND_QUERYSTATISTICS = 48,
+ DXGK_VMBCOMMAND_CHANGEVIDEOMEMORYRESERVATION = 49,
+ DXGK_VMBCOMMAND_CREATEHWQUEUE = 50,
+ DXGK_VMBCOMMAND_DESTROYHWQUEUE = 51,
+ DXGK_VMBCOMMAND_SUBMITCOMMANDTOHWQUEUE = 52,
+ DXGK_VMBCOMMAND_GETDRIVERSTOREFILE = 53,
+ DXGK_VMBCOMMAND_READDRIVERSTOREFILE = 54,
+ DXGK_VMBCOMMAND_GETNEXTHARDLINK = 55,
+ DXGK_VMBCOMMAND_UPDATEALLOCATIONPROPERTY = 56,
+ DXGK_VMBCOMMAND_OFFERALLOCATIONS = 57,
+ DXGK_VMBCOMMAND_RECLAIMALLOCATIONS = 58,
+ DXGK_VMBCOMMAND_SETALLOCATIONPRIORITY = 59,
+ DXGK_VMBCOMMAND_GETALLOCATIONPRIORITY = 60,
+ DXGK_VMBCOMMAND_GETCONTEXTSCHEDULINGPRIORITY = 61,
+ DXGK_VMBCOMMAND_QUERYCLOCKCALIBRATION = 62,
+ DXGK_VMBCOMMAND_QUERYRESOURCEINFO = 64,
+ DXGK_VMBCOMMAND_LOGEVENT = 65,
+ DXGK_VMBCOMMAND_SETEXISTINGSYSMEMPAGES = 66,
+ DXGK_VMBCOMMAND_INVALID
+};
+
/*
* Commands, sent by the host to the VM
*/
@@ -66,6 +143,15 @@ struct dxgkvmb_command_vm_to_host {
enum dxgkvmb_commandtype_global command_type;
};
+struct dxgkvmb_command_vgpu_to_host {
+ u64 command_id;
+ struct d3dkmthandle process;
+ u32 channel_type : 8;
+ u32 async_msg : 1;
+ u32 reserved : 23;
+ enum dxgkvmb_commandtype command_type;
+};
+
struct dxgkvmb_command_host_to_vm {
u64 command_id;
struct d3dkmthandle process;
@@ -83,4 +169,46 @@ struct dxgkvmb_command_setiospaceregion {
u32 shared_page_gpadl;
};
+struct dxgkvmb_command_openadapter {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ u32 vmbus_interface_version;
+ u32 vmbus_last_compatible_interface_version;
+ struct winluid guest_adapter_luid;
+};
+
+struct dxgkvmb_command_openadapter_return {
+ struct d3dkmthandle host_adapter_handle;
+ struct ntstatus status;
+ u32 vmbus_interface_version;
+ u32 vmbus_last_compatible_interface_version;
+};
+
+struct dxgkvmb_command_closeadapter {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+ struct d3dkmthandle host_handle;
+};
+
+struct dxgkvmb_command_getinternaladapterinfo {
+ struct dxgkvmb_command_vgpu_to_host hdr;
+};
+
+struct dxgkvmb_command_getinternaladapterinfo_return {
+ struct dxgk_device_types device_types;
+ u32 driver_store_copy_mode;
+ u32 driver_ddi_version;
+ u32 secure_virtual_machine : 1;
+ u32 virtual_machine_reset : 1;
+ u32 is_vail_supported : 1;
+ u32 hw_sch_enabled : 1;
+ u32 hw_sch_capable : 1;
+ u32 va_backed_vm : 1;
+ u32 async_msg_enabled : 1;
+ u32 hw_support_state : 2;
+ u32 reserved : 23;
+ struct winluid host_adapter_luid;
+ u16 device_description[80];
+ u16 device_instance_id[WIN_MAX_PATH];
+ struct winluid host_vgpu_luid;
+};
+
#endif /* _DXGVMBUS_H */
diff --git a/drivers/hv/dxgkrnl/misc.c b/drivers/hv/dxgkrnl/misc.c
new file mode 100644
index 000000000000..cb1e0635bebc
--- /dev/null
+++ b/drivers/hv/dxgkrnl/misc.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2019, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Helper functions
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include "dxgkrnl.h"
+#include "misc.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+u16 *wcsncpy(u16 *dest, const u16 *src, size_t n)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ dest[i] = src[i];
+ if (src[i] == 0) {
+ i++;
+ break;
+ }
+ }
+ dest[i - 1] = 0;
+ return dest;
+}
diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
index 4c6047c32a20..d292e9a9bb7f 100644
--- a/drivers/hv/dxgkrnl/misc.h
+++ b/drivers/hv/dxgkrnl/misc.h
@@ -14,18 +14,34 @@
#ifndef _MISC_H_
#define _MISC_H_
+/* Max characters in Windows path */
+#define WIN_MAX_PATH 260
+
extern const struct d3dkmthandle zerohandle;
/*
* Synchronization lock hierarchy.
*
- * The higher enum value, the higher is the lock order.
- * When a lower lock ois held, the higher lock should not be acquired.
+ * The locks here are in the order from lowest to highest.
+ * When a lower lock is held, the higher lock should not be acquired.
*
- * channel_lock
- * device_mutex
+ * channel_lock (VMBus channel lock)
+ * fd_mutex
+ * plistmutex (process list mutex)
+ * table_lock (handle table lock)
+ * core_lock (dxgadapter lock)
+ * device_lock (dxgdevice lock)
+ * adapter_list_lock
+ * device_mutex (dxgglobal mutex)
*/
+u16 *wcsncpy(u16 *dest, const u16 *src, size_t n);
+
+enum dxglockstate {
+ DXGLOCK_SHARED,
+ DXGLOCK_EXCL
+};
+
/*
* Some of the Windows return codes, which needs to be translated to Linux
* IOCTL return codes. Positive values are success codes and need to be
^ permalink raw reply related
* [PATCH 02/55] drivers: hv: dxgkrnl: Add VMBus message support, initialize VMBus channels.
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
Implement support for sending/receiving VMBus messages between
the host and the guest.
Initialize the VMBus channels and notify the host about IO space
settings of the VMBus global channel.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
drivers/hv/dxgkrnl/dxgkrnl.h | 14 ++
drivers/hv/dxgkrnl/dxgmodule.c | 9 +-
drivers/hv/dxgkrnl/dxgvmbus.c | 318 +++++++++++++++++++++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 67 +++++++
drivers/hv/dxgkrnl/ioctl.c | 24 +++
drivers/hv/dxgkrnl/misc.h | 72 ++++++++
include/uapi/misc/d3dkmthk.h | 34 ++++
7 files changed, 536 insertions(+), 2 deletions(-)
create mode 100644 drivers/hv/dxgkrnl/ioctl.c
create mode 100644 drivers/hv/dxgkrnl/misc.h
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
index f7900840d1ed..52b9e82c51e6 100644
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -28,6 +28,8 @@
#include <linux/hyperv.h>
#include <uapi/misc/d3dkmthk.h>
#include <linux/version.h>
+#include "misc.h"
+#include <uapi/misc/d3dkmthk.h>
struct dxgadapter;
@@ -100,6 +102,13 @@ static inline struct dxgglobal *dxggbl(void)
return dxgdrv.dxgglobal;
}
+int dxgglobal_init_global_channel(void);
+void dxgglobal_destroy_global_channel(void);
+struct vmbus_channel *dxgglobal_get_vmbus(void);
+struct dxgvmbuschannel *dxgglobal_get_dxgvmbuschannel(void);
+int dxgglobal_acquire_channel_lock(void);
+void dxgglobal_release_channel_lock(void);
+
struct dxgprocess {
/* Placeholder */
};
@@ -130,6 +139,11 @@ static inline void guid_to_luid(guid_t *guid, struct winluid *luid)
#define DXGK_VMBUS_INTERFACE_VERSION 40
#define DXGK_VMBUS_LAST_COMPATIBLE_INTERFACE_VERSION 16
+void dxgvmb_initialize(void);
+int dxgvmb_send_set_iospace_region(u64 start, u64 len);
+
+int ntstatus2int(struct ntstatus status);
+
#ifdef DEBUG
void dxgk_validate_ioctls(void);
diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c
index de02edc4d023..e55639dc0adc 100644
--- a/drivers/hv/dxgkrnl/dxgmodule.c
+++ b/drivers/hv/dxgkrnl/dxgmodule.c
@@ -260,6 +260,13 @@ int dxgglobal_init_global_channel(void)
goto error;
}
+ ret = dxgvmb_send_set_iospace_region(dxgglobal->mmiospace_base,
+ dxgglobal->mmiospace_size);
+ if (ret < 0) {
+ DXG_ERR("send_set_iospace_region failed");
+ goto error;
+ }
+
hv_set_drvdata(dxgglobal->hdev, dxgglobal);
error:
@@ -429,8 +436,6 @@ static void dxgglobal_destroy(struct dxgglobal *dxgglobal)
if (dxgglobal->vmbus_registered)
vmbus_driver_unregister(&dxgdrv.vmbus_drv);
- dxgglobal_destroy_global_channel();
-
if (dxgglobal->pci_registered)
pci_unregister_driver(&dxgdrv.pci_drv);
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
index deb880e34377..a4365739826a 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -40,6 +40,121 @@ struct dxgvmbuspacket {
bool completed;
};
+struct dxgvmb_ext_header {
+ /* Offset from the start of the message to DXGKVMB_COMMAND_BASE */
+ u32 command_offset;
+ u32 reserved;
+ struct winluid vgpu_luid;
+};
+
+#define VMBUSMESSAGEONSTACK 64
+
+struct dxgvmbusmsg {
+/* Points to the allocated buffer */
+ struct dxgvmb_ext_header *hdr;
+/* Points to dxgkvmb_command_vm_to_host or dxgkvmb_command_vgpu_to_host */
+ void *msg;
+/* The vm bus channel, used to pass the message to the host */
+ struct dxgvmbuschannel *channel;
+/* Message size in bytes including the header and the payload */
+ u32 size;
+/* Buffer used for small messages */
+ char msg_on_stack[VMBUSMESSAGEONSTACK];
+};
+
+struct dxgvmbusmsgres {
+/* Points to the allocated buffer */
+ struct dxgvmb_ext_header *hdr;
+/* Points to dxgkvmb_command_vm_to_host or dxgkvmb_command_vgpu_to_host */
+ void *msg;
+/* The vm bus channel, used to pass the message to the host */
+ struct dxgvmbuschannel *channel;
+/* Message size in bytes including the header, the payload and the result */
+ u32 size;
+/* Result buffer size in bytes */
+ u32 res_size;
+/* Points to the result within the allocated buffer */
+ void *res;
+};
+
+static int init_message(struct dxgvmbusmsg *msg,
+ struct dxgprocess *process, u32 size)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ bool use_ext_header = dxgglobal->vmbus_ver >=
+ DXGK_VMBUS_INTERFACE_VERSION;
+
+ if (use_ext_header)
+ size += sizeof(struct dxgvmb_ext_header);
+ msg->size = size;
+ if (size <= VMBUSMESSAGEONSTACK) {
+ msg->hdr = (void *)msg->msg_on_stack;
+ memset(msg->hdr, 0, size);
+ } else {
+ msg->hdr = vzalloc(size);
+ if (msg->hdr == NULL)
+ return -ENOMEM;
+ }
+ if (use_ext_header) {
+ msg->msg = (char *)&msg->hdr[1];
+ msg->hdr->command_offset = sizeof(msg->hdr[0]);
+ } else {
+ msg->msg = (char *)msg->hdr;
+ }
+ msg->channel = &dxgglobal->channel;
+ return 0;
+}
+
+static void free_message(struct dxgvmbusmsg *msg, struct dxgprocess *process)
+{
+ if (msg->hdr && (char *)msg->hdr != msg->msg_on_stack)
+ vfree(msg->hdr);
+}
+
+/*
+ * Helper functions
+ */
+
+int ntstatus2int(struct ntstatus status)
+{
+ if (NT_SUCCESS(status))
+ return (int)status.v;
+ switch (status.v) {
+ case STATUS_OBJECT_NAME_COLLISION:
+ return -EEXIST;
+ case STATUS_NO_MEMORY:
+ return -ENOMEM;
+ case STATUS_INVALID_PARAMETER:
+ return -EINVAL;
+ case STATUS_OBJECT_NAME_INVALID:
+ case STATUS_OBJECT_NAME_NOT_FOUND:
+ return -ENOENT;
+ case STATUS_TIMEOUT:
+ return -EAGAIN;
+ case STATUS_BUFFER_TOO_SMALL:
+ return -EOVERFLOW;
+ case STATUS_DEVICE_REMOVED:
+ return -ENODEV;
+ case STATUS_ACCESS_DENIED:
+ return -EACCES;
+ case STATUS_NOT_SUPPORTED:
+ return -EPERM;
+ case STATUS_ILLEGAL_INSTRUCTION:
+ return -EOPNOTSUPP;
+ case STATUS_INVALID_HANDLE:
+ return -EBADF;
+ case STATUS_GRAPHICS_ALLOCATION_BUSY:
+ return -EINPROGRESS;
+ case STATUS_OBJECT_TYPE_MISMATCH:
+ return -EPROTOTYPE;
+ case STATUS_NOT_IMPLEMENTED:
+ return -EPERM;
+ default:
+ return -EINVAL;
+ }
+}
+
int dxgvmbuschannel_init(struct dxgvmbuschannel *ch, struct hv_device *hdev)
{
int ret;
@@ -86,7 +201,210 @@ void dxgvmbuschannel_destroy(struct dxgvmbuschannel *ch)
}
}
+static void command_vm_to_host_init1(struct dxgkvmb_command_vm_to_host *command,
+ enum dxgkvmb_commandtype_global type)
+{
+ command->command_type = type;
+ command->process.v = 0;
+ command->command_id = 0;
+ command->channel_type = DXGKVMB_VM_TO_HOST;
+}
+
+static void process_inband_packet(struct dxgvmbuschannel *channel,
+ struct vmpacket_descriptor *desc)
+{
+ u32 packet_length = hv_pkt_datalen(desc);
+ struct dxgkvmb_command_host_to_vm *packet;
+
+ if (packet_length < sizeof(struct dxgkvmb_command_host_to_vm)) {
+ DXG_ERR("Invalid global packet");
+ } else {
+ packet = hv_pkt_data(desc);
+ DXG_TRACE("global packet %d",
+ packet->command_type);
+ switch (packet->command_type) {
+ case DXGK_VMBCOMMAND_SIGNALGUESTEVENT:
+ case DXGK_VMBCOMMAND_SIGNALGUESTEVENTPASSIVE:
+ break;
+ case DXGK_VMBCOMMAND_SENDWNFNOTIFICATION:
+ break;
+ default:
+ DXG_ERR("unexpected host message %d",
+ packet->command_type);
+ }
+ }
+}
+
+static void process_completion_packet(struct dxgvmbuschannel *channel,
+ struct vmpacket_descriptor *desc)
+{
+ struct dxgvmbuspacket *packet = NULL;
+ struct dxgvmbuspacket *entry;
+ u32 packet_length = hv_pkt_datalen(desc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&channel->packet_list_mutex, flags);
+ list_for_each_entry(entry, &channel->packet_list_head,
+ packet_list_entry) {
+ if (desc->trans_id == entry->request_id) {
+ packet = entry;
+ list_del(&packet->packet_list_entry);
+ packet->completed = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&channel->packet_list_mutex, flags);
+ if (packet) {
+ if (packet->buffer_length) {
+ if (packet_length < packet->buffer_length) {
+ DXG_TRACE("invalid size %d Expected:%d",
+ packet_length,
+ packet->buffer_length);
+ packet->status = -EOVERFLOW;
+ } else {
+ memcpy(packet->buffer, hv_pkt_data(desc),
+ packet->buffer_length);
+ }
+ }
+ complete(&packet->wait);
+ } else {
+ DXG_ERR("did not find packet to complete");
+ }
+}
+
/* Receive callback for messages from the host */
void dxgvmbuschannel_receive(void *ctx)
{
+ struct dxgvmbuschannel *channel = ctx;
+ struct vmpacket_descriptor *desc;
+ u32 packet_length = 0;
+
+ foreach_vmbus_pkt(desc, channel->channel) {
+ packet_length = hv_pkt_datalen(desc);
+ DXG_TRACE("next packet (id, size, type): %llu %d %d",
+ desc->trans_id, packet_length, desc->type);
+ if (desc->type == VM_PKT_COMP) {
+ process_completion_packet(channel, desc);
+ } else {
+ if (desc->type != VM_PKT_DATA_INBAND)
+ DXG_ERR("unexpected packet type");
+ else
+ process_inband_packet(channel, desc);
+ }
+ }
+}
+
+int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel,
+ void *command,
+ u32 cmd_size,
+ void *result,
+ u32 result_size)
+{
+ int ret;
+ struct dxgvmbuspacket *packet = NULL;
+
+ if (cmd_size > DXG_MAX_VM_BUS_PACKET_SIZE ||
+ result_size > DXG_MAX_VM_BUS_PACKET_SIZE) {
+ DXG_ERR("%s invalid data size", __func__);
+ return -EINVAL;
+ }
+
+ packet = kmem_cache_alloc(channel->packet_cache, 0);
+ if (packet == NULL) {
+ DXG_ERR("kmem_cache_alloc failed");
+ return -ENOMEM;
+ }
+
+ packet->request_id = atomic64_inc_return(&channel->packet_request_id);
+ init_completion(&packet->wait);
+ packet->buffer = result;
+ packet->buffer_length = result_size;
+ packet->status = 0;
+ packet->completed = false;
+ spin_lock_irq(&channel->packet_list_mutex);
+ list_add_tail(&packet->packet_list_entry, &channel->packet_list_head);
+ spin_unlock_irq(&channel->packet_list_mutex);
+
+ ret = vmbus_sendpacket(channel->channel, command, cmd_size,
+ packet->request_id, VM_PKT_DATA_INBAND,
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret) {
+ DXG_ERR("vmbus_sendpacket failed: %x", ret);
+ spin_lock_irq(&channel->packet_list_mutex);
+ list_del(&packet->packet_list_entry);
+ spin_unlock_irq(&channel->packet_list_mutex);
+ goto cleanup;
+ }
+
+ DXG_TRACE("waiting completion: %llu", packet->request_id);
+ ret = wait_for_completion_killable(&packet->wait);
+ if (ret) {
+ DXG_ERR("wait_for_completion failed: %x", ret);
+ spin_lock_irq(&channel->packet_list_mutex);
+ if (!packet->completed)
+ list_del(&packet->packet_list_entry);
+ spin_unlock_irq(&channel->packet_list_mutex);
+ goto cleanup;
+ }
+ DXG_TRACE("completion done: %llu %x",
+ packet->request_id, packet->status);
+ ret = packet->status;
+
+cleanup:
+
+ kmem_cache_free(channel->packet_cache, packet);
+ if (ret < 0)
+ DXG_TRACE("Error: %x", ret);
+ return ret;
+}
+
+static int
+dxgvmb_send_sync_msg_ntstatus(struct dxgvmbuschannel *channel,
+ void *command, u32 cmd_size)
+{
+ struct ntstatus status;
+ int ret;
+
+ ret = dxgvmb_send_sync_msg(channel, command, cmd_size,
+ &status, sizeof(status));
+ if (ret >= 0)
+ ret = ntstatus2int(status);
+ return ret;
+}
+
+/*
+ * Global messages to the host
+ */
+
+int dxgvmb_send_set_iospace_region(u64 start, u64 len)
+{
+ int ret;
+ struct dxgkvmb_command_setiospaceregion *command;
+ struct dxgvmbusmsg msg;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = init_message(&msg, NULL, sizeof(*command));
+ if (ret)
+ return ret;
+ command = (void *)msg.msg;
+
+ ret = dxgglobal_acquire_channel_lock();
+ if (ret < 0)
+ goto cleanup;
+
+ command_vm_to_host_init1(&command->hdr,
+ DXGK_VMBCOMMAND_SETIOSPACEREGION);
+ command->start = start;
+ command->length = len;
+ ret = dxgvmb_send_sync_msg_ntstatus(&dxgglobal->channel, msg.hdr,
+ msg.size);
+ if (ret < 0)
+ DXG_ERR("send_set_iospace_region failed %x", ret);
+
+ dxgglobal_release_channel_lock();
+cleanup:
+ free_message(&msg, NULL);
+ if (ret)
+ DXG_TRACE("Error: %d", ret);
+ return ret;
}
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
index 6cdca5e03d1f..b1bdd6039b73 100644
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -16,4 +16,71 @@
#define DXG_MAX_VM_BUS_PACKET_SIZE (1024 * 128)
+enum dxgkvmb_commandchanneltype {
+ DXGKVMB_VGPU_TO_HOST,
+ DXGKVMB_VM_TO_HOST,
+ DXGKVMB_HOST_TO_VM
+};
+
+/*
+ *
+ * Commands, sent to the host via the guest global VM bus channel
+ * DXG_GUEST_GLOBAL_VMBUS
+ *
+ */
+
+enum dxgkvmb_commandtype_global {
+ DXGK_VMBCOMMAND_VM_TO_HOST_FIRST = 1000,
+ DXGK_VMBCOMMAND_CREATEPROCESS = DXGK_VMBCOMMAND_VM_TO_HOST_FIRST,
+ DXGK_VMBCOMMAND_DESTROYPROCESS = 1001,
+ DXGK_VMBCOMMAND_OPENSYNCOBJECT = 1002,
+ DXGK_VMBCOMMAND_DESTROYSYNCOBJECT = 1003,
+ DXGK_VMBCOMMAND_CREATENTSHAREDOBJECT = 1004,
+ DXGK_VMBCOMMAND_DESTROYNTSHAREDOBJECT = 1005,
+ DXGK_VMBCOMMAND_SIGNALFENCE = 1006,
+ DXGK_VMBCOMMAND_NOTIFYPROCESSFREEZE = 1007,
+ DXGK_VMBCOMMAND_NOTIFYPROCESSTHAW = 1008,
+ DXGK_VMBCOMMAND_QUERYETWSESSION = 1009,
+ DXGK_VMBCOMMAND_SETIOSPACEREGION = 1010,
+ DXGK_VMBCOMMAND_COMPLETETRANSACTION = 1011,
+ DXGK_VMBCOMMAND_SHAREOBJECTWITHHOST = 1021,
+ DXGK_VMBCOMMAND_INVALID_VM_TO_HOST
+};
+
+/*
+ * Commands, sent by the host to the VM
+ */
+enum dxgkvmb_commandtype_host_to_vm {
+ DXGK_VMBCOMMAND_SIGNALGUESTEVENT,
+ DXGK_VMBCOMMAND_PROPAGATEPRESENTHISTORYTOKEN,
+ DXGK_VMBCOMMAND_SETGUESTDATA,
+ DXGK_VMBCOMMAND_SIGNALGUESTEVENTPASSIVE,
+ DXGK_VMBCOMMAND_SENDWNFNOTIFICATION,
+ DXGK_VMBCOMMAND_INVALID_HOST_TO_VM
+};
+
+struct dxgkvmb_command_vm_to_host {
+ u64 command_id;
+ struct d3dkmthandle process;
+ enum dxgkvmb_commandchanneltype channel_type;
+ enum dxgkvmb_commandtype_global command_type;
+};
+
+struct dxgkvmb_command_host_to_vm {
+ u64 command_id;
+ struct d3dkmthandle process;
+ u32 channel_type : 8;
+ u32 async_msg : 1;
+ u32 reserved : 23;
+ enum dxgkvmb_commandtype_host_to_vm command_type;
+};
+
+/* Returns ntstatus */
+struct dxgkvmb_command_setiospaceregion {
+ struct dxgkvmb_command_vm_to_host hdr;
+ u64 start;
+ u64 length;
+ u32 shared_page_gpadl;
+};
+
#endif /* _DXGVMBUS_H */
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
new file mode 100644
index 000000000000..23ecd15b0cd7
--- /dev/null
+++ b/drivers/hv/dxgkrnl/ioctl.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Ioctl implementation
+ *
+ */
+
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <linux/mman.h>
+
+#include "dxgkrnl.h"
+#include "dxgvmbus.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
diff --git a/drivers/hv/dxgkrnl/misc.h b/drivers/hv/dxgkrnl/misc.h
new file mode 100644
index 000000000000..4c6047c32a20
--- /dev/null
+++ b/drivers/hv/dxgkrnl/misc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Misc definitions
+ *
+ */
+
+#ifndef _MISC_H_
+#define _MISC_H_
+
+extern const struct d3dkmthandle zerohandle;
+
+/*
+ * Synchronization lock hierarchy.
+ *
+ * The higher enum value, the higher is the lock order.
+ * When a lower lock ois held, the higher lock should not be acquired.
+ *
+ * channel_lock
+ * device_mutex
+ */
+
+/*
+ * Some of the Windows return codes, which needs to be translated to Linux
+ * IOCTL return codes. Positive values are success codes and need to be
+ * returned from the driver IOCTLs. libdxcore.so depends on returning
+ * specific return codes.
+ */
+#define STATUS_SUCCESS ((int)(0))
+#define STATUS_OBJECT_NAME_INVALID ((int)(0xC0000033L))
+#define STATUS_DEVICE_REMOVED ((int)(0xC00002B6L))
+#define STATUS_INVALID_HANDLE ((int)(0xC0000008L))
+#define STATUS_ILLEGAL_INSTRUCTION ((int)(0xC000001DL))
+#define STATUS_NOT_IMPLEMENTED ((int)(0xC0000002L))
+#define STATUS_PENDING ((int)(0x00000103L))
+#define STATUS_ACCESS_DENIED ((int)(0xC0000022L))
+#define STATUS_BUFFER_TOO_SMALL ((int)(0xC0000023L))
+#define STATUS_OBJECT_TYPE_MISMATCH ((int)(0xC0000024L))
+#define STATUS_GRAPHICS_ALLOCATION_BUSY ((int)(0xC01E0102L))
+#define STATUS_NOT_SUPPORTED ((int)(0xC00000BBL))
+#define STATUS_TIMEOUT ((int)(0x00000102L))
+#define STATUS_INVALID_PARAMETER ((int)(0xC000000DL))
+#define STATUS_NO_MEMORY ((int)(0xC0000017L))
+#define STATUS_OBJECT_NAME_COLLISION ((int)(0xC0000035L))
+#define STATUS_OBJECT_NAME_NOT_FOUND ((int)(0xC0000034L))
+
+
+#define NT_SUCCESS(status) (status.v >= 0)
+
+#ifndef DEBUG
+
+#define DXGKRNL_ASSERT(exp)
+
+#else
+
+#define DXGKRNL_ASSERT(exp) \
+do { \
+ if (!(exp)) { \
+ dump_stack(); \
+ BUG_ON(true); \
+ } \
+} while (0)
+
+#endif /* DEBUG */
+
+#endif /* _MISC_H_ */
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
index 5d973604400c..2ea04cc02a1f 100644
--- a/include/uapi/misc/d3dkmthk.h
+++ b/include/uapi/misc/d3dkmthk.h
@@ -14,6 +14,40 @@
#ifndef _D3DKMTHK_H
#define _D3DKMTHK_H
+/*
+ * This structure matches the definition of D3DKMTHANDLE in Windows.
+ * The handle is opaque in user mode. It is used by user mode applications to
+ * represent kernel mode objects, created by dxgkrnl.
+ */
+struct d3dkmthandle {
+ union {
+ struct {
+ __u32 instance : 6;
+ __u32 index : 24;
+ __u32 unique : 2;
+ };
+ __u32 v;
+ };
+};
+
+/*
+ * VM bus messages return Windows' NTSTATUS, which is integer and only negative
+ * value indicates a failure. A positive number is a success and needs to be
+ * returned to user mode as the IOCTL return code. Negative status codes are
+ * converted to Linux error codes.
+ */
+struct ntstatus {
+ union {
+ struct {
+ int code : 16;
+ int facility : 13;
+ int customer : 1;
+ int severity : 2;
+ };
+ int v;
+ };
+};
+
/*
* Matches the Windows LUID definition.
* LUID is a locally unique identifier (similar to GUID, but not global),
^ permalink raw reply related
* [PATCH 01/55] drivers: hv: dxgkrnl: Driver initialization and loading
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
In-Reply-To: <20260319202509.63802-1-eric.curtin@docker.com>
From: Iouri Tarassov <iourit@linux.microsoft.com>
- Create skeleton and add basic functionality for the Hyper-V
compute device driver (dxgkrnl).
- Register for PCI and VMBus driver notifications and handle
initialization of VMBus channels.
- Connect the dxgkrnl module to the drivers/hv/ Makefile and Kconfig
- Create a MAINTAINERS entry
A VMBus channel is a communication interface between the Hyper-V guest
and the host. The are two type of VMBus channels, used in the driver:
- the global channel
- per virtual compute device channel
A PCI device is created for each virtual compute device, projected
by the host. The device vendor is PCI_VENDOR_ID_MICROSOFT and device
id is PCI_DEVICE_ID_VIRTUAL_RENDER. dxg_pci_probe_device handles
arrival of such devices. The PCI config space of the virtual compute
device has luid of the corresponding virtual compute device VM
bus channel. This is how the compute device adapter objects are
linked to VMBus channels.
VMBus interface version is exchanged by reading/writing the PCI config
space of the virtual compute device.
The IO space is used to handle CPU accessible compute device
allocations. Hyper-V allocates IO space for the global VMBus channel.
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
[kms: forward port to 6.6 from 6.1. No code changes made.]
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
---
MAINTAINERS | 7 +
drivers/hv/Kconfig | 2 +
drivers/hv/Makefile | 1 +
drivers/hv/dxgkrnl/Kconfig | 26 ++
drivers/hv/dxgkrnl/Makefile | 5 +
drivers/hv/dxgkrnl/dxgkrnl.h | 155 ++++++++++
drivers/hv/dxgkrnl/dxgmodule.c | 506 +++++++++++++++++++++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.c | 92 ++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 19 ++
include/uapi/misc/d3dkmthk.h | 27 ++
10 files changed, 840 insertions(+)
create mode 100644 drivers/hv/dxgkrnl/Kconfig
create mode 100644 drivers/hv/dxgkrnl/Makefile
create mode 100644 drivers/hv/dxgkrnl/dxgkrnl.h
create mode 100644 drivers/hv/dxgkrnl/dxgmodule.c
create mode 100644 drivers/hv/dxgkrnl/dxgvmbus.c
create mode 100644 drivers/hv/dxgkrnl/dxgvmbus.h
create mode 100644 include/uapi/misc/d3dkmthk.h
diff --git a/MAINTAINERS b/MAINTAINERS
index ae4c0cec5073..4fe0b3501931 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9771,6 +9771,13 @@ F: Documentation/devicetree/bindings/mtd/ti,am654-hbmc.yaml
F: drivers/mtd/hyperbus/
F: include/linux/mtd/hyperbus.h
+Hyper-V vGPU DRIVER
+M: Iouri Tarassov <iourit@microsoft.com>
+L: linux-hyperv@vger.kernel.org
+S: Supported
+F: drivers/hv/dxgkrnl/
+F: include/uapi/misc/d3dkmthk.h
+
HYPERVISOR VIRTUAL CONSOLE DRIVER
L: linuxppc-dev@lists.ozlabs.org
S: Odd Fixes
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 862c47b191af..b16c7701da19 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -55,4 +55,6 @@ config HYPERV_BALLOON
help
Select this option to enable Hyper-V Balloon driver.
+source "drivers/hv/dxgkrnl/Kconfig"
+
endmenu
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index d76df5c8c2a9..aa1cbdb5d0d2 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -2,6 +2,7 @@
obj-$(CONFIG_HYPERV) += hv_vmbus.o
obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o
obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o
+obj-$(CONFIG_DXGKRNL) += dxgkrnl/
CFLAGS_hv_trace.o = -I$(src)
CFLAGS_hv_balloon.o = -I$(src)
diff --git a/drivers/hv/dxgkrnl/Kconfig b/drivers/hv/dxgkrnl/Kconfig
new file mode 100644
index 000000000000..bcd92bbff939
--- /dev/null
+++ b/drivers/hv/dxgkrnl/Kconfig
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+# Configuration for the hyper-v virtual compute driver (dxgkrnl)
+#
+
+config DXGKRNL
+ tristate "Microsoft Paravirtualized GPU support"
+ depends on HYPERV
+ depends on 64BIT || COMPILE_TEST
+ help
+ This driver supports paravirtualized virtual compute devices, exposed
+ by Microsoft Hyper-V when Linux is running inside of a virtual machine
+ hosted by Windows. The virtual machines needs to be configured to use
+ host compute adapters. The driver name is dxgkrnl.
+
+ An example of such virtual machine is a Windows Subsystem for
+ Linux container. When such container is instantiated, the Windows host
+ assigns compatible host GPU adapters to the container. The corresponding
+ virtual GPU devices appear on the PCI bus in the container. These
+ devices are enumerated and accessed by this driver.
+
+ Communications with the driver are done by using the Microsoft libdxcore
+ library, which translates the D3DKMT interface
+ <https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/d3dkmthk/>
+ to the driver IOCTLs. The virtual GPU devices are paravirtualized,
+ which means that access to the hardware is done in the host. The driver
+ communicates with the host using Hyper-V VM bus communication channels.
diff --git a/drivers/hv/dxgkrnl/Makefile b/drivers/hv/dxgkrnl/Makefile
new file mode 100644
index 000000000000..76349064b60a
--- /dev/null
+++ b/drivers/hv/dxgkrnl/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the hyper-v compute device driver (dxgkrnl).
+
+obj-$(CONFIG_DXGKRNL) += dxgkrnl.o
+dxgkrnl-y := dxgmodule.o dxgvmbus.o
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
new file mode 100644
index 000000000000..f7900840d1ed
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Headers for internal objects
+ *
+ */
+
+#ifndef _DXGKRNL_H
+#define _DXGKRNL_H
+
+#include <linux/uuid.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/refcount.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+#include <linux/spinlock.h>
+#include <linux/gfp.h>
+#include <linux/miscdevice.h>
+#include <linux/pci.h>
+#include <linux/hyperv.h>
+#include <uapi/misc/d3dkmthk.h>
+#include <linux/version.h>
+
+struct dxgadapter;
+
+/*
+ * Driver private data.
+ * A single /dev/dxg device is created per virtual machine.
+ */
+struct dxgdriver{
+ struct dxgglobal *dxgglobal;
+ struct device *dxgdev;
+ struct pci_driver pci_drv;
+ struct hv_driver vmbus_drv;
+};
+extern struct dxgdriver dxgdrv;
+
+#define DXGDEV dxgdrv.dxgdev
+
+struct dxgvmbuschannel {
+ struct vmbus_channel *channel;
+ struct hv_device *hdev;
+ spinlock_t packet_list_mutex;
+ struct list_head packet_list_head;
+ struct kmem_cache *packet_cache;
+ atomic64_t packet_request_id;
+};
+
+int dxgvmbuschannel_init(struct dxgvmbuschannel *ch, struct hv_device *hdev);
+void dxgvmbuschannel_destroy(struct dxgvmbuschannel *ch);
+void dxgvmbuschannel_receive(void *ctx);
+
+/*
+ * The structure defines an offered vGPU vm bus channel.
+ */
+struct dxgvgpuchannel {
+ struct list_head vgpu_ch_list_entry;
+ struct winluid adapter_luid;
+ struct hv_device *hdev;
+};
+
+struct dxgglobal {
+ struct dxgdriver *drvdata;
+ struct dxgvmbuschannel channel;
+ struct hv_device *hdev;
+ u32 num_adapters;
+ u32 vmbus_ver; /* Interface version */
+ struct resource *mem;
+ u64 mmiospace_base;
+ u64 mmiospace_size;
+ struct miscdevice dxgdevice;
+ struct mutex device_mutex;
+
+ /*
+ * List of the vGPU VM bus channels (dxgvgpuchannel)
+ * Protected by device_mutex
+ */
+ struct list_head vgpu_ch_list_head;
+
+ /* protects acces to the global VM bus channel */
+ struct rw_semaphore channel_lock;
+
+ bool global_channel_initialized;
+ bool async_msg_enabled;
+ bool misc_registered;
+ bool pci_registered;
+ bool vmbus_registered;
+};
+
+static inline struct dxgglobal *dxggbl(void)
+{
+ return dxgdrv.dxgglobal;
+}
+
+struct dxgprocess {
+ /* Placeholder */
+};
+
+/*
+ * The convention is that VNBus instance id is a GUID, but the host sets
+ * the lower part of the value to the host adapter LUID. The function
+ * provides the necessary conversion.
+ */
+static inline void guid_to_luid(guid_t *guid, struct winluid *luid)
+{
+ *luid = *(struct winluid *)&guid->b[0];
+}
+
+/*
+ * VM bus interface
+ *
+ */
+
+/*
+ * The interface version is used to ensure that the host and the guest use the
+ * same VM bus protocol. It needs to be incremented every time the VM bus
+ * interface changes. DXGK_VMBUS_LAST_COMPATIBLE_INTERFACE_VERSION is
+ * incremented each time the earlier versions of the interface are no longer
+ * compatible with the current version.
+ */
+#define DXGK_VMBUS_INTERFACE_VERSION_OLD 27
+#define DXGK_VMBUS_INTERFACE_VERSION 40
+#define DXGK_VMBUS_LAST_COMPATIBLE_INTERFACE_VERSION 16
+
+#ifdef DEBUG
+
+void dxgk_validate_ioctls(void);
+
+#define DXG_TRACE(fmt, ...) do { \
+ trace_printk(dev_fmt(fmt) "\n", ##__VA_ARGS__); \
+} while (0)
+
+#define DXG_ERR(fmt, ...) do { \
+ dev_err(DXGDEV, fmt, ##__VA_ARGS__); \
+ trace_printk("*** dxgkerror *** " dev_fmt(fmt) "\n", ##__VA_ARGS__); \
+} while (0)
+
+#else
+
+#define DXG_TRACE(...)
+#define DXG_ERR(fmt, ...) do { \
+ dev_err(DXGDEV, fmt, ##__VA_ARGS__); \
+} while (0)
+
+#endif /* DEBUG */
+
+#endif
diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c
new file mode 100644
index 000000000000..de02edc4d023
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgmodule.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * Interface with Linux kernel, PCI driver and the VM bus driver
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/eventfd.h>
+#include <linux/hyperv.h>
+#include <linux/pci.h>
+#include "dxgkrnl.h"
+
+#define PCI_VENDOR_ID_MICROSOFT 0x1414
+#define PCI_DEVICE_ID_VIRTUAL_RENDER 0x008E
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+/*
+ * Interface from dxgglobal
+ */
+
+struct vmbus_channel *dxgglobal_get_vmbus(void)
+{
+ return dxggbl()->channel.channel;
+}
+
+struct dxgvmbuschannel *dxgglobal_get_dxgvmbuschannel(void)
+{
+ return &dxggbl()->channel;
+}
+
+int dxgglobal_acquire_channel_lock(void)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ down_read(&dxgglobal->channel_lock);
+ if (dxgglobal->channel.channel == NULL) {
+ DXG_ERR("Failed to acquire global channel lock");
+ return -ENODEV;
+ } else {
+ return 0;
+ }
+}
+
+void dxgglobal_release_channel_lock(void)
+{
+ up_read(&dxggbl()->channel_lock);
+}
+
+const struct file_operations dxgk_fops = {
+ .owner = THIS_MODULE,
+};
+
+/*
+ * Interface with the PCI driver
+ */
+
+/*
+ * Part of the PCI config space of the compute device is used for
+ * configuration data. Reading/writing of the PCI config space is forwarded
+ * to the host.
+ *
+ * Below are offsets in the PCI config spaces for various configuration values.
+ */
+
+/* Compute device VM bus channel instance ID */
+#define DXGK_VMBUS_CHANNEL_ID_OFFSET 192
+
+/* DXGK_VMBUS_INTERFACE_VERSION (u32) */
+#define DXGK_VMBUS_VERSION_OFFSET (DXGK_VMBUS_CHANNEL_ID_OFFSET + \
+ sizeof(guid_t))
+
+/* Luid of the virtual GPU on the host (struct winluid) */
+#define DXGK_VMBUS_VGPU_LUID_OFFSET (DXGK_VMBUS_VERSION_OFFSET + \
+ sizeof(u32))
+
+/* The guest writes its capabilities to this address */
+#define DXGK_VMBUS_GUESTCAPS_OFFSET (DXGK_VMBUS_VERSION_OFFSET + \
+ sizeof(u32))
+
+/* Capabilities of the guest driver, reported to the host */
+struct dxgk_vmbus_guestcaps {
+ union {
+ struct {
+ u32 wsl2 : 1;
+ u32 reserved : 31;
+ };
+ u32 guest_caps;
+ };
+};
+
+/*
+ * A helper function to read PCI config space.
+ */
+static int dxg_pci_read_dwords(struct pci_dev *dev, int offset, int size,
+ void *val)
+{
+ int off = offset;
+ int ret;
+ int i;
+
+ /* Make sure the offset and size are 32 bit aligned */
+ if (offset & 3 || size & 3)
+ return -EINVAL;
+
+ for (i = 0; i < size / sizeof(int); i++) {
+ ret = pci_read_config_dword(dev, off, &((int *)val)[i]);
+ if (ret) {
+ DXG_ERR("Failed to read PCI config: %d", off);
+ return ret;
+ }
+ off += sizeof(int);
+ }
+ return 0;
+}
+
+static int dxg_pci_probe_device(struct pci_dev *dev,
+ const struct pci_device_id *id)
+{
+ int ret;
+ guid_t guid;
+ u32 vmbus_interface_ver = DXGK_VMBUS_INTERFACE_VERSION;
+ struct winluid vgpu_luid = {};
+ struct dxgk_vmbus_guestcaps guest_caps = {.wsl2 = 1};
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->device_mutex);
+
+ if (dxgglobal->vmbus_ver == 0) {
+ /* Report capabilities to the host */
+
+ ret = pci_write_config_dword(dev, DXGK_VMBUS_GUESTCAPS_OFFSET,
+ guest_caps.guest_caps);
+ if (ret)
+ goto cleanup;
+
+ /* Negotiate the VM bus version */
+
+ ret = pci_read_config_dword(dev, DXGK_VMBUS_VERSION_OFFSET,
+ &vmbus_interface_ver);
+ if (ret == 0 && vmbus_interface_ver != 0)
+ dxgglobal->vmbus_ver = vmbus_interface_ver;
+ else
+ dxgglobal->vmbus_ver = DXGK_VMBUS_INTERFACE_VERSION_OLD;
+
+ if (dxgglobal->vmbus_ver < DXGK_VMBUS_INTERFACE_VERSION)
+ goto read_channel_id;
+
+ ret = pci_write_config_dword(dev, DXGK_VMBUS_VERSION_OFFSET,
+ DXGK_VMBUS_INTERFACE_VERSION);
+ if (ret)
+ goto cleanup;
+
+ if (dxgglobal->vmbus_ver > DXGK_VMBUS_INTERFACE_VERSION)
+ dxgglobal->vmbus_ver = DXGK_VMBUS_INTERFACE_VERSION;
+ }
+
+read_channel_id:
+
+ /* Get the VM bus channel ID for the virtual GPU */
+ ret = dxg_pci_read_dwords(dev, DXGK_VMBUS_CHANNEL_ID_OFFSET,
+ sizeof(guid), (int *)&guid);
+ if (ret)
+ goto cleanup;
+
+ if (dxgglobal->vmbus_ver >= DXGK_VMBUS_INTERFACE_VERSION) {
+ ret = dxg_pci_read_dwords(dev, DXGK_VMBUS_VGPU_LUID_OFFSET,
+ sizeof(vgpu_luid), &vgpu_luid);
+ if (ret)
+ goto cleanup;
+ }
+
+ DXG_TRACE("Adapter channel: %pUb", &guid);
+ DXG_TRACE("Vmbus interface version: %d", dxgglobal->vmbus_ver);
+ DXG_TRACE("Host luid: %x-%x", vgpu_luid.b, vgpu_luid.a);
+
+cleanup:
+
+ mutex_unlock(&dxgglobal->device_mutex);
+
+ if (ret)
+ DXG_TRACE("err: %d", ret);
+ return ret;
+}
+
+static void dxg_pci_remove_device(struct pci_dev *dev)
+{
+ /* Placeholder */
+}
+
+static struct pci_device_id dxg_pci_id_table[] = {
+ {
+ .vendor = PCI_VENDOR_ID_MICROSOFT,
+ .device = PCI_DEVICE_ID_VIRTUAL_RENDER,
+ .subvendor = PCI_ANY_ID,
+ .subdevice = PCI_ANY_ID
+ },
+ { 0 }
+};
+
+/*
+ * Interface with the VM bus driver
+ */
+
+static int dxgglobal_getiospace(struct dxgglobal *dxgglobal)
+{
+ /* Get mmio space for the global channel */
+ struct hv_device *hdev = dxgglobal->hdev;
+ struct vmbus_channel *channel = hdev->channel;
+ resource_size_t pot_start = 0;
+ resource_size_t pot_end = -1;
+ int ret;
+
+ dxgglobal->mmiospace_size = channel->offermsg.offer.mmio_megabytes;
+ if (dxgglobal->mmiospace_size == 0) {
+ DXG_TRACE("Zero mmio space is offered");
+ return -ENOMEM;
+ }
+ dxgglobal->mmiospace_size <<= 20;
+ DXG_TRACE("mmio offered: %llx", dxgglobal->mmiospace_size);
+
+ ret = vmbus_allocate_mmio(&dxgglobal->mem, hdev, pot_start, pot_end,
+ dxgglobal->mmiospace_size, 0x10000, false);
+ if (ret) {
+ DXG_ERR("Unable to allocate mmio memory: %d", ret);
+ return ret;
+ }
+ dxgglobal->mmiospace_size = dxgglobal->mem->end -
+ dxgglobal->mem->start + 1;
+ dxgglobal->mmiospace_base = dxgglobal->mem->start;
+ DXG_TRACE("mmio allocated %llx %llx %llx %llx",
+ dxgglobal->mmiospace_base, dxgglobal->mmiospace_size,
+ dxgglobal->mem->start, dxgglobal->mem->end);
+
+ return 0;
+}
+
+int dxgglobal_init_global_channel(void)
+{
+ int ret = 0;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ ret = dxgvmbuschannel_init(&dxgglobal->channel, dxgglobal->hdev);
+ if (ret) {
+ DXG_ERR("dxgvmbuschannel_init failed: %d", ret);
+ goto error;
+ }
+
+ ret = dxgglobal_getiospace(dxgglobal);
+ if (ret) {
+ DXG_ERR("getiospace failed: %d", ret);
+ goto error;
+ }
+
+ hv_set_drvdata(dxgglobal->hdev, dxgglobal);
+
+error:
+ return ret;
+}
+
+void dxgglobal_destroy_global_channel(void)
+{
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ down_write(&dxgglobal->channel_lock);
+
+ dxgglobal->global_channel_initialized = false;
+
+ if (dxgglobal->mem) {
+ vmbus_free_mmio(dxgglobal->mmiospace_base,
+ dxgglobal->mmiospace_size);
+ dxgglobal->mem = NULL;
+ }
+
+ dxgvmbuschannel_destroy(&dxgglobal->channel);
+
+ if (dxgglobal->hdev) {
+ hv_set_drvdata(dxgglobal->hdev, NULL);
+ dxgglobal->hdev = NULL;
+ }
+
+ up_write(&dxgglobal->channel_lock);
+}
+
+static const struct hv_vmbus_device_id dxg_vmbus_id_table[] = {
+ /* Per GPU Device GUID */
+ { HV_GPUP_DXGK_VGPU_GUID },
+ /* Global Dxgkgnl channel for the virtual machine */
+ { HV_GPUP_DXGK_GLOBAL_GUID },
+ { }
+};
+
+static int dxg_probe_vmbus(struct hv_device *hdev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ int ret = 0;
+ struct winluid luid;
+ struct dxgvgpuchannel *vgpuch;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->device_mutex);
+
+ if (uuid_le_cmp(hdev->dev_type, dxg_vmbus_id_table[0].guid) == 0) {
+ /* This is a new virtual GPU channel */
+ guid_to_luid(&hdev->channel->offermsg.offer.if_instance, &luid);
+ DXG_TRACE("vGPU channel: %pUb",
+ &hdev->channel->offermsg.offer.if_instance);
+ vgpuch = kzalloc(sizeof(struct dxgvgpuchannel), GFP_KERNEL);
+ if (vgpuch == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ vgpuch->adapter_luid = luid;
+ vgpuch->hdev = hdev;
+ list_add_tail(&vgpuch->vgpu_ch_list_entry,
+ &dxgglobal->vgpu_ch_list_head);
+ } else if (uuid_le_cmp(hdev->dev_type,
+ dxg_vmbus_id_table[1].guid) == 0) {
+ /* This is the global Dxgkgnl channel */
+ DXG_TRACE("Global channel: %pUb",
+ &hdev->channel->offermsg.offer.if_instance);
+ if (dxgglobal->hdev) {
+ /* This device should appear only once */
+ DXG_ERR("global channel already exists");
+ ret = -EBADE;
+ goto error;
+ }
+ dxgglobal->hdev = hdev;
+ } else {
+ /* Unknown device type */
+ DXG_ERR("Unknown VM bus device type");
+ ret = -ENODEV;
+ }
+
+error:
+
+ mutex_unlock(&dxgglobal->device_mutex);
+
+ return ret;
+}
+
+static int dxg_remove_vmbus(struct hv_device *hdev)
+{
+ int ret = 0;
+ struct dxgvgpuchannel *vgpu_channel;
+ struct dxgglobal *dxgglobal = dxggbl();
+
+ mutex_lock(&dxgglobal->device_mutex);
+
+ if (uuid_le_cmp(hdev->dev_type, dxg_vmbus_id_table[0].guid) == 0) {
+ DXG_TRACE("Remove virtual GPU channel");
+ list_for_each_entry(vgpu_channel,
+ &dxgglobal->vgpu_ch_list_head,
+ vgpu_ch_list_entry) {
+ if (vgpu_channel->hdev == hdev) {
+ list_del(&vgpu_channel->vgpu_ch_list_entry);
+ kfree(vgpu_channel);
+ break;
+ }
+ }
+ } else if (uuid_le_cmp(hdev->dev_type,
+ dxg_vmbus_id_table[1].guid) == 0) {
+ DXG_TRACE("Remove global channel device");
+ dxgglobal_destroy_global_channel();
+ } else {
+ /* Unknown device type */
+ DXG_ERR("Unknown device type");
+ ret = -ENODEV;
+ }
+
+ mutex_unlock(&dxgglobal->device_mutex);
+
+ return ret;
+}
+
+MODULE_DEVICE_TABLE(vmbus, dxg_vmbus_id_table);
+MODULE_DEVICE_TABLE(pci, dxg_pci_id_table);
+
+/*
+ * Global driver data
+ */
+
+struct dxgdriver dxgdrv = {
+ .vmbus_drv.name = KBUILD_MODNAME,
+ .vmbus_drv.id_table = dxg_vmbus_id_table,
+ .vmbus_drv.probe = dxg_probe_vmbus,
+ .vmbus_drv.remove = dxg_remove_vmbus,
+ .vmbus_drv.driver = {
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ },
+ .pci_drv.name = KBUILD_MODNAME,
+ .pci_drv.id_table = dxg_pci_id_table,
+ .pci_drv.probe = dxg_pci_probe_device,
+ .pci_drv.remove = dxg_pci_remove_device
+};
+
+static struct dxgglobal *dxgglobal_create(void)
+{
+ struct dxgglobal *dxgglobal;
+
+ dxgglobal = kzalloc(sizeof(struct dxgglobal), GFP_KERNEL);
+ if (!dxgglobal)
+ return NULL;
+
+ mutex_init(&dxgglobal->device_mutex);
+
+ INIT_LIST_HEAD(&dxgglobal->vgpu_ch_list_head);
+
+ init_rwsem(&dxgglobal->channel_lock);
+
+ return dxgglobal;
+}
+
+static void dxgglobal_destroy(struct dxgglobal *dxgglobal)
+{
+ if (dxgglobal) {
+ mutex_lock(&dxgglobal->device_mutex);
+ dxgglobal_destroy_global_channel();
+ mutex_unlock(&dxgglobal->device_mutex);
+
+ if (dxgglobal->vmbus_registered)
+ vmbus_driver_unregister(&dxgdrv.vmbus_drv);
+
+ dxgglobal_destroy_global_channel();
+
+ if (dxgglobal->pci_registered)
+ pci_unregister_driver(&dxgdrv.pci_drv);
+
+ if (dxgglobal->misc_registered)
+ misc_deregister(&dxgglobal->dxgdevice);
+
+ dxgglobal->drvdata->dxgdev = NULL;
+
+ kfree(dxgglobal);
+ dxgglobal = NULL;
+ }
+}
+
+static int __init dxg_drv_init(void)
+{
+ int ret;
+ struct dxgglobal *dxgglobal = NULL;
+
+ dxgglobal = dxgglobal_create();
+ if (dxgglobal == NULL) {
+ pr_err("dxgglobal_init failed");
+ ret = -ENOMEM;
+ goto error;
+ }
+ dxgglobal->drvdata = &dxgdrv;
+
+ dxgglobal->dxgdevice.minor = MISC_DYNAMIC_MINOR;
+ dxgglobal->dxgdevice.name = "dxg";
+ dxgglobal->dxgdevice.fops = &dxgk_fops;
+ dxgglobal->dxgdevice.mode = 0666;
+ ret = misc_register(&dxgglobal->dxgdevice);
+ if (ret) {
+ pr_err("misc_register failed: %d", ret);
+ goto error;
+ }
+ dxgglobal->misc_registered = true;
+ dxgdrv.dxgdev = dxgglobal->dxgdevice.this_device;
+ dxgdrv.dxgglobal = dxgglobal;
+
+ ret = vmbus_driver_register(&dxgdrv.vmbus_drv);
+ if (ret) {
+ DXG_ERR("vmbus_driver_register failed: %d", ret);
+ goto error;
+ }
+ dxgglobal->vmbus_registered = true;
+
+ ret = pci_register_driver(&dxgdrv.pci_drv);
+ if (ret) {
+ DXG_ERR("pci_driver_register failed: %d", ret);
+ goto error;
+ }
+ dxgglobal->pci_registered = true;
+
+ return 0;
+
+error:
+ /* This function does the cleanup */
+ dxgglobal_destroy(dxgglobal);
+ dxgdrv.dxgglobal = NULL;
+
+ return ret;
+}
+
+static void __exit dxg_drv_exit(void)
+{
+ dxgglobal_destroy(dxgdrv.dxgglobal);
+}
+
+module_init(dxg_drv_init);
+module_exit(dxg_drv_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microsoft Dxgkrnl virtual compute device Driver");
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
new file mode 100644
index 000000000000..deb880e34377
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * VM bus interface implementation
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/eventfd.h>
+#include <linux/hyperv.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+#include <linux/pagemap.h>
+#include "dxgkrnl.h"
+#include "dxgvmbus.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) "dxgk: " fmt
+
+#define RING_BUFSIZE (256 * 1024)
+
+/*
+ * The structure is used to track VM bus packets, waiting for completion.
+ */
+struct dxgvmbuspacket {
+ struct list_head packet_list_entry;
+ u64 request_id;
+ struct completion wait;
+ void *buffer;
+ u32 buffer_length;
+ int status;
+ bool completed;
+};
+
+int dxgvmbuschannel_init(struct dxgvmbuschannel *ch, struct hv_device *hdev)
+{
+ int ret;
+
+ ch->hdev = hdev;
+ spin_lock_init(&ch->packet_list_mutex);
+ INIT_LIST_HEAD(&ch->packet_list_head);
+ atomic64_set(&ch->packet_request_id, 0);
+
+ ch->packet_cache = kmem_cache_create("DXGK packet cache",
+ sizeof(struct dxgvmbuspacket), 0,
+ 0, NULL);
+ if (ch->packet_cache == NULL) {
+ DXG_ERR("packet_cache alloc failed");
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,15,0)
+ hdev->channel->max_pkt_size = DXG_MAX_VM_BUS_PACKET_SIZE;
+#endif
+ ret = vmbus_open(hdev->channel, RING_BUFSIZE, RING_BUFSIZE,
+ NULL, 0, dxgvmbuschannel_receive, ch);
+ if (ret) {
+ DXG_ERR("vmbus_open failed: %d", ret);
+ goto cleanup;
+ }
+
+ ch->channel = hdev->channel;
+
+cleanup:
+
+ return ret;
+}
+
+void dxgvmbuschannel_destroy(struct dxgvmbuschannel *ch)
+{
+ kmem_cache_destroy(ch->packet_cache);
+ ch->packet_cache = NULL;
+
+ if (ch->channel) {
+ vmbus_close(ch->channel);
+ ch->channel = NULL;
+ }
+}
+
+/* Receive callback for messages from the host */
+void dxgvmbuschannel_receive(void *ctx)
+{
+}
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
new file mode 100644
index 000000000000..6cdca5e03d1f
--- /dev/null
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2022, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * VM bus interface with the host definitions
+ *
+ */
+
+#ifndef _DXGVMBUS_H
+#define _DXGVMBUS_H
+
+#define DXG_MAX_VM_BUS_PACKET_SIZE (1024 * 128)
+
+#endif /* _DXGVMBUS_H */
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
new file mode 100644
index 000000000000..5d973604400c
--- /dev/null
+++ b/include/uapi/misc/d3dkmthk.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+/*
+ * Copyright (c) 2019, Microsoft Corporation.
+ *
+ * Author:
+ * Iouri Tarassov <iourit@linux.microsoft.com>
+ *
+ * Dxgkrnl Graphics Driver
+ * User mode WDDM interface definitions
+ *
+ */
+
+#ifndef _D3DKMTHK_H
+#define _D3DKMTHK_H
+
+/*
+ * Matches the Windows LUID definition.
+ * LUID is a locally unique identifier (similar to GUID, but not global),
+ * which is guaranteed to be unique intil the computer is rebooted.
+ */
+struct winluid {
+ __u32 a;
+ __u32 b;
+};
+
+#endif /* _D3DKMTHK_H */
^ permalink raw reply related
* [PATCH v4 00/55] drivers: hv: dxgkrnl: Driver for Hyper-V virtual compute device
From: Eric Curtin @ 2026-03-19 20:24 UTC (permalink / raw)
To: linux-hyperv; +Cc: linux-kernel, iourit, wei.liu, decui, haiyangz
This patch series introduces the dxgkrnl driver, which provides Linux
support for virtual compute devices (vGPUs) paravirtualized by a Windows
Hyper-V host. The primary use case is Windows Subsystem for Linux (WSL2),
where the driver enables GPU-accelerated workloads inside Linux containers
running on Windows.
The driver creates /dev/dxg, a miscdevice that user-space API libraries
(such as the open-source libdxg) use to communicate with virtual GPU
adapters via IOCTLs implementing a subset of the WDDM/MCDM D3DKMT
interface. Physical GPU access is performed entirely on the Windows host;
the guest driver communicates over Hyper-V VMBus channels.
Key characteristics:
- Self-contained under drivers/hv/dxgkrnl/
- Depends only on CONFIG_HYPERV, DMA_SHARED_BUFFER, SYNC_FILE
- Supports multiple vGPU adapters per VM
- DMA fence integration via dxgsyncfile (SYNC_FILE)
- Supports compute-only accelerators (AI/ML workloads) as well as
full graphics adapters
Changes since v3 (posted March 2022):
- Replace deprecated one-element arrays [1] with C99 flexible arrays []
- Replace %px with %p in trace macros
- Remove unnecessary braces from single-statement if blocks
- Remove LINUX_VERSION_CODE guard for max_pkt_size (added in 5.15,
well before any target kernel for this submission)
- Remove linux/version.h include (no longer needed)
- Fix whitespace issues flagged by checkpatch
- Replace non-debug DXG_ERR do{}while(0) macro with direct dev_err call
- Change -EBADE to -ENODEV for global channel duplicate detection
(as requested by Greg KH in v3 review)
- Remove MODULE_VERSION (not recommended for in-tree drivers)
- Add explanatory comment to guid_to_luid() cast
- Additional features and fixes developed in the WSL2 fork:
* dxgsyncfile: DMA fence / sync file integration
* D3DKMTEnumProcesses, D3DDKMTIsFeatureEnabled, D3DKMTInvalidateCache
* Compute-only adapter support
* pin_user_pages for DMA-accessible memory
* Retry logic for VMBus ring buffer full condition
* Various synchronization and memory safety fixes
Regarding the dxgglobal singleton raised in v3 review:
The design reflects a host architecture constraint: each Hyper-V VM has
exactly one global VMBus channel offered by the host, regardless of how
many vGPU adapters are present. The dxgglobal structure encapsulates this
VM-level state (global channel, adapter list, process list, host event
tracking). Per-adapter state is separately managed in dxgadapter objects.
This design was previously explained in the v3 thread; the architecture
matches the Hyper-V GPU-PV protocol which is fixed by the host side.
The patches apply on top of v6.6-lts. The user-space library (libdxg)
that communicates with this driver is available at:
https://github.com/microsoft/libdxg
The full WDDM compute stack (OpenCL, oneAPI, OpenVINO) is available
open-source via Intel's compute-runtime project.
Iouri Tarassov (iourit@linux.microsoft.com) is the primary author and
maintainer of this driver.
Eric Curtin (1):
drivers: hv: dxgkrnl: Fix checkpatch issues and address reviewer
feedback
Hideyuki Nagase (1):
drivers: hv: dxgkrnl: Fix crash at hmgrtable_free_handle
Iouri Tarassov (53):
drivers: hv: dxgkrnl: Driver initialization and loading
drivers: hv: dxgkrnl: Add VMBus message support, initialize VMBus
channels.
drivers: hv: dxgkrnl: Creation of dxgadapter object
drivers: hv: dxgkrnl: Opening of /dev/dxg device and dxgprocess
creation
drivers: hv: dxgkrnl: Enumerate and open dxgadapter objects
drivers: hv: dxgkrnl: Creation of dxgdevice objects
drivers: hv: dxgkrnl: Creation of dxgcontext objects
drivers: hv: dxgkrnl: Creation of compute device allocations and
resources
drivers: hv: dxgkrnl: Creation of compute device sync objects
drivers: hv: dxgkrnl: Operations using sync objects
drivers: hv: dxgkrnl: Sharing of dxgresource objects
drivers: hv: dxgkrnl: Sharing of sync objects
drivers: hv: dxgkrnl: Creation of paging queue objects.
drivers: hv: dxgkrnl: Submit execution commands to the compute device
drivers: hv: dxgkrnl: Share objects with the host
drivers: hv: dxgkrnl: Query the dxgdevice state
drivers: hv: dxgkrnl: Map(unmap) CPU address to device allocation
drivers: hv: dxgkrnl: Manage device allocation properties
drivers: hv: dxgkrnl: Flush heap transitions
drivers: hv: dxgkrnl: Query video memory information
drivers: hv: dxgkrnl: The escape ioctl
drivers: hv: dxgkrnl: Ioctl to put device to error state
drivers: hv: dxgkrnl: Ioctls to query statistics and clock calibration
drivers: hv: dxgkrnl: Offer and reclaim allocations
drivers: hv: dxgkrnl: Ioctls to manage scheduling priority
drivers: hv: dxgkrnl: Manage residency of allocations
drivers: hv: dxgkrnl: Manage compute device virtual addresses
drivers: hv: dxgkrnl: Add support to map guest pages by host
drivers: hv: dxgkrnl: Removed struct vmbus_gpadl, which was defined in
the main linux branch
drivers: hv: dxgkrnl: Remove dxgk_init_ioctls
drivers: hv: dxgkrnl: Creation of dxgsyncfile objects
drivers: hv: dxgkrnl: Use tracing instead of dev_dbg
drivers: hv: dxgkrnl: Implement D3DKMTWaitSyncFile
drivers: hv: dxgkrnl: Improve tracing and return values from copy from
user
drivers: hv: dxgkrnl: Fix synchronization locks
drivers: hv: dxgkrnl: Close shared file objects in case of a failure
drivers: hv: dxgkrnl: Added missed NULL check for resource object
drivers: hv: dxgkrnl: Fixed dxgkrnl to build for the 6.1 kernel
drivers: hv: dxgkrnl: Added support for compute only adapters
drivers: hv: dxgkrnl: Added implementation for D3DKMTInvalidateCache
drivers: hv: dxgkrnl: Handle process ID in D3DKMTQueryStatistics
drivers: hv: dxgkrnl: Implement the D3DKMTEnumProcesses API
drivers: hv: dxgkrnl: Implement D3DDKMTIsFeatureEnabled API
drivers: hv: dxgkrnl: Implement known escapes
drivers: hv: dxgkrnl: Fixed coding style issues
drivers: hv: dxgkrnl: Fixed the implementation of
D3DKMTQueryClockCalibration
drivers: hv: dxgkrnl: Retry sending a VM bus packet when there is no
place in the ring buffer
drivers: hv: dxgkrnl: Add support for locking a shared allocation by
not the owner
drivers: hv: dxgkrnl: Fix build breaks when switching to 6.6 kernel
due to hv_driver remove callback change.
drivers: hv: dxgkrnl: Fix build breaks when switching to 6.6 kernel
due to removed uuid_le_cmp
drivers: hv: dxgkrnl: Implement D3DKMTEnumProcesses to match the
Windows implementation
drivers: hv: dxgkrnl: Use pin_user_pages instead of get_user_pages for
DMA accessible memory
drivers: hv: dxgkrnl: Do not print error messages when virtual GPU is
not present
MAINTAINERS | 7 +
drivers/hv/Kconfig | 2 +
drivers/hv/Makefile | 1 +
drivers/hv/dxgkrnl/Kconfig | 28 +
drivers/hv/dxgkrnl/Makefile | 5 +
drivers/hv/dxgkrnl/dxgadapter.c | 1367 ++++++++
drivers/hv/dxgkrnl/dxgkrnl.h | 1042 ++++++
drivers/hv/dxgkrnl/dxgmodule.c | 971 +++++
drivers/hv/dxgkrnl/dxgprocess.c | 348 ++
drivers/hv/dxgkrnl/dxgsyncfile.c | 481 +++
drivers/hv/dxgkrnl/dxgsyncfile.h | 33 +
drivers/hv/dxgkrnl/dxgvmbus.c | 3992 +++++++++++++++++++++
drivers/hv/dxgkrnl/dxgvmbus.h | 910 +++++
drivers/hv/dxgkrnl/hmgr.c | 567 +++
drivers/hv/dxgkrnl/hmgr.h | 112 +
drivers/hv/dxgkrnl/ioctl.c | 5648 ++++++++++++++++++++++++++++++
drivers/hv/dxgkrnl/misc.c | 38 +
drivers/hv/dxgkrnl/misc.h | 96 +
include/uapi/misc/d3dkmthk.h | 1794 ++++++++++
19 files changed, 17442 insertions(+)
create mode 100644 drivers/hv/dxgkrnl/Kconfig
create mode 100644 drivers/hv/dxgkrnl/Makefile
create mode 100644 drivers/hv/dxgkrnl/dxgadapter.c
create mode 100644 drivers/hv/dxgkrnl/dxgkrnl.h
create mode 100644 drivers/hv/dxgkrnl/dxgmodule.c
create mode 100644 drivers/hv/dxgkrnl/dxgprocess.c
create mode 100644 drivers/hv/dxgkrnl/dxgsyncfile.c
create mode 100644 drivers/hv/dxgkrnl/dxgsyncfile.h
create mode 100644 drivers/hv/dxgkrnl/dxgvmbus.c
create mode 100644 drivers/hv/dxgkrnl/dxgvmbus.h
create mode 100644 drivers/hv/dxgkrnl/hmgr.c
create mode 100644 drivers/hv/dxgkrnl/hmgr.h
create mode 100644 drivers/hv/dxgkrnl/ioctl.c
create mode 100644 drivers/hv/dxgkrnl/misc.c
create mode 100644 drivers/hv/dxgkrnl/misc.h
create mode 100644 include/uapi/misc/d3dkmthk.h
^ permalink raw reply
* [PATCH v3 16/16] mm: on remap assert that input range within the proposed VMA
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Now we have range_in_vma_desc(), update remap_pfn_range_prepare() to check
whether the input range in contained within the specified VMA, so we can
fail at prepare time if an invalid range is specified.
This covers the I/O remap mmap actions also which ultimately call into
this function, and other mmap action types either already span the full
VMA or check this already.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
mm/memory.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/mm/memory.c b/mm/memory.c
index 53ef8ef3d04a..68cc592ff0ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3142,6 +3142,9 @@ int remap_pfn_range_prepare(struct vm_area_desc *desc)
const bool is_cow = vma_desc_is_cow_mapping(desc);
int err;
+ if (!range_in_vma_desc(desc, start, end))
+ return -EFAULT;
+
err = get_remap_pgoff(is_cow, start, end, desc->start, desc->end, pfn,
&desc->pgoff);
if (err)
--
2.53.0
^ permalink raw reply related
* [PATCH v3 15/16] mm: add mmap_action_map_kernel_pages[_full]()
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
A user can invoke mmap_action_map_kernel_pages() to specify that the
mapping should map kernel pages starting from desc->start of a specified
number of pages specified in an array.
In order to implement this, adjust mmap_action_prepare() to be able to
return an error code, as it makes sense to assert that the specified
parameters are valid as quickly as possible as well as updating the VMA
flags to include VMA_MIXEDMAP_BIT as necessary.
This provides an mmap_prepare equivalent of vm_insert_pages(). We
additionally update the existing vm_insert_pages() code to use
range_in_vma() and add a new range_in_vma_desc() helper function for the
mmap_prepare case, sharing the code between the two in range_is_subset().
We add both mmap_action_map_kernel_pages() and
mmap_action_map_kernel_pages_full() to allow for both partial and full VMA
mappings.
We update the documentation to reflect the new features.
Finally, we update the VMA tests accordingly to reflect the changes.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
Documentation/filesystems/mmap_prepare.rst | 8 ++
include/linux/mm.h | 95 +++++++++++++++++++++-
include/linux/mm_types.h | 7 ++
mm/memory.c | 42 +++++++++-
mm/util.c | 6 ++
tools/testing/vma/include/dup.h | 7 ++
6 files changed, 159 insertions(+), 6 deletions(-)
diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index be76ae475b9c..e810aa4134eb 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -156,5 +156,13 @@ pointer. These are:
* mmap_action_simple_ioremap() - Sets up an I/O remap from a specified
physical address and over a specified length.
+* mmap_action_map_kernel_pages() - Maps a specified array of `struct page`
+ pointers in the VMA from a specific offset.
+
+* mmap_action_map_kernel_pages_full() - Maps a specified array of `struct
+ page` pointers over the entire VMA. The caller must ensure there are
+ sufficient entries in the page array to cover the entire range of the
+ described VMA.
+
**NOTE:** The ``action`` field should never normally be manipulated directly,
rather you ought to use one of these helpers.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef2e4dccfe8e..8aadf115278e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2912,7 +2912,7 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
* The caller must add any reference (e.g., from folio_try_get()) it might be
* holding itself to the result.
*
- * Returns the expected folio refcount.
+ * Returns: the expected folio refcount.
*/
static inline int folio_expected_ref_count(const struct folio *folio)
{
@@ -4364,6 +4364,45 @@ static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc,
action->type = MMAP_SIMPLE_IO_REMAP;
}
+/**
+ * mmap_action_map_kernel_pages - helper for mmap_prepare hook to specify that
+ * @num kernel pages contained in the @pages array should be mapped to userland
+ * starting at virtual address @start.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @start: The virtual address from which to map them.
+ * @pages: An array of struct page pointers describing the memory to map.
+ * @nr_pages: The number of entries in the @pages aray.
+ */
+static inline void mmap_action_map_kernel_pages(struct vm_area_desc *desc,
+ unsigned long start, struct page **pages,
+ unsigned long nr_pages)
+{
+ struct mmap_action *action = &desc->action;
+
+ action->type = MMAP_MAP_KERNEL_PAGES;
+ action->map_kernel.start = start;
+ action->map_kernel.pages = pages;
+ action->map_kernel.nr_pages = nr_pages;
+ action->map_kernel.pgoff = desc->pgoff;
+}
+
+/**
+ * mmap_action_map_kernel_pages_full - helper for mmap_prepare hook to specify that
+ * kernel pages contained in the @pages array should be mapped to userland
+ * from @desc->start to @desc->end.
+ * @desc: The VMA descriptor for the VMA requiring kernel pags to be mapped.
+ * @pages: An array of struct page pointers describing the memory to map.
+ *
+ * The caller must ensure that @pages contains sufficient entries to cover the
+ * entire range described by @desc.
+ */
+static inline void mmap_action_map_kernel_pages_full(struct vm_area_desc *desc,
+ struct page **pages)
+{
+ mmap_action_map_kernel_pages(desc, desc->start, pages,
+ vma_desc_pages(desc));
+}
+
int mmap_action_prepare(struct vm_area_desc *desc);
int mmap_action_complete(struct vm_area_struct *vma,
struct mmap_action *action,
@@ -4381,10 +4420,59 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
return vma;
}
+/**
+ * range_is_subset - Is the specified inner range a subset of the outer range?
+ * @outer_start: The start of the outer range.
+ * @outer_end: The exclusive end of the outer range.
+ * @inner_start: The start of the inner range.
+ * @inner_end: The exclusive end of the inner range.
+ *
+ * Returns: %true if [inner_start, inner_end) is a subset of [outer_start,
+ * outer_end), otherwise %false.
+ */
+static inline bool range_is_subset(unsigned long outer_start,
+ unsigned long outer_end,
+ unsigned long inner_start,
+ unsigned long inner_end)
+{
+ return outer_start <= inner_start && inner_end <= outer_end;
+}
+
+/**
+ * range_in_vma - is the specified [@start, @end) range a subset of the VMA?
+ * @vma: The VMA against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@vma->vm_start,
+ * @vma->vm_end), %false otherwise.
+ */
static inline bool range_in_vma(const struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- return (vma && vma->vm_start <= start && end <= vma->vm_end);
+ if (!vma)
+ return false;
+
+ return range_is_subset(vma->vm_start, vma->vm_end, start, end);
+}
+
+/**
+ * range_in_vma_desc - is the specified [@start, @end) range a subset of the VMA
+ * described by @desc, a VMA descriptor?
+ * @desc: The VMA descriptor against which we want to check [@start, @end).
+ * @start: The start of the range we wish to check.
+ * @end: The exclusive end of the range we wish to check.
+ *
+ * Returns: %true if [@start, @end) is a subset of [@desc->start, @desc->end),
+ * %false otherwise.
+ */
+static inline bool range_in_vma_desc(const struct vm_area_desc *desc,
+ unsigned long start, unsigned long end)
+{
+ if (!desc)
+ return false;
+
+ return range_is_subset(desc->start, desc->end, start, end);
}
#ifdef CONFIG_MMU
@@ -4428,6 +4516,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num);
+int map_kernel_pages_prepare(struct vm_area_desc *desc);
+int map_kernel_pages_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
unsigned long num);
int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7538d64f8848..c46224020a46 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -815,6 +815,7 @@ enum mmap_action_type {
MMAP_REMAP_PFN, /* Remap PFN range. */
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
MMAP_SIMPLE_IO_REMAP, /* I/O remap with guardrails. */
+ MMAP_MAP_KERNEL_PAGES, /* Map kernel page range from array. */
};
/*
@@ -833,6 +834,12 @@ struct mmap_action {
phys_addr_t start_phys_addr;
unsigned long size;
} simple_ioremap;
+ struct {
+ unsigned long start;
+ struct page **pages;
+ unsigned long nr_pages;
+ pgoff_t pgoff;
+ } map_kernel;
};
enum mmap_action_type type;
diff --git a/mm/memory.c b/mm/memory.c
index b3bcc21af20a..53ef8ef3d04a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2484,13 +2484,14 @@ static int insert_pages(struct vm_area_struct *vma, unsigned long addr,
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num)
{
- const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1;
+ const unsigned long nr_pages = *num;
+ const unsigned long end = addr + PAGE_SIZE * nr_pages;
- if (addr < vma->vm_start || end_addr >= vma->vm_end)
+ if (!range_in_vma(vma, addr, end))
return -EFAULT;
if (!(vma->vm_flags & VM_MIXEDMAP)) {
- BUG_ON(mmap_read_trylock(vma->vm_mm));
- BUG_ON(vma->vm_flags & VM_PFNMAP);
+ VM_WARN_ON_ONCE(mmap_read_trylock(vma->vm_mm));
+ VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP);
vm_flags_set(vma, VM_MIXEDMAP);
}
/* Defer page refcount checking till we're about to map that page. */
@@ -2498,6 +2499,39 @@ int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(vm_insert_pages);
+int map_kernel_pages_prepare(struct vm_area_desc *desc)
+{
+ const struct mmap_action *action = &desc->action;
+ const unsigned long addr = action->map_kernel.start;
+ unsigned long nr_pages, end;
+
+ if (!vma_desc_test(desc, VMA_MIXEDMAP_BIT)) {
+ VM_WARN_ON_ONCE(mmap_read_trylock(desc->mm));
+ VM_WARN_ON_ONCE(vma_desc_test(desc, VMA_PFNMAP_BIT));
+ vma_desc_set_flags(desc, VMA_MIXEDMAP_BIT);
+ }
+
+ nr_pages = action->map_kernel.nr_pages;
+ end = addr + PAGE_SIZE * nr_pages;
+ if (!range_in_vma_desc(desc, addr, end))
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL(map_kernel_pages_prepare);
+
+int map_kernel_pages_complete(struct vm_area_struct *vma,
+ struct mmap_action *action)
+{
+ unsigned long nr_pages;
+
+ nr_pages = action->map_kernel.nr_pages;
+ return insert_pages(vma, action->map_kernel.start,
+ action->map_kernel.pages,
+ &nr_pages, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(map_kernel_pages_complete);
+
/**
* vm_insert_page - insert single page into user vma
* @vma: user vma to map to
diff --git a/mm/util.c b/mm/util.c
index 8cf59267a9ac..682d0d24e1c6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1446,6 +1446,8 @@ int mmap_action_prepare(struct vm_area_desc *desc)
return io_remap_pfn_range_prepare(desc);
case MMAP_SIMPLE_IO_REMAP:
return simple_ioremap_prepare(desc);
+ case MMAP_MAP_KERNEL_PAGES:
+ return map_kernel_pages_prepare(desc);
}
WARN_ON_ONCE(1);
@@ -1476,6 +1478,9 @@ int mmap_action_complete(struct vm_area_struct *vma,
case MMAP_REMAP_PFN:
err = remap_pfn_range_complete(vma, action);
break;
+ case MMAP_MAP_KERNEL_PAGES:
+ err = map_kernel_pages_complete(vma, action);
+ break;
case MMAP_IO_REMAP_PFN:
case MMAP_SIMPLE_IO_REMAP:
/* Should have been delegated. */
@@ -1497,6 +1502,7 @@ int mmap_action_prepare(struct vm_area_desc *desc)
case MMAP_REMAP_PFN:
case MMAP_IO_REMAP_PFN:
case MMAP_SIMPLE_IO_REMAP:
+ case MMAP_MAP_KERNEL_PAGES:
WARN_ON_ONCE(1); /* nommu cannot handle these. */
break;
}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 1f123704078e..6392e7835f89 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -454,6 +454,7 @@ enum mmap_action_type {
MMAP_REMAP_PFN, /* Remap PFN range. */
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
MMAP_SIMPLE_IO_REMAP, /* I/O remap with guardrails. */
+ MMAP_MAP_KERNEL_PAGES, /* Map kernel page range from an array. */
};
/*
@@ -472,6 +473,12 @@ struct mmap_action {
phys_addr_t start_phys_addr;
unsigned long size;
} simple_ioremap;
+ struct {
+ unsigned long start;
+ struct page **pages;
+ unsigned long nr_pages;
+ pgoff_t pgoff;
+ } map_kernel;
};
enum mmap_action_type type;
--
2.53.0
^ permalink raw reply related
* [PATCH v3 14/16] uio: replace deprecated mmap hook with mmap_prepare in uio_info
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update uio_info to use its
successor, mmap_prepare.
Therefore, replace the uio_info->mmap hook with a new
uio_info->mmap_prepare hook, and update its one user, target_core_user,
to both specify this new mmap_prepare hook and also to use the new
vm_ops->mapped() hook to continue to maintain a correct udev->kref
refcount.
Then update uio_mmap() to utilise the mmap_prepare compatibility layer to
invoke this callback from the uio mmap invocation.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/target/target_core_user.c | 26 ++++++++++++++++++--------
drivers/uio/uio.c | 10 ++++++++--
include/linux/uio_driver.h | 4 ++--
3 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index af95531ddd35..edc2afd5f4ee 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1860,6 +1860,17 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
return NULL;
}
+static int tcmu_vma_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data)
+{
+ struct tcmu_dev *udev = *vm_private_data;
+
+ pr_debug("vma_mapped\n");
+
+ kref_get(&udev->kref);
+ return 0;
+}
+
static void tcmu_vma_open(struct vm_area_struct *vma)
{
struct tcmu_dev *udev = vma->vm_private_data;
@@ -1919,26 +1930,25 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
}
static const struct vm_operations_struct tcmu_vm_ops = {
+ .mapped = tcmu_vma_mapped,
.open = tcmu_vma_open,
.close = tcmu_vma_close,
.fault = tcmu_vma_fault,
};
-static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
+static int tcmu_mmap_prepare(struct uio_info *info, struct vm_area_desc *desc)
{
struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
- vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
- vma->vm_ops = &tcmu_vm_ops;
+ vma_desc_set_flags(desc, VMA_DONTEXPAND_BIT, VMA_DONTDUMP_BIT);
+ desc->vm_ops = &tcmu_vm_ops;
- vma->vm_private_data = udev;
+ desc->private_data = udev;
/* Ensure the mmap is exactly the right size */
- if (vma_pages(vma) != udev->mmap_pages)
+ if (vma_desc_pages(desc) != udev->mmap_pages)
return -EINVAL;
- tcmu_vma_open(vma);
-
return 0;
}
@@ -2253,7 +2263,7 @@ static int tcmu_configure_device(struct se_device *dev)
info->irqcontrol = tcmu_irqcontrol;
info->irq = UIO_IRQ_CUSTOM;
- info->mmap = tcmu_mmap;
+ info->mmap_prepare = tcmu_mmap_prepare;
info->open = tcmu_open;
info->release = tcmu_release;
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 5a4998e2caf8..1e4ade78ed84 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -850,8 +850,14 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
goto out;
}
- if (idev->info->mmap) {
- ret = idev->info->mmap(idev->info, vma);
+ if (idev->info->mmap_prepare) {
+ struct vm_area_desc desc;
+
+ compat_set_desc_from_vma(&desc, filep, vma);
+ ret = idev->info->mmap_prepare(idev->info, &desc);
+ if (ret)
+ goto out;
+ ret = __compat_vma_mmap(&desc, vma);
goto out;
}
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 334641e20fb1..02eaac47ac44 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -97,7 +97,7 @@ struct uio_device {
* @irq_flags: flags for request_irq()
* @priv: optional private data
* @handler: the device's irq handler
- * @mmap: mmap operation for this uio device
+ * @mmap_prepare: mmap_prepare operation for this uio device
* @open: open operation for this uio device
* @release: release operation for this uio device
* @irqcontrol: disable/enable irqs when 0/1 is written to /dev/uioX
@@ -112,7 +112,7 @@ struct uio_info {
unsigned long irq_flags;
void *priv;
irqreturn_t (*handler)(int irq, struct uio_info *dev_info);
- int (*mmap)(struct uio_info *info, struct vm_area_struct *vma);
+ int (*mmap_prepare)(struct uio_info *info, struct vm_area_desc *desc);
int (*open)(struct uio_info *info, struct inode *inode);
int (*release)(struct uio_info *info, struct inode *inode);
int (*irqcontrol)(struct uio_info *info, s32 irq_on);
--
2.53.0
^ permalink raw reply related
* [PATCH v3 13/16] drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update the vmbus driver to use
its successor, mmap_prepare.
This updates all callbacks which referenced the function pointer
hv_mmap_ring_buffer to instead reference hv_mmap_prepare_ring_buffer,
utilising the newly introduced compat_set_desc_from_vma() and
__compat_vma_mmap() to be able to implement this change.
The UIO HV generic driver is the only user of hv_create_ring_sysfs(),
which is the only function which references
vmbus_channel->mmap_prepare_ring_buffer which, in turn, is the only
external interface to hv_mmap_prepare_ring_buffer.
This patch therefore updates this caller to use mmap_prepare instead,
which also previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/hv/hyperv_vmbus.h | 4 ++--
drivers/hv/vmbus_drv.c | 31 +++++++++++++++++++------------
drivers/uio/uio_hv_generic.c | 11 ++++++-----
include/linux/hyperv.h | 4 ++--
4 files changed, 29 insertions(+), 21 deletions(-)
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 7bd8f8486e85..31f576464f18 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -545,8 +545,8 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev)
/* Create and remove sysfs entry for memory mapped ring buffers for a channel */
int hv_create_ring_sysfs(struct vmbus_channel *channel,
- int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
- struct vm_area_struct *vma));
+ int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_desc *desc));
int hv_remove_ring_sysfs(struct vmbus_channel *channel);
#endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index bc4fc1951ae1..45625487ba36 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1951,12 +1951,19 @@ static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj,
struct vm_area_struct *vma)
{
struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj);
+ struct vm_area_desc desc;
+ int err;
/*
- * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer
- * is not NULL.
+ * hv_(create|remove)_ring_sysfs implementation ensures that
+ * mmap_prepare_ring_buffer is not NULL.
*/
- return channel->mmap_ring_buffer(channel, vma);
+ compat_set_desc_from_vma(&desc, filp, vma);
+ err = channel->mmap_prepare_ring_buffer(channel, &desc);
+ if (err)
+ return err;
+
+ return __compat_vma_mmap(&desc, vma);
}
static struct bin_attribute chan_attr_ring_buffer = {
@@ -2048,13 +2055,13 @@ static const struct kobj_type vmbus_chan_ktype = {
/**
* hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel.
* @channel: Pointer to vmbus_channel structure
- * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of
+ * @hv_mmap_prepare_ring_buffer: function pointer for initializing the function to be called on mmap
* channel's "ring" sysfs node, which is for the ring buffer of that channel.
* Function pointer is of below type:
- * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
- * struct vm_area_struct *vma))
- * This has a pointer to the channel and a pointer to vm_area_struct,
- * used for mmap, as arguments.
+ * int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+ * struct vm_area_desc *desc))
+ * This has a pointer to the channel and a pointer to vm_area_desc,
+ * used for mmap_prepare, as arguments.
*
* Sysfs node for ring buffer of a channel is created along with other fields, however its
* visibility is disabled by default. Sysfs creation needs to be controlled when the use-case
@@ -2071,12 +2078,12 @@ static const struct kobj_type vmbus_chan_ktype = {
* Returns 0 on success or error code on failure.
*/
int hv_create_ring_sysfs(struct vmbus_channel *channel,
- int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
- struct vm_area_struct *vma))
+ int (*hv_mmap_prepare_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_desc *desc))
{
struct kobject *kobj = &channel->kobj;
- channel->mmap_ring_buffer = hv_mmap_ring_buffer;
+ channel->mmap_prepare_ring_buffer = hv_mmap_prepare_ring_buffer;
channel->ring_sysfs_visible = true;
return sysfs_update_group(kobj, &vmbus_chan_group);
@@ -2098,7 +2105,7 @@ int hv_remove_ring_sysfs(struct vmbus_channel *channel)
channel->ring_sysfs_visible = false;
ret = sysfs_update_group(kobj, &vmbus_chan_group);
- channel->mmap_ring_buffer = NULL;
+ channel->mmap_prepare_ring_buffer = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs);
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 3f8e2e27697f..29ec2d15ada8 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -154,15 +154,16 @@ static void hv_uio_rescind(struct vmbus_channel *channel)
* The ring buffer is allocated as contiguous memory by vmbus_open
*/
static int
-hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma)
+hv_uio_ring_mmap_prepare(struct vmbus_channel *channel, struct vm_area_desc *desc)
{
void *ring_buffer = page_address(channel->ringbuffer_page);
if (channel->state != CHANNEL_OPENED_STATE)
return -ENODEV;
- return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
- channel->ringbuffer_pagecount << PAGE_SHIFT);
+ mmap_action_simple_ioremap(desc, virt_to_phys(ring_buffer),
+ channel->ringbuffer_pagecount << PAGE_SHIFT);
+ return 0;
}
/* Callback from VMBUS subsystem when new channel created. */
@@ -183,7 +184,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
}
set_channel_read_mode(new_sc, HV_CALL_ISR);
- ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap);
+ ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap_prepare);
if (ret) {
dev_err(device, "sysfs create ring bin file failed; %d\n", ret);
vmbus_close(new_sc);
@@ -366,7 +367,7 @@ hv_uio_probe(struct hv_device *dev,
* or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify
* APIs to make sure that ring is created.
*/
- hv_create_ring_sysfs(channel, hv_uio_ring_mmap);
+ hv_create_ring_sysfs(channel, hv_uio_ring_mmap_prepare);
hv_set_drvdata(dev, pdata);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index dfc516c1c719..3a721b1853a4 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1015,8 +1015,8 @@ struct vmbus_channel {
/* The max size of a packet on this channel */
u32 max_pkt_size;
- /* function to mmap ring buffer memory to the channel's sysfs ring attribute */
- int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma);
+ /* function to mmap_prepare ring buffer memory to the channel's sysfs ring attribute */
+ int (*mmap_prepare_ring_buffer)(struct vmbus_channel *channel, struct vm_area_desc *desc);
/* boolean to control visibility of sysfs for ring buffer */
bool ring_sysfs_visible;
--
2.53.0
^ permalink raw reply related
* [PATCH v3 12/16] mm: allow handling of stacked mmap_prepare hooks in more drivers
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
While the conversion of mmap hooks to mmap_prepare is underway, we will
encounter situations where mmap hooks need to invoke nested mmap_prepare
hooks.
The nesting of mmap hooks is termed 'stacking'. In order to flexibly
facilitate the conversion of custom mmap hooks in drivers which stack, we
must split up the existing __compat_vma_mmap() function into two separate
functions:
* compat_set_desc_from_vma() - This allows the setting of a vm_area_desc
object's fields to the relevant fields of a VMA.
* __compat_vma_mmap() - Once an mmap_prepare hook has been executed upon a
vm_area_desc object, this function performs any mmap actions specified by
the mmap_prepare hook and then invokes its vm_ops->mapped() hook if any
were specified.
In ordinary cases, where a file's f_op->mmap_prepare() hook simply needs
to be invoked in a stacked mmap() hook, compat_vma_mmap() can be used.
However some drivers define their own nested hooks, which are invoked in
turn by another hook.
A concrete example is vmbus_channel->mmap_ring_buffer(), which is invoked
in turn by bin_attribute->mmap():
vmbus_channel->mmap_ring_buffer() has a signature of:
int (*mmap_ring_buffer)(struct vmbus_channel *channel,
struct vm_area_struct *vma);
And bin_attribute->mmap() has a signature of:
int (*mmap)(struct file *, struct kobject *,
const struct bin_attribute *attr,
struct vm_area_struct *vma);
And so compat_vma_mmap() cannot be used here for incremental conversion of
hooks from mmap() to mmap_prepare().
There are many such instances like this, where conversion to mmap_prepare
would otherwise cascade to a huge change set due to nesting of this kind.
The changes in this patch mean we could now instead convert
vmbus_channel->mmap_ring_buffer() to
vmbus_channel->mmap_prepare_ring_buffer(), and implement something like:
struct vm_area_desc desc;
int err;
compat_set_desc_from_vma(&desc, file, vma);
err = channel->mmap_prepare_ring_buffer(channel, &desc);
if (err)
return err;
return __compat_vma_mmap(&desc, vma);
Allowing us to incrementally update this logic, and other logic like it.
Unfortunately, as part of this change, we need to be able to flexibly
assign to the VMA descriptor, so have to remove some of the const
declarations within the structure.
Also update the VMA tests to reflect the changes.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
include/linux/fs.h | 3 +
include/linux/mm_types.h | 4 +-
mm/util.c | 112 ++++++++++++++++++++++---------
mm/vma.h | 2 +-
tools/testing/vma/include/dup.h | 114 +++++++++++++++++++++-----------
5 files changed, 162 insertions(+), 73 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c390f5c667e3..0bdccfa70b44 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,6 +2058,9 @@ static inline bool can_mmap_file(struct file *file)
return true;
}
+void compat_set_desc_from_vma(struct vm_area_desc *desc, const struct file *file,
+ const struct vm_area_struct *vma);
+int __compat_vma_mmap(struct vm_area_desc *desc, struct vm_area_struct *vma);
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
int __vma_check_mmap_hook(struct vm_area_struct *vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 50685cf29792..7538d64f8848 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -891,8 +891,8 @@ static __always_inline bool vma_flags_empty(vma_flags_t *flags)
*/
struct vm_area_desc {
/* Immutable state. */
- const struct mm_struct *const mm;
- struct file *const file; /* May vary from vm_file in stacked callers. */
+ struct mm_struct *mm;
+ struct file *file; /* May vary from vm_file in stacked callers. */
unsigned long start;
unsigned long end;
diff --git a/mm/util.c b/mm/util.c
index 879ba62b5f0c..8cf59267a9ac 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,34 +1163,40 @@ void flush_dcache_folio(struct folio *folio)
EXPORT_SYMBOL(flush_dcache_folio);
#endif
-static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+/**
+ * compat_set_desc_from_vma() - assigns VMA descriptor @desc fields from a VMA.
+ * @desc: A VMA descriptor whose fields need to be set.
+ * @file: The file object describing the file being mmap()'d.
+ * @vma: The VMA whose fields we wish to assign to @desc.
+ *
+ * This is a compatibility function to allow an mmap() hook to call
+ * mmap_prepare() hooks when drivers nest these. This function specifically
+ * allows the construction of a vm_area_desc value, @desc, from a VMA @vma for
+ * the purposes of doing this.
+ *
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
+ */
+void compat_set_desc_from_vma(struct vm_area_desc *desc,
+ const struct file *file,
+ const struct vm_area_struct *vma)
{
- struct vm_area_desc desc = {
- .mm = vma->vm_mm,
- .file = file,
- .start = vma->vm_start,
- .end = vma->vm_end,
+ memset(desc, 0, sizeof(*desc));
- .pgoff = vma->vm_pgoff,
- .vm_file = vma->vm_file,
- .vma_flags = vma->flags,
- .page_prot = vma->vm_page_prot,
-
- .action.type = MMAP_NOTHING, /* Default */
- };
- int err;
+ desc->mm = vma->vm_mm;
+ desc->file = (struct file *)file;
+ desc->start = vma->vm_start;
+ desc->end = vma->vm_end;
- err = vfs_mmap_prepare(file, &desc);
- if (err)
- return err;
+ desc->pgoff = vma->vm_pgoff;
+ desc->vm_file = vma->vm_file;
+ desc->vma_flags = vma->flags;
+ desc->page_prot = vma->vm_page_prot;
- err = mmap_action_prepare(&desc);
- if (err)
- return err;
-
- set_vma_from_desc(vma, &desc);
- return mmap_action_complete(vma, &desc.action, /*rmap_lock_held=*/false);
+ /* Default. */
+ desc->action.type = MMAP_NOTHING;
}
+EXPORT_SYMBOL(compat_set_desc_from_vma);
static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
{
@@ -1211,6 +1217,50 @@ static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
return err;
}
+/**
+ * __compat_vma_mmap() - Similar to compat_vma_mmap(), only it allows
+ * flexibility as to how the mmap_prepare callback is invoked, which is useful
+ * for drivers which invoke nested mmap_prepare callbacks in an mmap() hook.
+ * @desc: A VMA descriptor upon which an mmap_prepare() hook has already been
+ * executed.
+ * @vma: The VMA to which @desc should be applied.
+ *
+ * The function assumes that you have obtained a VMA descriptor @desc from
+ * compat_set_desc_from_vma(), and already executed the mmap_prepare() hook upon
+ * it.
+ *
+ * It then performs any specified mmap actions, and invokes the vm_ops->mapped()
+ * hook if one is present.
+ *
+ * See the description of compat_vma_mmap() for more details.
+ *
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
+ *
+ * Returns: 0 on success or error.
+ */
+int __compat_vma_mmap(struct vm_area_desc *desc,
+ struct vm_area_struct *vma)
+{
+ int err;
+
+ /* Perform any preparatory tasks for mmap action. */
+ err = mmap_action_prepare(desc);
+ if (err)
+ return err;
+ /* Update the VMA from the descriptor. */
+ compat_set_vma_from_desc(vma, desc);
+ /* Complete any specified mmap actions. */
+ err = mmap_action_complete(vma, &desc->action,
+ /*rmap_lock_held=*/false);
+ if (err)
+ return err;
+
+ /* Invoke vm_ops->mapped callback. */
+ return __compat_vma_mapped(desc->file, vma);
+}
+EXPORT_SYMBOL(__compat_vma_mmap);
+
/**
* compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
* existing VMA and execute any requested actions.
@@ -1218,10 +1268,10 @@ static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
* @vma: The VMA to apply the .mmap_prepare() hook to.
*
* Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain
- * stacked filesystems invoke a nested mmap hook of an underlying file.
+ * stacked drivers invoke a nested mmap hook of an underlying file.
*
- * Until all filesystems are converted to use .mmap_prepare(), we must be
- * conservative and continue to invoke these stacked filesystems using the
+ * Until all drivers are converted to use .mmap_prepare(), we must be
+ * conservative and continue to invoke these stacked drivers using the
* deprecated .mmap() hook.
*
* However we have a problem if the underlying file system possesses an
@@ -1232,20 +1282,22 @@ static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
* establishes a struct vm_area_desc descriptor, passes to the underlying
* .mmap_prepare() hook and applies any changes performed by it.
*
- * Once the conversion of filesystems is complete this function will no longer
- * be required and will be removed.
+ * Once the conversion of drivers is complete this function will no longer be
+ * required and will be removed.
*
* Returns: 0 on success or error.
*/
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct vm_area_desc desc;
int err;
- err = __compat_vma_mmap(file, vma);
+ compat_set_desc_from_vma(&desc, file, vma);
+ err = vfs_mmap_prepare(file, &desc);
if (err)
return err;
- return __compat_vma_mapped(file, vma);
+ return __compat_vma_mmap(&desc, vma);
}
EXPORT_SYMBOL(compat_vma_mmap);
diff --git a/mm/vma.h b/mm/vma.h
index adc18f7dd9f1..a76046c39b14 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -300,7 +300,7 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
* f_op->mmap() but which might have an underlying file system which implements
* f_op->mmap_prepare().
*/
-static inline void set_vma_from_desc(struct vm_area_struct *vma,
+static inline void compat_set_vma_from_desc(struct vm_area_struct *vma,
struct vm_area_desc *desc)
{
/*
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 1b86c34e1158..1f123704078e 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -519,8 +519,8 @@ enum vma_operation {
*/
struct vm_area_desc {
/* Immutable state. */
- const struct mm_struct *const mm;
- struct file *const file; /* May vary from vm_file in stacked callers. */
+ struct mm_struct *mm;
+ struct file *file; /* May vary from vm_file in stacked callers. */
unsigned long start;
unsigned long end;
@@ -1272,43 +1272,95 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
}
/* Declared in vma.h. */
-static inline void set_vma_from_desc(struct vm_area_struct *vma,
+static inline void compat_set_vma_from_desc(struct vm_area_struct *vma,
struct vm_area_desc *desc);
-static inline int __compat_vma_mmap(const struct file_operations *f_op,
- struct file *file, struct vm_area_struct *vma)
+static inline void compat_set_desc_from_vma(struct vm_area_desc *desc,
+ const struct file *file,
+ const struct vm_area_struct *vma)
{
- struct vm_area_desc desc = {
- .mm = vma->vm_mm,
- .file = file,
- .start = vma->vm_start,
- .end = vma->vm_end,
+ memset(desc, 0, sizeof(*desc));
- .pgoff = vma->vm_pgoff,
- .vm_file = vma->vm_file,
- .vma_flags = vma->flags,
- .page_prot = vma->vm_page_prot,
+ desc->mm = vma->vm_mm;
+ desc->file = (struct file *)file;
+ desc->start = vma->vm_start;
+ desc->end = vma->vm_end;
- .action.type = MMAP_NOTHING, /* Default */
- };
+ desc->pgoff = vma->vm_pgoff;
+ desc->vm_file = vma->vm_file;
+ desc->vma_flags = vma->flags;
+ desc->page_prot = vma->vm_page_prot;
+
+ /* Default. */
+ desc->action.type = MMAP_NOTHING;
+}
+
+static inline unsigned long vma_pages(const struct vm_area_struct *vma)
+{
+ return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+}
+
+static inline void unmap_vma_locked(struct vm_area_struct *vma)
+{
+ const size_t len = vma_pages(vma) << PAGE_SHIFT;
+
+ mmap_assert_write_locked(vma->vm_mm);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
+}
+
+static inline int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
+{
+ const struct vm_operations_struct *vm_ops = vma->vm_ops;
int err;
- err = f_op->mmap_prepare(&desc);
+ if (!vm_ops->mapped)
+ return 0;
+
+ err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file,
+ &vma->vm_private_data);
if (err)
- return err;
+ unmap_vma_locked(vma);
+ return err;
+}
- err = mmap_action_prepare(&desc);
+static inline int __compat_vma_mmap(struct vm_area_desc *desc,
+ struct vm_area_struct *vma)
+{
+ int err;
+
+ /* Perform any preparatory tasks for mmap action. */
+ err = mmap_action_prepare(desc);
+ if (err)
+ return err;
+ /* Update the VMA from the descriptor. */
+ compat_set_vma_from_desc(vma, desc);
+ /* Complete any specified mmap actions. */
+ err = mmap_action_complete(vma, &desc->action,
+ /*rmap_lock_held=*/false);
if (err)
return err;
- set_vma_from_desc(vma, &desc);
- return mmap_action_complete(vma, &desc.action, /*rmap_lock_held=*/false);
+ /* Invoke vm_ops->mapped callback. */
+ return __compat_vma_mapped(desc->file, vma);
+}
+
+static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
+{
+ return file->f_op->mmap_prepare(desc);
}
static inline int compat_vma_mmap(struct file *file,
struct vm_area_struct *vma)
{
- return __compat_vma_mmap(file->f_op, file, vma);
+ struct vm_area_desc desc;
+ int err;
+
+ compat_set_desc_from_vma(&desc, file, vma);
+ err = vfs_mmap_prepare(file, &desc);
+ if (err)
+ return err;
+
+ return __compat_vma_mmap(&desc, vma);
}
@@ -1318,11 +1370,6 @@ static inline void vma_iter_init(struct vma_iterator *vmi,
mas_init(&vmi->mas, &mm->mm_mt, addr);
}
-static inline unsigned long vma_pages(struct vm_area_struct *vma)
-{
- return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-}
-
static inline void mmap_assert_locked(struct mm_struct *);
static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
unsigned long start_addr,
@@ -1492,11 +1539,6 @@ static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
return file->f_op->mmap(file, vma);
}
-static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
-{
- return file->f_op->mmap_prepare(desc);
-}
-
static inline void vma_set_file(struct vm_area_struct *vma, struct file *file)
{
/* Changing an anonymous vma with this is illegal */
@@ -1521,11 +1563,3 @@ static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
return vm_get_page_prot(vm_flags);
}
-
-static inline void unmap_vma_locked(struct vm_area_struct *vma)
-{
- const size_t len = vma_pages(vma) << PAGE_SHIFT;
-
- mmap_assert_write_locked(vma->vm_mm);
- do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
-}
--
2.53.0
^ permalink raw reply related
* [PATCH v3 11/16] staging: vme_user: replace deprecated mmap hook with mmap_prepare
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update driver to use its
successor, mmap_prepare.
The driver previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().
Functions that wrap mmap() are also converted to wrap mmap_prepare()
instead.
Also update the documentation accordingly.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
Documentation/driver-api/vme.rst | 2 +-
drivers/staging/vme_user/vme.c | 20 +++++------
drivers/staging/vme_user/vme.h | 2 +-
drivers/staging/vme_user/vme_user.c | 51 +++++++++++++++++------------
4 files changed, 42 insertions(+), 33 deletions(-)
diff --git a/Documentation/driver-api/vme.rst b/Documentation/driver-api/vme.rst
index c0b475369de0..7111999abc14 100644
--- a/Documentation/driver-api/vme.rst
+++ b/Documentation/driver-api/vme.rst
@@ -107,7 +107,7 @@ The function :c:func:`vme_master_read` can be used to read from and
In addition to simple reads and writes, :c:func:`vme_master_rmw` is provided to
do a read-modify-write transaction. Parts of a VME window can also be mapped
-into user space memory using :c:func:`vme_master_mmap`.
+into user space memory using :c:func:`vme_master_mmap_prepare`.
Slave windows
diff --git a/drivers/staging/vme_user/vme.c b/drivers/staging/vme_user/vme.c
index f10a00c05f12..7220aba7b919 100644
--- a/drivers/staging/vme_user/vme.c
+++ b/drivers/staging/vme_user/vme.c
@@ -735,9 +735,9 @@ unsigned int vme_master_rmw(struct vme_resource *resource, unsigned int mask,
EXPORT_SYMBOL(vme_master_rmw);
/**
- * vme_master_mmap - Mmap region of VME master window.
+ * vme_master_mmap_prepare - Mmap region of VME master window.
* @resource: Pointer to VME master resource.
- * @vma: Pointer to definition of user mapping.
+ * @desc: Pointer to descriptor of user mapping.
*
* Memory map a region of the VME master window into user space.
*
@@ -745,12 +745,13 @@ EXPORT_SYMBOL(vme_master_rmw);
* resource or -EFAULT if map exceeds window size. Other generic mmap
* errors may also be returned.
*/
-int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma)
+int vme_master_mmap_prepare(struct vme_resource *resource,
+ struct vm_area_desc *desc)
{
+ const unsigned long vma_size = vma_desc_size(desc);
struct vme_bridge *bridge = find_bridge(resource);
struct vme_master_resource *image;
phys_addr_t phys_addr;
- unsigned long vma_size;
if (resource->type != VME_MASTER) {
dev_err(bridge->parent, "Not a master resource\n");
@@ -758,19 +759,18 @@ int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma)
}
image = list_entry(resource->entry, struct vme_master_resource, list);
- phys_addr = image->bus_resource.start + (vma->vm_pgoff << PAGE_SHIFT);
- vma_size = vma->vm_end - vma->vm_start;
+ phys_addr = image->bus_resource.start + (desc->pgoff << PAGE_SHIFT);
if (phys_addr + vma_size > image->bus_resource.end + 1) {
dev_err(bridge->parent, "Map size cannot exceed the window size\n");
return -EFAULT;
}
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- return vm_iomap_memory(vma, phys_addr, vma->vm_end - vma->vm_start);
+ desc->page_prot = pgprot_noncached(desc->page_prot);
+ mmap_action_simple_ioremap(desc, phys_addr, vma_size);
+ return 0;
}
-EXPORT_SYMBOL(vme_master_mmap);
+EXPORT_SYMBOL(vme_master_mmap_prepare);
/**
* vme_master_free - Free VME master window
diff --git a/drivers/staging/vme_user/vme.h b/drivers/staging/vme_user/vme.h
index 797e9940fdd1..b6413605ea49 100644
--- a/drivers/staging/vme_user/vme.h
+++ b/drivers/staging/vme_user/vme.h
@@ -151,7 +151,7 @@ ssize_t vme_master_read(struct vme_resource *resource, void *buf, size_t count,
ssize_t vme_master_write(struct vme_resource *resource, void *buf, size_t count, loff_t offset);
unsigned int vme_master_rmw(struct vme_resource *resource, unsigned int mask, unsigned int compare,
unsigned int swap, loff_t offset);
-int vme_master_mmap(struct vme_resource *resource, struct vm_area_struct *vma);
+int vme_master_mmap_prepare(struct vme_resource *resource, struct vm_area_desc *desc);
void vme_master_free(struct vme_resource *resource);
struct vme_resource *vme_dma_request(struct vme_dev *vdev, u32 route);
diff --git a/drivers/staging/vme_user/vme_user.c b/drivers/staging/vme_user/vme_user.c
index d95dd7d9190a..11e25c2f6b0a 100644
--- a/drivers/staging/vme_user/vme_user.c
+++ b/drivers/staging/vme_user/vme_user.c
@@ -446,24 +446,14 @@ static void vme_user_vm_close(struct vm_area_struct *vma)
kfree(vma_priv);
}
-static const struct vm_operations_struct vme_user_vm_ops = {
- .open = vme_user_vm_open,
- .close = vme_user_vm_close,
-};
-
-static int vme_user_master_mmap(unsigned int minor, struct vm_area_struct *vma)
+static int vme_user_vm_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data)
{
- int err;
+ const unsigned int minor = iminor(file_inode(file));
struct vme_user_vma_priv *vma_priv;
mutex_lock(&image[minor].mutex);
- err = vme_master_mmap(image[minor].resource, vma);
- if (err) {
- mutex_unlock(&image[minor].mutex);
- return err;
- }
-
vma_priv = kmalloc_obj(*vma_priv);
if (!vma_priv) {
mutex_unlock(&image[minor].mutex);
@@ -472,22 +462,41 @@ static int vme_user_master_mmap(unsigned int minor, struct vm_area_struct *vma)
vma_priv->minor = minor;
refcount_set(&vma_priv->refcnt, 1);
- vma->vm_ops = &vme_user_vm_ops;
- vma->vm_private_data = vma_priv;
-
+ *vm_private_data = vma_priv;
image[minor].mmap_count++;
mutex_unlock(&image[minor].mutex);
-
return 0;
}
-static int vme_user_mmap(struct file *file, struct vm_area_struct *vma)
+static const struct vm_operations_struct vme_user_vm_ops = {
+ .mapped = vme_user_vm_mapped,
+ .open = vme_user_vm_open,
+ .close = vme_user_vm_close,
+};
+
+static int vme_user_master_mmap_prepare(unsigned int minor,
+ struct vm_area_desc *desc)
+{
+ int err;
+
+ mutex_lock(&image[minor].mutex);
+
+ err = vme_master_mmap_prepare(image[minor].resource, desc);
+ if (!err)
+ desc->vm_ops = &vme_user_vm_ops;
+
+ mutex_unlock(&image[minor].mutex);
+ return err;
+}
+
+static int vme_user_mmap_prepare(struct vm_area_desc *desc)
{
- unsigned int minor = iminor(file_inode(file));
+ const struct file *file = desc->file;
+ const unsigned int minor = iminor(file_inode(file));
if (type[minor] == MASTER_MINOR)
- return vme_user_master_mmap(minor, vma);
+ return vme_user_master_mmap_prepare(minor, desc);
return -ENODEV;
}
@@ -498,7 +507,7 @@ static const struct file_operations vme_user_fops = {
.llseek = vme_user_llseek,
.unlocked_ioctl = vme_user_unlocked_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .mmap = vme_user_mmap,
+ .mmap_prepare = vme_user_mmap_prepare,
};
static int vme_user_match(struct vme_dev *vdev)
--
2.53.0
^ permalink raw reply related
* [PATCH v3 10/16] stm: replace deprecated mmap hook with mmap_prepare
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update driver to use its
successor, mmap_prepare.
The driver previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().
Also, in order to correctly maintain reference counting, add a
vm_ops->mapped callback to increment the reference count when successfully
mapped.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/hwtracing/stm/core.c | 31 +++++++++++++++++++++----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index 37584e786bb5..f48c6a8a0654 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -666,6 +666,16 @@ static ssize_t stm_char_write(struct file *file, const char __user *buf,
return count;
}
+static int stm_mmap_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data)
+{
+ struct stm_file *stmf = file->private_data;
+ struct stm_device *stm = stmf->stm;
+
+ pm_runtime_get_sync(&stm->dev);
+ return 0;
+}
+
static void stm_mmap_open(struct vm_area_struct *vma)
{
struct stm_file *stmf = vma->vm_file->private_data;
@@ -684,12 +694,14 @@ static void stm_mmap_close(struct vm_area_struct *vma)
}
static const struct vm_operations_struct stm_mmap_vmops = {
+ .mapped = stm_mmap_mapped,
.open = stm_mmap_open,
.close = stm_mmap_close,
};
-static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
+static int stm_char_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *file = desc->file;
struct stm_file *stmf = file->private_data;
struct stm_device *stm = stmf->stm;
unsigned long size, phys;
@@ -697,10 +709,10 @@ static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
if (!stm->data->mmio_addr)
return -EOPNOTSUPP;
- if (vma->vm_pgoff)
+ if (desc->pgoff)
return -EINVAL;
- size = vma->vm_end - vma->vm_start;
+ size = vma_desc_size(desc);
if (stmf->output.nr_chans * stm->data->sw_mmiosz != size)
return -EINVAL;
@@ -712,13 +724,12 @@ static int stm_char_mmap(struct file *file, struct vm_area_struct *vma)
if (!phys)
return -EINVAL;
- pm_runtime_get_sync(&stm->dev);
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP);
- vma->vm_ops = &stm_mmap_vmops;
- vm_iomap_memory(vma, phys, size);
+ desc->page_prot = pgprot_noncached(desc->page_prot);
+ vma_desc_set_flags(desc, VMA_IO_BIT, VMA_DONTEXPAND_BIT,
+ VMA_DONTDUMP_BIT);
+ desc->vm_ops = &stm_mmap_vmops;
+ mmap_action_simple_ioremap(desc, phys, size);
return 0;
}
@@ -836,7 +847,7 @@ static const struct file_operations stm_fops = {
.open = stm_char_open,
.release = stm_char_release,
.write = stm_char_write,
- .mmap = stm_char_mmap,
+ .mmap_prepare = stm_char_mmap_prepare,
.unlocked_ioctl = stm_char_ioctl,
.compat_ioctl = compat_ptr_ioctl,
};
--
2.53.0
^ permalink raw reply related
* [PATCH v3 09/16] mtdchar: replace deprecated mmap hook with mmap_prepare, clean up
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Replace the deprecated mmap callback with mmap_prepare.
Commit f5cf8f07423b ("mtd: Disable mtdchar mmap on MMU systems") commented
out the CONFIG_MMU part of this function back in 2012, so after ~14 years
it's probably reasonable to remove this altogether rather than updating
dead code.
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/mtd/mtdchar.c | 21 +++------------------
1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 55a43682c567..bf01e6ac7293 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1376,27 +1376,12 @@ static unsigned mtdchar_mmap_capabilities(struct file *file)
/*
* set up a mapping for shared memory segments
*/
-static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma)
+static int mtdchar_mmap_prepare(struct vm_area_desc *desc)
{
#ifdef CONFIG_MMU
- struct mtd_file_info *mfi = file->private_data;
- struct mtd_info *mtd = mfi->mtd;
- struct map_info *map = mtd->priv;
-
- /* This is broken because it assumes the MTD device is map-based
- and that mtd->priv is a valid struct map_info. It should be
- replaced with something that uses the mtd_get_unmapped_area()
- operation properly. */
- if (0 /*mtd->type == MTD_RAM || mtd->type == MTD_ROM*/) {
-#ifdef pgprot_noncached
- if (file->f_flags & O_DSYNC || map->phys >= __pa(high_memory))
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#endif
- return vm_iomap_memory(vma, map->phys, map->size);
- }
return -ENODEV;
#else
- return vma->vm_flags & VM_SHARED ? 0 : -EACCES;
+ return vma_desc_test(desc, VMA_SHARED_BIT) ? 0 : -EACCES;
#endif
}
@@ -1411,7 +1396,7 @@ static const struct file_operations mtd_fops = {
#endif
.open = mtdchar_open,
.release = mtdchar_close,
- .mmap = mtdchar_mmap,
+ .mmap_prepare = mtdchar_mmap_prepare,
#ifndef CONFIG_MMU
.get_unmapped_area = mtdchar_get_unmapped_area,
.mmap_capabilities = mtdchar_mmap_capabilities,
--
2.53.0
^ permalink raw reply related
* [PATCH v3 08/16] hpet: replace deprecated mmap hook with mmap_prepare
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update driver to use its
successor, mmap_prepare.
The driver previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/char/hpet.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 60dd09a56f50..8f128cc40147 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -354,8 +354,9 @@ static __init int hpet_mmap_enable(char *str)
}
__setup("hpet_mmap=", hpet_mmap_enable);
-static int hpet_mmap(struct file *file, struct vm_area_struct *vma)
+static int hpet_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *file = desc->file;
struct hpet_dev *devp;
unsigned long addr;
@@ -368,11 +369,12 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma)
if (addr & (PAGE_SIZE - 1))
return -ENOSYS;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- return vm_iomap_memory(vma, addr, PAGE_SIZE);
+ desc->page_prot = pgprot_noncached(desc->page_prot);
+ mmap_action_simple_ioremap(desc, addr, PAGE_SIZE);
+ return 0;
}
#else
-static int hpet_mmap(struct file *file, struct vm_area_struct *vma)
+static int hpet_mmap_prepare(struct vm_area_desc *desc)
{
return -ENOSYS;
}
@@ -710,7 +712,7 @@ static const struct file_operations hpet_fops = {
.open = hpet_open,
.release = hpet_release,
.fasync = hpet_fasync,
- .mmap = hpet_mmap,
+ .mmap_prepare = hpet_mmap_prepare,
};
static int hpet_is_known(struct hpet_data *hdp)
--
2.53.0
^ permalink raw reply related
* [PATCH v3 07/16] misc: open-dice: replace deprecated mmap hook with mmap_prepare
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
The f_op->mmap interface is deprecated, so update driver to use its
successor, mmap_prepare.
The driver previously used vm_iomap_memory(), so this change replaces it
with its mmap_prepare equivalent, mmap_action_simple_ioremap().
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
drivers/misc/open-dice.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c
index 24c29e0f00ef..45060fb4ea27 100644
--- a/drivers/misc/open-dice.c
+++ b/drivers/misc/open-dice.c
@@ -86,29 +86,32 @@ static ssize_t open_dice_write(struct file *filp, const char __user *ptr,
/*
* Creates a mapping of the reserved memory region in user address space.
*/
-static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma)
+static int open_dice_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *filp = desc->file;
struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp);
- if (vma->vm_flags & VM_MAYSHARE) {
+ if (vma_desc_test(desc, VMA_MAYSHARE_BIT)) {
/* Do not allow userspace to modify the underlying data. */
- if (vma->vm_flags & VM_WRITE)
+ if (vma_desc_test(desc, VMA_WRITE_BIT))
return -EPERM;
/* Ensure userspace cannot acquire VM_WRITE later. */
- vm_flags_clear(vma, VM_MAYWRITE);
+ vma_desc_clear_flags(desc, VMA_MAYWRITE_BIT);
}
/* Create write-combine mapping so all clients observe a wipe. */
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP);
- return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size);
+ desc->page_prot = pgprot_writecombine(desc->page_prot);
+ vma_desc_set_flags(desc, VMA_DONTCOPY_BIT, VMA_DONTDUMP_BIT);
+ mmap_action_simple_ioremap(desc, drvdata->rmem->base,
+ drvdata->rmem->size);
+ return 0;
}
static const struct file_operations open_dice_fops = {
.owner = THIS_MODULE,
.read = open_dice_read,
.write = open_dice_write,
- .mmap = open_dice_mmap,
+ .mmap_prepare = open_dice_mmap_prepare,
};
static int __init open_dice_probe(struct platform_device *pdev)
--
2.53.0
^ permalink raw reply related
* [PATCH v3 06/16] mm: add mmap_action_simple_ioremap()
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Currently drivers use vm_iomap_memory() as a simple helper function for
I/O remapping memory over a range starting at a specified physical address
over a specified length.
In order to utilise this from mmap_prepare, separate out the core logic
into __simple_ioremap_prep(), update vm_iomap_memory() to use it, and add
simple_ioremap_prepare() to do the same with a VMA descriptor object.
We also add MMAP_SIMPLE_IO_REMAP and relevant fields to the struct
mmap_action type to permit this operation also.
We use mmap_action_ioremap() to set up the actual I/O remap operation once
we have checked and figured out the parameters, which makes
simple_ioremap_prepare() easy to implement.
We then add mmap_action_simple_ioremap() to allow drivers to make use of
this mode.
We update the mmap_prepare documentation to describe this mode. Finally,
we update the VMA tests to reflect this change.
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
Documentation/filesystems/mmap_prepare.rst | 3 +
include/linux/mm.h | 24 +++++-
include/linux/mm_types.h | 6 +-
mm/internal.h | 1 +
mm/memory.c | 85 +++++++++++++++-------
mm/util.c | 5 ++
tools/testing/vma/include/dup.h | 6 +-
7 files changed, 102 insertions(+), 28 deletions(-)
diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index 20db474915da..be76ae475b9c 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -153,5 +153,8 @@ pointer. These are:
* mmap_action_ioremap_full() - Same as mmap_action_ioremap(), only remaps
the entire mapping from ``start_pfn`` onward.
+* mmap_action_simple_ioremap() - Sets up an I/O remap from a specified
+ physical address and over a specified length.
+
**NOTE:** The ``action`` field should never normally be manipulated directly,
rather you ought to use one of these helpers.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68dee1101313..ef2e4dccfe8e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4337,11 +4337,33 @@ static inline void mmap_action_ioremap(struct vm_area_desc *desc,
* @start_pfn: The first PFN in the range to remap.
*/
static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
- unsigned long start_pfn)
+ unsigned long start_pfn)
{
mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
}
+/**
+ * mmap_action_simple_ioremap - helper for mmap_prepare hook to specify that the
+ * physical range in [start_phys_addr, start_phys_addr + size) should be I/O
+ * remapped.
+ * @desc: The VMA descriptor for the VMA requiring remap.
+ * @start_phys_addr: Start of the physical memory to be mapped.
+ * @size: Size of the area to map.
+ *
+ * NOTE: Some drivers might want to tweak desc->page_prot for purposes of
+ * write-combine or similar.
+ */
+static inline void mmap_action_simple_ioremap(struct vm_area_desc *desc,
+ phys_addr_t start_phys_addr,
+ unsigned long size)
+{
+ struct mmap_action *action = &desc->action;
+
+ action->simple_ioremap.start_phys_addr = start_phys_addr;
+ action->simple_ioremap.size = size;
+ action->type = MMAP_SIMPLE_IO_REMAP;
+}
+
int mmap_action_prepare(struct vm_area_desc *desc);
int mmap_action_complete(struct vm_area_struct *vma,
struct mmap_action *action,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4a229cc0a06b..50685cf29792 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -814,6 +814,7 @@ enum mmap_action_type {
MMAP_NOTHING, /* Mapping is complete, no further action. */
MMAP_REMAP_PFN, /* Remap PFN range. */
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+ MMAP_SIMPLE_IO_REMAP, /* I/O remap with guardrails. */
};
/*
@@ -822,13 +823,16 @@ enum mmap_action_type {
*/
struct mmap_action {
union {
- /* Remap range. */
struct {
unsigned long start;
unsigned long start_pfn;
unsigned long size;
pgprot_t pgprot;
} remap;
+ struct {
+ phys_addr_t start_phys_addr;
+ unsigned long size;
+ } simple_ioremap;
};
enum mmap_action_type type;
diff --git a/mm/internal.h b/mm/internal.h
index e0f554178143..2aa04d87ac10 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1804,6 +1804,7 @@ int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
int remap_pfn_range_prepare(struct vm_area_desc *desc);
int remap_pfn_range_complete(struct vm_area_struct *vma,
struct mmap_action *action);
+int simple_ioremap_prepare(struct vm_area_desc *desc);
static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
{
diff --git a/mm/memory.c b/mm/memory.c
index 9dec67a18116..b3bcc21af20a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3170,6 +3170,58 @@ int remap_pfn_range_complete(struct vm_area_struct *vma,
return do_remap_pfn_range(vma, start, pfn, size, prot);
}
+static int __simple_ioremap_prep(unsigned long vm_len, pgoff_t vm_pgoff,
+ phys_addr_t start_phys, unsigned long size,
+ unsigned long *pfnp)
+{
+ unsigned long pfn, pages;
+
+ /* Check that the physical memory area passed in looks valid */
+ if (start_phys + size < start_phys)
+ return -EINVAL;
+ /*
+ * You *really* shouldn't map things that aren't page-aligned,
+ * but we've historically allowed it because IO memory might
+ * just have smaller alignment.
+ */
+ size += start_phys & ~PAGE_MASK;
+ pfn = start_phys >> PAGE_SHIFT;
+ pages = (size + ~PAGE_MASK) >> PAGE_SHIFT;
+ if (pfn + pages < pfn)
+ return -EINVAL;
+
+ /* We start the mapping 'vm_pgoff' pages into the area */
+ if (vm_pgoff > pages)
+ return -EINVAL;
+ pfn += vm_pgoff;
+ pages -= vm_pgoff;
+
+ /* Can we fit all of the mapping? */
+ if ((vm_len >> PAGE_SHIFT) > pages)
+ return -EINVAL;
+
+ *pfnp = pfn;
+ return 0;
+}
+
+int simple_ioremap_prepare(struct vm_area_desc *desc)
+{
+ struct mmap_action *action = &desc->action;
+ const phys_addr_t start = action->simple_ioremap.start_phys_addr;
+ const unsigned long size = action->simple_ioremap.size;
+ unsigned long pfn;
+ int err;
+
+ err = __simple_ioremap_prep(vma_desc_size(desc), desc->pgoff,
+ start, size, &pfn);
+ if (err)
+ return err;
+
+ /* The I/O remap logic does the heavy lifting. */
+ mmap_action_ioremap_full(desc, pfn);
+ return io_remap_pfn_range_prepare(desc);
+}
+
/**
* vm_iomap_memory - remap memory to userspace
* @vma: user vma to map to
@@ -3187,32 +3239,15 @@ int remap_pfn_range_complete(struct vm_area_struct *vma,
*/
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len)
{
- unsigned long vm_len, pfn, pages;
-
- /* Check that the physical memory area passed in looks valid */
- if (start + len < start)
- return -EINVAL;
- /*
- * You *really* shouldn't map things that aren't page-aligned,
- * but we've historically allowed it because IO memory might
- * just have smaller alignment.
- */
- len += start & ~PAGE_MASK;
- pfn = start >> PAGE_SHIFT;
- pages = (len + ~PAGE_MASK) >> PAGE_SHIFT;
- if (pfn + pages < pfn)
- return -EINVAL;
-
- /* We start the mapping 'vm_pgoff' pages into the area */
- if (vma->vm_pgoff > pages)
- return -EINVAL;
- pfn += vma->vm_pgoff;
- pages -= vma->vm_pgoff;
+ const unsigned long vm_start = vma->vm_start;
+ const unsigned long vm_end = vma->vm_end;
+ const unsigned long vm_len = vm_end - vm_start;
+ unsigned long pfn;
+ int err;
- /* Can we fit all of the mapping? */
- vm_len = vma->vm_end - vma->vm_start;
- if (vm_len >> PAGE_SHIFT > pages)
- return -EINVAL;
+ err = __simple_ioremap_prep(vm_len, vma->vm_pgoff, start, len, &pfn);
+ if (err)
+ return err;
/* Ok, let it rip */
return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot);
diff --git a/mm/util.c b/mm/util.c
index fc1bd8a8f3ea..879ba62b5f0c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1392,6 +1392,8 @@ int mmap_action_prepare(struct vm_area_desc *desc)
return remap_pfn_range_prepare(desc);
case MMAP_IO_REMAP_PFN:
return io_remap_pfn_range_prepare(desc);
+ case MMAP_SIMPLE_IO_REMAP:
+ return simple_ioremap_prepare(desc);
}
WARN_ON_ONCE(1);
@@ -1423,6 +1425,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
err = remap_pfn_range_complete(vma, action);
break;
case MMAP_IO_REMAP_PFN:
+ case MMAP_SIMPLE_IO_REMAP:
/* Should have been delegated. */
WARN_ON_ONCE(1);
err = -EINVAL;
@@ -1441,6 +1444,7 @@ int mmap_action_prepare(struct vm_area_desc *desc)
break;
case MMAP_REMAP_PFN:
case MMAP_IO_REMAP_PFN:
+ case MMAP_SIMPLE_IO_REMAP:
WARN_ON_ONCE(1); /* nommu cannot handle these. */
break;
}
@@ -1460,6 +1464,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
break;
case MMAP_REMAP_PFN:
case MMAP_IO_REMAP_PFN:
+ case MMAP_SIMPLE_IO_REMAP:
WARN_ON_ONCE(1); /* nommu cannot handle this. */
err = -EINVAL;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index aa34966cbc62..1b86c34e1158 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -453,6 +453,7 @@ enum mmap_action_type {
MMAP_NOTHING, /* Mapping is complete, no further action. */
MMAP_REMAP_PFN, /* Remap PFN range. */
MMAP_IO_REMAP_PFN, /* I/O remap PFN range. */
+ MMAP_SIMPLE_IO_REMAP, /* I/O remap with guardrails. */
};
/*
@@ -461,13 +462,16 @@ enum mmap_action_type {
*/
struct mmap_action {
union {
- /* Remap range. */
struct {
unsigned long start;
unsigned long start_pfn;
unsigned long size;
pgprot_t pgprot;
} remap;
+ struct {
+ phys_addr_t start_phys_addr;
+ unsigned long size;
+ } simple_ioremap;
};
enum mmap_action_type type;
--
2.53.0
^ permalink raw reply related
* [PATCH v3 05/16] fs: afs: correctly drop reference count on mapping failure
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Commit 9d5403b1036c ("fs: convert most other generic_file_*mmap() users to
.mmap_prepare()") updated AFS to use the mmap_prepare callback in favour
of the deprecated mmap callback.
However, it did not account for the fact that mmap_prepare is called
pre-merge, and may then be merged, nor that mmap_prepare can fail to map
due to an out of memory error.
Both of those are cases in which we should not be incrementing a reference
count.
With the newly added vm_ops->mapped callback available, we can simply
defer this operation to that callback which is only invoked once the
mapping is successfully in place (but not yet visible to userspace as the
mmap and VMA write locks are held).
Therefore add afs_mapped() to implement this callback for AFS, and remove
the code doing so in afs_mmap_prepare().
Also update afs_vm_open(), afs_vm_close() and afs_vm_map_pages() to be
consistent in how the vnode is accessed.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
fs/afs/file.c | 36 ++++++++++++++++++++++++++----------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/fs/afs/file.c b/fs/afs/file.c
index f609366fd2ac..85696ac984cc 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -28,6 +28,8 @@ static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
static void afs_vm_open(struct vm_area_struct *area);
static void afs_vm_close(struct vm_area_struct *area);
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff);
+static int afs_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data);
const struct file_operations afs_file_operations = {
.open = afs_open,
@@ -61,6 +63,7 @@ const struct address_space_operations afs_file_aops = {
};
static const struct vm_operations_struct afs_vm_ops = {
+ .mapped = afs_mapped,
.open = afs_vm_open,
.close = afs_vm_close,
.fault = filemap_fault,
@@ -494,32 +497,45 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode)
*/
static int afs_file_mmap_prepare(struct vm_area_desc *desc)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file));
int ret;
- afs_add_open_mmap(vnode);
-
ret = generic_file_mmap_prepare(desc);
- if (ret == 0)
- desc->vm_ops = &afs_vm_ops;
- else
- afs_drop_open_mmap(vnode);
+ if (ret)
+ return ret;
+
+ desc->vm_ops = &afs_vm_ops;
return ret;
}
+static int afs_mapped(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+
+ afs_add_open_mmap(vnode);
+ return 0;
+}
+
static void afs_vm_open(struct vm_area_struct *vma)
{
- afs_add_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+ struct file *file = vma->vm_file;
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+
+ afs_add_open_mmap(vnode);
}
static void afs_vm_close(struct vm_area_struct *vma)
{
- afs_drop_open_mmap(AFS_FS_I(file_inode(vma->vm_file)));
+ struct file *file = vma->vm_file;
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+
+ afs_drop_open_mmap(vnode);
}
static vm_fault_t afs_vm_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff)
{
- struct afs_vnode *vnode = AFS_FS_I(file_inode(vmf->vma->vm_file));
+ struct file *file = vmf->vma->vm_file;
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
if (afs_check_validity(vnode))
return filemap_map_pages(vmf, start_pgoff, end_pgoff);
--
2.53.0
^ permalink raw reply related
* [PATCH v3 04/16] mm: add vm_ops->mapped hook
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Previously, when a driver needed to do something like establish a
reference count, it could do so in the mmap hook in the knowledge that the
mapping would succeed.
With the introduction of f_op->mmap_prepare this is no longer the case, as
it is invoked prior to actually establishing the mapping.
mmap_prepare is not appropriate for this kind of thing as it is called
before any merge might take place, and after which an error might occur
meaning resources could be leaked.
To take this into account, introduce a new vm_ops->mapped callback which
is invoked when the VMA is first mapped (though notably - not when it is
merged - which is correct and mirrors existing mmap/open/close behaviour).
We do better that vm_ops->open() here, as this callback can return an
error, at which point the VMA will be unmapped.
Note that vm_ops->mapped() is invoked after any mmap action is complete
(such as I/O remapping).
We intentionally do not expose the VMA at this point, exposing only the
fields that could be used, and an output parameter in case the operation
needs to update the vma->vm_private_data field.
In order to deal with stacked filesystems which invoke inner filesystem's
mmap() invocations, add __compat_vma_mapped() and invoke it on vfs_mmap()
(via compat_vma_mmap()) to ensure that the mapped callback is handled when
an mmap() caller invokes a nested filesystem's mmap_prepare() callback.
We can now also remove call_action_complete() and invoke
mmap_action_complete() directly, as we separate out the rmap lock logic.
The rmap lock logic, which was added in order to keep hugetlb working (!)
to allow for the rmap lock to be held longer, needs to be propagated to the
error paths on mmap complete and mapped hook error paths.
This is because do_munmap() might otherwise deadlock with the rmap being
held, so instead we unlock at the point of unmap.
This is fine as any reliance on the rmap being held is irrelevant on error.
While we're here, refactor mmap_action_finish() to avoid a big if (err)
branch.
We also abstract unmapping of a VMA on mmap action completion into its own
helper function, unmap_vma_locked().
Update the mmap_prepare documentation to describe the mapped hook and make
it clear what its intended use is.
Additionally, update VMA userland test headers to reflect the change.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
Documentation/filesystems/mmap_prepare.rst | 15 +++
include/linux/fs.h | 9 +-
include/linux/mm.h | 20 +++-
mm/internal.h | 8 ++
mm/util.c | 129 ++++++++++++++-------
mm/vma.c | 35 +++---
tools/testing/vma/include/dup.h | 27 ++++-
tools/testing/vma/include/stubs.h | 3 +-
8 files changed, 186 insertions(+), 60 deletions(-)
diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
index 65a1f094e469..20db474915da 100644
--- a/Documentation/filesystems/mmap_prepare.rst
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -25,6 +25,21 @@ That is - no resources should be allocated nor state updated to reflect that a
mapping has been established, as the mapping may either be merged, or fail to be
mapped after the callback is complete.
+Mapped callback
+---------------
+
+If resources need to be allocated per-mapping, or state such as a reference
+count needs to be manipulated, this should be done using the ``vm_ops->mapped``
+hook, which itself should be set by the >mmap_prepare hook.
+
+This callback is only invoked if a new mapping has been established and was not
+merged with any other, and is invoked at a point where no error may occur before
+the mapping is established.
+
+You may return an error to the callback itself, which will cause the mapping to
+become unmapped and an error returned to the mmap() caller. This is useful if
+resources need to be allocated, and that allocation might fail.
+
How To Use
==========
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a2628a12bd2b..c390f5c667e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2059,13 +2059,20 @@ static inline bool can_mmap_file(struct file *file)
}
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
+int __vma_check_mmap_hook(struct vm_area_struct *vma);
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
{
+ int err;
+
if (file->f_op->mmap_prepare)
return compat_vma_mmap(file, vma);
- return file->f_op->mmap(file, vma);
+ err = file->f_op->mmap(file, vma);
+ if (err)
+ return err;
+
+ return __vma_check_mmap_hook(vma);
}
static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index da94edb287cd..68dee1101313 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -777,6 +777,23 @@ struct vm_operations_struct {
* Context: User context. May sleep. Caller holds mmap_lock.
*/
void (*close)(struct vm_area_struct *vma);
+ /**
+ * @mapped: Called when the VMA is first mapped in the MM. Not called if
+ * the new VMA is merged with an adjacent VMA.
+ *
+ * The @vm_private_data field is an output field allowing the user to
+ * modify vma->vm_private_data as necessary.
+ *
+ * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
+ * set from f_op->mmap.
+ *
+ * Returns %0 on success, or an error otherwise. On error, the VMA will
+ * be unmapped.
+ *
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data);
/* Called any time before splitting to check if it's allowed */
int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
int (*mremap)(struct vm_area_struct *vma);
@@ -4327,7 +4344,8 @@ static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
int mmap_action_prepare(struct vm_area_desc *desc);
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action);
+ struct mmap_action *action,
+ bool rmap_lock_held);
/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
diff --git a/mm/internal.h b/mm/internal.h
index 0256ca44115a..e0f554178143 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -202,6 +202,14 @@ static inline void vma_close(struct vm_area_struct *vma)
/* unmap_vmas is in mm/memory.c */
void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap);
+static inline void unmap_vma_locked(struct vm_area_struct *vma)
+{
+ const size_t len = vma_pages(vma) << PAGE_SHIFT;
+
+ mmap_assert_write_locked(vma->vm_mm);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
+}
+
#ifdef CONFIG_MMU
static inline void get_anon_vma(struct anon_vma *anon_vma)
diff --git a/mm/util.c b/mm/util.c
index 73c97a748d8e..fc1bd8a8f3ea 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,6 +1163,54 @@ void flush_dcache_folio(struct folio *folio)
EXPORT_SYMBOL(flush_dcache_folio);
#endif
+static int __compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct vm_area_desc desc = {
+ .mm = vma->vm_mm,
+ .file = file,
+ .start = vma->vm_start,
+ .end = vma->vm_end,
+
+ .pgoff = vma->vm_pgoff,
+ .vm_file = vma->vm_file,
+ .vma_flags = vma->flags,
+ .page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
+ };
+ int err;
+
+ err = vfs_mmap_prepare(file, &desc);
+ if (err)
+ return err;
+
+ err = mmap_action_prepare(&desc);
+ if (err)
+ return err;
+
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(vma, &desc.action, /*rmap_lock_held=*/false);
+}
+
+static int __compat_vma_mapped(struct file *file, struct vm_area_struct *vma)
+{
+ const struct vm_operations_struct *vm_ops = vma->vm_ops;
+ void *vm_private_data = vma->vm_private_data;
+ int err;
+
+ if (!vm_ops || !vm_ops->mapped)
+ return 0;
+
+ err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff, file,
+ &vm_private_data);
+ if (err)
+ unmap_vma_locked(vma);
+ else if (vm_private_data != vma->vm_private_data)
+ vma->vm_private_data = vm_private_data;
+
+ return err;
+}
+
/**
* compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
* existing VMA and execute any requested actions.
@@ -1191,34 +1239,26 @@ EXPORT_SYMBOL(flush_dcache_folio);
*/
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
{
- struct vm_area_desc desc = {
- .mm = vma->vm_mm,
- .file = file,
- .start = vma->vm_start,
- .end = vma->vm_end,
-
- .pgoff = vma->vm_pgoff,
- .vm_file = vma->vm_file,
- .vma_flags = vma->flags,
- .page_prot = vma->vm_page_prot,
-
- .action.type = MMAP_NOTHING, /* Default */
- };
int err;
- err = vfs_mmap_prepare(file, &desc);
+ err = __compat_vma_mmap(file, vma);
if (err)
return err;
- err = mmap_action_prepare(&desc);
- if (err)
- return err;
-
- set_vma_from_desc(vma, &desc);
- return mmap_action_complete(vma, &desc.action);
+ return __compat_vma_mapped(file, vma);
}
EXPORT_SYMBOL(compat_vma_mmap);
+int __vma_check_mmap_hook(struct vm_area_struct *vma)
+{
+ /* vm_ops->mapped is not valid if mmap() is specified. */
+ if (vma->vm_ops && WARN_ON_ONCE(vma->vm_ops->mapped))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(__vma_check_mmap_hook);
+
static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
const struct page *page)
{
@@ -1308,32 +1348,31 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
}
static int mmap_action_finish(struct vm_area_struct *vma,
- struct mmap_action *action, int err)
+ struct mmap_action *action, int err,
+ bool rmap_lock_held)
{
+ if (rmap_lock_held)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+
+ if (!err) {
+ if (action->success_hook)
+ return action->success_hook(vma);
+ return 0;
+ }
+
/*
* If an error occurs, unmap the VMA altogether and return an error. We
* only clear the newly allocated VMA, since this function is only
* invoked if we do NOT merge, so we only clean up the VMA we created.
*/
- if (err) {
- const size_t len = vma_pages(vma) << PAGE_SHIFT;
-
- do_munmap(current->mm, vma->vm_start, len, NULL);
-
- if (action->error_hook) {
- /* We may want to filter the error. */
- err = action->error_hook(err);
-
- /* The caller should not clear the error. */
- VM_WARN_ON_ONCE(!err);
- }
- return err;
+ unmap_vma_locked(vma);
+ if (action->error_hook) {
+ /* We may want to filter the error. */
+ err = action->error_hook(err);
+ /* The caller should not clear the error. */
+ VM_WARN_ON_ONCE(!err);
}
-
- if (action->success_hook)
- return action->success_hook(vma);
-
- return 0;
+ return err;
}
#ifdef CONFIG_MMU
@@ -1364,13 +1403,15 @@ EXPORT_SYMBOL(mmap_action_prepare);
* mmap_action_complete - Execute VMA descriptor action.
* @vma: The VMA to perform the action upon.
* @action: The action to perform.
+ * @rmap_lock_held: Is the file rmap lock held?
*
* Similar to mmap_action_prepare().
*
* Return: 0 on success, or error, at which point the VMA will be unmapped.
*/
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
int err = 0;
@@ -1388,7 +1429,8 @@ int mmap_action_complete(struct vm_area_struct *vma,
break;
}
- return mmap_action_finish(vma, action, err);
+ return mmap_action_finish(vma, action, err,
+ rmap_lock_held);
}
EXPORT_SYMBOL(mmap_action_complete);
#else
@@ -1408,7 +1450,8 @@ int mmap_action_prepare(struct vm_area_desc *desc)
EXPORT_SYMBOL(mmap_action_prepare);
int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
int err = 0;
@@ -1423,7 +1466,7 @@ int mmap_action_complete(struct vm_area_struct *vma,
break;
}
- return mmap_action_finish(vma, action, err);
+ return mmap_action_finish(vma, action, err, rmap_lock_held);
}
EXPORT_SYMBOL(mmap_action_complete);
#endif
diff --git a/mm/vma.c b/mm/vma.c
index 2a86c7575000..a27d1278ea6d 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2731,21 +2731,28 @@ static bool can_set_ksm_flags_early(struct mmap_state *map)
return false;
}
-static int call_action_complete(struct mmap_state *map,
- struct mmap_action *action,
- struct vm_area_struct *vma)
+static int call_mapped_hook(struct mmap_state *map,
+ struct vm_area_struct *vma)
{
- int ret;
-
- ret = mmap_action_complete(vma, action);
+ const struct vm_operations_struct *vm_ops = vma->vm_ops;
+ void *vm_private_data = vma->vm_private_data;
+ int err;
- /* If we held the file rmap we need to release it. */
- if (map->hold_file_rmap_lock) {
- struct file *file = vma->vm_file;
+ if (!vm_ops || !vm_ops->mapped)
+ return 0;
+ err = vm_ops->mapped(vma->vm_start, vma->vm_end, vma->vm_pgoff,
+ vma->vm_file, &vm_private_data);
+ if (err) {
+ if (map->hold_file_rmap_lock)
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
- i_mmap_unlock_write(file->f_mapping);
+ unmap_vma_locked(vma);
+ return err;
}
- return ret;
+ /* Update private data if changed. */
+ if (vm_private_data != vma->vm_private_data)
+ vma->vm_private_data = vm_private_data;
+ return 0;
}
static unsigned long __mmap_region(struct file *file, unsigned long addr,
@@ -2799,8 +2806,10 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
__mmap_complete(&map, vma);
if (have_mmap_prepare && allocated_new) {
- error = call_action_complete(&map, &desc.action, vma);
-
+ error = mmap_action_complete(vma, &desc.action,
+ map.hold_file_rmap_lock);
+ if (!error)
+ error = call_mapped_hook(&map, vma);
if (error)
return error;
}
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 8ae525ed1738..aa34966cbc62 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -643,6 +643,23 @@ struct vm_operations_struct {
* Context: User context. May sleep. Caller holds mmap_lock.
*/
void (*close)(struct vm_area_struct *vma);
+ /**
+ * @mapped: Called when the VMA is first mapped in the MM. Not called if
+ * the new VMA is merged with an adjacent VMA.
+ *
+ * The @vm_private_data field is an output field allowing the user to
+ * modify vma->vm_private_data as necessary.
+ *
+ * ONLY valid if set from f_op->mmap_prepare. Will result in an error if
+ * set from f_op->mmap.
+ *
+ * Returns %0 on success, or an error otherwise. On error, the VMA will
+ * be unmapped.
+ *
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ int (*mapped)(unsigned long start, unsigned long end, pgoff_t pgoff,
+ const struct file *file, void **vm_private_data);
/* Called any time before splitting to check if it's allowed */
int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
int (*mremap)(struct vm_area_struct *vma);
@@ -1281,7 +1298,7 @@ static inline int __compat_vma_mmap(const struct file_operations *f_op,
return err;
set_vma_from_desc(vma, &desc);
- return mmap_action_complete(vma, &desc.action);
+ return mmap_action_complete(vma, &desc.action, /*rmap_lock_held=*/false);
}
static inline int compat_vma_mmap(struct file *file,
@@ -1500,3 +1517,11 @@ static inline pgprot_t vma_get_page_prot(vma_flags_t vma_flags)
return vm_get_page_prot(vm_flags);
}
+
+static inline void unmap_vma_locked(struct vm_area_struct *vma)
+{
+ const size_t len = vma_pages(vma) << PAGE_SHIFT;
+
+ mmap_assert_write_locked(vma->vm_mm);
+ do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
+}
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index a30b8bc84955..d1c3d4ddb5e9 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -87,7 +87,8 @@ static inline int mmap_action_prepare(struct vm_area_desc *desc)
}
static inline int mmap_action_complete(struct vm_area_struct *vma,
- struct mmap_action *action)
+ struct mmap_action *action,
+ bool rmap_lock_held)
{
return 0;
}
--
2.53.0
^ permalink raw reply related
* [PATCH v3 03/16] mm: document vm_operations_struct->open the same as close()
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Describe when the operation is invoked and the context in which it is
invoked, matching the description already added for vm_op->close().
While we're here, update all outdated references to an 'area' field for
VMAs to the more consistent 'vma'.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
include/linux/mm.h | 15 ++++++++++-----
tools/testing/vma/include/dup.h | 15 ++++++++++-----
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e63b3a44a47..da94edb287cd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -766,15 +766,20 @@ struct vm_uffd_ops;
* to the functions called when a no-page or a wp-page exception occurs.
*/
struct vm_operations_struct {
- void (*open)(struct vm_area_struct * area);
+ /**
+ * @open: Called when a VMA is remapped, split or forked. Not called
+ * upon first mapping a VMA.
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ void (*open)(struct vm_area_struct *vma);
/**
* @close: Called when the VMA is being removed from the MM.
* Context: User context. May sleep. Caller holds mmap_lock.
*/
- void (*close)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct *vma);
/* Called any time before splitting to check if it's allowed */
- int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area);
+ int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *vma);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -786,7 +791,7 @@ struct vm_operations_struct {
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
- unsigned long (*pagesize)(struct vm_area_struct * area);
+ unsigned long (*pagesize)(struct vm_area_struct *vma);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 9eada1e0949c..8ae525ed1738 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -632,15 +632,20 @@ struct vm_area_struct {
} __randomize_layout;
struct vm_operations_struct {
- void (*open)(struct vm_area_struct * area);
+ /**
+ * @open: Called when a VMA is remapped, split or forked. Not called
+ * upon first mapping a VMA.
+ * Context: User context. May sleep. Caller holds mmap_lock.
+ */
+ void (*open)(struct vm_area_struct *vma);
/**
* @close: Called when the VMA is being removed from the MM.
* Context: User context. May sleep. Caller holds mmap_lock.
*/
- void (*close)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct *vma);
/* Called any time before splitting to check if it's allowed */
- int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area);
+ int (*may_split)(struct vm_area_struct *vma, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *vma);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -652,7 +657,7 @@ struct vm_operations_struct {
vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order);
vm_fault_t (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
- unsigned long (*pagesize)(struct vm_area_struct * area);
+ unsigned long (*pagesize)(struct vm_area_struct *vma);
/* notification that a previously read-only page is about to become
* writable, if an error is returned it will cause a SIGBUS */
--
2.53.0
^ permalink raw reply related
* [PATCH v3 02/16] mm: add documentation for the mmap_prepare file operation callback
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
This documentation makes it easier for a driver/file system implementer to
correctly use this callback.
It covers the fundamentals, whilst intentionally leaving the less lovely
possible actions one might take undocumented (for instance - the
success_hook, error_hook fields in mmap_action).
The document also covers the new VMA flags implementation which is the
only one which will work correctly with mmap_prepare.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
Documentation/filesystems/index.rst | 1 +
Documentation/filesystems/mmap_prepare.rst | 142 +++++++++++++++++++++
2 files changed, 143 insertions(+)
create mode 100644 Documentation/filesystems/mmap_prepare.rst
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index f4873197587d..6cbc3e0292ae 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -29,6 +29,7 @@ algorithms work.
fiemap
files
locks
+ mmap_prepare
multigrain-ts
mount_api
quota
diff --git a/Documentation/filesystems/mmap_prepare.rst b/Documentation/filesystems/mmap_prepare.rst
new file mode 100644
index 000000000000..65a1f094e469
--- /dev/null
+++ b/Documentation/filesystems/mmap_prepare.rst
@@ -0,0 +1,142 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+mmap_prepare callback HOWTO
+===========================
+
+Introduction
+============
+
+The ``struct file->f_op->mmap()`` callback has been deprecated as it is both a
+stability and security risk, and doesn't always permit the merging of adjacent
+mappings resulting in unnecessary memory fragmentation.
+
+It has been replaced with the ``file->f_op->mmap_prepare()`` callback which
+solves these problems.
+
+This hook is called right at the beginning of setting up the mapping, and
+importantly it is invoked *before* any merging of adjacent mappings has taken
+place.
+
+If an error arises upon mapping, it might arise after this callback has been
+invoked, therefore it should be treated as effectively stateless.
+
+That is - no resources should be allocated nor state updated to reflect that a
+mapping has been established, as the mapping may either be merged, or fail to be
+mapped after the callback is complete.
+
+How To Use
+==========
+
+In your driver's struct file_operations struct, specify an ``mmap_prepare``
+callback rather than an ``mmap`` one, e.g. for ext4:
+
+.. code-block:: C
+
+ const struct file_operations ext4_file_operations = {
+ ...
+ .mmap_prepare = ext4_file_mmap_prepare,
+ };
+
+This has a signature of ``int (*mmap_prepare)(struct vm_area_desc *)``.
+
+Examining the struct vm_area_desc type:
+
+.. code-block:: C
+
+ struct vm_area_desc {
+ /* Immutable state. */
+ const struct mm_struct *const mm;
+ struct file *const file; /* May vary from vm_file in stacked callers. */
+ unsigned long start;
+ unsigned long end;
+
+ /* Mutable fields. Populated with initial state. */
+ pgoff_t pgoff;
+ struct file *vm_file;
+ vma_flags_t vma_flags;
+ pgprot_t page_prot;
+
+ /* Write-only fields. */
+ const struct vm_operations_struct *vm_ops;
+ void *private_data;
+
+ /* Take further action? */
+ struct mmap_action action;
+ };
+
+This is straightforward - you have all the fields you need to set up the
+mapping, and you can update the mutable and writable fields, for instance:
+
+.. code-block:: C
+
+ static int ext4_file_mmap_prepare(struct vm_area_desc *desc)
+ {
+ int ret;
+ struct file *file = desc->file;
+ struct inode *inode = file->f_mapping->host;
+
+ ...
+
+ file_accessed(file);
+ if (IS_DAX(file_inode(file))) {
+ desc->vm_ops = &ext4_dax_vm_ops;
+ vma_desc_set_flags(desc, VMA_HUGEPAGE_BIT);
+ } else {
+ desc->vm_ops = &ext4_file_vm_ops;
+ }
+ return 0;
+ }
+
+Importantly, you no longer have to dance around with reference counts or locks
+when updating these fields - **you can simply go ahead and change them**.
+
+Everything is taken care of by the mapping code.
+
+VMA Flags
+---------
+
+Along with ``mmap_prepare``, VMA flags have undergone an overhaul. Where before
+you would invoke one of vm_flags_init(), vm_flags_reset(), vm_flags_set(),
+vm_flags_clear(), and vm_flags_mod() to modify flags (and to have the
+locking done correctly for you, this is no longer necessary.
+
+Also, the legacy approach of specifying VMA flags via ``VM_READ``, ``VM_WRITE``,
+etc. - i.e. using a ``-VM_xxx``- macro has changed too.
+
+When implementing mmap_prepare(), reference flags by their bit number, defined
+as a ``VMA_xxx_BIT`` macro, e.g. ``VMA_READ_BIT``, ``VMA_WRITE_BIT`` etc.,
+and use one of (where ``desc`` is a pointer to struct vm_area_desc):
+
+* ``vma_desc_test_flags(desc, ...)`` - Specify a comma-separated list of flags
+ you wish to test for (whether _any_ are set), e.g. - ``vma_desc_test_flags(
+ desc, VMA_WRITE_BIT, VMA_MAYWRITE_BIT)`` - returns ``true`` if either are set,
+ otherwise ``false``.
+* ``vma_desc_set_flags(desc, ...)`` - Update the VMA descriptor flags to set
+ additional flags specified by a comma-separated list,
+ e.g. - ``vma_desc_set_flags(desc, VMA_PFNMAP_BIT, VMA_IO_BIT)``.
+* ``vma_desc_clear_flags(desc, ...)`` - Update the VMA descriptor flags to clear
+ flags specified by a comma-separated list, e.g. - ``vma_desc_clear_flags(
+ desc, VMA_WRITE_BIT, VMA_MAYWRITE_BIT)``.
+
+Actions
+=======
+
+You can now very easily have actions be performed upon a mapping once set up by
+utilising simple helper functions invoked upon the struct vm_area_desc
+pointer. These are:
+
+* mmap_action_remap() - Remaps a range consisting only of PFNs for a specific
+ range starting a virtual address and PFN number of a set size.
+
+* mmap_action_remap_full() - Same as mmap_action_remap(), only remaps the
+ entire mapping from ``start_pfn`` onward.
+
+* mmap_action_ioremap() - Same as mmap_action_remap(), only performs an I/O
+ remap.
+
+* mmap_action_ioremap_full() - Same as mmap_action_ioremap(), only remaps
+ the entire mapping from ``start_pfn`` onward.
+
+**NOTE:** The ``action`` field should never normally be manipulated directly,
+rather you ought to use one of these helpers.
--
2.53.0
^ permalink raw reply related
* [PATCH v3 01/16] mm: various small mmap_prepare cleanups
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
In-Reply-To: <cover.1773944114.git.ljs@kernel.org>
Rather than passing arbitrary fields, pass a vm_area_desc pointer to mmap
prepare functions to mmap prepare, and an action and vma pointer to mmap
complete in order to put all the action-specific logic in the function
actually doing the work.
Additionally, allow mmap prepare functions to return an error so we can
error out as soon as possible if there is something logically incorrect in
the input.
Update remap_pfn_range_prepare() to properly check the input range for the
CoW case.
Also remove io_remap_pfn_range_complete(), as we can simply set up the
fields correctly in io_remap_pfn_range_prepare() and use
remap_pfn_range_complete() for this.
While we're here, make remap_pfn_range_prepare_vma() a little neater, and
pass mmap_action directly to call_action_complete().
Then, update compat_vma_mmap() to perform its logic directly, as
__compat_vma_map() is not used by anything so we don't need to export it.
Also update compat_vma_mmap() to use vfs_mmap_prepare() rather than
calling the mmap_prepare op directly.
Finally, update the VMA userland tests to reflect the changes.
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
include/linux/fs.h | 2 -
include/linux/mm.h | 7 +-
mm/internal.h | 32 ++++----
mm/memory.c | 45 +++++++----
mm/util.c | 121 +++++++++++++-----------------
mm/vma.c | 24 +++---
tools/testing/vma/include/dup.h | 7 +-
tools/testing/vma/include/stubs.h | 8 +-
8 files changed, 126 insertions(+), 120 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25e..a2628a12bd2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2058,8 +2058,6 @@ static inline bool can_mmap_file(struct file *file)
return true;
}
-int __compat_vma_mmap(const struct file_operations *f_op,
- struct file *file, struct vm_area_struct *vma);
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma);
static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 42cc40aa63d9..1e63b3a44a47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4320,10 +4320,9 @@ static inline void mmap_action_ioremap_full(struct vm_area_desc *desc,
mmap_action_ioremap(desc, desc->start, start_pfn, vma_desc_size(desc));
}
-void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc);
-int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma);
+int mmap_action_prepare(struct vm_area_desc *desc);
+int mmap_action_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
diff --git a/mm/internal.h b/mm/internal.h
index 708d240b4198..0256ca44115a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1793,26 +1793,28 @@ int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm);
int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
-void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn);
-int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size, pgprot_t pgprot);
+int remap_pfn_range_prepare(struct vm_area_desc *desc);
+int remap_pfn_range_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
-static inline void io_remap_pfn_range_prepare(struct vm_area_desc *desc,
- unsigned long orig_pfn, unsigned long size)
+static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
{
+ struct mmap_action *action = &desc->action;
+ const unsigned long orig_pfn = action->remap.start_pfn;
+ const pgprot_t orig_pgprot = action->remap.pgprot;
+ const unsigned long size = action->remap.size;
const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
+ int err;
- return remap_pfn_range_prepare(desc, pfn);
-}
+ action->remap.start_pfn = pfn;
+ action->remap.pgprot = pgprot_decrypted(orig_pgprot);
+ err = remap_pfn_range_prepare(desc);
+ if (err)
+ return err;
-static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
- unsigned long addr, unsigned long orig_pfn, unsigned long size,
- pgprot_t orig_prot)
-{
- const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
- const pgprot_t prot = pgprot_decrypted(orig_prot);
-
- return remap_pfn_range_complete(vma, addr, pfn, size, prot);
+ /* Remap does the actual work. */
+ action->type = MMAP_REMAP_PFN;
+ return 0;
}
#ifdef CONFIG_MMU_NOTIFIER
diff --git a/mm/memory.c b/mm/memory.c
index 219b9bf6cae0..9dec67a18116 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3099,26 +3099,34 @@ static int do_remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
}
#endif
-void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn)
+int remap_pfn_range_prepare(struct vm_area_desc *desc)
{
- /*
- * We set addr=VMA start, end=VMA end here, so this won't fail, but we
- * check it again on complete and will fail there if specified addr is
- * invalid.
- */
- get_remap_pgoff(vma_desc_is_cow_mapping(desc), desc->start, desc->end,
- desc->start, desc->end, pfn, &desc->pgoff);
+ const struct mmap_action *action = &desc->action;
+ const unsigned long start = action->remap.start;
+ const unsigned long end = start + action->remap.size;
+ const unsigned long pfn = action->remap.start_pfn;
+ const bool is_cow = vma_desc_is_cow_mapping(desc);
+ int err;
+
+ err = get_remap_pgoff(is_cow, start, end, desc->start, desc->end, pfn,
+ &desc->pgoff);
+ if (err)
+ return err;
+
vma_desc_set_flags_mask(desc, VMA_REMAP_FLAGS);
+ return 0;
}
-static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size)
+static int remap_pfn_range_prepare_vma(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn,
+ unsigned long size)
{
- unsigned long end = addr + PAGE_ALIGN(size);
+ const unsigned long end = addr + PAGE_ALIGN(size);
+ const bool is_cow = is_cow_mapping(vma->vm_flags);
int err;
- err = get_remap_pgoff(is_cow_mapping(vma->vm_flags), addr, end,
- vma->vm_start, vma->vm_end, pfn, &vma->vm_pgoff);
+ err = get_remap_pgoff(is_cow, addr, end, vma->vm_start, vma->vm_end,
+ pfn, &vma->vm_pgoff);
if (err)
return err;
@@ -3151,10 +3159,15 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
}
EXPORT_SYMBOL(remap_pfn_range);
-int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size, pgprot_t prot)
+int remap_pfn_range_complete(struct vm_area_struct *vma,
+ struct mmap_action *action)
{
- return do_remap_pfn_range(vma, addr, pfn, size, prot);
+ const unsigned long start = action->remap.start;
+ const unsigned long pfn = action->remap.start_pfn;
+ const unsigned long size = action->remap.size;
+ const pgprot_t prot = action->remap.pgprot;
+
+ return do_remap_pfn_range(vma, start, pfn, size, prot);
}
/**
diff --git a/mm/util.c b/mm/util.c
index ce7ae80047cf..73c97a748d8e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1163,43 +1163,6 @@ void flush_dcache_folio(struct folio *folio)
EXPORT_SYMBOL(flush_dcache_folio);
#endif
-/**
- * __compat_vma_mmap() - See description for compat_vma_mmap()
- * for details. This is the same operation, only with a specific file operations
- * struct which may or may not be the same as vma->vm_file->f_op.
- * @f_op: The file operations whose .mmap_prepare() hook is specified.
- * @file: The file which backs or will back the mapping.
- * @vma: The VMA to apply the .mmap_prepare() hook to.
- * Returns: 0 on success or error.
- */
-int __compat_vma_mmap(const struct file_operations *f_op,
- struct file *file, struct vm_area_struct *vma)
-{
- struct vm_area_desc desc = {
- .mm = vma->vm_mm,
- .file = file,
- .start = vma->vm_start,
- .end = vma->vm_end,
-
- .pgoff = vma->vm_pgoff,
- .vm_file = vma->vm_file,
- .vma_flags = vma->flags,
- .page_prot = vma->vm_page_prot,
-
- .action.type = MMAP_NOTHING, /* Default */
- };
- int err;
-
- err = f_op->mmap_prepare(&desc);
- if (err)
- return err;
-
- mmap_action_prepare(&desc.action, &desc);
- set_vma_from_desc(vma, &desc);
- return mmap_action_complete(&desc.action, vma);
-}
-EXPORT_SYMBOL(__compat_vma_mmap);
-
/**
* compat_vma_mmap() - Apply the file's .mmap_prepare() hook to an
* existing VMA and execute any requested actions.
@@ -1228,7 +1191,31 @@ EXPORT_SYMBOL(__compat_vma_mmap);
*/
int compat_vma_mmap(struct file *file, struct vm_area_struct *vma)
{
- return __compat_vma_mmap(file->f_op, file, vma);
+ struct vm_area_desc desc = {
+ .mm = vma->vm_mm,
+ .file = file,
+ .start = vma->vm_start,
+ .end = vma->vm_end,
+
+ .pgoff = vma->vm_pgoff,
+ .vm_file = vma->vm_file,
+ .vma_flags = vma->flags,
+ .page_prot = vma->vm_page_prot,
+
+ .action.type = MMAP_NOTHING, /* Default */
+ };
+ int err;
+
+ err = vfs_mmap_prepare(file, &desc);
+ if (err)
+ return err;
+
+ err = mmap_action_prepare(&desc);
+ if (err)
+ return err;
+
+ set_vma_from_desc(vma, &desc);
+ return mmap_action_complete(vma, &desc.action);
}
EXPORT_SYMBOL(compat_vma_mmap);
@@ -1320,8 +1307,8 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
}
}
-static int mmap_action_finish(struct mmap_action *action,
- const struct vm_area_struct *vma, int err)
+static int mmap_action_finish(struct vm_area_struct *vma,
+ struct mmap_action *action, int err)
{
/*
* If an error occurs, unmap the VMA altogether and return an error. We
@@ -1353,37 +1340,38 @@ static int mmap_action_finish(struct mmap_action *action,
/**
* mmap_action_prepare - Perform preparatory setup for an VMA descriptor
* action which need to be performed.
- * @desc: The VMA descriptor to prepare for @action.
- * @action: The action to perform.
+ * @desc: The VMA descriptor to prepare for its @desc->action.
+ *
+ * Returns: %0 on success, otherwise error.
*/
-void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc)
+int mmap_action_prepare(struct vm_area_desc *desc)
{
- switch (action->type) {
+ switch (desc->action.type) {
case MMAP_NOTHING:
- break;
+ return 0;
case MMAP_REMAP_PFN:
- remap_pfn_range_prepare(desc, action->remap.start_pfn);
- break;
+ return remap_pfn_range_prepare(desc);
case MMAP_IO_REMAP_PFN:
- io_remap_pfn_range_prepare(desc, action->remap.start_pfn,
- action->remap.size);
- break;
+ return io_remap_pfn_range_prepare(desc);
}
+
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
EXPORT_SYMBOL(mmap_action_prepare);
/**
* mmap_action_complete - Execute VMA descriptor action.
- * @action: The action to perform.
* @vma: The VMA to perform the action upon.
+ * @action: The action to perform.
*
* Similar to mmap_action_prepare().
*
* Return: 0 on success, or error, at which point the VMA will be unmapped.
*/
-int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma)
+int mmap_action_complete(struct vm_area_struct *vma,
+ struct mmap_action *action)
+
{
int err = 0;
@@ -1391,25 +1379,22 @@ int mmap_action_complete(struct mmap_action *action,
case MMAP_NOTHING:
break;
case MMAP_REMAP_PFN:
- err = remap_pfn_range_complete(vma, action->remap.start,
- action->remap.start_pfn, action->remap.size,
- action->remap.pgprot);
+ err = remap_pfn_range_complete(vma, action);
break;
case MMAP_IO_REMAP_PFN:
- err = io_remap_pfn_range_complete(vma, action->remap.start,
- action->remap.start_pfn, action->remap.size,
- action->remap.pgprot);
+ /* Should have been delegated. */
+ WARN_ON_ONCE(1);
+ err = -EINVAL;
break;
}
- return mmap_action_finish(action, vma, err);
+ return mmap_action_finish(vma, action, err);
}
EXPORT_SYMBOL(mmap_action_complete);
#else
-void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc)
+int mmap_action_prepare(struct vm_area_desc *desc)
{
- switch (action->type) {
+ switch (desc->action.type) {
case MMAP_NOTHING:
break;
case MMAP_REMAP_PFN:
@@ -1417,11 +1402,13 @@ void mmap_action_prepare(struct mmap_action *action,
WARN_ON_ONCE(1); /* nommu cannot handle these. */
break;
}
+
+ return 0;
}
EXPORT_SYMBOL(mmap_action_prepare);
-int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma)
+int mmap_action_complete(struct vm_area_struct *vma,
+ struct mmap_action *action)
{
int err = 0;
@@ -1436,7 +1423,7 @@ int mmap_action_complete(struct mmap_action *action,
break;
}
- return mmap_action_finish(action, vma, err);
+ return mmap_action_finish(vma, action, err);
}
EXPORT_SYMBOL(mmap_action_complete);
#endif
diff --git a/mm/vma.c b/mm/vma.c
index c1f183235756..2a86c7575000 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2640,15 +2640,18 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
vma_set_page_prot(vma);
}
-static void call_action_prepare(struct mmap_state *map,
- struct vm_area_desc *desc)
+static int call_action_prepare(struct mmap_state *map,
+ struct vm_area_desc *desc)
{
- struct mmap_action *action = &desc->action;
+ int err;
- mmap_action_prepare(action, desc);
+ err = mmap_action_prepare(desc);
+ if (err)
+ return err;
- if (action->hide_from_rmap_until_complete)
+ if (desc->action.hide_from_rmap_until_complete)
map->hold_file_rmap_lock = true;
+ return 0;
}
/*
@@ -2672,7 +2675,9 @@ static int call_mmap_prepare(struct mmap_state *map,
if (err)
return err;
- call_action_prepare(map, desc);
+ err = call_action_prepare(map, desc);
+ if (err)
+ return err;
/* Update fields permitted to be changed. */
map->pgoff = desc->pgoff;
@@ -2727,13 +2732,12 @@ static bool can_set_ksm_flags_early(struct mmap_state *map)
}
static int call_action_complete(struct mmap_state *map,
- struct vm_area_desc *desc,
+ struct mmap_action *action,
struct vm_area_struct *vma)
{
- struct mmap_action *action = &desc->action;
int ret;
- ret = mmap_action_complete(action, vma);
+ ret = mmap_action_complete(vma, action);
/* If we held the file rmap we need to release it. */
if (map->hold_file_rmap_lock) {
@@ -2795,7 +2799,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
__mmap_complete(&map, vma);
if (have_mmap_prepare && allocated_new) {
- error = call_action_complete(&map, &desc, vma);
+ error = call_action_complete(&map, &desc.action, vma);
if (error)
return error;
diff --git a/tools/testing/vma/include/dup.h b/tools/testing/vma/include/dup.h
index 999357e18eb0..9eada1e0949c 100644
--- a/tools/testing/vma/include/dup.h
+++ b/tools/testing/vma/include/dup.h
@@ -1271,9 +1271,12 @@ static inline int __compat_vma_mmap(const struct file_operations *f_op,
if (err)
return err;
- mmap_action_prepare(&desc.action, &desc);
+ err = mmap_action_prepare(&desc);
+ if (err)
+ return err;
+
set_vma_from_desc(vma, &desc);
- return mmap_action_complete(&desc.action, vma);
+ return mmap_action_complete(vma, &desc.action);
}
static inline int compat_vma_mmap(struct file *file,
diff --git a/tools/testing/vma/include/stubs.h b/tools/testing/vma/include/stubs.h
index 5afb0afe2d48..a30b8bc84955 100644
--- a/tools/testing/vma/include/stubs.h
+++ b/tools/testing/vma/include/stubs.h
@@ -81,13 +81,13 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma)
{
}
-static inline void mmap_action_prepare(struct mmap_action *action,
- struct vm_area_desc *desc)
+static inline int mmap_action_prepare(struct vm_area_desc *desc)
{
+ return 0;
}
-static inline int mmap_action_complete(struct mmap_action *action,
- struct vm_area_struct *vma)
+static inline int mmap_action_complete(struct vm_area_struct *vma,
+ struct mmap_action *action)
{
return 0;
}
--
2.53.0
^ permalink raw reply related
* [PATCH v3 00/16] mm: expand mmap_prepare functionality and usage
From: Lorenzo Stoakes (Oracle) @ 2026-03-19 18:23 UTC (permalink / raw)
To: Andrew Morton
Cc: Jonathan Corbet, Clemens Ladisch, Arnd Bergmann,
Greg Kroah-Hartman, K . Y . Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Long Li, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, Miquel Raynal, Richard Weinberger,
Vignesh Raghavendra, Bodo Stroesser, Martin K . Petersen,
David Howells, Marc Dionne, Alexander Viro, Christian Brauner,
Jan Kara, David Hildenbrand, Liam R . Howlett, Vlastimil Babka,
Mike Rapoport, Suren Baghdasaryan, Michal Hocko, Jann Horn,
Pedro Falcato, linux-kernel, linux-doc, linux-hyperv, linux-stm32,
linux-arm-kernel, linux-mtd, linux-staging, linux-scsi,
target-devel, linux-afs, linux-fsdevel, linux-mm, Ryan Roberts
This series expands the mmap_prepare functionality, which is intended to
replace the deprecated f_op->mmap hook which has been the source of bugs
and security issues for some time.
This series starts with some cleanup of existing mmap_prepare logic, then
adds documentation for the mmap_prepare call to make it easier for
filesystem and driver writers to understand how it works.
It then importantly adds a vm_ops->mapped hook, a key feature that was
missing from mmap_prepare previously - this is invoked when a driver which
specifies mmap_prepare has successfully been mapped but not merged with
another VMA.
mmap_prepare is invoked prior to a merge being attempted, so you cannot
manipulate state such as reference counts as if it were a new mapping.
The vm_ops->mapped hook allows a driver to perform tasks required at this
stage, and provides symmetry against subsequent vm_ops->open,close calls.
The series uses this to correct the afs implementation which wrongly
manipulated reference count at mmap_prepare time.
It then adds an mmap_prepare equivalent of vm_iomap_memory() -
mmap_action_simple_ioremap(), then uses this to update a number of drivers.
It then splits out the mmap_prepare compatibility layer (which allows for
invocation of mmap_prepare hooks in an mmap() hook) in such a way as to
allow for more incremental implementation of mmap_prepare hooks.
It then uses this to extend mmap_prepare usage in drivers.
Finally it adds an mmap_prepare equivalent of vm_map_pages(), which lays
the foundation for future work which will extend mmap_prepare to DMA
coherent mappings.
v3:
* Propagated tags (thanks Suren, Richard!)
* Updated 12/16 to correctly clear the vm_area_desc data structure in
set_desc_from_vma() as per Joshua Hahn (thanks! :)
* Fixed type in 12/16 as per Suren (cheers!)
* Fixed up 6/16 to use mmap_action_ioremap_full() in simple_ioremap_prepare() as
suggested by Suren.
* Also fixed up 6/16 to call io_remap_pfn_range_prepare() direct rather than
mmap_action_prepare() as per Suren.
* Also fixed up 6/16 to pass vm_len rather than vm_[start, end] to
__simple_ioremap_prep() as per Suren (thanks for all the above! :)
* Fixed issue in rmap lock being held - we were referencing a vma->vm_file after
the VMA was unmapped, so UAF. Avoid that. Also do_munmap() relies on rmap lock
NOT being held or may deadlock, so extend functionality to ensure we drop it
when it is held on error paths.
* Updated 'area' -> 'vma' variable in 3/16 in VMA test dup.h.
* Fixed up reference to __compat_vma_mmap() in 12/16 commit message.
* Updated 1/16 to no longer duplicatively apply io_remap_pfn_range_pfn().
* Updated 1/16 to delegate I/O remap complete to remap complete logic.
* Fixed various typos in 12/16.
* Fixed stale comment typos in 13/16.
* Fixed commit msg and comment typos in 14/16.
* Removed accidental sneak peak to future functionality in 15/16 commit message
:).
* Fixed up field names to be identical in VMA tests + mm_types.h in 6/16,
15/16.
v2:
* Rebased on
https://lore.kernel.org/all/cover.1773665966.git.ljs@kernel.org/ to make
Andrew's life easier :)
* Folded all interim fixes into series (thanks Randy for many doc fixes!))
* As per Suren, removed a comment about allocations too small to fail.
* As per Randy, fixed up typo in documentation for vm_area_desc.
* Fixed mmap_action_prepare() not returning if invalid action->type
specified, as updated from Andrew's interim fix (thanks!) and also
reported by kernel test bot.
* Updated mmap_action_prepare() and specific prepare functions to only
pass vm_area_desc parameter as per Suren.
* Fixed up whitespace as per Suren.
* Updated vm_op->open comment in vm_operations_struct to reference forking
as per Suren.
* Added a commit to check that input range is within VMA on remap as per
Suren (this also covers I/O remap and all other cases already asserted).
* Updated AFS to not incorrectly reference count on mmap prepare as per
Usama.
* Also updated various static AFS functions to be consistent with each
other.
* Updated AFS commit message to reflect mmap_prepare being before any VMA
merging as per Suren.
* Updated __compat_vma_mapped() to check for NULL vm_ops as per Usama.
* Updated __compat_vma_mapped() to not reference an unmapped VMA's fields
as per Usama.
* Updated __vma_check_mmap_hook() to check for NULL vm_ops as per Usama.
* Dropped comment about preferring mmap_prepare as seems overly confusing,
as per Suren.
* Updated the mmap lock assert in unmap_vma_locked() to a write lock assert
as per Suren.
* Copied vm_ops->open comment over to VMA tests in appropriate patch as per
Suren.
* Updated mmap_prepare documentation to reflect the fact that no resources
should be allocated upon mmap_prepare.
* Updated mmap_prepare documentation to reference the vm_ops->mapped
callback.
* Fixed stray markdown '## How to use' in documentation.
* Fixed bug reported by kernel test bot re: overlooked
vma_desc_test_flags() -> vma_desc_test() in MTD driver for nommu.
https://lore.kernel.org/linux-mm/cover.1773695307.git.ljs@kernel.org/
v1:
https://lore.kernel.org/linux-mm/cover.1773346620.git.ljs@kernel.org/
Lorenzo Stoakes (Oracle) (16):
mm: various small mmap_prepare cleanups
mm: add documentation for the mmap_prepare file operation callback
mm: document vm_operations_struct->open the same as close()
mm: add vm_ops->mapped hook
fs: afs: correctly drop reference count on mapping failure
mm: add mmap_action_simple_ioremap()
misc: open-dice: replace deprecated mmap hook with mmap_prepare
hpet: replace deprecated mmap hook with mmap_prepare
mtdchar: replace deprecated mmap hook with mmap_prepare, clean up
stm: replace deprecated mmap hook with mmap_prepare
staging: vme_user: replace deprecated mmap hook with mmap_prepare
mm: allow handling of stacked mmap_prepare hooks in more drivers
drivers: hv: vmbus: replace deprecated mmap hook with mmap_prepare
uio: replace deprecated mmap hook with mmap_prepare in uio_info
mm: add mmap_action_map_kernel_pages[_full]()
mm: on remap assert that input range within the proposed VMA
Documentation/driver-api/vme.rst | 2 +-
Documentation/filesystems/index.rst | 1 +
Documentation/filesystems/mmap_prepare.rst | 168 ++++++++++++++
drivers/char/hpet.c | 12 +-
drivers/hv/hyperv_vmbus.h | 4 +-
drivers/hv/vmbus_drv.c | 31 ++-
drivers/hwtracing/stm/core.c | 31 ++-
drivers/misc/open-dice.c | 19 +-
drivers/mtd/mtdchar.c | 21 +-
drivers/staging/vme_user/vme.c | 20 +-
drivers/staging/vme_user/vme.h | 2 +-
drivers/staging/vme_user/vme_user.c | 51 +++--
drivers/target/target_core_user.c | 26 ++-
drivers/uio/uio.c | 10 +-
drivers/uio/uio_hv_generic.c | 11 +-
fs/afs/file.c | 36 ++-
include/linux/fs.h | 14 +-
include/linux/hyperv.h | 4 +-
include/linux/mm.h | 159 ++++++++++++-
include/linux/mm_types.h | 17 +-
include/linux/uio_driver.h | 4 +-
mm/internal.h | 41 ++--
mm/memory.c | 175 ++++++++++----
mm/util.c | 251 ++++++++++++++-------
mm/vma.c | 53 +++--
mm/vma.h | 2 +-
tools/testing/vma/include/dup.h | 152 ++++++++++---
tools/testing/vma/include/stubs.h | 9 +-
28 files changed, 990 insertions(+), 336 deletions(-)
create mode 100644 Documentation/filesystems/mmap_prepare.rst
--
2.53.0
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox