* [PATCH 32/83] hsa/radeon: implementing IOCTL for clock counters
[not found] ` <1405029279-6894-1-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
@ 2014-07-10 21:53 ` Oded Gabbay
2014-07-11 20:34 ` Jerome Glisse
0 siblings, 1 reply; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:53 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman,
Andrew Lewycky, Joerg Roedel, Evgeny Pinchuk, Oded Gabbay,
Ben Goz, Alexey Skidanov, linux-api-u79uwXL29TY76Z2rM5mHXA
From: Evgeny Pinchuk <evgeny.pinchuk-5C7GfCeVMHo@public.gmane.org>
Implemented new IOCTL to query the CPU and GPU clock counters.
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk-5C7GfCeVMHo@public.gmane.org>
Signed-off-by: Oded Gabbay <oded.gabbay-5C7GfCeVMHo@public.gmane.org>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 37 ++++++++++++++++++++++++++++++++++++
include/uapi/linux/kfd_ioctl.h | 9 +++++++++
2 files changed, 46 insertions(+)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index ddaf357..d6fa980 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
#include "kfd_priv.h"
#include "kfd_scheduler.h"
@@ -284,6 +285,38 @@ out:
return err;
}
+static long
+kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_get_clock_counters_args args;
+ struct kfd_dev *dev;
+ struct timespec time;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ /* Reading GPU clock counter from KGD */
+ args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
+
+ /* No access to rdtsc. Using raw monotonic time */
+ getrawmonotonic(&time);
+ args.cpu_clock_counter = time.tv_nsec;
+
+ get_monotonic_boottime(&time);
+ args.system_clock_counter = time.tv_nsec;
+
+ /* Since the counter is in nano-seconds we use 1GHz frequency */
+ args.system_clock_freq = 1000000000;
+
+ if (copy_to_user(arg, &args, sizeof(args)))
+ return -EFAULT;
+
+ return 0;
+}
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -312,6 +345,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_set_memory_policy(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_GET_CLOCK_COUNTERS:
+ err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 928e628..5b9517e 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -70,12 +70,21 @@ struct kfd_ioctl_set_memory_policy_args {
uint64_t alternate_aperture_size; /* to KFD */
};
+struct kfd_ioctl_get_clock_counters_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t gpu_clock_counter; /* from KFD */
+ uint64_t cpu_clock_counter; /* from KFD */
+ uint64_t system_clock_counter; /* from KFD */
+ uint64_t system_clock_freq; /* from KFD */
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
+#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#pragma pack(pop)
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 42/83] hsa/radeon: 32-bit processes support
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
[not found] ` <1405029279-6894-1-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
@ 2014-07-10 21:53 ` Oded Gabbay
2014-07-10 21:54 ` [PATCH 44/83] hsa/radeon: HSA64/HSA32 modes support Oded Gabbay
` (4 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:53 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Alexey Skidanov, Oded Gabbay, Ben Goz,
Evgeny Pinchuk, linux-api
From: Alexey Skidanov <Alexey.Skidanov@amd.com>
Initializing compat_ioctl properly. All ioctls args are packed.
Signed-off-by: Alexey Skidanov <Alexey.Skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 7 +++++--
drivers/gpu/hsa/radeon/kfd_priv.h | 4 ++++
include/uapi/linux/kfd_ioctl.h | 2 +-
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 75fe11f..e95d597 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
@@ -41,6 +42,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
+ .compat_ioctl = kfd_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
@@ -105,8 +107,9 @@ kfd_open(struct inode *inode, struct file *filep)
process = radeon_kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
-
- pr_debug("\nkfd: process %d opened dev/kfd", process->pasid);
+ process->is_32bit_user_mode = is_compat_task();
+ dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
+ process->pasid, process->is_32bit_user_mode);
return 0;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 8b877ca..9d3b1fc 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -194,6 +194,10 @@ struct kfd_process {
size_t queue_array_size;
struct kfd_queue **queues; /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
unsigned long allocated_queue_bitmap[DIV_ROUND_UP(MAX_PROCESS_QUEUES, BITS_PER_LONG)];
+
+ /*Is the user space process 32 bit?*/
+ bool is_32bit_user_mode;
+
};
struct kfd_process *radeon_kfd_create_process(const struct task_struct *);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 5b9517e..a7c3abd 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -29,7 +29,7 @@
#define KFD_IOCTL_CURRENT_VERSION 1
/* The 64-bit ABI is the authoritative version. */
-#pragma pack(push, 8)
+#pragma pack(push, 1)
struct kfd_ioctl_get_version_args {
uint32_t min_supported_version; /* from KFD */
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 44/83] hsa/radeon: HSA64/HSA32 modes support
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
[not found] ` <1405029279-6894-1-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
2014-07-10 21:53 ` [PATCH 42/83] hsa/radeon: 32-bit processes support Oded Gabbay
@ 2014-07-10 21:54 ` Oded Gabbay
[not found] ` <1405029279-6894-16-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
2014-07-10 21:54 ` [PATCH 54/83] hsa/radeon: Switch to new queue scheduler Oded Gabbay
` (3 subsequent siblings)
6 siblings, 1 reply; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:54 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Alexey Skidanov, Oded Gabbay, Evgeny Pinchuk,
Ben Goz, linux-api
From: Alexey Skidanov <Alexey.Skidanov@amd.com>
Added apertures initialization and appropriate ioctl
Signed-off-by: Alexey Skidanov <Alexey.Skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/hsa/radeon/Makefile | 2 +-
drivers/gpu/hsa/radeon/kfd_aperture.c | 124 ++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_chardev.c | 58 +++++++++++-
drivers/gpu/hsa/radeon/kfd_priv.h | 18 ++++
drivers/gpu/hsa/radeon/kfd_process.c | 17 ++++
drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 3 +-
drivers/gpu/hsa/radeon/kfd_topology.c | 27 ++++++
include/uapi/linux/kfd_ioctl.h | 18 ++++
8 files changed, 264 insertions(+), 3 deletions(-)
create mode 100644 drivers/gpu/hsa/radeon/kfd_aperture.c
diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
index 5422e6a..813b31f 100644
--- a/drivers/gpu/hsa/radeon/Makefile
+++ b/drivers/gpu/hsa/radeon/Makefile
@@ -5,6 +5,6 @@
radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
- kfd_vidmem.o kfd_interrupt.o
+ kfd_vidmem.o kfd_interrupt.o kfd_aperture.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
new file mode 100644
index 0000000..9e2d6da
--- /dev/null
+++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
+#include <uapi/linux/kfd_ioctl.h>
+#include <linux/time.h>
+#include "kfd_priv.h"
+#include "kfd_scheduler.h"
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
+
+
+#define MAKE_GPUVM_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x1000000000000)
+#define MAKE_GPUVM_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFF0000000000) | 0xFFFFFFFFFF)
+#define MAKE_SCRATCH_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x100000000)
+#define MAKE_SCRATCH_APP_LIMIT(base) (((uint64_t)base & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
+#define MAKE_LDS_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
+
+#define HSA_32BIT_LDS_APP_SIZE 0x10000
+#define HSA_32BIT_LDS_APP_ALIGNMENT 0x10000
+
+static unsigned long kfd_reserve_aperture(struct kfd_process *process, unsigned long len, unsigned long alignment)
+{
+
+ unsigned long addr = 0;
+ unsigned long start_address;
+
+ /*
+ * Go bottom up and find the first available aligned address.
+ * We may narrow space to scan by getting mmap range limits.
+ */
+ for (start_address = alignment; start_address < (TASK_SIZE - alignment); start_address += alignment) {
+ addr = vm_mmap(NULL, start_address, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0);
+ if (!IS_ERR_VALUE(addr)) {
+ if (addr == start_address)
+ return addr;
+ vm_munmap(addr, len);
+ }
+ }
+ return 0;
+
+}
+
+int kfd_init_apertures(struct kfd_process *process)
+{
+ uint8_t id = 0;
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+
+ mutex_lock(&process->mutex);
+
+ /*Iterating over all devices*/
+ while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) {
+
+ pdd = radeon_kfd_get_process_device_data(dev, process);
+
+ /*for 64 bit process aperture will be statically reserved in the non canonical process address space
+ *for 32 bit process the aperture will be reserved in the process address space
+ */
+ if (process->is_32bit_user_mode) {
+ /*try to reserve aperture. continue on failure, just put the aperture size to be 0*/
+ pdd->lds_base = kfd_reserve_aperture(
+ process,
+ HSA_32BIT_LDS_APP_SIZE,
+ HSA_32BIT_LDS_APP_ALIGNMENT);
+
+ if (pdd->lds_base)
+ pdd->lds_limit = pdd->lds_base + HSA_32BIT_LDS_APP_SIZE - 1;
+ else
+ pdd->lds_limit = 0;
+
+ /*GPUVM and Scratch apertures are not supported*/
+ pdd->gpuvm_base = pdd->gpuvm_limit = pdd->scratch_base = pdd->scratch_limit = 0;
+ } else {
+ /*node id couldn't be 0 - the three MSB bits of aperture shoudn't be 0*/
+ pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
+ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+ }
+
+ dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
+ id, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
+
+ id++;
+ }
+
+ mutex_unlock(&process->mutex);
+
+ return 0;
+}
+
+
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index e95d597..07cac88 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -32,6 +32,9 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include "kfd_scheduler.h"
+#include <linux/mm.h>
+#include <uapi/asm-generic/mman-common.h>
+#include <asm/processor.h>
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -107,9 +110,13 @@ kfd_open(struct inode *inode, struct file *filep)
process = radeon_kfd_create_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
+
process->is_32bit_user_mode = is_compat_task();
+
dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
- process->pasid, process->is_32bit_user_mode);
+ process->pasid, process->is_32bit_user_mode);
+
+ kfd_init_apertures(process);
return 0;
}
@@ -321,6 +328,51 @@ kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __u
return 0;
}
+
+static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_get_process_apertures_args args;
+ struct kfd_process_device *pdd;
+
+ dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ args.num_of_nodes = 0;
+
+ mutex_lock(&p->mutex);
+
+ /*if the process-device list isn't empty*/
+ if (kfd_has_process_device_data(p)) {
+ /* Run over all pdd of the process */
+ pdd = kfd_get_first_process_device_data(p);
+ do {
+
+ args.process_apertures[args.num_of_nodes].gpu_id = pdd->dev->id;
+ args.process_apertures[args.num_of_nodes].lds_base = pdd->lds_base;
+ args.process_apertures[args.num_of_nodes].lds_limit = pdd->lds_limit;
+ args.process_apertures[args.num_of_nodes].gpuvm_base = pdd->gpuvm_base;
+ args.process_apertures[args.num_of_nodes].gpuvm_limit = pdd->gpuvm_limit;
+ args.process_apertures[args.num_of_nodes].scratch_base = pdd->scratch_base;
+ args.process_apertures[args.num_of_nodes].scratch_limit = pdd->scratch_limit;
+
+ dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
+ args.num_of_nodes, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
+ args.num_of_nodes++;
+ } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
+ (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+ }
+
+ mutex_unlock(&p->mutex);
+
+ if (copy_to_user(arg, &args, sizeof(args)))
+ return -EFAULT;
+
+ return 0;
+}
+
+
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
@@ -352,6 +404,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_GET_PROCESS_APERTURES:
+ err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 9d3b1fc..28155bc 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -171,6 +171,16 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
bool bound;
+
+ /*Apertures*/
+ uint64_t lds_base;
+ uint64_t lds_limit;
+ uint64_t gpuvm_base;
+ uint64_t gpuvm_limit;
+ uint64_t scratch_base;
+ uint64_t scratch_limit;
+
+
};
/* Process data */
@@ -212,6 +222,10 @@ void radeon_kfd_install_queue(struct kfd_process *p, unsigned int queue_id, stru
void radeon_kfd_remove_queue(struct kfd_process *p, unsigned int queue_id);
struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue_id);
+/* Process device data iterator */
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd);
+bool kfd_has_process_device_data(struct kfd_process *p);
/* PASIDs */
int radeon_kfd_pasid_init(void);
@@ -237,6 +251,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
int kfd_topology_remove_device(struct kfd_dev *gpu);
struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *radeon_kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
/* MMIO registers */
#define WRITE_REG(dev, reg, value) radeon_kfd_write_reg((dev), (reg), (value))
@@ -253,4 +268,7 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry);
void kgd2kfd_suspend(struct kfd_dev *dev);
int kgd2kfd_resume(struct kfd_dev *dev);
+/*HSA apertures*/
+int kfd_init_apertures(struct kfd_process *process);
+
#endif
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index f89f855..80136e6 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -397,3 +397,20 @@ struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue
test_bit(queue_id, p->allocated_queue_bitmap)) ?
p->queues[queue_id] : NULL;
}
+
+struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+{
+ return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list);
+}
+
+struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd)
+{
+ if (list_is_last(&pdd->per_device_list, &p->per_device_data))
+ return NULL;
+ return list_next_entry(pdd, per_device_list);
+}
+
+bool kfd_has_process_device_data(struct kfd_process *p)
+{
+ return !(list_empty(&p->per_device_data));
+}
diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
index 7ee8125..30561a6 100644
--- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
+++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
@@ -627,7 +627,8 @@ static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
- if (priv && pp) {
+
+ if (priv && pp) {
release_vmid(priv, pp->vmid);
kfree(pp);
}
diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
index 21bb66e..213ae7b 100644
--- a/drivers/gpu/hsa/radeon/kfd_topology.c
+++ b/drivers/gpu/hsa/radeon/kfd_topology.c
@@ -1201,3 +1201,30 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
return res;
}
+
+/*
+ * When idx is out of bounds, the function will return NULL
+ */
+struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
+{
+
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
+ uint8_t device_idx = 0;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(top_dev, &topology_device_list, list) {
+ if (device_idx == idx) {
+ device = top_dev->gpu;
+ break;
+ }
+
+ device_idx++;
+ }
+
+ up_read(&topology_lock);
+
+ return device;
+
+}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index a7c3abd..e5fcb8b 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -78,6 +78,23 @@ struct kfd_ioctl_get_clock_counters_args {
uint64_t system_clock_freq; /* from KFD */
};
+#define NUM_OF_SUPPORTED_GPUS 7
+
+struct kfd_process_device_apertures {
+ uint64_t lds_base;/* from KFD */
+ uint64_t lds_limit;/* from KFD */
+ uint64_t scratch_base;/* from KFD */
+ uint64_t scratch_limit;/* from KFD */
+ uint64_t gpuvm_base;/* from KFD */
+ uint64_t gpuvm_limit;/* from KFD */
+ uint32_t gpu_id;/* from KFD */
+};
+
+struct kfd_ioctl_get_process_apertures_args {
+ struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
+ uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
@@ -85,6 +102,7 @@ struct kfd_ioctl_get_clock_counters_args {
#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
+#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
#pragma pack(pop)
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 54/83] hsa/radeon: Switch to new queue scheduler
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
` (2 preceding siblings ...)
2014-07-10 21:54 ` [PATCH 44/83] hsa/radeon: HSA64/HSA32 modes support Oded Gabbay
@ 2014-07-10 21:54 ` Oded Gabbay
2014-07-10 21:54 ` [PATCH 55/83] hsa/radeon: Add IOCTL for update queue Oded Gabbay
` (2 subsequent siblings)
6 siblings, 0 replies; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:54 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Ben Goz, Oded Gabbay, Evgeny Pinchuk,
Alexey Skidanov, linux-api
From: Ben Goz <ben.goz@amd.com>
This patch makes the switch between the old KFD queue scheduler to the new KFD
queue scheduler. The new scheduler supports H/W CP scheduling, over-subscription
of queues and pre-emption of queues.
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/hsa/radeon/kfd_aperture.c | 1 -
drivers/gpu/hsa/radeon/kfd_chardev.c | 107 +++++++++++++++------------------
drivers/gpu/hsa/radeon/kfd_device.c | 31 ++++++----
drivers/gpu/hsa/radeon/kfd_interrupt.c | 4 +-
drivers/gpu/hsa/radeon/kfd_priv.h | 2 +
drivers/gpu/hsa/radeon/kfd_process.c | 56 ++++-------------
include/uapi/linux/kfd_ioctl.h | 4 +-
7 files changed, 88 insertions(+), 117 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
index 9e2d6da..2c72b21 100644
--- a/drivers/gpu/hsa/radeon/kfd_aperture.c
+++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
@@ -32,7 +32,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 07cac88..bb2ef02 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -31,10 +31,11 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#include <linux/mm.h>
#include <uapi/asm-generic/mman-common.h>
#include <asm/processor.h>
+#include "kfd_hw_pointer_store.h"
+#include "kfd_device_queue_manager.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -128,24 +129,36 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
struct kfd_dev *dev;
int err = 0;
unsigned int queue_id;
- struct kfd_queue *queue;
struct kfd_process_device *pdd;
+ struct queue_properties q_properties;
+
+ memset(&q_properties, 0, sizeof(struct queue_properties));
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- dev = radeon_kfd_device_by_id(args.gpu_id);
- if (dev == NULL)
- return -EINVAL;
+ /* need to validate parameters */
+
+ q_properties.is_interop = false;
+ q_properties.queue_percent = args.queue_percentage;
+ q_properties.priority = args.queue_priority;
+ q_properties.queue_address = args.ring_base_address;
+ q_properties.queue_size = args.ring_size;
- queue = kzalloc(
- offsetof(struct kfd_queue, scheduler_queue) + dev->device_info->scheduler_class->queue_size,
- GFP_KERNEL);
- if (!queue)
- return -ENOMEM;
+ pr_debug("%s Arguments: Queue Percentage (%d, %d)\n"
+ "Queue Priority (%d, %d)\n"
+ "Queue Address (0x%llX, 0x%llX)\n"
+ "Queue Size (%u64, %ll)\n",
+ __func__,
+ q_properties.queue_percent, args.queue_percentage,
+ q_properties.priority, args.queue_priority,
+ q_properties.queue_address, args.ring_base_address,
+ q_properties.queue_size, args.ring_size);
- queue->dev = dev;
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
mutex_lock(&p->mutex);
@@ -159,23 +172,14 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
p->pasid,
dev->id);
- if (!radeon_kfd_allocate_queue_id(p, &queue_id))
- goto err_allocate_queue_id;
-
- err = dev->device_info->scheduler_class->create_queue(dev->scheduler, pdd->scheduler_process,
- &queue->scheduler_queue,
- (void __user *)args.ring_base_address,
- args.ring_size,
- (void __user *)args.read_pointer_address,
- (void __user *)args.write_pointer_address,
- radeon_kfd_queue_id_to_doorbell(dev, p, queue_id));
- if (err)
+ err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, KFD_QUEUE_TYPE_COMPUTE, &queue_id);
+ if (err != 0)
goto err_create_queue;
- radeon_kfd_install_queue(p, queue_id, queue);
-
args.queue_id = queue_id;
- args.doorbell_address = (uint64_t)(uintptr_t)radeon_kfd_get_doorbell(filep, p, dev, queue_id);
+ args.read_pointer_address = (uint64_t)q_properties.read_ptr;
+ args.write_pointer_address = (uint64_t)q_properties.write_ptr;
+ args.doorbell_address = (uint64_t)q_properties.doorbell_ptr;
if (copy_to_user(arg, &args, sizeof(args))) {
err = -EFAULT;
@@ -198,12 +202,9 @@ kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void __user *a
return 0;
err_copy_args_out:
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ pqm_destroy_queue(&p->pqm, queue_id);
err_create_queue:
- radeon_kfd_remove_queue(p, queue_id);
-err_allocate_queue_id:
err_bind_process:
- kfree(queue);
mutex_unlock(&p->mutex);
return err;
}
@@ -211,36 +212,25 @@ err_bind_process:
static int
kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, void __user *arg)
{
+ int retval;
struct kfd_ioctl_destroy_queue_args args;
- struct kfd_queue *queue;
- struct kfd_dev *dev;
if (copy_from_user(&args, arg, sizeof(args)))
return -EFAULT;
- mutex_lock(&p->mutex);
-
- queue = radeon_kfd_get_queue(p, args.queue_id);
- if (!queue) {
- mutex_unlock(&p->mutex);
- return -EINVAL;
- }
-
- dev = queue->dev;
-
pr_debug("kfd: destroying queue id %d for PASID %d\n",
- args.queue_id,
- p->pasid);
+ args.queue_id,
+ p->pasid);
- radeon_kfd_remove_queue(p, args.queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
+ mutex_lock(&p->mutex);
- kfree(queue);
+ retval = pqm_destroy_queue(&p->pqm, args.queue_id);
mutex_unlock(&p->mutex);
- return 0;
+ return retval;
}
+
static long
kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg)
{
@@ -281,12 +271,12 @@ kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __us
alternate_policy = (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
- if (!dev->device_info->scheduler_class->set_cache_policy(dev->scheduler,
- pdd->scheduler_process,
- default_policy,
- alternate_policy,
- (void __user *)args.alternate_aperture_base,
- args.alternate_aperture_size))
+ if (!dev->dqm->set_cache_memory_policy(dev->dqm,
+ &pdd->qpd,
+ default_policy,
+ alternate_policy,
+ (void __user *)args.alternate_aperture_base,
+ args.alternate_aperture_size))
err = -EINVAL;
out:
@@ -432,11 +422,14 @@ kfd_mmap(struct file *filp, struct vm_area_struct *vma)
if (IS_ERR(process))
return PTR_ERR(process);
- if (pgoff < KFD_MMAP_DOORBELL_START)
- return -EINVAL;
-
- if (pgoff < KFD_MMAP_DOORBELL_END)
+ if (pgoff >= KFD_MMAP_DOORBELL_START && pgoff < KFD_MMAP_DOORBELL_END)
return radeon_kfd_doorbell_mmap(process, vma);
+ if (pgoff >= KFD_MMAP_RPTR_START && pgoff < KFD_MMAP_RPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->read_ptr, vma);
+
+ if (pgoff >= KFD_MMAP_WPTR_START && pgoff < KFD_MMAP_WPTR_END)
+ return radeon_kfd_hw_pointer_store_mmap(&process->write_ptr, vma);
+
return -EINVAL;
}
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index 82febf4..c602e16 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -25,10 +25,9 @@
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
+#include "kfd_device_queue_manager.h"
static const struct kfd_device_info kaveri_device_info = {
- .scheduler_class = &radeon_kfd_cik_static_scheduler_class,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t)
};
@@ -121,7 +120,11 @@ device_iommu_pasid_init(struct kfd_dev *kfd)
}
pasid_limit = min_t(pasid_t, (pasid_t)1 << kfd->device_info->max_pasid_bits, iommu_info.max_pasids);
- pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit);
+ /*
+ * last pasid is used for kernel queues doorbells
+ * in the future the last pasid might be used for a kernel thread.
+ */
+ pasid_limit = min_t(pasid_t, pasid_limit, kfd->doorbell_process_limit - 1);
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
if (err < 0) {
@@ -168,17 +171,26 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
- if (kfd->device_info->scheduler_class->create(kfd, &kfd->scheduler)) {
+ kfd->dqm = device_queue_manager_init(kfd);
+ if (!kfd->dqm) {
+ kfd_topology_remove_device(kfd);
amd_iommu_free_device(kfd->pdev);
return false;
}
- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ if (kfd->dqm->start(kfd->dqm) != 0) {
+ device_queue_manager_uninit(kfd->dqm);
+ kfd_topology_remove_device(kfd);
+ amd_iommu_free_device(kfd->pdev);
+ return false;
+ }
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
+ pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", sched_policy);
+
return true;
}
@@ -188,13 +200,10 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
BUG_ON(err != 0);
- if (kfd->init_complete)
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
-
radeon_kfd_interrupt_exit(kfd);
if (kfd->init_complete) {
- kfd->device_info->scheduler_class->destroy(kfd->scheduler);
+ device_queue_manager_uninit(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}
@@ -206,7 +215,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);
if (kfd->init_complete) {
- kfd->device_info->scheduler_class->stop(kfd->scheduler);
+ kfd->dqm->stop(kfd->dqm);
amd_iommu_free_device(kfd->pdev);
}
}
@@ -225,7 +234,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
if (err < 0)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, iommu_pasid_shutdown_callback);
- kfd->device_info->scheduler_class->start(kfd->scheduler);
+ kfd->dqm->start(kfd->dqm);
}
return 0;
diff --git a/drivers/gpu/hsa/radeon/kfd_interrupt.c b/drivers/gpu/hsa/radeon/kfd_interrupt.c
index 2179780..1c9ad46 100644
--- a/drivers/gpu/hsa/radeon/kfd_interrupt.c
+++ b/drivers/gpu/hsa/radeon/kfd_interrupt.c
@@ -43,7 +43,6 @@
#include <linux/slab.h>
#include <linux/device.h>
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
#define KFD_INTERRUPT_RING_SIZE 256
@@ -162,7 +161,7 @@ static void interrupt_wq(struct work_struct *work)
uint32_t ih_ring_entry[DIV_ROUND_UP(dev->device_info->ih_ring_entry_size, sizeof(uint32_t))];
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
- dev->device_info->scheduler_class->interrupt_wq(dev->scheduler, ih_ring_entry);
+ ;
}
/* This is called directly from KGD at ISR. */
@@ -171,7 +170,6 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_lock(&kfd->interrupt_lock);
if (kfd->interrupts_active
- && kfd->device_info->scheduler_class->interrupt_isr(kfd->scheduler, ih_ring_entry)
&& enqueue_ih_ring_entry(kfd, ih_ring_entry))
schedule_work(&kfd->interrupt_work);
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 0af4c71..049671b 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -441,6 +441,8 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type);
void kernel_queue_uninit(struct kernel_queue *kq);
+int get_vmid_from_pasid(struct kfd_dev *dev, pasid_t pasid , unsigned int *vmid);
+
/* Process Queue Manager */
struct process_queue_node {
struct queue *q;
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index 80136e6..f967c15 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -29,7 +29,6 @@
struct mm_struct;
#include "kfd_priv.h"
-#include "kfd_scheduler.h"
/* Initial size for the array of queues.
* The allocated size is doubled each time it is exceeded up to MAX_PROCESS_QUEUES. */
@@ -91,52 +90,15 @@ radeon_kfd_get_process(const struct task_struct *thread)
return process;
}
-/* Assumes that the kfd_process mutex is held.
- * (Or that it doesn't need to be held because the process is exiting.)
- *
- * dev_filter can be set to only destroy queues for one device.
- * Otherwise all queues for the process are destroyed.
- */
-static void
-destroy_queues(struct kfd_process *p, struct kfd_dev *dev_filter)
-{
- unsigned long queue_id;
-
- for_each_set_bit(queue_id, p->allocated_queue_bitmap, MAX_PROCESS_QUEUES) {
-
- struct kfd_queue *queue = radeon_kfd_get_queue(p, queue_id);
- struct kfd_dev *dev;
-
- BUG_ON(queue == NULL);
-
- dev = queue->dev;
-
- if (!dev_filter || dev == dev_filter) {
- struct kfd_process_device *pdd = radeon_kfd_get_process_device_data(dev, p);
-
- BUG_ON(pdd == NULL); /* A queue exists so pdd must. */
-
- radeon_kfd_remove_queue(p, queue_id);
- dev->device_info->scheduler_class->destroy_queue(dev->scheduler, &queue->scheduler_queue);
-
- kfree(queue);
- }
- }
-}
-
static void free_process(struct kfd_process *p)
{
struct kfd_process_device *pdd, *temp;
BUG_ON(p == NULL);
- destroy_queues(p, NULL);
-
/* doorbell mappings: automatic */
list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) {
- pdd->dev->device_info->scheduler_class->deregister_process(pdd->dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
kfree(pdd);
@@ -202,8 +164,17 @@ static struct kfd_process *create_process(const struct task_struct *thread)
INIT_LIST_HEAD(&process->per_device_data);
+ process->read_ptr.page_mapping = process->write_ptr.page_mapping = NULL;
+ err = pqm_init(&process->pqm, process);
+ if (err != 0)
+ goto err_process_pqm_init;
+
return process;
+err_process_pqm_init:
+ radeon_kfd_pasid_free(process->pasid);
+ list_del(&process->processes_list);
+ thread->mm->kfd_process = NULL;
err_alloc:
kfree(process->queues);
kfree(process);
@@ -222,6 +193,9 @@ radeon_kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p)
pdd = kzalloc(sizeof(*pdd), GFP_KERNEL);
if (pdd != NULL) {
pdd->dev = dev;
+ INIT_LIST_HEAD(&pdd->qpd.queues_list);
+ INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
+ pdd->qpd.dqm = dev->dqm;
list_add(&pdd->per_device_list, &p->per_device_data);
}
@@ -248,7 +222,6 @@ struct kfd_process_device *radeon_kfd_bind_process_to_device(struct kfd_dev *dev
if (err < 0)
return ERR_PTR(err);
- err = dev->device_info->scheduler_class->register_process(dev->scheduler, p, &pdd->scheduler_process);
if (err < 0) {
amd_iommu_unbind_pasid(dev->pdev, p->pasid);
return ERR_PTR(err);
@@ -282,10 +255,7 @@ void radeon_kfd_unbind_process_from_device(struct kfd_dev *dev, pasid_t pasid)
mutex_lock(&p->mutex);
- destroy_queues(p, dev);
-
- dev->device_info->scheduler_class->deregister_process(dev->scheduler, pdd->scheduler_process);
- pdd->scheduler_process = NULL;
+ pqm_uninit(&p->pqm);
/*
* Just mark pdd as unbound, because we still need it to call
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e5fcb8b..5134880 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -47,9 +47,9 @@ struct kfd_ioctl_create_queue_args {
uint32_t queue_type; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
- uint64_t write_pointer_address; /* to KFD */
- uint64_t read_pointer_address; /* to KFD */
+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
uint64_t doorbell_address; /* from KFD */
uint32_t queue_id; /* from KFD */
};
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 55/83] hsa/radeon: Add IOCTL for update queue
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
` (3 preceding siblings ...)
2014-07-10 21:54 ` [PATCH 54/83] hsa/radeon: Switch to new queue scheduler Oded Gabbay
@ 2014-07-10 21:54 ` Oded Gabbay
2014-07-10 21:54 ` [PATCH 59/83] hsa/radeon: Exclusive access for perf. counters Oded Gabbay
2014-07-10 21:54 ` [PATCH 60/83] hsa/radeon: Rearrange structures in kfd_ioctl.h Oded Gabbay
6 siblings, 0 replies; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:54 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Ben Goz, Oded Gabbay, Evgeny Pinchuk,
Alexey Skidanov, linux-api
From: Ben Goz <ben.goz@amd.com>
This patch adds a new IOCTL that enables the user to perform update to an HSA
queue.
Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/hsa/radeon/cik_mqds.h | 1 -
drivers/gpu/hsa/radeon/kfd_chardev.c | 29 ++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_device_queue_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_device_queue_manager.h | 1 -
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c | 1 -
drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h | 1 -
drivers/gpu/hsa/radeon/kfd_kernel_queue.c | 1 -
drivers/gpu/hsa/radeon/kfd_kernel_queue.h | 1 -
drivers/gpu/hsa/radeon/kfd_mqd_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_mqd_manager.h | 1 -
drivers/gpu/hsa/radeon/kfd_packet_manager.c | 23 ++++++++++++++---
drivers/gpu/hsa/radeon/kfd_process_queue_manager.c | 1 -
drivers/gpu/hsa/radeon/kfd_queue.c | 1 -
include/uapi/linux/kfd_ioctl.h | 9 +++++++
14 files changed, 58 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/cik_mqds.h b/drivers/gpu/hsa/radeon/cik_mqds.h
index 58945c8..35a35b4 100644
--- a/drivers/gpu/hsa/radeon/cik_mqds.h
+++ b/drivers/gpu/hsa/radeon/cik_mqds.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef CIK_MQDS_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index bb2ef02..9a77332 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -230,6 +230,31 @@ kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, void __user *a
return retval;
}
+static int
+kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ int retval;
+ struct kfd_ioctl_update_queue_args args;
+ struct queue_properties properties;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ properties.queue_address = args.ring_base_address;
+ properties.queue_size = args.ring_size;
+ properties.queue_percent = args.queue_percentage;
+ properties.priority = args.queue_priority;
+
+ pr_debug("kfd: updating queue id %d for PASID %d\n", args.queue_id, p->pasid);
+
+ mutex_lock(&p->mutex);
+
+ retval = pqm_update_queue(&p->pqm, args.queue_id, &properties);
+
+ mutex_unlock(&p->mutex);
+
+ return retval;
+}
static long
kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void __user *arg)
@@ -398,6 +423,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_UPDATE_QUEUE:
+ err = kfd_ioctl_update_queue(filep, process, (void __user *)arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
index 9e21074..c2d91c9 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
index 0529a96..fe9ef10 100644
--- a/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_device_queue_manager.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef DEVICE_QUEUE_MANAGER_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
index 1372fb2..4e71f7d 100644
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/types.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
index be1d6cb..f384b7f 100644
--- a/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
+++ b/drivers/gpu/hsa/radeon/kfd_hw_pointer_store.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef HW_POINTER_STORE_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
index 61f420f..aa64693e 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/types.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
index 339376c..963e861 100644
--- a/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
+++ b/drivers/gpu/hsa/radeon/kfd_kernel_queue.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef KERNEL_QUEUE_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
index 14b248f..a3e9f7c 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/printk.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
index e7b39ee..8e7a5fd 100644
--- a/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
+++ b/drivers/gpu/hsa/radeon/kfd_mqd_manager.h
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#ifndef MQD_MANAGER_H_
diff --git a/drivers/gpu/hsa/radeon/kfd_packet_manager.c b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
index 4967b7c..3fc8c34 100644
--- a/drivers/gpu/hsa/radeon/kfd_packet_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_packet_manager.c
@@ -1,9 +1,26 @@
/*
- * packet_manager.c
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
*
- * Created on: Mar 16, 2014
- * Author: ben
*/
+
#include <linux/slab.h>
#include <linux/mutex.h>
#include "kfd_device_queue_manager.h"
diff --git a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
index 6e38ca4..fe74dd7 100644
--- a/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
+++ b/drivers/gpu/hsa/radeon/kfd_process_queue_manager.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/drivers/gpu/hsa/radeon/kfd_queue.c b/drivers/gpu/hsa/radeon/kfd_queue.c
index 78fe180..2d22cc1 100644
--- a/drivers/gpu/hsa/radeon/kfd_queue.c
+++ b/drivers/gpu/hsa/radeon/kfd_queue.c
@@ -19,7 +19,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Author: Ben Goz
*/
#include <linux/slab.h>
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 5134880..d58231d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -58,6 +58,14 @@ struct kfd_ioctl_destroy_queue_args {
uint32_t queue_id; /* to KFD */
};
+struct kfd_ioctl_update_queue_args {
+ uint32_t queue_id; /* to KFD */
+ uint64_t ring_base_address; /* to KFD */
+ uint32_t ring_size; /* to KFD */
+ uint32_t queue_percentage; /* to KFD */
+ uint32_t queue_priority; /* to KFD */
+};
+
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -103,6 +111,7 @@ struct kfd_ioctl_get_process_apertures_args {
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
+#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
#pragma pack(pop)
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 59/83] hsa/radeon: Exclusive access for perf. counters
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
` (4 preceding siblings ...)
2014-07-10 21:54 ` [PATCH 55/83] hsa/radeon: Add IOCTL for update queue Oded Gabbay
@ 2014-07-10 21:54 ` Oded Gabbay
2014-07-10 21:54 ` [PATCH 60/83] hsa/radeon: Rearrange structures in kfd_ioctl.h Oded Gabbay
6 siblings, 0 replies; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:54 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Evgeny Pinchuk, Oded Gabbay, Ben Goz,
Alexey Skidanov, linux-api
From: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
Introducing IOCTL implementation for controlling exclusive access to performace counters.
The exclusive access is per GPU device.
Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
drivers/gpu/hsa/radeon/kfd_chardev.c | 61 ++++++++++++++++++++++++++++++++++++
drivers/gpu/hsa/radeon/kfd_device.c | 2 ++
drivers/gpu/hsa/radeon/kfd_priv.h | 5 +++
drivers/gpu/hsa/radeon/kfd_process.c | 8 +++--
include/uapi/linux/kfd_ioctl.h | 12 +++++++
5 files changed, 86 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
index 80b702e..b39df68 100644
--- a/drivers/gpu/hsa/radeon/kfd_chardev.c
+++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
@@ -387,6 +387,59 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process
return 0;
}
+static long
+kfd_ioctl_pmc_acquire_access(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_pmc_acquire_access_args args;
+ struct kfd_dev *dev;
+ int err = -EBUSY;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ spin_lock(&dev->pmc_access_lock);
+ if (dev->pmc_locking_process == NULL) {
+ dev->pmc_locking_process = p;
+ dev->pmc_locking_trace = args.trace_id;
+ err = 0;
+ } else if (dev->pmc_locking_process == p && dev->pmc_locking_trace == args.trace_id) {
+ /* Same trace already has an access. Returning success */
+ err = 0;
+ }
+
+ spin_unlock(&dev->pmc_access_lock);
+
+ return err;
+}
+
+static long
+kfd_ioctl_pmc_release_access(struct file *filp, struct kfd_process *p, void __user *arg)
+{
+ struct kfd_ioctl_pmc_release_access_args args;
+ struct kfd_dev *dev;
+ int err = -EINVAL;
+
+ if (copy_from_user(&args, arg, sizeof(args)))
+ return -EFAULT;
+
+ dev = radeon_kfd_device_by_id(args.gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ spin_lock(&dev->pmc_access_lock);
+ if (dev->pmc_locking_process == p && dev->pmc_locking_trace == args.trace_id) {
+ dev->pmc_locking_process = NULL;
+ dev->pmc_locking_trace = 0;
+ err = 0;
+ }
+ spin_unlock(&dev->pmc_access_lock);
+
+ return err;
+}
static long
kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -427,6 +480,14 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
err = kfd_ioctl_update_queue(filep, process, (void __user *)arg);
break;
+ case KFD_IOC_PMC_ACQUIRE_ACCESS:
+ err = kfd_ioctl_pmc_acquire_access(filep, process, (void __user *) arg);
+ break;
+
+ case KFD_IOC_PMC_RELEASE_ACCESS:
+ err = kfd_ioctl_pmc_release_access(filep, process, (void __user *) arg);
+ break;
+
default:
dev_err(kfd_device,
"unknown ioctl cmd 0x%x, arg 0x%lx)\n",
diff --git a/drivers/gpu/hsa/radeon/kfd_device.c b/drivers/gpu/hsa/radeon/kfd_device.c
index c602e16..9af812b 100644
--- a/drivers/gpu/hsa/radeon/kfd_device.c
+++ b/drivers/gpu/hsa/radeon/kfd_device.c
@@ -185,6 +185,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
return false;
}
+ spin_lock_init(&kfd->pmc_access_lock);
+
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
index 049671b..e6d4993 100644
--- a/drivers/gpu/hsa/radeon/kfd_priv.h
+++ b/drivers/gpu/hsa/radeon/kfd_priv.h
@@ -135,6 +135,11 @@ struct kfd_dev {
/* QCM Device instance */
struct device_queue_manager *dqm;
+
+ /* Performance counters exclusivity lock */
+ spinlock_t pmc_access_lock;
+ struct kfd_process *pmc_locking_process;
+ uint64_t pmc_locking_trace;
};
/* KGD2KFD callbacks */
diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
index f967c15..9bb5cab 100644
--- a/drivers/gpu/hsa/radeon/kfd_process.c
+++ b/drivers/gpu/hsa/radeon/kfd_process.c
@@ -96,9 +96,13 @@ static void free_process(struct kfd_process *p)
BUG_ON(p == NULL);
- /* doorbell mappings: automatic */
-
list_for_each_entry_safe(pdd, temp, &p->per_device_data, per_device_list) {
+ spin_lock(&pdd->dev->pmc_access_lock);
+ if (pdd->dev->pmc_locking_process == p) {
+ pdd->dev->pmc_locking_process = NULL;
+ pdd->dev->pmc_locking_trace = 0;
+ }
+ spin_unlock(&pdd->dev->pmc_access_lock);
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
kfree(pdd);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index d58231d..509c4a0 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -103,6 +103,16 @@ struct kfd_ioctl_get_process_apertures_args {
uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
};
+struct kfd_ioctl_pmc_acquire_access_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+};
+
+struct kfd_ioctl_pmc_release_access_args {
+ uint32_t gpu_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+};
+
#define KFD_IOC_MAGIC 'K'
#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
@@ -112,6 +122,8 @@ struct kfd_ioctl_get_process_apertures_args {
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
+#define KFD_IOC_PMC_ACQUIRE_ACCESS _IOW(KFD_IOC_MAGIC, 12, struct kfd_ioctl_pmc_acquire_access_args)
+#define KFD_IOC_PMC_RELEASE_ACCESS _IOW(KFD_IOC_MAGIC, 13, struct kfd_ioctl_pmc_release_access_args)
#pragma pack(pop)
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 60/83] hsa/radeon: Rearrange structures in kfd_ioctl.h
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
` (5 preceding siblings ...)
2014-07-10 21:54 ` [PATCH 59/83] hsa/radeon: Exclusive access for perf. counters Oded Gabbay
@ 2014-07-10 21:54 ` Oded Gabbay
6 siblings, 0 replies; 9+ messages in thread
From: Oded Gabbay @ 2014-07-10 21:54 UTC (permalink / raw)
To: David Airlie, Alex Deucher, Jerome Glisse
Cc: linux-kernel, dri-devel, John Bridgman, Andrew Lewycky,
Joerg Roedel, Oded Gabbay, Ben Goz, Evgeny Pinchuk,
Alexey Skidanov, linux-api
This patch rearranges the structures defined in kfd_ioctl.h so that
all the uint64_t variables are located at the start of each structure and
then all the uint32_t variables are located.
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
include/uapi/linux/kfd_ioctl.h | 51 ++++++++++++++++++++++--------------------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 509c4a0..3cedd1a 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -42,15 +42,15 @@ struct kfd_ioctl_get_version_args {
struct kfd_ioctl_create_queue_args {
uint64_t ring_base_address; /* to KFD */
+ uint64_t write_pointer_address; /* from KFD */
+ uint64_t read_pointer_address; /* from KFD */
+ uint64_t doorbell_address; /* from KFD */
+
uint32_t ring_size; /* to KFD */
uint32_t gpu_id; /* to KFD */
uint32_t queue_type; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
-
- uint64_t write_pointer_address; /* from KFD */
- uint64_t read_pointer_address; /* from KFD */
- uint64_t doorbell_address; /* from KFD */
uint32_t queue_id; /* from KFD */
};
@@ -59,8 +59,9 @@ struct kfd_ioctl_destroy_queue_args {
};
struct kfd_ioctl_update_queue_args {
- uint32_t queue_id; /* to KFD */
uint64_t ring_base_address; /* to KFD */
+
+ uint32_t queue_id; /* to KFD */
uint32_t ring_size; /* to KFD */
uint32_t queue_percentage; /* to KFD */
uint32_t queue_priority; /* to KFD */
@@ -71,31 +72,33 @@ struct kfd_ioctl_update_queue_args {
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
struct kfd_ioctl_set_memory_policy_args {
+ uint64_t alternate_aperture_base; /* to KFD */
+ uint64_t alternate_aperture_size; /* to KFD */
+
uint32_t gpu_id; /* to KFD */
uint32_t default_policy; /* to KFD */
uint32_t alternate_policy; /* to KFD */
- uint64_t alternate_aperture_base; /* to KFD */
- uint64_t alternate_aperture_size; /* to KFD */
};
struct kfd_ioctl_get_clock_counters_args {
- uint32_t gpu_id; /* to KFD */
uint64_t gpu_clock_counter; /* from KFD */
uint64_t cpu_clock_counter; /* from KFD */
uint64_t system_clock_counter; /* from KFD */
uint64_t system_clock_freq; /* from KFD */
+
+ uint32_t gpu_id; /* to KFD */
};
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_process_device_apertures {
- uint64_t lds_base;/* from KFD */
- uint64_t lds_limit;/* from KFD */
- uint64_t scratch_base;/* from KFD */
- uint64_t scratch_limit;/* from KFD */
- uint64_t gpuvm_base;/* from KFD */
- uint64_t gpuvm_limit;/* from KFD */
- uint32_t gpu_id;/* from KFD */
+ uint64_t lds_base; /* from KFD */
+ uint64_t lds_limit; /* from KFD */
+ uint64_t scratch_base; /* from KFD */
+ uint64_t scratch_limit; /* from KFD */
+ uint64_t gpuvm_base; /* from KFD */
+ uint64_t gpuvm_limit; /* from KFD */
+ uint32_t gpu_id; /* from KFD */
};
struct kfd_ioctl_get_process_apertures_args {
@@ -104,24 +107,24 @@ struct kfd_ioctl_get_process_apertures_args {
};
struct kfd_ioctl_pmc_acquire_access_args {
- uint32_t gpu_id; /* to KFD */
- uint64_t trace_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
};
struct kfd_ioctl_pmc_release_access_args {
- uint32_t gpu_id; /* to KFD */
- uint64_t trace_id; /* to KFD */
+ uint64_t trace_id; /* to KFD */
+ uint32_t gpu_id; /* to KFD */
};
#define KFD_IOC_MAGIC 'K'
-#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
-#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
-#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
+#define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
+#define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
+#define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
#define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
-#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
-#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
+#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
+#define KFD_IOC_UPDATE_QUEUE _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args)
#define KFD_IOC_PMC_ACQUIRE_ACCESS _IOW(KFD_IOC_MAGIC, 12, struct kfd_ioctl_pmc_acquire_access_args)
#define KFD_IOC_PMC_RELEASE_ACCESS _IOW(KFD_IOC_MAGIC, 13, struct kfd_ioctl_pmc_release_access_args)
--
1.9.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 32/83] hsa/radeon: implementing IOCTL for clock counters
2014-07-10 21:53 ` [PATCH 32/83] hsa/radeon: implementing IOCTL for clock counters Oded Gabbay
@ 2014-07-11 20:34 ` Jerome Glisse
0 siblings, 0 replies; 9+ messages in thread
From: Jerome Glisse @ 2014-07-11 20:34 UTC (permalink / raw)
To: Oded Gabbay
Cc: Andrew Lewycky, Ben Goz, linux-kernel, dri-devel, Evgeny Pinchuk,
Alexey Skidanov, linux-api, Alex Deucher
On Fri, Jul 11, 2014 at 12:53:48AM +0300, Oded Gabbay wrote:
> From: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
>
> Implemented new IOCTL to query the CPU and GPU clock counters.
>
> Signed-off-by: Evgeny Pinchuk <evgeny.pinchuk@amd.com>
> Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
> ---
> drivers/gpu/hsa/radeon/kfd_chardev.c | 37 ++++++++++++++++++++++++++++++++++++
> include/uapi/linux/kfd_ioctl.h | 9 +++++++++
> 2 files changed, 46 insertions(+)
>
> diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
> index ddaf357..d6fa980 100644
> --- a/drivers/gpu/hsa/radeon/kfd_chardev.c
> +++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
> @@ -28,6 +28,7 @@
> #include <linux/slab.h>
> #include <linux/uaccess.h>
> #include <uapi/linux/kfd_ioctl.h>
> +#include <linux/time.h>
> #include "kfd_priv.h"
> #include "kfd_scheduler.h"
>
> @@ -284,6 +285,38 @@ out:
> return err;
> }
>
> +static long
> +kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __user *arg)
> +{
> + struct kfd_ioctl_get_clock_counters_args args;
> + struct kfd_dev *dev;
> + struct timespec time;
> +
> + if (copy_from_user(&args, arg, sizeof(args)))
> + return -EFAULT;
> +
> + dev = radeon_kfd_device_by_id(args.gpu_id);
> + if (dev == NULL)
> + return -EINVAL;
> +
> + /* Reading GPU clock counter from KGD */
> + args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
> +
> + /* No access to rdtsc. Using raw monotonic time */
> + getrawmonotonic(&time);
> + args.cpu_clock_counter = time.tv_nsec;
Is the GPU clock counter monotonic too ? Even after GPU reset (hard reset
included) what could go wrong if it rolls back ?
> +
> + get_monotonic_boottime(&time);
> + args.system_clock_counter = time.tv_nsec;
> +
> + /* Since the counter is in nano-seconds we use 1GHz frequency */
> + args.system_clock_freq = 1000000000;
> +
> + if (copy_to_user(arg, &args, sizeof(args)))
> + return -EFAULT;
> +
> + return 0;
> +}
>
> static long
> kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> @@ -312,6 +345,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> err = kfd_ioctl_set_memory_policy(filep, process, (void __user *)arg);
> break;
>
> + case KFD_IOC_GET_CLOCK_COUNTERS:
> + err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
> + break;
> +
> default:
> dev_err(kfd_device,
> "unknown ioctl cmd 0x%x, arg 0x%lx)\n",
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 928e628..5b9517e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -70,12 +70,21 @@ struct kfd_ioctl_set_memory_policy_args {
> uint64_t alternate_aperture_size; /* to KFD */
> };
>
> +struct kfd_ioctl_get_clock_counters_args {
> + uint32_t gpu_id; /* to KFD */
> + uint64_t gpu_clock_counter; /* from KFD */
> + uint64_t cpu_clock_counter; /* from KFD */
> + uint64_t system_clock_counter; /* from KFD */
> + uint64_t system_clock_freq; /* from KFD */
> +};
> +
> #define KFD_IOC_MAGIC 'K'
>
> #define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
> #define KFD_IOC_CREATE_QUEUE _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args)
> #define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
> #define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
> +#define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
>
> #pragma pack(pop)
>
> --
> 1.9.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 44/83] hsa/radeon: HSA64/HSA32 modes support
[not found] ` <1405029279-6894-16-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
@ 2014-07-11 20:41 ` Jerome Glisse
0 siblings, 0 replies; 9+ messages in thread
From: Jerome Glisse @ 2014-07-11 20:41 UTC (permalink / raw)
To: Oded Gabbay
Cc: David Airlie, Alex Deucher, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman,
Andrew Lewycky, Joerg Roedel, Alexey Skidanov, Oded Gabbay,
Evgeny Pinchuk, Ben Goz, linux-api-u79uwXL29TY76Z2rM5mHXA
On Fri, Jul 11, 2014 at 12:54:00AM +0300, Oded Gabbay wrote:
> From: Alexey Skidanov <Alexey.Skidanov-5C7GfCeVMHo@public.gmane.org>
>
> Added apertures initialization and appropriate ioctl
What is process aperture and what it is use for ? This is a very
cryptic commit message.
Cheers,
Jérôme
>
> Signed-off-by: Alexey Skidanov <Alexey.Skidanov-5C7GfCeVMHo@public.gmane.org>
> Signed-off-by: Oded Gabbay <oded.gabbay-5C7GfCeVMHo@public.gmane.org>
> ---
> drivers/gpu/hsa/radeon/Makefile | 2 +-
> drivers/gpu/hsa/radeon/kfd_aperture.c | 124 ++++++++++++++++++++++++++
> drivers/gpu/hsa/radeon/kfd_chardev.c | 58 +++++++++++-
> drivers/gpu/hsa/radeon/kfd_priv.h | 18 ++++
> drivers/gpu/hsa/radeon/kfd_process.c | 17 ++++
> drivers/gpu/hsa/radeon/kfd_sched_cik_static.c | 3 +-
> drivers/gpu/hsa/radeon/kfd_topology.c | 27 ++++++
> include/uapi/linux/kfd_ioctl.h | 18 ++++
> 8 files changed, 264 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/hsa/radeon/kfd_aperture.c
>
> diff --git a/drivers/gpu/hsa/radeon/Makefile b/drivers/gpu/hsa/radeon/Makefile
> index 5422e6a..813b31f 100644
> --- a/drivers/gpu/hsa/radeon/Makefile
> +++ b/drivers/gpu/hsa/radeon/Makefile
> @@ -5,6 +5,6 @@
> radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
> kfd_pasid.o kfd_topology.o kfd_process.o \
> kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
> - kfd_vidmem.o kfd_interrupt.o
> + kfd_vidmem.o kfd_interrupt.o kfd_aperture.o
>
> obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
> diff --git a/drivers/gpu/hsa/radeon/kfd_aperture.c b/drivers/gpu/hsa/radeon/kfd_aperture.c
> new file mode 100644
> index 0000000..9e2d6da
> --- /dev/null
> +++ b/drivers/gpu/hsa/radeon/kfd_aperture.c
> @@ -0,0 +1,124 @@
> +/*
> + * Copyright 2014 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include <linux/device.h>
> +#include <linux/export.h>
> +#include <linux/err.h>
> +#include <linux/fs.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/compat.h>
> +#include <uapi/linux/kfd_ioctl.h>
> +#include <linux/time.h>
> +#include "kfd_priv.h"
> +#include "kfd_scheduler.h"
> +#include <linux/mm.h>
> +#include <uapi/asm-generic/mman-common.h>
> +#include <asm/processor.h>
> +
> +
> +#define MAKE_GPUVM_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x1000000000000)
> +#define MAKE_GPUVM_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFF0000000000) | 0xFFFFFFFFFF)
> +#define MAKE_SCRATCH_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x100000000)
> +#define MAKE_SCRATCH_APP_LIMIT(base) (((uint64_t)base & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
> +#define MAKE_LDS_APP_BASE(gpu_num) (((uint64_t)(gpu_num) << 61) + 0x0)
> +#define MAKE_LDS_APP_LIMIT(base) (((uint64_t)(base) & 0xFFFFFFFF00000000) | 0xFFFFFFFF)
> +
> +#define HSA_32BIT_LDS_APP_SIZE 0x10000
> +#define HSA_32BIT_LDS_APP_ALIGNMENT 0x10000
> +
> +static unsigned long kfd_reserve_aperture(struct kfd_process *process, unsigned long len, unsigned long alignment)
> +{
> +
> + unsigned long addr = 0;
> + unsigned long start_address;
> +
> + /*
> + * Go bottom up and find the first available aligned address.
> + * We may narrow space to scan by getting mmap range limits.
> + */
> + for (start_address = alignment; start_address < (TASK_SIZE - alignment); start_address += alignment) {
> + addr = vm_mmap(NULL, start_address, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0);
> + if (!IS_ERR_VALUE(addr)) {
> + if (addr == start_address)
> + return addr;
> + vm_munmap(addr, len);
> + }
> + }
> + return 0;
> +
> +}
> +
> +int kfd_init_apertures(struct kfd_process *process)
> +{
> + uint8_t id = 0;
> + struct kfd_dev *dev;
> + struct kfd_process_device *pdd;
> +
> + mutex_lock(&process->mutex);
> +
> + /*Iterating over all devices*/
> + while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && id < NUM_OF_SUPPORTED_GPUS) {
> +
> + pdd = radeon_kfd_get_process_device_data(dev, process);
> +
> + /*for 64 bit process aperture will be statically reserved in the non canonical process address space
> + *for 32 bit process the aperture will be reserved in the process address space
> + */
> + if (process->is_32bit_user_mode) {
> + /*try to reserve aperture. continue on failure, just put the aperture size to be 0*/
> + pdd->lds_base = kfd_reserve_aperture(
> + process,
> + HSA_32BIT_LDS_APP_SIZE,
> + HSA_32BIT_LDS_APP_ALIGNMENT);
> +
> + if (pdd->lds_base)
> + pdd->lds_limit = pdd->lds_base + HSA_32BIT_LDS_APP_SIZE - 1;
> + else
> + pdd->lds_limit = 0;
> +
> + /*GPUVM and Scratch apertures are not supported*/
> + pdd->gpuvm_base = pdd->gpuvm_limit = pdd->scratch_base = pdd->scratch_limit = 0;
> + } else {
> + /*node id couldn't be 0 - the three MSB bits of aperture shoudn't be 0*/
> + pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
> + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
> + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
> + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
> + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
> + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> + }
> +
> + dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
> + id, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
> +
> + id++;
> + }
> +
> + mutex_unlock(&process->mutex);
> +
> + return 0;
> +}
> +
> +
> diff --git a/drivers/gpu/hsa/radeon/kfd_chardev.c b/drivers/gpu/hsa/radeon/kfd_chardev.c
> index e95d597..07cac88 100644
> --- a/drivers/gpu/hsa/radeon/kfd_chardev.c
> +++ b/drivers/gpu/hsa/radeon/kfd_chardev.c
> @@ -32,6 +32,9 @@
> #include <linux/time.h>
> #include "kfd_priv.h"
> #include "kfd_scheduler.h"
> +#include <linux/mm.h>
> +#include <uapi/asm-generic/mman-common.h>
> +#include <asm/processor.h>
>
> static long kfd_ioctl(struct file *, unsigned int, unsigned long);
> static int kfd_open(struct inode *, struct file *);
> @@ -107,9 +110,13 @@ kfd_open(struct inode *inode, struct file *filep)
> process = radeon_kfd_create_process(current);
> if (IS_ERR(process))
> return PTR_ERR(process);
> +
> process->is_32bit_user_mode = is_compat_task();
> +
> dev_info(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
> - process->pasid, process->is_32bit_user_mode);
> + process->pasid, process->is_32bit_user_mode);
> +
> + kfd_init_apertures(process);
>
> return 0;
> }
> @@ -321,6 +328,51 @@ kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void __u
> return 0;
> }
>
> +
> +static int kfd_ioctl_get_process_apertures(struct file *filp, struct kfd_process *p, void __user *arg)
> +{
> + struct kfd_ioctl_get_process_apertures_args args;
> + struct kfd_process_device *pdd;
> +
> + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
> +
> + if (copy_from_user(&args, arg, sizeof(args)))
> + return -EFAULT;
> +
> + args.num_of_nodes = 0;
> +
> + mutex_lock(&p->mutex);
> +
> + /*if the process-device list isn't empty*/
> + if (kfd_has_process_device_data(p)) {
> + /* Run over all pdd of the process */
> + pdd = kfd_get_first_process_device_data(p);
> + do {
> +
> + args.process_apertures[args.num_of_nodes].gpu_id = pdd->dev->id;
> + args.process_apertures[args.num_of_nodes].lds_base = pdd->lds_base;
> + args.process_apertures[args.num_of_nodes].lds_limit = pdd->lds_limit;
> + args.process_apertures[args.num_of_nodes].gpuvm_base = pdd->gpuvm_base;
> + args.process_apertures[args.num_of_nodes].gpuvm_limit = pdd->gpuvm_limit;
> + args.process_apertures[args.num_of_nodes].scratch_base = pdd->scratch_base;
> + args.process_apertures[args.num_of_nodes].scratch_limit = pdd->scratch_limit;
> +
> + dev_dbg(kfd_device, "node id %u, gpu id %u, lds_base %llX lds_limit %llX gpuvm_base %llX gpuvm_limit %llX scratch_base %llX scratch_limit %llX",
> + args.num_of_nodes, pdd->dev->id, pdd->lds_base, pdd->lds_limit, pdd->gpuvm_base, pdd->gpuvm_limit, pdd->scratch_base, pdd->scratch_limit);
> + args.num_of_nodes++;
> + } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
> + (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS));
> + }
> +
> + mutex_unlock(&p->mutex);
> +
> + if (copy_to_user(arg, &args, sizeof(args)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +
> static long
> kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> {
> @@ -352,6 +404,10 @@ kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
> err = kfd_ioctl_get_clock_counters(filep, process, (void __user *)arg);
> break;
>
> + case KFD_IOC_GET_PROCESS_APERTURES:
> + err = kfd_ioctl_get_process_apertures(filep, process, (void __user *)arg);
> + break;
> +
> default:
> dev_err(kfd_device,
> "unknown ioctl cmd 0x%x, arg 0x%lx)\n",
> diff --git a/drivers/gpu/hsa/radeon/kfd_priv.h b/drivers/gpu/hsa/radeon/kfd_priv.h
> index 9d3b1fc..28155bc 100644
> --- a/drivers/gpu/hsa/radeon/kfd_priv.h
> +++ b/drivers/gpu/hsa/radeon/kfd_priv.h
> @@ -171,6 +171,16 @@ struct kfd_process_device {
>
> /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
> bool bound;
> +
> + /*Apertures*/
> + uint64_t lds_base;
> + uint64_t lds_limit;
> + uint64_t gpuvm_base;
> + uint64_t gpuvm_limit;
> + uint64_t scratch_base;
> + uint64_t scratch_limit;
> +
> +
> };
>
> /* Process data */
> @@ -212,6 +222,10 @@ void radeon_kfd_install_queue(struct kfd_process *p, unsigned int queue_id, stru
> void radeon_kfd_remove_queue(struct kfd_process *p, unsigned int queue_id);
> struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue_id);
>
> +/* Process device data iterator */
> +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
> +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd);
> +bool kfd_has_process_device_data(struct kfd_process *p);
>
> /* PASIDs */
> int radeon_kfd_pasid_init(void);
> @@ -237,6 +251,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
> int kfd_topology_remove_device(struct kfd_dev *gpu);
> struct kfd_dev *radeon_kfd_device_by_id(uint32_t gpu_id);
> struct kfd_dev *radeon_kfd_device_by_pci_dev(const struct pci_dev *pdev);
> +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx);
>
> /* MMIO registers */
> #define WRITE_REG(dev, reg, value) radeon_kfd_write_reg((dev), (reg), (value))
> @@ -253,4 +268,7 @@ void kgd2kfd_interrupt(struct kfd_dev *dev, const void *ih_ring_entry);
> void kgd2kfd_suspend(struct kfd_dev *dev);
> int kgd2kfd_resume(struct kfd_dev *dev);
>
> +/*HSA apertures*/
> +int kfd_init_apertures(struct kfd_process *process);
> +
> #endif
> diff --git a/drivers/gpu/hsa/radeon/kfd_process.c b/drivers/gpu/hsa/radeon/kfd_process.c
> index f89f855..80136e6 100644
> --- a/drivers/gpu/hsa/radeon/kfd_process.c
> +++ b/drivers/gpu/hsa/radeon/kfd_process.c
> @@ -397,3 +397,20 @@ struct kfd_queue *radeon_kfd_get_queue(struct kfd_process *p, unsigned int queue
> test_bit(queue_id, p->allocated_queue_bitmap)) ?
> p->queues[queue_id] : NULL;
> }
> +
> +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
> +{
> + return list_first_entry(&p->per_device_data, struct kfd_process_device, per_device_list);
> +}
> +
> +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, struct kfd_process_device *pdd)
> +{
> + if (list_is_last(&pdd->per_device_list, &p->per_device_data))
> + return NULL;
> + return list_next_entry(pdd, per_device_list);
> +}
> +
> +bool kfd_has_process_device_data(struct kfd_process *p)
> +{
> + return !(list_empty(&p->per_device_data));
> +}
> diff --git a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> index 7ee8125..30561a6 100644
> --- a/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> +++ b/drivers/gpu/hsa/radeon/kfd_sched_cik_static.c
> @@ -627,7 +627,8 @@ static void cik_static_deregister_process(struct kfd_scheduler *scheduler,
> struct cik_static_private *priv = kfd_scheduler_to_private(scheduler);
> struct cik_static_process *pp = kfd_process_to_private(scheduler_process);
>
> - if (priv && pp) {
> +
> + if (priv && pp) {
> release_vmid(priv, pp->vmid);
> kfree(pp);
> }
> diff --git a/drivers/gpu/hsa/radeon/kfd_topology.c b/drivers/gpu/hsa/radeon/kfd_topology.c
> index 21bb66e..213ae7b 100644
> --- a/drivers/gpu/hsa/radeon/kfd_topology.c
> +++ b/drivers/gpu/hsa/radeon/kfd_topology.c
> @@ -1201,3 +1201,30 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
>
> return res;
> }
> +
> +/*
> + * When idx is out of bounds, the function will return NULL
> + */
> +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx)
> +{
> +
> + struct kfd_topology_device *top_dev;
> + struct kfd_dev *device = NULL;
> + uint8_t device_idx = 0;
> +
> + down_read(&topology_lock);
> +
> + list_for_each_entry(top_dev, &topology_device_list, list) {
> + if (device_idx == idx) {
> + device = top_dev->gpu;
> + break;
> + }
> +
> + device_idx++;
> + }
> +
> + up_read(&topology_lock);
> +
> + return device;
> +
> +}
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index a7c3abd..e5fcb8b 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -78,6 +78,23 @@ struct kfd_ioctl_get_clock_counters_args {
> uint64_t system_clock_freq; /* from KFD */
> };
>
> +#define NUM_OF_SUPPORTED_GPUS 7
> +
> +struct kfd_process_device_apertures {
> + uint64_t lds_base;/* from KFD */
> + uint64_t lds_limit;/* from KFD */
> + uint64_t scratch_base;/* from KFD */
> + uint64_t scratch_limit;/* from KFD */
> + uint64_t gpuvm_base;/* from KFD */
> + uint64_t gpuvm_limit;/* from KFD */
> + uint32_t gpu_id;/* from KFD */
> +};
> +
> +struct kfd_ioctl_get_process_apertures_args {
> + struct kfd_process_device_apertures process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
> + uint8_t num_of_nodes; /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS]*/
> +};
> +
> #define KFD_IOC_MAGIC 'K'
>
> #define KFD_IOC_GET_VERSION _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args)
> @@ -85,6 +102,7 @@ struct kfd_ioctl_get_clock_counters_args {
> #define KFD_IOC_DESTROY_QUEUE _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args)
> #define KFD_IOC_SET_MEMORY_POLICY _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args)
> #define KFD_IOC_GET_CLOCK_COUNTERS _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args)
> +#define KFD_IOC_GET_PROCESS_APERTURES _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args)
>
> #pragma pack(pop)
>
> --
> 1.9.1
>
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2014-07-11 20:41 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <1405029279-6894-1-git-send-email-oded.gabbay@amd.com>
[not found] ` <1405029279-6894-1-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
2014-07-10 21:53 ` [PATCH 32/83] hsa/radeon: implementing IOCTL for clock counters Oded Gabbay
2014-07-11 20:34 ` Jerome Glisse
2014-07-10 21:53 ` [PATCH 42/83] hsa/radeon: 32-bit processes support Oded Gabbay
2014-07-10 21:54 ` [PATCH 44/83] hsa/radeon: HSA64/HSA32 modes support Oded Gabbay
[not found] ` <1405029279-6894-16-git-send-email-oded.gabbay-5C7GfCeVMHo@public.gmane.org>
2014-07-11 20:41 ` Jerome Glisse
2014-07-10 21:54 ` [PATCH 54/83] hsa/radeon: Switch to new queue scheduler Oded Gabbay
2014-07-10 21:54 ` [PATCH 55/83] hsa/radeon: Add IOCTL for update queue Oded Gabbay
2014-07-10 21:54 ` [PATCH 59/83] hsa/radeon: Exclusive access for perf. counters Oded Gabbay
2014-07-10 21:54 ` [PATCH 60/83] hsa/radeon: Rearrange structures in kfd_ioctl.h Oded Gabbay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).