* [tip:irq/core] PCI: Remove the irq_affinity mask from struct pci_dev
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:51 UTC (permalink / raw)
To: linux-tip-commits
Cc: bhelgaas, hch, tglx, mingo, jthumshirn, hpa, axboe, linux-kernel,
hare
In-Reply-To: <1478654107-7384-7-git-send-email-hch@lst.de>
Commit-ID: 0cf71b04467bc34063cecae577f12481da6cc565
Gitweb: http://git.kernel.org/tip/0cf71b04467bc34063cecae577f12481da6cc565
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:06 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:10 +0100
PCI: Remove the irq_affinity mask from struct pci_dev
This has never been used, and now is totally unreferenced. Nuke it.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-7-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/pci.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 7090f5f..f2ba6ac 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -333,7 +333,6 @@ struct pci_dev {
* directly, use the values stored here. They might be different!
*/
unsigned int irq;
- struct cpumask *irq_affinity;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
bool match_driver; /* Skip attaching driver */
^ permalink raw reply related
* RE: [PATCH] mwifiex: printk() overflow with 32-byte SSIDs
From: Amitkumar Karwar @ 2016-11-09 7:51 UTC (permalink / raw)
To: Brian Norris, Nishant Sarmukadam, Kalle Valo
Cc: linux-kernel@vger.kernel.org, linux-wireless@vger.kernel.org,
Cathy Luo, security@kernel.org, stable@vger.kernel.org
In-Reply-To: <1478658504-31045-1-git-send-email-briannorris@chromium.org>
> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Wednesday, November 09, 2016 7:58 AM
> To: Amitkumar Karwar; Nishant Sarmukadam; Kalle Valo
> Cc: linux-kernel@vger.kernel.org; linux-wireless@vger.kernel.org; Cathy
> Luo; security@kernel.org; stable@vger.kernel.org; Brian Norris
> Subject: [PATCH] mwifiex: printk() overflow with 32-byte SSIDs
>
> SSIDs aren't guaranteed to be 0-terminated. Let's cap the max length
> when we print them out.
>
> This can be easily noticed by connecting to a network with a 32-octet
> SSID:
>
> [ 3903.502925] mwifiex_pcie 0000:01:00.0: info: trying to associate to
> '0123456789abcdef0123456789abcdef <uninitialized mem>' bssid
> xx:xx:xx:xx:xx:xx
>
> Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell
> mwifiex driver")
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> Cc: <stable@vger.kernel.org>
> ---
> drivers/net/wireless/marvell/mwifiex/cfg80211.c | 13 +++++++------
> 1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
> b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
> index 39ce76ad00bc..16241d21727b 100644
> --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
> +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
> @@ -2222,8 +2222,9 @@ mwifiex_cfg80211_assoc(struct mwifiex_private
> *priv, size_t ssid_len,
> is_scanning_required = 1;
> } else {
> mwifiex_dbg(priv->adapter, MSG,
> - "info: trying to associate to '%s' bssid
> %pM\n",
> - (char *)req_ssid.ssid, bss->bssid);
> + "info: trying to associate to '%.*s' bssid
> %pM\n",
> + req_ssid.ssid_len, (char *)req_ssid.ssid,
> + bss->bssid);
> memcpy(&priv->cfg_bssid, bss->bssid, ETH_ALEN);
> break;
> }
> @@ -2283,8 +2284,8 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy,
> struct net_device *dev,
> }
>
> mwifiex_dbg(adapter, INFO,
> - "info: Trying to associate to %s and bssid %pM\n",
> - (char *)sme->ssid, sme->bssid);
> + "info: Trying to associate to %.*s and bssid %pM\n",
> + (int)sme->ssid_len, (char *)sme->ssid, sme->bssid);
>
> if (!mwifiex_stop_bg_scan(priv))
> cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy);
> @@ -2417,8 +2418,8 @@ mwifiex_cfg80211_join_ibss(struct wiphy *wiphy,
> struct net_device *dev,
> }
>
> mwifiex_dbg(priv->adapter, MSG,
> - "info: trying to join to %s and bssid %pM\n",
> - (char *)params->ssid, params->bssid);
> + "info: trying to join to %.*s and bssid %pM\n",
> + params->ssid_len, (char *)params->ssid, params->bssid);
>
> mwifiex_set_ibss_params(priv, params);
>
> --
> 2.8.0.rc3.226.g39d4020
Thanks for fixing this.
Acked-by: Amitkumar Karwar <akarwar@marvell.com>
Regards,
Amitkumar
^ permalink raw reply
* [tip:irq/core] PCI/MSI: Provide pci_alloc_irq_vectors_affinity()
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:50 UTC (permalink / raw)
To: linux-tip-commits
Cc: hch, hpa, hare, jthumshirn, linux-kernel, bhelgaas, axboe, mingo,
tglx
In-Reply-To: <1478654107-7384-6-git-send-email-hch@lst.de>
Commit-ID: 402723ad5c625ee052432698ae5e56b02d38d4ec
Gitweb: http://git.kernel.org/tip/402723ad5c625ee052432698ae5e56b02d38d4ec
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:05 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:10 +0100
PCI/MSI: Provide pci_alloc_irq_vectors_affinity()
This is a variant of pci_alloc_irq_vectors() that allows passing a struct
irq_affinity to provide fine-grained IRQ affinity control.
For now this means being able to exclude vectors at the beginning or end of
the MSI vector space, but it could also be used for any other quirks needed
in the future (e.g. more vectors than CPUs, or excluding CPUs from the
spreading).
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-6-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/pci/msi.c | 20 +++++++++++++-------
include/linux/pci.h | 24 +++++++++++++++++++-----
2 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 512f388..dd27f73 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1179,11 +1179,12 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
EXPORT_SYMBOL(pci_enable_msix_range);
/**
- * pci_alloc_irq_vectors - allocate multiple IRQs for a device
+ * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device
* @dev: PCI device to operate on
* @min_vecs: minimum number of vectors required (must be >= 1)
* @max_vecs: maximum (desired) number of vectors
* @flags: flags or quirks for the allocation
+ * @affd: optional description of the affinity requirements
*
* Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
* vectors if available, and fall back to a single legacy vector
@@ -1195,15 +1196,20 @@ EXPORT_SYMBOL(pci_enable_msix_range);
* To get the Linux IRQ number used for a vector that can be passed to
* request_irq() use the pci_irq_vector() helper.
*/
-int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
- unsigned int max_vecs, unsigned int flags)
+int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags,
+ const struct irq_affinity *affd)
{
static const struct irq_affinity msi_default_affd;
- const struct irq_affinity *affd = NULL;
int vecs = -ENOSPC;
- if (flags & PCI_IRQ_AFFINITY)
- affd = &msi_default_affd;
+ if (flags & PCI_IRQ_AFFINITY) {
+ if (!affd)
+ affd = &msi_default_affd;
+ } else {
+ if (WARN_ON(affd))
+ affd = NULL;
+ }
if (flags & PCI_IRQ_MSIX) {
vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
@@ -1226,7 +1232,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
return vecs;
}
-EXPORT_SYMBOL(pci_alloc_irq_vectors);
+EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity);
/**
* pci_free_irq_vectors - free previously allocated IRQs for a device
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0e49f70..7090f5f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -244,6 +244,7 @@ struct pci_cap_saved_state {
struct pci_cap_saved_data cap;
};
+struct irq_affinity;
struct pcie_link_state;
struct pci_vpd;
struct pci_sriov;
@@ -1310,8 +1311,10 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
return rc;
return 0;
}
-int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
- unsigned int max_vecs, unsigned int flags);
+int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags,
+ const struct irq_affinity *affd);
+
void pci_free_irq_vectors(struct pci_dev *dev);
int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
@@ -1339,14 +1342,17 @@ static inline int pci_enable_msix_range(struct pci_dev *dev,
static inline int pci_enable_msix_exact(struct pci_dev *dev,
struct msix_entry *entries, int nvec)
{ return -ENOSYS; }
-static inline int pci_alloc_irq_vectors(struct pci_dev *dev,
- unsigned int min_vecs, unsigned int max_vecs,
- unsigned int flags)
+
+static inline int
+pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags,
+ const struct irq_affinity *aff_desc)
{
if (min_vecs > 1)
return -EINVAL;
return 1;
}
+
static inline void pci_free_irq_vectors(struct pci_dev *dev)
{
}
@@ -1364,6 +1370,14 @@ static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
}
#endif
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+ unsigned int max_vecs, unsigned int flags)
+{
+ return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
+ NULL);
+}
+
#ifdef CONFIG_PCIEPORTBUS
extern bool pcie_ports_disabled;
extern bool pcie_ports_auto;
^ permalink raw reply related
* [tip:irq/core] PCI/MSI: Propagate IRQ affinity description through the MSI code
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:50 UTC (permalink / raw)
To: linux-tip-commits
Cc: bhelgaas, tglx, hare, mingo, hch, axboe, linux-kernel, hpa,
jthumshirn
In-Reply-To: <1478654107-7384-5-git-send-email-hch@lst.de>
Commit-ID: 61e1c5905290efe48bacda5e342d4af4cb1b923b
Gitweb: http://git.kernel.org/tip/61e1c5905290efe48bacda5e342d4af4cb1b923b
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:04 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:09 +0100
PCI/MSI: Propagate IRQ affinity description through the MSI code
No API change yet, just pass it down all the way from
pci_alloc_irq_vectors() to the core MSI code.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-5-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/pci/msi.c | 66 +++++++++++++++++++++++++++----------------------------
1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index f4a108b..512f388 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -551,15 +551,14 @@ error_attrs:
}
static struct msi_desc *
-msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
+msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
{
- static const struct irq_affinity default_affd;
struct cpumask *masks = NULL;
struct msi_desc *entry;
u16 control;
- if (affinity) {
- masks = irq_create_affinity_masks(nvec, &default_affd);
+ if (affd) {
+ masks = irq_create_affinity_masks(nvec, affd);
if (!masks)
pr_err("Unable to allocate affinity masks, ignoring\n");
}
@@ -619,7 +618,8 @@ static int msi_verify_entries(struct pci_dev *dev)
* an error, and a positive return value indicates the number of interrupts
* which could have been allocated.
*/
-static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
+static int msi_capability_init(struct pci_dev *dev, int nvec,
+ const struct irq_affinity *affd)
{
struct msi_desc *entry;
int ret;
@@ -627,7 +627,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
- entry = msi_setup_entry(dev, nvec, affinity);
+ entry = msi_setup_entry(dev, nvec, affd);
if (!entry)
return -ENOMEM;
@@ -691,15 +691,14 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct msix_entry *entries, int nvec,
- bool affinity)
+ const struct irq_affinity *affd)
{
- static const struct irq_affinity default_affd;
struct cpumask *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
- if (affinity) {
- masks = irq_create_affinity_masks(nvec, &default_affd);
+ if (affd) {
+ masks = irq_create_affinity_masks(nvec, affd);
if (!masks)
pr_err("Unable to allocate affinity masks, ignoring\n");
}
@@ -755,14 +754,14 @@ static void msix_program_entries(struct pci_dev *dev,
* @dev: pointer to the pci_dev data structure of MSI-X device function
* @entries: pointer to an array of struct msix_entry entries
* @nvec: number of @entries
- * @affinity: flag to indicate cpu irq affinity mask should be set
+ * @affd: Optional pointer to enable automatic affinity assignement
*
* Setup the MSI-X capability structure of device function with a
* single MSI-X irq. A return of zero indicates the successful setup of
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
- int nvec, bool affinity)
+ int nvec, const struct irq_affinity *affd)
{
int ret;
u16 control;
@@ -777,7 +776,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
if (!base)
return -ENOMEM;
- ret = msix_setup_entries(dev, base, entries, nvec, affinity);
+ ret = msix_setup_entries(dev, base, entries, nvec, affd);
if (ret)
return ret;
@@ -958,7 +957,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
EXPORT_SYMBOL(pci_msix_vec_count);
static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
- int nvec, bool affinity)
+ int nvec, const struct irq_affinity *affd)
{
int nr_entries;
int i, j;
@@ -990,7 +989,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
return -EINVAL;
}
- return msix_capability_init(dev, entries, nvec, affinity);
+ return msix_capability_init(dev, entries, nvec, affd);
}
/**
@@ -1010,7 +1009,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
**/
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
{
- return __pci_enable_msix(dev, entries, nvec, false);
+ return __pci_enable_msix(dev, entries, nvec, NULL);
}
EXPORT_SYMBOL(pci_enable_msix);
@@ -1061,10 +1060,8 @@ int pci_msi_enabled(void)
EXPORT_SYMBOL(pci_msi_enabled);
static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
- unsigned int flags)
+ const struct irq_affinity *affd)
{
- static const struct irq_affinity default_affd;
- bool affinity = flags & PCI_IRQ_AFFINITY;
int nvec;
int rc;
@@ -1093,13 +1090,13 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
nvec = maxvec;
for (;;) {
- if (affinity) {
- nvec = irq_calc_affinity_vectors(nvec, &default_affd);
+ if (affd) {
+ nvec = irq_calc_affinity_vectors(nvec, affd);
if (nvec < minvec)
return -ENOSPC;
}
- rc = msi_capability_init(dev, nvec, affinity);
+ rc = msi_capability_init(dev, nvec, affd);
if (rc == 0)
return nvec;
@@ -1126,29 +1123,27 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
**/
int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
{
- return __pci_enable_msi_range(dev, minvec, maxvec, 0);
+ return __pci_enable_msi_range(dev, minvec, maxvec, NULL);
}
EXPORT_SYMBOL(pci_enable_msi_range);
static int __pci_enable_msix_range(struct pci_dev *dev,
- struct msix_entry *entries, int minvec, int maxvec,
- unsigned int flags)
+ struct msix_entry *entries, int minvec,
+ int maxvec, const struct irq_affinity *affd)
{
- static const struct irq_affinity default_affd;
- bool affinity = flags & PCI_IRQ_AFFINITY;
int rc, nvec = maxvec;
if (maxvec < minvec)
return -ERANGE;
for (;;) {
- if (affinity) {
- nvec = irq_calc_affinity_vectors(nvec, &default_affd);
+ if (affd) {
+ nvec = irq_calc_affinity_vectors(nvec, affd);
if (nvec < minvec)
return -ENOSPC;
}
- rc = __pci_enable_msix(dev, entries, nvec, affinity);
+ rc = __pci_enable_msix(dev, entries, nvec, affd);
if (rc == 0)
return nvec;
@@ -1179,7 +1174,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec)
{
- return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0);
+ return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
}
EXPORT_SYMBOL(pci_enable_msix_range);
@@ -1203,17 +1198,22 @@ EXPORT_SYMBOL(pci_enable_msix_range);
int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
unsigned int max_vecs, unsigned int flags)
{
+ static const struct irq_affinity msi_default_affd;
+ const struct irq_affinity *affd = NULL;
int vecs = -ENOSPC;
+ if (flags & PCI_IRQ_AFFINITY)
+ affd = &msi_default_affd;
+
if (flags & PCI_IRQ_MSIX) {
vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
- flags);
+ affd);
if (vecs > 0)
return vecs;
}
if (flags & PCI_IRQ_MSI) {
- vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
+ vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd);
if (vecs > 0)
return vecs;
}
^ permalink raw reply related
* Re: [PATCH 1/3] drm/amd/amdgpu: add SI defines/registers
From: Christian König @ 2016-11-09 7:50 UTC (permalink / raw)
To: Alex Deucher, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Cc: Tom St Denis, Alex Deucher
In-Reply-To: <1478638958-25286-1-git-send-email-alexander.deucher-5C7GfCeVMHo@public.gmane.org>
Am 08.11.2016 um 22:02 schrieb Alex Deucher:
> From: Tom St Denis <tom.stdenis@amd.com>
>
> Add missing gca MMIO registers and defines necessary for the
> next patch which re-works a lot of gfx v6 to use the new SI
> headers.
>
> Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>.
> ---
>
> The patch which adds the new register headers for SI is here:
> https://cgit.freedesktop.org/~agd5f/linux/commit/?h=drm-next-4.10-wip&id=d193cf2f61fab6f4d7de39ce130804b729223d57
> It's kind of giant for the mailing list.
>
> drivers/gpu/drm/amd/amdgpu/si_enums.h | 186 +++++++++++++++++++++
> .../gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h | 24 +++
> .../gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h | 3 +
> 3 files changed, 213 insertions(+)
> create mode 100644 drivers/gpu/drm/amd/amdgpu/si_enums.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
> new file mode 100644
> index 0000000..63c057f
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
> @@ -0,0 +1,186 @@
> +/*
> + * Copyright 2016 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +#ifndef SI_ENUMS_H
> +#define SI_ENUMS_H
> +
> +#define DMA0_REGISTER_OFFSET 0x000
> +#define DMA1_REGISTER_OFFSET 0x200
> +#define ES_AND_GS_AUTO 3
> +#define RADEON_PACKET_TYPE3 3
> +#define CE_PARTITION_BASE 3
> +#define BUF_SWAP_32BIT (2 << 16)
> +
> +#define GFX_POWER_STATUS (1 << 1)
> +#define GFX_CLOCK_STATUS (1 << 2)
> +#define GFX_LS_STATUS (1 << 3)
> +#define RLC_BUSY_STATUS (1 << 0)
> +
> +#define RLC_PUD(x) ((x) << 0)
> +#define RLC_PUD_MASK (0xff << 0)
> +#define RLC_PDD(x) ((x) << 8)
> +#define RLC_PDD_MASK (0xff << 8)
> +#define RLC_TTPD(x) ((x) << 16)
> +#define RLC_TTPD_MASK (0xff << 16)
> +#define RLC_MSD(x) ((x) << 24)
> +#define RLC_MSD_MASK (0xff << 24)
> +#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
> +#define WRITE_DATA_DST_SEL(x) ((x) << 8)
> +#define EVENT_TYPE(x) ((x) << 0)
> +#define EVENT_INDEX(x) ((x) << 8)
> +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4)
> +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0)
> +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8)
> +
> +#define GFX6_NUM_GFX_RINGS 1
> +#define GFX6_NUM_COMPUTE_RINGS 2
> +#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
> +#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
> +
> +#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003
> +#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002
> +#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001
> +
> +#define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \
> + (((op) & 0xFF) << 8) | \
> + ((n) & 0x3FFF) << 16)
> +#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
> +#define PACKET3_NOP 0x10
> +#define PACKET3_SET_BASE 0x11
> +#define PACKET3_BASE_INDEX(x) ((x) << 0)
> +#define PACKET3_CLEAR_STATE 0x12
> +#define PACKET3_INDEX_BUFFER_SIZE 0x13
> +#define PACKET3_DISPATCH_DIRECT 0x15
> +#define PACKET3_DISPATCH_INDIRECT 0x16
> +#define PACKET3_ALLOC_GDS 0x1B
> +#define PACKET3_WRITE_GDS_RAM 0x1C
> +#define PACKET3_ATOMIC_GDS 0x1D
> +#define PACKET3_ATOMIC 0x1E
> +#define PACKET3_OCCLUSION_QUERY 0x1F
> +#define PACKET3_SET_PREDICATION 0x20
> +#define PACKET3_REG_RMW 0x21
> +#define PACKET3_COND_EXEC 0x22
> +#define PACKET3_PRED_EXEC 0x23
> +#define PACKET3_DRAW_INDIRECT 0x24
> +#define PACKET3_DRAW_INDEX_INDIRECT 0x25
> +#define PACKET3_INDEX_BASE 0x26
> +#define PACKET3_DRAW_INDEX_2 0x27
> +#define PACKET3_CONTEXT_CONTROL 0x28
> +#define PACKET3_INDEX_TYPE 0x2A
> +#define PACKET3_DRAW_INDIRECT_MULTI 0x2C
> +#define PACKET3_DRAW_INDEX_AUTO 0x2D
> +#define PACKET3_DRAW_INDEX_IMMD 0x2E
> +#define PACKET3_NUM_INSTANCES 0x2F
> +#define PACKET3_DRAW_INDEX_MULTI_AUTO 0x30
> +#define PACKET3_INDIRECT_BUFFER_CONST 0x31
> +#define PACKET3_INDIRECT_BUFFER 0x3F
> +#define PACKET3_STRMOUT_BUFFER_UPDATE 0x34
> +#define PACKET3_DRAW_INDEX_OFFSET_2 0x35
> +#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
> +#define PACKET3_WRITE_DATA 0x37
> +#define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38
> +#define PACKET3_MEM_SEMAPHORE 0x39
> +#define PACKET3_MPEG_INDEX 0x3A
> +#define PACKET3_COPY_DW 0x3B
> +#define PACKET3_WAIT_REG_MEM 0x3C
> +#define PACKET3_MEM_WRITE 0x3D
> +#define PACKET3_COPY_DATA 0x40
> +#define PACKET3_CP_DMA 0x41
> +# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
> +# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
> +# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
> +# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
> +# define PACKET3_CP_DMA_DIS_WC (1 << 21)
> +# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 22)
> +# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
> +# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
> +# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
> +# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
> +# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
> +# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
> +#define PACKET3_PFP_SYNC_ME 0x42
> +#define PACKET3_SURFACE_SYNC 0x43
> +# define PACKET3_DEST_BASE_0_ENA (1 << 0)
> +# define PACKET3_DEST_BASE_1_ENA (1 << 1)
> +# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
> +# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
> +# define PACKET3_CB2_DEST_BASE_ENA (1 << 8)
> +# define PACKET3_CB3_DEST_BASE_ENA (1 << 9)
> +# define PACKET3_CB4_DEST_BASE_ENA (1 << 10)
> +# define PACKET3_CB5_DEST_BASE_ENA (1 << 11)
> +# define PACKET3_CB6_DEST_BASE_ENA (1 << 12)
> +# define PACKET3_CB7_DEST_BASE_ENA (1 << 13)
> +# define PACKET3_DB_DEST_BASE_ENA (1 << 14)
> +# define PACKET3_DEST_BASE_2_ENA (1 << 19)
> +# define PACKET3_DEST_BASE_3_ENA (1 << 21)
> +# define PACKET3_TCL1_ACTION_ENA (1 << 22)
> +# define PACKET3_TC_ACTION_ENA (1 << 23)
> +# define PACKET3_CB_ACTION_ENA (1 << 25)
> +# define PACKET3_DB_ACTION_ENA (1 << 26)
> +# define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
> +# define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
> +#define PACKET3_ME_INITIALIZE 0x44
> +#define PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
> +#define PACKET3_COND_WRITE 0x45
> +#define PACKET3_EVENT_WRITE 0x46
> +#define PACKET3_EVENT_WRITE_EOP 0x47
> +#define PACKET3_EVENT_WRITE_EOS 0x48
> +#define PACKET3_PREAMBLE_CNTL 0x4A
> +# define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE (2 << 28)
> +# define PACKET3_PREAMBLE_END_CLEAR_STATE (3 << 28)
> +#define PACKET3_ONE_REG_WRITE 0x57
> +#define PACKET3_LOAD_CONFIG_REG 0x5F
> +#define PACKET3_LOAD_CONTEXT_REG 0x60
> +#define PACKET3_LOAD_SH_REG 0x61
> +#define PACKET3_SET_CONFIG_REG 0x68
> +#define PACKET3_SET_CONFIG_REG_START 0x00002000
> +#define PACKET3_SET_CONFIG_REG_END 0x00002c00
> +#define PACKET3_SET_CONTEXT_REG 0x69
> +#define PACKET3_SET_CONTEXT_REG_START 0x000a000
> +#define PACKET3_SET_CONTEXT_REG_END 0x000a400
> +#define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73
> +#define PACKET3_SET_RESOURCE_INDIRECT 0x74
> +#define PACKET3_SET_SH_REG 0x76
> +#define PACKET3_SET_SH_REG_START 0x00002c00
> +#define PACKET3_SET_SH_REG_END 0x00003000
> +#define PACKET3_SET_SH_REG_OFFSET 0x77
> +#define PACKET3_ME_WRITE 0x7A
> +#define PACKET3_SCRATCH_RAM_WRITE 0x7D
> +#define PACKET3_SCRATCH_RAM_READ 0x7E
> +#define PACKET3_CE_WRITE 0x7F
> +#define PACKET3_LOAD_CONST_RAM 0x80
> +#define PACKET3_WRITE_CONST_RAM 0x81
> +#define PACKET3_WRITE_CONST_RAM_OFFSET 0x82
> +#define PACKET3_DUMP_CONST_RAM 0x83
> +#define PACKET3_INCREMENT_CE_COUNTER 0x84
> +#define PACKET3_INCREMENT_DE_COUNTER 0x85
> +#define PACKET3_WAIT_ON_CE_COUNTER 0x86
> +#define PACKET3_WAIT_ON_DE_COUNTER 0x87
> +#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
> +#define PACKET3_SET_CE_DE_COUNTERS 0x89
> +#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
> +#define PACKET3_SWITCH_BUFFER 0x8B
> +#define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12)
> +#define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
> +#define PACKET3_SEM_SEL_WAIT (0x7 << 29)
> +
> +#endif
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
> index b2829d0..c75aee2 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_6_0_d.h
> @@ -1757,4 +1757,28 @@
> #define mmVGT_VTX_CNT_EN 0xA2AE
> #define mmVGT_VTX_VECT_EJECT_REG 0x222C
>
> +/* manually added from old sid.h */
> +#define mmCB_PERFCOUNTER0_SELECT0 0x2688
> +#define mmCB_PERFCOUNTER1_SELECT0 0x268A
> +#define mmCB_PERFCOUNTER1_SELECT1 0x268B
> +#define mmCB_PERFCOUNTER2_SELECT0 0x268C
> +#define mmCB_PERFCOUNTER2_SELECT1 0x268D
> +#define mmCB_PERFCOUNTER3_SELECT0 0x268E
> +#define mmCB_PERFCOUNTER3_SELECT1 0x268F
> +#define mmCP_COHER_CNTL2 0x217A
> +#define mmCP_DEBUG 0x307F
> +#define mmRLC_SERDES_MASTER_BUSY_0 0x3119
> +#define mmRLC_SERDES_MASTER_BUSY_1 0x311A
> +#define mmRLC_RL_BASE 0x30C1
> +#define mmRLC_RL_SIZE 0x30C2
> +#define mmRLC_UCODE_ADDR 0x30CB
> +#define mmRLC_UCODE_DATA 0x30CC
> +#define mmRLC_GCPM_GENERAL_3 0x311E
> +#define mmRLC_SERDES_WR_MASTER_MASK_0 0x3115
> +#define mmRLC_SERDES_WR_MASTER_MASK_1 0x3116
> +#define mmRLC_TTOP_D 0x3105
> +#define mmRLC_CLEAR_STATE_RESTORE_BASE 0x30C8
> +#define mmRLC_PG_AO_CU_MASK 0x310B
> +#define mmSPI_STATIC_THREAD_MGMT_3 0x243A
> +
> #endif
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
> index e5c817d..edc8a79 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/oss_1_0_d.h
> @@ -269,4 +269,7 @@
> #define mmVCE_CONFIG 0x0F94
> #define mmXDMA_MSTR_MEM_OVERFLOW_CNTL 0x03F8
>
> +/* from the old sid.h */
> +#define mmDMA_TILING_CONFIG 0x342E
> +
> #endif
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply
* [tip:irq/core] genirq/affinity: Handle pre/post vectors in irq_create_affinity_masks()
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:49 UTC (permalink / raw)
To: linux-tip-commits
Cc: mingo, bhelgaas, linux-kernel, tglx, hare, axboe, hpa, hch
In-Reply-To: <1478654107-7384-4-git-send-email-hch@lst.de>
Commit-ID: 67c93c218dc5d1b45d547771f1fdb44a381e1faf
Gitweb: http://git.kernel.org/tip/67c93c218dc5d1b45d547771f1fdb44a381e1faf
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:03 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:09 +0100
genirq/affinity: Handle pre/post vectors in irq_create_affinity_masks()
Only calculate the affinity for the main I/O vectors, and skip the
pre or post vectors specified by struct irq_affinity.
Also remove the irq_affinity cpumask argument that has never been used.
If we ever need it in the future we can pass it through struct
irq_affinity.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-4-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/pci/msi.c | 6 ++++--
include/linux/interrupt.h | 4 ++--
kernel/irq/affinity.c | 46 +++++++++++++++++++++++++---------------------
3 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index dad2da7..f4a108b 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -553,12 +553,13 @@ error_attrs:
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
{
+ static const struct irq_affinity default_affd;
struct cpumask *masks = NULL;
struct msi_desc *entry;
u16 control;
if (affinity) {
- masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+ masks = irq_create_affinity_masks(nvec, &default_affd);
if (!masks)
pr_err("Unable to allocate affinity masks, ignoring\n");
}
@@ -692,12 +693,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct msix_entry *entries, int nvec,
bool affinity)
{
+ static const struct irq_affinity default_affd;
struct cpumask *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
if (affinity) {
- masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+ masks = irq_create_affinity_masks(nvec, &default_affd);
if (!masks)
pr_err("Unable to allocate affinity masks, ignoring\n");
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9081f23..53144e7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -290,7 +290,7 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
-struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
+struct cpumask *irq_create_affinity_masks(int nvec, const struct irq_affinity *affd);
int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd);
#else /* CONFIG_SMP */
@@ -325,7 +325,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
}
static inline struct cpumask *
-irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
+irq_create_affinity_masks(int nvec, const struct irq_affinity *affd)
{
return NULL;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 8d92597..17360bd 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -51,16 +51,16 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
/**
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
- * @affinity: The affinity mask to spread. If NULL cpu_online_mask
- * is used
- * @nvecs: The number of vectors
+ * @nvecs: The total number of vectors
+ * @affd: Description of the affinity requirements
*
* Returns the masks pointer or NULL if allocation failed.
*/
-struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
- int nvec)
+struct cpumask *
+irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
- int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+ int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+ int affv = nvecs - affd->pre_vectors - affd->post_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
struct cpumask *masks;
cpumask_var_t nmsk;
@@ -68,46 +68,46 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return NULL;
- masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+ masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
if (!masks)
goto out;
+ /* Fill out vectors at the beginning that don't need affinity */
+ for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+ cpumask_copy(masks + curvec, cpu_possible_mask);
+
/* Stabilize the cpumasks */
get_online_cpus();
- /* If the supplied affinity mask is NULL, use cpu online mask */
- if (!affinity)
- affinity = cpu_online_mask;
-
- nodes = get_nodes_in_cpumask(affinity, &nodemsk);
+ nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
/*
* If the number of nodes in the mask is less than or equal the
* number of vectors we just spread the vectors across the nodes.
*/
- if (nvec <= nodes) {
+ if (affv <= nodes) {
for_each_node_mask(n, nodemsk) {
cpumask_copy(masks + curvec, cpumask_of_node(n));
- if (++curvec == nvec)
+ if (++curvec == affv)
break;
}
- goto outonl;
+ goto done;
}
/* Spread the vectors per node */
- vecs_per_node = nvec / nodes;
+ vecs_per_node = affv / nodes;
/* Account for rounding errors */
- extra_vecs = nvec - (nodes * vecs_per_node);
+ extra_vecs = affv - (nodes * vecs_per_node);
for_each_node_mask(n, nodemsk) {
int ncpus, v, vecs_to_assign = vecs_per_node;
/* Get the cpus on this node which are in the mask */
- cpumask_and(nmsk, affinity, cpumask_of_node(n));
+ cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
- for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+ for (v = 0; curvec < affv && v < vecs_to_assign; curvec++, v++) {
cpus_per_vec = ncpus / vecs_to_assign;
/* Account for extra vectors to compensate rounding errors */
@@ -119,12 +119,16 @@ struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
- if (curvec >= nvec)
+ if (curvec >= affv)
break;
}
-outonl:
+done:
put_online_cpus();
+
+ /* Fill out vectors at the end that don't need affinity */
+ for (; curvec < nvecs; curvec++)
+ cpumask_copy(masks + curvec, cpu_possible_mask);
out:
free_cpumask_var(nmsk);
return masks;
^ permalink raw reply related
* [tip:irq/core] genirq/affinity: Introduce struct irq_affinity
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:48 UTC (permalink / raw)
To: linux-tip-commits
Cc: tglx, hpa, mingo, linux-kernel, hch, axboe, hare, jthumshirn
In-Reply-To: <1478654107-7384-2-git-send-email-hch@lst.de>
Commit-ID: 20e407e195b29a4f5a18d713a61f54a75f992bd5
Gitweb: http://git.kernel.org/tip/20e407e195b29a4f5a18d713a61f54a75f992bd5
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:01 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:08 +0100
genirq/affinity: Introduce struct irq_affinity
Some drivers (various network and RDMA adapter for example) have a MSI-X
vector layout where most of the vectors are used for I/O queues and should
have CPU affinity assigned to them, but some (usually 1 but sometimes more)
at the beginning or end are used for low-performance admin or configuration
work and should not have any explicit affinity assigned to them.
Add a new irq_affinity structure, which will be passed through a variant of
pci_irq_alloc_vectors that allows to specify these requirements (and is
extensible to any future quirks in that area) so that the core IRQ affinity
algorithm can take this quirks into account.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-2-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/interrupt.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 72f0721..6b52686 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -232,6 +232,18 @@ struct irq_affinity_notify {
void (*release)(struct kref *ref);
};
+/**
+ * struct irq_affinity - Description for automatic irq affinity assignements
+ * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of
+ * the MSI(-X) vector space
+ * @post_vectors: Don't apply affinity to @post_vectors at end of
+ * the MSI(-X) vector space
+ */
+struct irq_affinity {
+ int pre_vectors;
+ int post_vectors;
+};
+
#if defined(CONFIG_SMP)
extern cpumask_var_t irq_default_affinity;
^ permalink raw reply related
* [tip:irq/core] genirq/affinity: Handle pre/post vectors in irq_calc_affinity_vectors()
From: tip-bot for Christoph Hellwig @ 2016-11-09 7:49 UTC (permalink / raw)
To: linux-tip-commits; +Cc: linux-kernel, axboe, hpa, hch, hare, tglx, mingo
In-Reply-To: <1478654107-7384-3-git-send-email-hch@lst.de>
Commit-ID: 212bd846223c718b6577d4df16fd8d05a55ad914
Gitweb: http://git.kernel.org/tip/212bd846223c718b6577d4df16fd8d05a55ad914
Author: Christoph Hellwig <hch@lst.de>
AuthorDate: Tue, 8 Nov 2016 17:15:02 -0800
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 9 Nov 2016 08:25:08 +0100
genirq/affinity: Handle pre/post vectors in irq_calc_affinity_vectors()
Only calculate the affinity for the main I/O vectors, and skip the pre or
post vectors specified by struct irq_affinity.
Also remove the irq_affinity cpumask argument that has never been used. If
we ever need it in the future we can pass it through struct irq_affinity.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Link: http://lkml.kernel.org/r/1478654107-7384-3-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/pci/msi.c | 8 ++++----
include/linux/interrupt.h | 4 ++--
kernel/irq/affinity.c | 24 ++++++++++--------------
3 files changed, 16 insertions(+), 20 deletions(-)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index ad70507..dad2da7 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1061,6 +1061,7 @@ EXPORT_SYMBOL(pci_msi_enabled);
static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
unsigned int flags)
{
+ static const struct irq_affinity default_affd;
bool affinity = flags & PCI_IRQ_AFFINITY;
int nvec;
int rc;
@@ -1091,8 +1092,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
for (;;) {
if (affinity) {
- nvec = irq_calc_affinity_vectors(dev->irq_affinity,
- nvec);
+ nvec = irq_calc_affinity_vectors(nvec, &default_affd);
if (nvec < minvec)
return -ENOSPC;
}
@@ -1132,6 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
struct msix_entry *entries, int minvec, int maxvec,
unsigned int flags)
{
+ static const struct irq_affinity default_affd;
bool affinity = flags & PCI_IRQ_AFFINITY;
int rc, nvec = maxvec;
@@ -1140,8 +1141,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
for (;;) {
if (affinity) {
- nvec = irq_calc_affinity_vectors(dev->irq_affinity,
- nvec);
+ nvec = irq_calc_affinity_vectors(nvec, &default_affd);
if (nvec < minvec)
return -ENOSPC;
}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6b52686..9081f23 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -291,7 +291,7 @@ extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
-int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec);
+int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd);
#else /* CONFIG_SMP */
@@ -331,7 +331,7 @@ irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
}
static inline int
-irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
{
return maxvec;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 17f51d63..8d92597 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -131,24 +131,20 @@ out:
}
/**
- * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
- * @affinity: The affinity mask to spread. If NULL cpu_online_mask
- * is used
- * @maxvec: The maximum number of vectors available
+ * irq_calc_affinity_vectors - Calculate the optimal number of vectors
+ * @maxvec: The maximum number of vectors available
+ * @affd: Description of the affinity requirements
*/
-int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
{
- int cpus, ret;
+ int resv = affd->pre_vectors + affd->post_vectors;
+ int vecs = maxvec - resv;
+ int cpus;
/* Stabilize the cpumasks */
get_online_cpus();
- /* If the supplied affinity mask is NULL, use cpu online mask */
- if (!affinity)
- affinity = cpu_online_mask;
-
- cpus = cpumask_weight(affinity);
- ret = (cpus < maxvec) ? cpus : maxvec;
-
+ cpus = cpumask_weight(cpu_online_mask);
put_online_cpus();
- return ret;
+
+ return min(cpus, vecs) + resv;
}
^ permalink raw reply related
* [U-Boot] [PATCH 0/7] sunxi: Add support for the CHIP Pro
From: Heiko Schocher @ 2016-11-09 7:47 UTC (permalink / raw)
To: u-boot
In-Reply-To: <cover.db1b1205c7bcffae9553c604d8b6e3e7ee39dca4.1478621974.git-series.maxime.ripard@free-electrons.com>
Hello Maxime,
Am 08.11.2016 um 17:21 schrieb Maxime Ripard:
> The CHIP Pro is a SoM made by NextThing Co, and that embeds a GR8 SIP, an
> AXP209 PMIC, a WiFi BT chip and a 512MB SLC NAND.
>
> Since the first Allwinner device coming whit an SLC NAND that doesn't have
> the shortcomings (and breakages) the MLC NAND has, we can finally enable
> the NAND support on a board by default.
>
> This is the occasion to introduce a bunch of additions needed imo to be
> able to come up with a sane NAND support for our users.
>
> The biggest pain point is that the BROM uses a different ECC and randomizer
> configuration than for the rest of the NAND. In order to lessen the number
> of bitflips, you also need to pad with random data the SPL image.
>
> Since it's quite tedious to do right (and most users won't be able to
> figure it out) and since if it is not done right, it will eventually turn
> into an unusable system (which is bad UX), we think that the best solution
> is to generate an SPL image that already embeds all this. We'll possible
> have to do the same thing for the U-Boot image (at least for the random
> padding) on MLC NANDs.
>
> The only drawback from that is that you need to flash it raw, instead of
> using the usual nand write, but it's just a different command, nothing
> major anyway.
>
> In order to flash it, from a device switched in FEL, on your host:
> sunxi-fel spl spl/sunxi-spl.bin
> sunxi-fel write 0x4a000000 u-boot-dtb.bin
> sunxi-fel write 0x43000000 spl/sunxi-spl-with-ecc.bin
> sunxi-fel exe 0x4a000000
>
> And on the board, once u-boot is running (assuming the NAND is already
> erased):
>
> nand write.raw.noverify 0x43000000 0 40
> nand write.raw.noverify 0x43000000 0x400000 40
>
> nand write 0x4a000000 0x800000 0xc0000
>
> I also encountered some weird bug in the private libgcc that prevents
> U-Boot from loading. Disabling CONFIG_USE_PRIVATE_LIBGCC fixes that.
What was the problem?
>
> Let me know what you think,
> Maxime
>
> Boris Brezillon (1):
> mtd: nand: add support for the TC58NVG2S0H chip
>
> Hans de Goede (1):
> sunxi: Enable UBI and NAND support
>
> Maxime Ripard (5):
> sunxi: Sync GR8 DTS and AXP209 with the kernel
> tools: sunxi: Add spl image builder
> nand: sunxi: Add options for the SPL NAND configuration
> scripts: sunxi: Build an raw SPL image
> sunxi: Add support for the CHIP Pro
>
> Makefile | 3 +-
> arch/arm/dts/Makefile | 1 +-
> arch/arm/dts/axp209.dtsi | 6 +-
> arch/arm/dts/ntc-gr8-chip-pro.dts | 266 +++++++-
> arch/arm/dts/ntc-gr8.dtsi | 1132 ++++++++++++++++++++++++++++++-
> configs/CHIP_pro_defconfig | 27 +-
> drivers/mtd/nand/Kconfig | 16 +-
> drivers/mtd/nand/nand_ids.c | 3 +-
> include/configs/sunxi-common.h | 26 +-
> scripts/Makefile.spl | 12 +-
> tools/.gitignore | 1 +-
> tools/Makefile | 1 +-
> tools/sunxi-spl-image-builder.c | 1113 +++++++++++++++++++++++++++++-
> 13 files changed, 2603 insertions(+), 4 deletions(-)
> create mode 100644 arch/arm/dts/ntc-gr8-chip-pro.dts
> create mode 100644 arch/arm/dts/ntc-gr8.dtsi
> create mode 100644 configs/CHIP_pro_defconfig
> create mode 100644 tools/sunxi-spl-image-builder.c
>
> base-commit: d8bdfc80da39211d95f10d24e79f2e867305f71b
Can you please add a README file, where the above things are explained?
Thanks!
bye,
Heiko
--
DENX Software Engineering GmbH, Managing Director: Wolfgang Denk
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
^ permalink raw reply
* Re: [Qemu-devel] [PATCH for-2.8] migration: Fix return code of ram_save_iterate()
From: Thomas Huth @ 2016-11-09 7:46 UTC (permalink / raw)
To: Amit Shah; +Cc: Juan Quintela, qemu-devel, Dr. David Alan Gilbert, David Gibson
In-Reply-To: <20161109071800.GA1888@amit-lp.rh>
On 09.11.2016 08:18, Amit Shah wrote:
> On (Fri) 04 Nov 2016 [14:10:17], Thomas Huth wrote:
>> qemu_savevm_state_iterate() expects the iterators to return 1
>> when they are done, and 0 if there is still something left to do.
>> However, ram_save_iterate() does not obey this rule and returns
>> the number of saved pages instead. This causes a fatal hang with
>> ppc64 guests when you run QEMU like this (also works with TCG):
>
> "works with" -- does that mean reproduces with?
Yes, that's what I've meant: You can reproduce it with TCG (e.g. running
on a x86 system), too, there's no need for a real POWER machine with KVM
here.
>> qemu-img create -f qcow2 /tmp/test.qcow2 1M
>> qemu-system-ppc64 -nographic -nodefaults -m 256 \
>> -hda /tmp/test.qcow2 -serial mon:stdio
>>
>> ... then switch to the monitor by pressing CTRL-a c and try to
>> save a snapshot with "savevm test1" for example.
>>
>> After the first iteration, ram_save_iterate() always returns 0 here,
>> so that qemu_savevm_state_iterate() hangs in an endless loop and you
>> can only "kill -9" the QEMU process.
>> Fix it by using proper return values in ram_save_iterate().
>>
>> Signed-off-by: Thomas Huth <thuth@redhat.com>
>> ---
>> migration/ram.c | 6 +++---
>> 1 file changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index fb9252d..a1c8089 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1987,7 +1987,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>> int ret;
>> int i;
>> int64_t t0;
>> - int pages_sent = 0;
>> + int done = 0;
>>
>> rcu_read_lock();
>> if (ram_list.version != last_version) {
>> @@ -2007,9 +2007,9 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>> pages = ram_find_and_save_block(f, false, &bytes_transferred);
>> /* no more pages to sent */
>> if (pages == 0) {
>> + done = 1;
>> break;
>> }
>> - pages_sent += pages;
>> acct_info.iterations++;
>>
>> /* we want to check in the 1st loop, just in case it was the 1st time
>> @@ -2044,7 +2044,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>> return ret;
>> }
>>
>> - return pages_sent;
>> + return done;
>> }
>
> I agree with David, we can just remove the return value. The first
> patch of the series can do that; and this one could become the 2nd
> patch. Should be OK for the soft freeze.
Sorry, I still did not quite get it - if I'd change the return type of
ram_save_iterate() and the other iterate functions to "void", how is
qemu_savevm_state_iterate() supposed to know whether all iterators are
done or not? And other iterators also use negative return values to
signal errors - should that then be handled via an "Error **" parameter
instead? ... my gut feeling still says that such a bigger rework (we've
got to touch all iterators for this!) should rather not be done right in
the middle of the freeze period...
Thomas
^ permalink raw reply
* Re: [Qemu-devel] virsh dump (qemu guest memory dump?): KASLR enabled linux guest support
From: Wen Congyang @ 2016-11-09 7:42 UTC (permalink / raw)
To: Dave Young, anderson; +Cc: lersek, qemu-devel, bhe
In-Reply-To: <20161109050248.GA4431@dhcp-128-65.nay.redhat.com>
On 11/09/2016 01:02 PM, Dave Young wrote:
> On 11/09/16 at 11:58am, Wen Congyang wrote:
>> On 11/09/2016 11:17 AM, Dave Young wrote:
>>> Drop qiaonuohan, seems the mail address is wrong..
>>>
>>> On 11/09/16 at 11:01am, Dave Young wrote:
>>>> Hi,
>>>>
>>>> Latest linux kernel enabled kaslr to randomiz phys/virt memory
>>>> addresses, we had some effort to support kexec/kdump so that crash
>>>> utility can still works in case crashed kernel has kaslr enabled.
>>>>
>>>> But according to Dave Anderson virsh dump does not work, quoted messages
>>>> from Dave below:
>>>>
>>>> """
>>>> with virsh dump, there's no way of even knowing that KASLR
>>>> has randomized the kernel __START_KERNEL_map region, because there is no
>>>> virtual address information -- e.g., like "SYMBOL(_stext)" in the kdump
>>>> vmcoreinfo data to compare against the vmlinux file symbol value.
>>>> Unless virsh dump can export some basic virtual memory data, which
>>>> they say it can't, I don't see how KASLR can ever be supported.
>>>> """
>>>>
>>>> I assume virsh dump is using qemu guest memory dump facility so it
>>>> should be first addressed in qemu. Thus post this query to qemu devel
>>>> list. If this is not correct please let me know.
>>
>> IIRC, 'virsh dump --memory-only' uses dump-guest-memory, and 'virsh dump'
>> uses migration to dump.
>
> Do they need different fixes? Dave, I guess you mean --memory-only, but
> could you clarify and confirm it?
>
>>
>> I think I should study kaslr first...
>
> Thanks for taking care of it.
Can you give me the patch for kexec/kdump. I want to know what I need to do
for dump-guest-memory.
Thanks
Wen Congyang
>
>>
>> Thanks
>> Wen Congyang
>>
>>>>
>>>> Could you qemu dump people make it work? Or we can not support virt dump
>>>> as long as KASLR being enabled. Latest Fedora kernel has enabled it in x86_64.
>>>>
>>>> Thanks
>>>> Dave
>>>
>>>
>>>
>>
>>
>>
>
>
> .
>
^ permalink raw reply
* Re: [PATCH 6/9 v2] arm64: dts: m3ulcb: enable SDHI0
From: Simon Horman @ 2016-11-09 7:44 UTC (permalink / raw)
To: Vladimir Barinov
Cc: Magnus Damm, Rob Herring, Mark Rutland,
devicetree-u79uwXL29TY76Z2rM5mHXA,
linux-renesas-soc-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1478614461-9918-1-git-send-email-vladimir.barinov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8@public.gmane.org>
On Tue, Nov 08, 2016 at 05:14:21PM +0300, Vladimir Barinov wrote:
> This supports SDHI0 on M3ULCB board SD card slot
>
> Signed-off-by: Vladimir Barinov <vladimir.barinov-M4DtvfQ/ZS1MRgGoP+s0PdBPR1lH4CV8@public.gmane.org>
> Reviewed-off-by: Simon Horman <horms+renesas-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>
Thanks Vladimir,
I have queued up the following patches:
arm64: dts: h3ulcb: rename SDHI0 pins
arm64: dts: h3ulcb: enable SDHI2
arm64: dts: m3ulcb: enable SDHI2
arm64: dts: m3ulcb: enable SDHI0
For reference I would, however, like to make some comments regarding the
way you have submitted these:
1. I did not provide a Reviewed-off-by tag or any other tag as far as I
recall. So its not appropriate for you to add one when posting patches.
I have removed it.
2. Not withstanding the above, Reviewed-off-by is an invalid tag.
Perhaps you mean Reviewed-by.
3. When you repost patches I have a slight preference for you to repost
them in a fresh thread. And if the patchset has more than one patch then
with a fresh cover letter. This makes it a little easier for me
to see what is going on. And gives a more natural place for
me to respond to a patchset.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH 6/9 v2] arm64: dts: m3ulcb: enable SDHI0
From: Simon Horman @ 2016-11-09 7:44 UTC (permalink / raw)
To: Vladimir Barinov
Cc: Magnus Damm, Rob Herring, Mark Rutland, devicetree,
linux-renesas-soc
In-Reply-To: <1478614461-9918-1-git-send-email-vladimir.barinov@cogentembedded.com>
On Tue, Nov 08, 2016 at 05:14:21PM +0300, Vladimir Barinov wrote:
> This supports SDHI0 on M3ULCB board SD card slot
>
> Signed-off-by: Vladimir Barinov <vladimir.barinov@cogentembedded.com>
> Reviewed-off-by: Simon Horman <horms+renesas@verge.net.au>
Thanks Vladimir,
I have queued up the following patches:
arm64: dts: h3ulcb: rename SDHI0 pins
arm64: dts: h3ulcb: enable SDHI2
arm64: dts: m3ulcb: enable SDHI2
arm64: dts: m3ulcb: enable SDHI0
For reference I would, however, like to make some comments regarding the
way you have submitted these:
1. I did not provide a Reviewed-off-by tag or any other tag as far as I
recall. So its not appropriate for you to add one when posting patches.
I have removed it.
2. Not withstanding the above, Reviewed-off-by is an invalid tag.
Perhaps you mean Reviewed-by.
3. When you repost patches I have a slight preference for you to repost
them in a fresh thread. And if the patchset has more than one patch then
with a fresh cover letter. This makes it a little easier for me
to see what is going on. And gives a more natural place for
me to respond to a patchset.
^ permalink raw reply
* Re: Summary of LPC guest MSI discussion in Santa Fe
From: Auger Eric @ 2016-11-09 7:43 UTC (permalink / raw)
To: Don Dutile, Will Deacon
Cc: drjones, christoffer.dall, jason, kvm, marc.zyngier, benh, joro,
punit.agrawal, linux-kernel, iommu, diana.craciun,
Alex Williamson, pranav.sawargaonkar, arnd, dwmw, jcm, tglx,
robin.murphy, linux-arm-kernel, eric.auger.pro
In-Reply-To: <5822214F.2070500@redhat.com>
Hi Will,
On 08/11/2016 20:02, Don Dutile wrote:
> On 11/08/2016 12:54 PM, Will Deacon wrote:
>> On Tue, Nov 08, 2016 at 03:27:23PM +0100, Auger Eric wrote:
>>> On 08/11/2016 03:45, Will Deacon wrote:
>>>> Rather than treat these as separate problems, a better interface is to
>>>> tell userspace about a set of reserved regions, and have this include
>>>> the MSI doorbell, irrespective of whether or not it can be remapped.
>>>> Don suggested that we statically pick an address for the doorbell in a
>>>> similar way to x86, and have the kernel map it there. We could even
>>>> pick
>>>> 0xfee00000. If it conflicts with a reserved region on the platform (due
>>>> to (4)), then we'd obviously have to (deterministically?) allocate it
>>>> somewhere else, but probably within the bottom 4G.
>>> This is tentatively achieved now with
>>> [1] [RFC v2 0/8] KVM PCIe/MSI passthrough on ARM/ARM64 - Alt II
>>> (http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1264506.html)
>>>
>> Yup, I saw that fly by. Hopefully some of the internals can be reused
>> with the current thinking on user ABI.
>>
>>>> The next question is how to tell userspace about all of the reserved
>>>> regions. Initially, the idea was to extend VFIO, however Alex pointed
>>>> out a horrible scenario:
>>>>
>>>> 1. QEMU spawns a VM on system 0
>>>> 2. VM is migrated to system 1
>>>> 3. QEMU attempts to passthrough a device using PCI hotplug
>>>>
>>>> In this scenario, the guest memory map is chosen at step (1), yet there
>>>> is no VFIO fd available to determine the reserved regions. Furthermore,
>>>> the reserved regions may vary between system 0 and system 1. This
>>>> pretty
>>>> much rules out using VFIO to determine the reserved regions.Alex
>>>> suggested
>>>> that the SMMU driver can advertise the regions via
>>>> /sys/class/iommu/. This
>>>> would solve part of the problem, but migration between systems with
>>>> different memory maps can still cause problems if the reserved regions
>>>> of the new system conflict with the guest memory map chosen by QEMU.
>>>
>>> OK so I understand we do not want anymore the VFIO chain capability API
>>> (patch 5 of above series) but we prefer a sysfs approach instead.
>> Right.
>>
>>> I understand the sysfs approach which allows the userspace to get the
>>> info earlier and independently on VFIO. Keeping in mind current QEMU
>>> virt - which is not the only userspace - will not do much from this info
>>> until we bring upheavals in virt address space management. So if I am
>>> not wrong, at the moment the main action to be undertaken is the
>>> rejection of the PCI hotplug in case we detect a collision.
>> I don't think so; it should be up to userspace to reject the hotplug.
>> If userspace doesn't have support for the regions, then that's fine --
>> you just end up in a situation where the CPU page table maps memory
>> somewhere that the device can't see. In other words, you'll end up with
>> spurious DMA failures, but that's exactly what happens with current
>> systems
>> if you passthrough an overlapping region (Robin demonstrated this on
>> Juno).
>>
>> Additionally, you can imagine some future support where you can tell the
>> guest not to use certain regions of its memory for DMA. In this case, you
>> wouldn't want to refuse the hotplug in the case of overlapping regions.
>>
>> Really, I think the kernel side just needs to enumerate the fixed
>> reserved
>> regions, place the doorbell at a fixed address and then advertise these
>> via sysfs.
>>
>>> I can respin [1]
>>> - studying and taking into account Robin's comments about dm_regions
>>> similarities
>>> - removing the VFIO capability chain and replacing this by a sysfs API
>> Ideally, this would be reusable between different SMMU drivers so the
>> sysfs
>> entries have the same format etc.
>>
>>> Would that be OK?
>> Sounds good to me. Are you in a position to prototype something on the
>> qemu
>> side once we've got kernel-side agreement?
yes sure.
>>
>>> What about Alex comments who wanted to report the usable memory ranges
>>> instead of unusable memory ranges?
>>>
>>> Also did you have a chance to discuss the following items:
>>> 1) the VFIO irq safety assessment
>> The discussion really focussed on system topology, as opposed to
>> properties
>> of the doorbell. Regardless of how the device talks to the doorbell, if
>> the doorbell can't protect against things like MSI spoofing, then it's
>> unsafe. My opinion is that we shouldn't allow passthrough by default on
>> systems with unsafe doorbells (we could piggyback on
>> allow_unsafe_interrupts
>> cmdline option to VFIO).
OK.
>>
>> A first step would be making all this opt-in, and only supporting GICv3
>> ITS for now.
> You're trying to support a config that is < GICv3 and no ITS ? ...
> That would be the equiv. of x86 pre-intr-remap, and that's why
> allow_unsafe_interrupts
> hook was created ... to enable devel/kick-the-tires.
>>> 2) the MSI reserved size computation (is an arbitrary size OK?)
>> If we fix the base address, we could fix a size too. However, we'd still
>> need to enumerate the doorbells to check that they fit in the region we
>> have. If not, then we can warn during boot and treat it the same way as
>> a resource conflict (that is, reallocate the region in some deterministic
>> way).
OK
Thanks
Eric
>>
>> Will
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply
* Summary of LPC guest MSI discussion in Santa Fe
From: Auger Eric @ 2016-11-09 7:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <5822214F.2070500@redhat.com>
Hi Will,
On 08/11/2016 20:02, Don Dutile wrote:
> On 11/08/2016 12:54 PM, Will Deacon wrote:
>> On Tue, Nov 08, 2016 at 03:27:23PM +0100, Auger Eric wrote:
>>> On 08/11/2016 03:45, Will Deacon wrote:
>>>> Rather than treat these as separate problems, a better interface is to
>>>> tell userspace about a set of reserved regions, and have this include
>>>> the MSI doorbell, irrespective of whether or not it can be remapped.
>>>> Don suggested that we statically pick an address for the doorbell in a
>>>> similar way to x86, and have the kernel map it there. We could even
>>>> pick
>>>> 0xfee00000. If it conflicts with a reserved region on the platform (due
>>>> to (4)), then we'd obviously have to (deterministically?) allocate it
>>>> somewhere else, but probably within the bottom 4G.
>>> This is tentatively achieved now with
>>> [1] [RFC v2 0/8] KVM PCIe/MSI passthrough on ARM/ARM64 - Alt II
>>> (http://www.mail-archive.com/linux-kernel at vger.kernel.org/msg1264506.html)
>>>
>> Yup, I saw that fly by. Hopefully some of the internals can be reused
>> with the current thinking on user ABI.
>>
>>>> The next question is how to tell userspace about all of the reserved
>>>> regions. Initially, the idea was to extend VFIO, however Alex pointed
>>>> out a horrible scenario:
>>>>
>>>> 1. QEMU spawns a VM on system 0
>>>> 2. VM is migrated to system 1
>>>> 3. QEMU attempts to passthrough a device using PCI hotplug
>>>>
>>>> In this scenario, the guest memory map is chosen at step (1), yet there
>>>> is no VFIO fd available to determine the reserved regions. Furthermore,
>>>> the reserved regions may vary between system 0 and system 1. This
>>>> pretty
>>>> much rules out using VFIO to determine the reserved regions.Alex
>>>> suggested
>>>> that the SMMU driver can advertise the regions via
>>>> /sys/class/iommu/. This
>>>> would solve part of the problem, but migration between systems with
>>>> different memory maps can still cause problems if the reserved regions
>>>> of the new system conflict with the guest memory map chosen by QEMU.
>>>
>>> OK so I understand we do not want anymore the VFIO chain capability API
>>> (patch 5 of above series) but we prefer a sysfs approach instead.
>> Right.
>>
>>> I understand the sysfs approach which allows the userspace to get the
>>> info earlier and independently on VFIO. Keeping in mind current QEMU
>>> virt - which is not the only userspace - will not do much from this info
>>> until we bring upheavals in virt address space management. So if I am
>>> not wrong, at the moment the main action to be undertaken is the
>>> rejection of the PCI hotplug in case we detect a collision.
>> I don't think so; it should be up to userspace to reject the hotplug.
>> If userspace doesn't have support for the regions, then that's fine --
>> you just end up in a situation where the CPU page table maps memory
>> somewhere that the device can't see. In other words, you'll end up with
>> spurious DMA failures, but that's exactly what happens with current
>> systems
>> if you passthrough an overlapping region (Robin demonstrated this on
>> Juno).
>>
>> Additionally, you can imagine some future support where you can tell the
>> guest not to use certain regions of its memory for DMA. In this case, you
>> wouldn't want to refuse the hotplug in the case of overlapping regions.
>>
>> Really, I think the kernel side just needs to enumerate the fixed
>> reserved
>> regions, place the doorbell at a fixed address and then advertise these
>> via sysfs.
>>
>>> I can respin [1]
>>> - studying and taking into account Robin's comments about dm_regions
>>> similarities
>>> - removing the VFIO capability chain and replacing this by a sysfs API
>> Ideally, this would be reusable between different SMMU drivers so the
>> sysfs
>> entries have the same format etc.
>>
>>> Would that be OK?
>> Sounds good to me. Are you in a position to prototype something on the
>> qemu
>> side once we've got kernel-side agreement?
yes sure.
>>
>>> What about Alex comments who wanted to report the usable memory ranges
>>> instead of unusable memory ranges?
>>>
>>> Also did you have a chance to discuss the following items:
>>> 1) the VFIO irq safety assessment
>> The discussion really focussed on system topology, as opposed to
>> properties
>> of the doorbell. Regardless of how the device talks to the doorbell, if
>> the doorbell can't protect against things like MSI spoofing, then it's
>> unsafe. My opinion is that we shouldn't allow passthrough by default on
>> systems with unsafe doorbells (we could piggyback on
>> allow_unsafe_interrupts
>> cmdline option to VFIO).
OK.
>>
>> A first step would be making all this opt-in, and only supporting GICv3
>> ITS for now.
> You're trying to support a config that is < GICv3 and no ITS ? ...
> That would be the equiv. of x86 pre-intr-remap, and that's why
> allow_unsafe_interrupts
> hook was created ... to enable devel/kick-the-tires.
>>> 2) the MSI reserved size computation (is an arbitrary size OK?)
>> If we fix the base address, we could fix a size too. However, we'd still
>> need to enumerate the doorbells to check that they fit in the region we
>> have. If not, then we can warn during boot and treat it the same way as
>> a resource conflict (that is, reallocate the region in some deterministic
>> way).
OK
Thanks
Eric
>>
>> Will
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply
* Re: Summary of LPC guest MSI discussion in Santa Fe
From: Auger Eric @ 2016-11-09 7:43 UTC (permalink / raw)
To: Don Dutile, Will Deacon
Cc: linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
drjones-H+wXaHxf7aLQT0dZR+AlfA, jason-NLaQJdtUoK4Be96aLqz0jA,
kvm-u79uwXL29TY76Z2rM5mHXA, marc.zyngier-5wv7dgnIgG8,
benh-XVmvHMARGAS8U2dJNN8I7kB+6BGkLq7r, punit.agrawal-5wv7dgnIgG8,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
pranav.sawargaonkar-Re5JQEeQqe8AvxtiuMwx3w, arnd-r2nGTMty4D4,
dwmw-vV1OtcyAfmbQXOPxS62xeg, jcm-H+wXaHxf7aLQT0dZR+AlfA,
tglx-hfZtesqFncYOwBW4kG4KsQ,
christoffer.dall-QSEj5FYQhm4dnm+yROfE0A,
eric.auger.pro-Re5JQEeQqe8AvxtiuMwx3w
In-Reply-To: <5822214F.2070500-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Hi Will,
On 08/11/2016 20:02, Don Dutile wrote:
> On 11/08/2016 12:54 PM, Will Deacon wrote:
>> On Tue, Nov 08, 2016 at 03:27:23PM +0100, Auger Eric wrote:
>>> On 08/11/2016 03:45, Will Deacon wrote:
>>>> Rather than treat these as separate problems, a better interface is to
>>>> tell userspace about a set of reserved regions, and have this include
>>>> the MSI doorbell, irrespective of whether or not it can be remapped.
>>>> Don suggested that we statically pick an address for the doorbell in a
>>>> similar way to x86, and have the kernel map it there. We could even
>>>> pick
>>>> 0xfee00000. If it conflicts with a reserved region on the platform (due
>>>> to (4)), then we'd obviously have to (deterministically?) allocate it
>>>> somewhere else, but probably within the bottom 4G.
>>> This is tentatively achieved now with
>>> [1] [RFC v2 0/8] KVM PCIe/MSI passthrough on ARM/ARM64 - Alt II
>>> (http://www.mail-archive.com/linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org/msg1264506.html)
>>>
>> Yup, I saw that fly by. Hopefully some of the internals can be reused
>> with the current thinking on user ABI.
>>
>>>> The next question is how to tell userspace about all of the reserved
>>>> regions. Initially, the idea was to extend VFIO, however Alex pointed
>>>> out a horrible scenario:
>>>>
>>>> 1. QEMU spawns a VM on system 0
>>>> 2. VM is migrated to system 1
>>>> 3. QEMU attempts to passthrough a device using PCI hotplug
>>>>
>>>> In this scenario, the guest memory map is chosen at step (1), yet there
>>>> is no VFIO fd available to determine the reserved regions. Furthermore,
>>>> the reserved regions may vary between system 0 and system 1. This
>>>> pretty
>>>> much rules out using VFIO to determine the reserved regions.Alex
>>>> suggested
>>>> that the SMMU driver can advertise the regions via
>>>> /sys/class/iommu/. This
>>>> would solve part of the problem, but migration between systems with
>>>> different memory maps can still cause problems if the reserved regions
>>>> of the new system conflict with the guest memory map chosen by QEMU.
>>>
>>> OK so I understand we do not want anymore the VFIO chain capability API
>>> (patch 5 of above series) but we prefer a sysfs approach instead.
>> Right.
>>
>>> I understand the sysfs approach which allows the userspace to get the
>>> info earlier and independently on VFIO. Keeping in mind current QEMU
>>> virt - which is not the only userspace - will not do much from this info
>>> until we bring upheavals in virt address space management. So if I am
>>> not wrong, at the moment the main action to be undertaken is the
>>> rejection of the PCI hotplug in case we detect a collision.
>> I don't think so; it should be up to userspace to reject the hotplug.
>> If userspace doesn't have support for the regions, then that's fine --
>> you just end up in a situation where the CPU page table maps memory
>> somewhere that the device can't see. In other words, you'll end up with
>> spurious DMA failures, but that's exactly what happens with current
>> systems
>> if you passthrough an overlapping region (Robin demonstrated this on
>> Juno).
>>
>> Additionally, you can imagine some future support where you can tell the
>> guest not to use certain regions of its memory for DMA. In this case, you
>> wouldn't want to refuse the hotplug in the case of overlapping regions.
>>
>> Really, I think the kernel side just needs to enumerate the fixed
>> reserved
>> regions, place the doorbell at a fixed address and then advertise these
>> via sysfs.
>>
>>> I can respin [1]
>>> - studying and taking into account Robin's comments about dm_regions
>>> similarities
>>> - removing the VFIO capability chain and replacing this by a sysfs API
>> Ideally, this would be reusable between different SMMU drivers so the
>> sysfs
>> entries have the same format etc.
>>
>>> Would that be OK?
>> Sounds good to me. Are you in a position to prototype something on the
>> qemu
>> side once we've got kernel-side agreement?
yes sure.
>>
>>> What about Alex comments who wanted to report the usable memory ranges
>>> instead of unusable memory ranges?
>>>
>>> Also did you have a chance to discuss the following items:
>>> 1) the VFIO irq safety assessment
>> The discussion really focussed on system topology, as opposed to
>> properties
>> of the doorbell. Regardless of how the device talks to the doorbell, if
>> the doorbell can't protect against things like MSI spoofing, then it's
>> unsafe. My opinion is that we shouldn't allow passthrough by default on
>> systems with unsafe doorbells (we could piggyback on
>> allow_unsafe_interrupts
>> cmdline option to VFIO).
OK.
>>
>> A first step would be making all this opt-in, and only supporting GICv3
>> ITS for now.
> You're trying to support a config that is < GICv3 and no ITS ? ...
> That would be the equiv. of x86 pre-intr-remap, and that's why
> allow_unsafe_interrupts
> hook was created ... to enable devel/kick-the-tires.
>>> 2) the MSI reserved size computation (is an arbitrary size OK?)
>> If we fix the base address, we could fix a size too. However, we'd still
>> need to enumerate the doorbells to check that they fit in the region we
>> have. If not, then we can warn during boot and treat it the same way as
>> a resource conflict (that is, reallocate the region in some deterministic
>> way).
OK
Thanks
Eric
>>
>> Will
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply
* Re: [PATCH 1/1] fanotify_mark.2: mention FAN_Q_OVERFLOW
From: Michael Kerrisk (man-pages) @ 2016-11-09 7:42 UTC (permalink / raw)
To: Heinrich Schuchardt
Cc: mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w,
linux-man-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161108221338.9611-1-xypron.glpk-Mmb7MZpHnFY@public.gmane.org>
Hello Heinrich,
On 11/08/2016 11:13 PM, Heinrich Schuchardt wrote:
> To receive overflow events it is necessary to set this bit
> in fanotify_mark().
Thanks! Patch applied.
Cheers,
Michael
> Signed-off-by: Heinrich Schuchardt <xypron.glpk-Mmb7MZpHnFY@public.gmane.org>
> ---
> man2/fanotify_mark.2 | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/man2/fanotify_mark.2 b/man2/fanotify_mark.2
> index 88aa17e..4933ecc 100644
> --- a/man2/fanotify_mark.2
> +++ b/man2/fanotify_mark.2
> @@ -149,6 +149,13 @@ Create an event when a read-only file or directory is closed.
> .B FAN_OPEN
> Create an event when a file or directory is opened.
> .TP
> +.B FAN_Q_OVERFLOW
> +Create an event when an overflow of the event queue occurs.
> +The size of the event queue is limited to 16384 entries if
> +.B FAN_UNLIMITED_QUEUE
> +is not set in
> +.BR fanotify_init (2).
> +.TP
> .B FAN_OPEN_PERM
> Create an event when a permission to open a file or directory is requested.
> An fanotify file descriptor created with
>
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH 1/6] mm: khugepaged: fix radix tree node leak in shmem collapse error path
From: Jan Kara @ 2016-11-09 7:41 UTC (permalink / raw)
To: Johannes Weiner
Cc: Jan Kara, Andrew Morton, Linus Torvalds, Kirill A. Shutemov,
linux-mm, linux-kernel, kernel-team
In-Reply-To: <20161108161245.GA4020@cmpxchg.org>
On Tue 08-11-16 11:12:45, Johannes Weiner wrote:
> On Tue, Nov 08, 2016 at 10:53:52AM +0100, Jan Kara wrote:
> > On Mon 07-11-16 14:07:36, Johannes Weiner wrote:
> > > The radix tree counts valid entries in each tree node. Entries stored
> > > in the tree cannot be removed by simpling storing NULL in the slot or
> > > the internal counters will be off and the node never gets freed again.
> > >
> > > When collapsing a shmem page fails, restore the holes that were filled
> > > with radix_tree_insert() with a proper radix tree deletion.
> > >
> > > Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
> > > Reported-by: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> > > ---
> > > mm/khugepaged.c | 3 ++-
> > > 1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index 728d7790dc2d..eac6f0580e26 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -1520,7 +1520,8 @@ static void collapse_shmem(struct mm_struct *mm,
> > > if (!nr_none)
> > > break;
> > > /* Put holes back where they were */
> > > - radix_tree_replace_slot(slot, NULL);
> > > + radix_tree_delete(&mapping->page_tree,
> > > + iter.index);
> >
> > Hum, but this is inside radix_tree_for_each_slot() iteration. And
> > radix_tree_delete() may end up freeing nodes resulting in invalidating
> > current slot pointer and the iteration code will do use-after-free.
>
> Good point, we need to do another tree lookup after the deletion.
>
> But there are other instances in the code, where we drop the lock
> temporarily and somebody else could delete the node from under us.
>
> In the main collapse path, I *think* this is prevented by the fact
> that when we drop the tree lock we still hold the page lock of the
> regular page that's in the tree while we isolate and unmap it, thus
> pin the node. Even so, it would seem a little hairy to rely on that.
Yeah, I think that is mostly right but I'm not sure whether shrinking of
radix tree into direct pointer cannot bite us here as well. Generally that
relies on internal implementatation of the radix tree and its iterator
so what you did makes sense to me.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply
* Re: [PATCH 1/6] mm: khugepaged: fix radix tree node leak in shmem collapse error path
From: Jan Kara @ 2016-11-09 7:41 UTC (permalink / raw)
To: Johannes Weiner
Cc: Jan Kara, Andrew Morton, Linus Torvalds, Kirill A. Shutemov,
linux-mm, linux-kernel, kernel-team
In-Reply-To: <20161108161245.GA4020@cmpxchg.org>
On Tue 08-11-16 11:12:45, Johannes Weiner wrote:
> On Tue, Nov 08, 2016 at 10:53:52AM +0100, Jan Kara wrote:
> > On Mon 07-11-16 14:07:36, Johannes Weiner wrote:
> > > The radix tree counts valid entries in each tree node. Entries stored
> > > in the tree cannot be removed by simpling storing NULL in the slot or
> > > the internal counters will be off and the node never gets freed again.
> > >
> > > When collapsing a shmem page fails, restore the holes that were filled
> > > with radix_tree_insert() with a proper radix tree deletion.
> > >
> > > Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages")
> > > Reported-by: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> > > ---
> > > mm/khugepaged.c | 3 ++-
> > > 1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index 728d7790dc2d..eac6f0580e26 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -1520,7 +1520,8 @@ static void collapse_shmem(struct mm_struct *mm,
> > > if (!nr_none)
> > > break;
> > > /* Put holes back where they were */
> > > - radix_tree_replace_slot(slot, NULL);
> > > + radix_tree_delete(&mapping->page_tree,
> > > + iter.index);
> >
> > Hum, but this is inside radix_tree_for_each_slot() iteration. And
> > radix_tree_delete() may end up freeing nodes resulting in invalidating
> > current slot pointer and the iteration code will do use-after-free.
>
> Good point, we need to do another tree lookup after the deletion.
>
> But there are other instances in the code, where we drop the lock
> temporarily and somebody else could delete the node from under us.
>
> In the main collapse path, I *think* this is prevented by the fact
> that when we drop the tree lock we still hold the page lock of the
> regular page that's in the tree while we isolate and unmap it, thus
> pin the node. Even so, it would seem a little hairy to rely on that.
Yeah, I think that is mostly right but I'm not sure whether shrinking of
radix tree into direct pointer cannot bite us here as well. Generally that
relies on internal implementatation of the radix tree and its iterator
so what you did makes sense to me.
Honza
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* [PATCH 5/5] drm/amdgpu: enable uvd mgcg for Fiji.
From: Rex Zhu @ 2016-11-09 7:41 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Rex Zhu
In-Reply-To: <1478677305-12579-1-git-send-email-Rex.Zhu-5C7GfCeVMHo@public.gmane.org>
Change-Id: I96b937c8b97589d1f98a8351f3653b89163c84a0
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/vi.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index a230b39..d09c25a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -927,7 +927,8 @@ static int vi_common_early_init(void *handle)
AMD_CG_SUPPORT_HDP_LS |
AMD_CG_SUPPORT_ROM_MGCG |
AMD_CG_SUPPORT_MC_MGCG |
- AMD_CG_SUPPORT_MC_LS;
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_UVD_MGCG;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x3c;
break;
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related
* [PATCH 4/5] drm/amdgpu: refine uvd 6.0 clock gate feature.
From: Rex Zhu @ 2016-11-09 7:41 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Rex Zhu
In-Reply-To: <1478677305-12579-1-git-send-email-Rex.Zhu-5C7GfCeVMHo@public.gmane.org>
Change-Id: I3b665f26689dd35750e1a6521cd5fac5456f7556
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 112 ++++++++++++++++++++++++++++------
1 file changed, 92 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 00fad69..c697a73 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -42,6 +42,10 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v6_0_start(struct amdgpu_device *adev);
static void uvd_v6_0_stop(struct amdgpu_device *adev);
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev);
+static int uvd_v6_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable);
/**
* uvd_v6_0_ring_get_rptr - get read pointer
@@ -151,8 +155,6 @@ static int uvd_v6_0_hw_init(void *handle)
uint32_t tmp;
int r;
- amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
-
r = uvd_v6_0_start(adev);
if (r)
goto done;
@@ -395,11 +397,11 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
lmi_swap_cntl = 0;
mp_swap_cntl = 0;
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+ uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v6_0_enable_mgcg(adev, true);
uvd_v6_0_mc_resume(adev);
- /* disable clock gating */
- WREG32_FIELD(UVD_CGC_CTRL, DYN_CLOCK_MODE, 0);
-
/* disable interupt */
WREG32_FIELD(UVD_MASTINT_EN, VCPU_EN, 0);
@@ -838,22 +840,69 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
+static void uvd_v6_0_enable_clock_gating(struct amdgpu_device *adev, bool enable)
+{
+ uint32_t data1, data3;
+
+ data1 = RREG32(mmUVD_SUVD_CGC_GATE);
+ data3 = RREG32(mmUVD_CGC_GATE);
+
+ data1 |= UVD_SUVD_CGC_GATE__SRE_MASK |
+ UVD_SUVD_CGC_GATE__SIT_MASK |
+ UVD_SUVD_CGC_GATE__SMP_MASK |
+ UVD_SUVD_CGC_GATE__SCM_MASK |
+ UVD_SUVD_CGC_GATE__SDB_MASK |
+ UVD_SUVD_CGC_GATE__SRE_H264_MASK |
+ UVD_SUVD_CGC_GATE__SRE_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SIT_H264_MASK |
+ UVD_SUVD_CGC_GATE__SIT_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SCM_H264_MASK |
+ UVD_SUVD_CGC_GATE__SCM_HEVC_MASK |
+ UVD_SUVD_CGC_GATE__SDB_H264_MASK |
+ UVD_SUVD_CGC_GATE__SDB_HEVC_MASK;
+
+ if (enable) {
+ data3 |= (UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__LMI_UMC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__SCPU_MASK |
+ UVD_CGC_GATE__JPEG2_MASK);
+ data3 &= ~UVD_CGC_GATE__REGS_MASK;
+ } else {
+ data3 = 0;
+ }
+
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+ WREG32(mmUVD_CGC_GATE, data3);
+}
+
static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev)
{
- uint32_t data, data1, data2, suvd_flags;
+ uint32_t data, data2;
data = RREG32(mmUVD_CGC_CTRL);
- data1 = RREG32(mmUVD_SUVD_CGC_GATE);
data2 = RREG32(mmUVD_SUVD_CGC_CTRL);
+
data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
- suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
- UVD_SUVD_CGC_GATE__SIT_MASK |
- UVD_SUVD_CGC_GATE__SMP_MASK |
- UVD_SUVD_CGC_GATE__SCM_MASK |
- UVD_SUVD_CGC_GATE__SDB_MASK;
data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
(1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
@@ -886,11 +935,8 @@ static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev)
UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
WREG32(mmUVD_CGC_CTRL, data);
- WREG32(mmUVD_CGC_GATE, 0);
- WREG32(mmUVD_SUVD_CGC_GATE, data1);
WREG32(mmUVD_SUVD_CGC_CTRL, data2);
}
@@ -937,6 +983,32 @@ static void uvd_v6_0_set_hw_clock_gating(struct amdgpu_device *adev)
}
#endif
+static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
+
static int uvd_v6_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
@@ -947,17 +1019,17 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
return 0;
if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v6_0_set_sw_clock_gating(adev);
- } else {
/* wait for STATUS to clear */
if (uvd_v6_0_wait_for_idle(handle))
return -EBUSY;
-
+ uvd_v6_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
/* uvd_v6_0_set_hw_clock_gating(adev); */
+ } else {
+ /* disable HW gating and enable Sw gating */
+ uvd_v6_0_enable_clock_gating(adev, false);
}
-
+ uvd_v6_0_set_sw_clock_gating(adev);
return 0;
}
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related
* [PATCH 3/5] drm/amdgpu: refine uvd 5.0 clock gate feature.
From: Rex Zhu @ 2016-11-09 7:41 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Rex Zhu
In-Reply-To: <1478677305-12579-1-git-send-email-Rex.Zhu-5C7GfCeVMHo@public.gmane.org>
1. fix uvd cg status not correct.
2. fix uvd pg can't work on tonga.
3. enable uvd mgcg.
Change-Id: Ia3911f2bd0f982e2fd00a9041ec03f47ab5338ed
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tom St Denis <tom.stdenis@amd.com>
---
drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 106 ++++++++++++++++++++++++++--------
1 file changed, 83 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index dadb6ab..95cabea 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -40,7 +40,10 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev);
static int uvd_v5_0_start(struct amdgpu_device *adev);
static void uvd_v5_0_stop(struct amdgpu_device *adev);
-
+static int uvd_v5_0_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state);
+static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable);
/**
* uvd_v5_0_ring_get_rptr - get read pointer
*
@@ -149,9 +152,6 @@ static int uvd_v5_0_hw_init(void *handle)
uint32_t tmp;
int r;
- /* raise clocks while booting up the VCPU */
- amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-
r = uvd_v5_0_start(adev);
if (r)
goto done;
@@ -189,11 +189,7 @@ static int uvd_v5_0_hw_init(void *handle)
amdgpu_ring_write(ring, 3);
amdgpu_ring_commit(ring);
-
done:
- /* lower clocks again */
- amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-
if (!r)
DRM_INFO("UVD initialized successfully.\n");
@@ -226,6 +222,7 @@ static int uvd_v5_0_suspend(void *handle)
r = uvd_v5_0_hw_fini(adev);
if (r)
return r;
+ uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
r = amdgpu_uvd_suspend(adev);
if (r)
@@ -313,8 +310,9 @@ static int uvd_v5_0_start(struct amdgpu_device *adev)
uvd_v5_0_mc_resume(adev);
- /* disable clock gating */
- WREG32(mmUVD_CGC_GATE, 0);
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+ uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+ uvd_v5_0_enable_mgcg(adev, true);
/* disable interupt */
WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
@@ -628,16 +626,12 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
return 0;
}
-static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev)
+static void uvd_v5_0_enable_clock_gating(struct amdgpu_device *adev, bool enable)
{
- uint32_t data, data1, data2, suvd_flags;
+ uint32_t data1, data3, suvd_flags;
- data = RREG32(mmUVD_CGC_CTRL);
data1 = RREG32(mmUVD_SUVD_CGC_GATE);
- data2 = RREG32(mmUVD_SUVD_CGC_CTRL);
-
- data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
- UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+ data3 = RREG32(mmUVD_CGC_GATE);
suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK |
UVD_SUVD_CGC_GATE__SIT_MASK |
@@ -645,6 +639,49 @@ static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev)
UVD_SUVD_CGC_GATE__SCM_MASK |
UVD_SUVD_CGC_GATE__SDB_MASK;
+ if (enable) {
+ data3 |= (UVD_CGC_GATE__SYS_MASK |
+ UVD_CGC_GATE__UDEC_MASK |
+ UVD_CGC_GATE__MPEG2_MASK |
+ UVD_CGC_GATE__RBC_MASK |
+ UVD_CGC_GATE__LMI_MC_MASK |
+ UVD_CGC_GATE__IDCT_MASK |
+ UVD_CGC_GATE__MPRD_MASK |
+ UVD_CGC_GATE__MPC_MASK |
+ UVD_CGC_GATE__LBSI_MASK |
+ UVD_CGC_GATE__LRBBM_MASK |
+ UVD_CGC_GATE__UDEC_RE_MASK |
+ UVD_CGC_GATE__UDEC_CM_MASK |
+ UVD_CGC_GATE__UDEC_IT_MASK |
+ UVD_CGC_GATE__UDEC_DB_MASK |
+ UVD_CGC_GATE__UDEC_MP_MASK |
+ UVD_CGC_GATE__WCB_MASK |
+ UVD_CGC_GATE__VCPU_MASK |
+ UVD_CGC_GATE__JPEG_MASK |
+ UVD_CGC_GATE__SCPU_MASK);
+ data3 &= ~UVD_CGC_GATE__REGS_MASK;
+ data1 |= suvd_flags;
+ } else {
+ data3 = 0;
+ data1 = 0;
+ }
+
+ WREG32(mmUVD_SUVD_CGC_GATE, data1);
+ WREG32(mmUVD_CGC_GATE, data3);
+}
+
+static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev)
+{
+ uint32_t data, data2;
+
+ data = RREG32(mmUVD_CGC_CTRL);
+ data2 = RREG32(mmUVD_SUVD_CGC_CTRL);
+
+
+ data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
+ UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
+
+
data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK |
(1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) |
(4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY));
@@ -675,11 +712,8 @@ static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev)
UVD_SUVD_CGC_CTRL__SMP_MODE_MASK |
UVD_SUVD_CGC_CTRL__SCM_MODE_MASK |
UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
- data1 |= suvd_flags;
WREG32(mmUVD_CGC_CTRL, data);
- WREG32(mmUVD_CGC_GATE, 0);
- WREG32(mmUVD_SUVD_CGC_GATE, data1);
WREG32(mmUVD_SUVD_CGC_CTRL, data2);
}
@@ -724,6 +758,31 @@ static void uvd_v5_0_set_hw_clock_gating(struct amdgpu_device *adev)
}
#endif
+static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 orig, data;
+
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data |= 0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ } else {
+ data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
+ data &= ~0xfff;
+ WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
+
+ orig = data = RREG32(mmUVD_CGC_CTRL);
+ data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
+ if (orig != data)
+ WREG32(mmUVD_CGC_CTRL, data);
+ }
+}
static int uvd_v5_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
@@ -740,17 +799,18 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
curstate = state;
if (enable) {
- /* disable HW gating and enable Sw gating */
- uvd_v5_0_set_sw_clock_gating(adev);
- } else {
/* wait for STATUS to clear */
if (uvd_v5_0_wait_for_idle(handle))
return -EBUSY;
+ uvd_v5_0_enable_clock_gating(adev, true);
/* enable HW gates because UVD is idle */
/* uvd_v5_0_set_hw_clock_gating(adev); */
+ } else {
+ uvd_v5_0_enable_clock_gating(adev, false);
}
+ uvd_v5_0_set_sw_clock_gating(adev);
return 0;
}
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related
* [PATCH 2/5] drm/amd/powerplay: partial revert commit 01b0e7fb1.
From: Rex Zhu @ 2016-11-09 7:41 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Rex Zhu
In-Reply-To: <1478677305-12579-1-git-send-email-Rex.Zhu-5C7GfCeVMHo@public.gmane.org>
when uvd is idle, we gate uvd clock.
and uvd is busy, we ungate uvd clock.
Change-Id: Ic2fa6149389b0113faf36ec7aad857e77d01af33
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
---
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index cf2ee93..a1fc4fc 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -149,7 +149,7 @@ int smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
if (bgate) {
cgs_set_clockgating_state(hwmgr->device,
AMD_IP_BLOCK_TYPE_UVD,
- AMD_CG_STATE_UNGATE);
+ AMD_CG_STATE_GATE);
cgs_set_powergating_state(hwmgr->device,
AMD_IP_BLOCK_TYPE_UVD,
AMD_PG_STATE_GATE);
@@ -162,7 +162,7 @@ int smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
AMD_CG_STATE_UNGATE);
cgs_set_clockgating_state(hwmgr->device,
AMD_IP_BLOCK_TYPE_UVD,
- AMD_CG_STATE_GATE);
+ AMD_CG_STATE_UNGATE);
smu7_update_uvd_dpm(hwmgr, false);
}
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related
* [PATCH 1/5] drm/amdgpu: not set bypass mode for uvd5.0/uvd6.0
From: Rex Zhu @ 2016-11-09 7:41 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Rex Zhu
Change-Id: I99b307d2026d6fec0b5b18349455df2c38d78c6a
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 15 ---------------
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 18 ++----------------
2 files changed, 2 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 95303e2..dadb6ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -724,19 +724,6 @@ static void uvd_v5_0_set_hw_clock_gating(struct amdgpu_device *adev)
}
#endif
-static void uvd_v5_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
static int uvd_v5_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
@@ -745,8 +732,6 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
static int curstate = -1;
- uvd_v5_0_set_bypass_mode(adev, enable);
-
if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index a339b5c..00fad69 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -151,6 +151,8 @@ static int uvd_v6_0_hw_init(void *handle)
uint32_t tmp;
int r;
+ amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+
r = uvd_v6_0_start(adev);
if (r)
goto done;
@@ -935,28 +937,12 @@ static void uvd_v6_0_set_hw_clock_gating(struct amdgpu_device *adev)
}
#endif
-static void uvd_v6_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
- u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
- if (enable)
- tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
- else
- tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK |
- GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK);
-
- WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
static int uvd_v6_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
- uvd_v6_0_set_bypass_mode(adev, enable);
-
if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
return 0;
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related
* [PATCH 3/3] vhost_net: tx support batching
From: Jason Wang @ 2016-11-09 7:38 UTC (permalink / raw)
To: mst, netdev, linux-kernel; +Cc: Jason Wang
In-Reply-To: <1478677113-13126-1-git-send-email-jasowang@redhat.com>
This patch tries to utilize tuntap rx batching by peeking the tx
virtqueue during transmission, if there's more available buffers in
the virtqueue, set MSG_MORE flag for a hint for tuntap to batch the
packets. The maximum number of batched tx packets were specified
through a module parameter: tx_bached.
When use 16 as tx_batched:
Pktgen test shows 16% on tx pps in guest.
Netperf test does not show obvious regression.
For safety, 1 were used as the default value for tx_batched.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/vhost/net.c | 15 ++++++++++++++-
drivers/vhost/vhost.c | 1 +
drivers/vhost/vhost.h | 1 +
3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 5dc128a..51c378e 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -35,6 +35,10 @@ module_param(experimental_zcopytx, int, 0444);
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
" 1 -Enable; 0 - Disable");
+static int tx_batched = 1;
+module_param(tx_batched, int, 0444);
+MODULE_PARM_DESC(tx_batched, "Number of patches batched in TX");
+
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_NET_WEIGHT 0x80000
@@ -454,6 +458,16 @@ static void handle_tx(struct vhost_net *net)
msg.msg_control = NULL;
ubufs = NULL;
}
+ total_len += len;
+ if (vq->delayed < tx_batched &&
+ total_len < VHOST_NET_WEIGHT &&
+ !vhost_vq_avail_empty(&net->dev, vq)) {
+ vq->delayed++;
+ msg.msg_flags |= MSG_MORE;
+ } else {
+ vq->delayed = 0;
+ msg.msg_flags &= ~MSG_MORE;
+ }
/* TODO: Check specific error and bomb out unless ENOBUFS? */
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
@@ -472,7 +486,6 @@ static void handle_tx(struct vhost_net *net)
vhost_add_used_and_signal(&net->dev, vq, head, 0);
else
vhost_zerocopy_signal_used(net, vq);
- total_len += len;
vhost_net_tx_packet(net);
if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index fdf4cdf..bc362c7 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -311,6 +311,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->busyloop_timeout = 0;
vq->umem = NULL;
vq->iotlb = NULL;
+ vq->delayed = 0;
}
static int vhost_worker(void *data)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 78f3c5f..9f81a94 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -141,6 +141,7 @@ struct vhost_virtqueue {
bool user_be;
#endif
u32 busyloop_timeout;
+ int delayed;
};
struct vhost_msg_node {
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.