From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Russell King <rmk+kernel@armlinux.org.uk>,
Arnd Bergmann <arnd@arndb.de>,
Bjorn Helgaas <bhelgaas@google.com>,
Sasha Levin <sashal@kernel.org>,
linux-pci@vger.kernel.org
Subject: [PATCH AUTOSEL 5.12 14/43] PCI: Dynamically map ECAM regions
Date: Sat, 10 Jul 2021 19:48:46 -0400 [thread overview]
Message-ID: <20210710234915.3220342-14-sashal@kernel.org> (raw)
In-Reply-To: <20210710234915.3220342-1-sashal@kernel.org>
From: Russell King <rmk+kernel@armlinux.org.uk>
[ Upstream commit 8fe55ef23387ce3c7488375b1fd539420d7654bb ]
Attempting to boot 32-bit ARM kernels under QEMU's 3.x virt models fails
when we have more than 512M of RAM in the model as we run out of vmalloc
space for the PCI ECAM regions. This failure will be silent when running
libvirt, as the console in that situation is a PCI device.
In this configuration, the kernel maps the whole ECAM, which QEMU sets up
for 256 buses, even when maybe only seven buses are in use. Each bus uses
1M of ECAM space, and ioremap() adds an additional guard page between
allocations. The kernel vmap allocator will align these regions to 512K,
resulting in each mapping eating 1.5M of vmalloc space. This means we need
384M of vmalloc space just to map all of these, which is very wasteful of
resources.
Fix this by only mapping the ECAM for buses we are going to be using. In
my setups, this is around seven buses in most guests, which is 10.5M of
vmalloc space - way smaller than the 384M that would otherwise be required.
This also means that the kernel can boot without forcing extra RAM into
highmem with the vmalloc= argument, or decreasing the virtual RAM available
to the guest.
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/E1lhCAV-0002yb-50@rmk-PC.armlinux.org.uk
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/pci/ecam.c | 54 ++++++++++++++++++++++++++++++++++------
include/linux/pci-ecam.h | 1 +
2 files changed, 47 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c
index d2a1920bb055..1c40d2506aef 100644
--- a/drivers/pci/ecam.c
+++ b/drivers/pci/ecam.c
@@ -32,7 +32,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev,
struct pci_config_window *cfg;
unsigned int bus_range, bus_range_max, bsz;
struct resource *conflict;
- int i, err;
+ int err;
if (busr->start > busr->end)
return ERR_PTR(-EINVAL);
@@ -50,6 +50,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev,
cfg->busr.start = busr->start;
cfg->busr.end = busr->end;
cfg->busr.flags = IORESOURCE_BUS;
+ cfg->bus_shift = bus_shift;
bus_range = resource_size(&cfg->busr);
bus_range_max = resource_size(cfgres) >> bus_shift;
if (bus_range > bus_range_max) {
@@ -77,13 +78,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev,
cfg->winp = kcalloc(bus_range, sizeof(*cfg->winp), GFP_KERNEL);
if (!cfg->winp)
goto err_exit_malloc;
- for (i = 0; i < bus_range; i++) {
- cfg->winp[i] =
- pci_remap_cfgspace(cfgres->start + i * bsz,
- bsz);
- if (!cfg->winp[i])
- goto err_exit_iomap;
- }
} else {
cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz);
if (!cfg->win)
@@ -129,6 +123,44 @@ void pci_ecam_free(struct pci_config_window *cfg)
}
EXPORT_SYMBOL_GPL(pci_ecam_free);
+static int pci_ecam_add_bus(struct pci_bus *bus)
+{
+ struct pci_config_window *cfg = bus->sysdata;
+ unsigned int bsz = 1 << cfg->bus_shift;
+ unsigned int busn = bus->number;
+ phys_addr_t start;
+
+ if (!per_bus_mapping)
+ return 0;
+
+ if (busn < cfg->busr.start || busn > cfg->busr.end)
+ return -EINVAL;
+
+ busn -= cfg->busr.start;
+ start = cfg->res.start + busn * bsz;
+
+ cfg->winp[busn] = pci_remap_cfgspace(start, bsz);
+ if (!cfg->winp[busn])
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void pci_ecam_remove_bus(struct pci_bus *bus)
+{
+ struct pci_config_window *cfg = bus->sysdata;
+ unsigned int busn = bus->number;
+
+ if (!per_bus_mapping || busn < cfg->busr.start || busn > cfg->busr.end)
+ return;
+
+ busn -= cfg->busr.start;
+ if (cfg->winp[busn]) {
+ iounmap(cfg->winp[busn]);
+ cfg->winp[busn] = NULL;
+ }
+}
+
/*
* Function to implement the pci_ops ->map_bus method
*/
@@ -167,6 +199,8 @@ EXPORT_SYMBOL_GPL(pci_ecam_map_bus);
/* ECAM ops */
const struct pci_ecam_ops pci_generic_ecam_ops = {
.pci_ops = {
+ .add_bus = pci_ecam_add_bus,
+ .remove_bus = pci_ecam_remove_bus,
.map_bus = pci_ecam_map_bus,
.read = pci_generic_config_read,
.write = pci_generic_config_write,
@@ -178,6 +212,8 @@ EXPORT_SYMBOL_GPL(pci_generic_ecam_ops);
/* ECAM ops for 32-bit access only (non-compliant) */
const struct pci_ecam_ops pci_32b_ops = {
.pci_ops = {
+ .add_bus = pci_ecam_add_bus,
+ .remove_bus = pci_ecam_remove_bus,
.map_bus = pci_ecam_map_bus,
.read = pci_generic_config_read32,
.write = pci_generic_config_write32,
@@ -187,6 +223,8 @@ const struct pci_ecam_ops pci_32b_ops = {
/* ECAM ops for 32-bit read only (non-compliant) */
const struct pci_ecam_ops pci_32b_read_ops = {
.pci_ops = {
+ .add_bus = pci_ecam_add_bus,
+ .remove_bus = pci_ecam_remove_bus,
.map_bus = pci_ecam_map_bus,
.read = pci_generic_config_read32,
.write = pci_generic_config_write,
diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h
index 65d3d83015c3..944da75ff25c 100644
--- a/include/linux/pci-ecam.h
+++ b/include/linux/pci-ecam.h
@@ -55,6 +55,7 @@ struct pci_ecam_ops {
struct pci_config_window {
struct resource res;
struct resource busr;
+ unsigned int bus_shift;
void *priv;
const struct pci_ecam_ops *ops;
union {
--
2.30.2
next prev parent reply other threads:[~2021-07-10 23:49 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-10 23:48 [PATCH AUTOSEL 5.12 01/43] power: supply: sc27xx: Add missing MODULE_DEVICE_TABLE Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 02/43] power: supply: sc2731_charger: " Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 03/43] f2fs: fix to avoid racing on fsync_entry_slab by multi filesystem instances Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 04/43] pwm: spear: Don't modify HW state in .remove callback Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 05/43] PCI: ftpci100: Rename macro name collision Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 06/43] power: supply: ab8500: Avoid NULL pointers Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 07/43] PCI: hv: Fix a race condition when removing the device Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 08/43] power: supply: max17042: Do not enforce (incorrect) interrupt trigger type Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 09/43] power: reset: gpio-poweroff: add missing MODULE_DEVICE_TABLE Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 10/43] ARM: 9087/1: kprobes: test-thumb: fix for LLVM_IAS=1 Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 11/43] PCI/P2PDMA: Avoid pci_get_slot(), which may sleep Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 12/43] NFSv4: Fix delegation return in cases where we have to retry Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 13/43] PCI: pciehp: Ignore Link Down/Up caused by DPC Sasha Levin
2021-07-10 23:48 ` Sasha Levin [this message]
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 15/43] watchdog: Fix possible use-after-free in wdt_startup() Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 16/43] watchdog: sc520_wdt: Fix possible use-after-free in wdt_turnoff() Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 17/43] watchdog: Fix possible use-after-free by calling del_timer_sync() Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 18/43] watchdog: imx_sc_wdt: fix pretimeout Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 19/43] watchdog: iTCO_wdt: Account for rebooting on second timeout Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 20/43] x86/fpu: Return proper error codes from user access functions Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 21/43] remoteproc: core: Fix cdev remove and rproc del Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 22/43] PCI: tegra: Add missing MODULE_DEVICE_TABLE Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 23/43] orangefs: fix orangefs df output Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 24/43] ceph: remove bogus checks and WARN_ONs from ceph_set_page_dirty Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 25/43] drm/gma500: Add the missed drm_gem_object_put() in psb_user_framebuffer_create() Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 26/43] NFS: nfs_find_open_context() may only select open files Sasha Levin
2021-07-10 23:48 ` [PATCH AUTOSEL 5.12 27/43] power: reset: regulator-poweroff: add missing MODULE_DEVICE_TABLE Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 28/43] power: supply: charger-manager: " Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 29/43] power: supply: ab8500: " Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 30/43] power: supply: axp288_fuel_gauge: Make "T3 MRD" no_battery_list DMI entry more generic Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 31/43] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 32/43] drm/amdkfd: fix sysfs kobj leak Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 33/43] pwm: img: Fix PM reference leak in img_pwm_enable() Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 34/43] pwm: tegra: Don't modify HW state in .remove callback Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 35/43] ACPI: AMBA: Fix resource name in /proc/iomem Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 36/43] ACPI: video: Add quirk for the Dell Vostro 3350 Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 37/43] PCI: rockchip: Register IRQ handlers after device and data are ready Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 38/43] ext4: fix WARN_ON_ONCE(!buffer_uptodate) after an error writing the superblock Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 39/43] virtio-blk: Fix memory leak among suspend/resume procedure Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 40/43] virtio_net: Fix error handling in virtnet_restore() Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 41/43] virtio_console: Assure used length from device is limited Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 42/43] virtio: fix up virtio_disable_cb Sasha Levin
2021-07-11 4:23 ` Michael S. Tsirkin
2021-07-18 1:41 ` Sasha Levin
2021-07-10 23:49 ` [PATCH AUTOSEL 5.12 43/43] block: fix the problem of io_ticks becoming smaller Sasha Levin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210710234915.3220342-14-sashal@kernel.org \
--to=sashal@kernel.org \
--cc=arnd@arndb.de \
--cc=bhelgaas@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=rmk+kernel@armlinux.org.uk \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox