From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
torvalds@linux-foundation.org, stable@vger.kernel.org
Cc: lwn@lwn.net, jslaby@suse.cz,
Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Subject: Re: Linux 4.14.207
Date: Thu, 19 Nov 2020 13:14:47 +0100 [thread overview]
Message-ID: <160578808695214@kroah.com> (raw)
In-Reply-To: <160578808684233@kroah.com>
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0a59fcf934f4..e0ce14f028d8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5022,6 +5022,14 @@
Disables the PV optimizations forcing the HVM guest to
run as generic HVM guest with no PV drivers.
+ xen.event_eoi_delay= [XEN]
+ How long to delay EOI handling in case of event
+ storms (jiffies). Default is 10.
+
+ xen.event_loop_timeout= [XEN]
+ After which time (jiffies) the event handling loop
+ should start to delay EOI handling. Default is 2.
+
xirc2ps_cs= [NET,PCMCIA]
Format:
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
diff --git a/Makefile b/Makefile
index 2d5ec8b7bcf5..c4bb19c1e4c7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 14
-SUBLEVEL = 206
+SUBLEVEL = 207
EXTRAVERSION =
NAME = Petit Gorille
diff --git a/arch/Kconfig b/arch/Kconfig
index 77b3e21c4844..95567f683275 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -339,6 +339,13 @@ config HAVE_RCU_TABLE_FREE
config HAVE_RCU_TABLE_INVALIDATE
bool
+config ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+ bool
+ help
+ Temporary select until all architectures can be converted to have
+ irqs disabled over activate_mm. Architectures that do IPI based TLB
+ shootdowns should enable this.
+
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h
index 59655459da59..74cef57bf806 100644
--- a/arch/arm/include/asm/kprobes.h
+++ b/arch/arm/include/asm/kprobes.h
@@ -54,20 +54,20 @@ int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
/* optinsn template addresses */
-extern __visible kprobe_opcode_t optprobe_template_entry;
-extern __visible kprobe_opcode_t optprobe_template_val;
-extern __visible kprobe_opcode_t optprobe_template_call;
-extern __visible kprobe_opcode_t optprobe_template_end;
-extern __visible kprobe_opcode_t optprobe_template_sub_sp;
-extern __visible kprobe_opcode_t optprobe_template_add_sp;
-extern __visible kprobe_opcode_t optprobe_template_restore_begin;
-extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn;
-extern __visible kprobe_opcode_t optprobe_template_restore_end;
+extern __visible kprobe_opcode_t optprobe_template_entry[];
+extern __visible kprobe_opcode_t optprobe_template_val[];
+extern __visible kprobe_opcode_t optprobe_template_call[];
+extern __visible kprobe_opcode_t optprobe_template_end[];
+extern __visible kprobe_opcode_t optprobe_template_sub_sp[];
+extern __visible kprobe_opcode_t optprobe_template_add_sp[];
+extern __visible kprobe_opcode_t optprobe_template_restore_begin[];
+extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[];
+extern __visible kprobe_opcode_t optprobe_template_restore_end[];
#define MAX_OPTIMIZED_LENGTH 4
#define MAX_OPTINSN_SIZE \
- ((unsigned long)&optprobe_template_end - \
- (unsigned long)&optprobe_template_entry)
+ ((unsigned long)optprobe_template_end - \
+ (unsigned long)optprobe_template_entry)
#define RELATIVEJUMP_SIZE 4
struct arch_optimized_insn {
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index 0dc23fc227ed..cf08cb726767 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -98,21 +98,21 @@ asm (
"optprobe_template_end:\n");
#define TMPL_VAL_IDX \
- ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry)
#define TMPL_CALL_IDX \
- ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry)
#define TMPL_END_IDX \
- ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry)
#define TMPL_ADD_SP \
- ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_SUB_SP \
- ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_BEGIN \
- ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_ORIGN_INSN \
- ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry)
#define TMPL_RESTORE_END \
- ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry)
+ ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry)
/*
* ARM can always optimize an instruction when using ARM ISA, except
@@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or
}
/* Copy arch-dep-instance from template. */
- memcpy(code, (unsigned long *)&optprobe_template_entry,
+ memcpy(code, (unsigned long *)optprobe_template_entry,
TMPL_END_IDX * sizeof(kprobe_opcode_t));
/* Adjust buffer according to instruction. */
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 81fd41d5a0d9..0661227d935c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1190,7 +1190,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
if (!filter->range || !filter->size)
return -EOPNOTSUPP;
- if (!filter->inode) {
+ if (!filter->path.dentry) {
if (!valid_kernel_ip(filter->offset))
return -EINVAL;
@@ -1217,7 +1217,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
return;
list_for_each_entry(filter, &head->list, entry) {
- if (filter->inode && !offs[range]) {
+ if (filter->path.dentry && !offs[range]) {
msr_a = msr_b = 0;
} else {
/* apply the offset */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 245184152892..1dbc64bbd866 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1240,6 +1240,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
}
+static bool is_spec_ib_user_controlled(void)
+{
+ return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP;
+}
+
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
@@ -1247,17 +1255,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
- /*
- * Indirect branch speculation is always disabled in strict
- * mode. It can neither be enabled if it was force-disabled
- * by a previous prctl call.
+ /*
+ * With strict mode for both IBPB and STIBP, the instruction
+ * code paths avoid checking this task flag and instead,
+ * unconditionally run the instruction. However, STIBP and IBPB
+ * are independent and either can be set to conditionally
+ * enabled regardless of the mode of the other.
+ *
+ * If either is set to conditional, allow the task flag to be
+ * updated, unless it was force-disabled by a previous prctl
+ * call. Currently, this is possible on an AMD CPU which has the
+ * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the
+ * kernel is booted with 'spectre_v2_user=seccomp', then
+ * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and
+ * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.
*/
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ if (!is_spec_ib_user_controlled() ||
task_spec_ib_force_disable(task))
return -EPERM;
+
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
@@ -1270,10 +1287,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+
+ if (!is_spec_ib_user_controlled())
return 0;
+
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
@@ -1336,20 +1353,17 @@ static int ib_prctl_get(struct task_struct *task)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
- return PR_SPEC_DISABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
+ else if (is_spec_ib_user_controlled()) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- } else
+ } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else
return PR_SPEC_NOT_AFFECTED;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cdf62fb94fb1..70ef826af7f8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -268,7 +268,7 @@ static void nbd_size_clear(struct nbd_device *nbd)
}
}
-static void nbd_size_update(struct nbd_device *nbd)
+static void nbd_size_update(struct nbd_device *nbd, bool start)
{
struct nbd_config *config = nbd->config;
struct block_device *bdev = bdget_disk(nbd->disk, 0);
@@ -279,7 +279,8 @@ static void nbd_size_update(struct nbd_device *nbd)
if (bdev) {
if (bdev->bd_disk) {
bd_set_size(bdev, config->bytesize);
- set_blocksize(bdev, config->blksize);
+ if (start)
+ set_blocksize(bdev, config->blksize);
} else
bdev->bd_invalidated = 1;
bdput(bdev);
@@ -294,7 +295,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize,
config->blksize = blocksize;
config->bytesize = blocksize * nr_blocks;
if (nbd->task_recv != NULL)
- nbd_size_update(nbd);
+ nbd_size_update(nbd, false);
}
static void nbd_complete_rq(struct request *req)
@@ -1231,7 +1232,7 @@ static int nbd_start_device(struct nbd_device *nbd)
args->index = i;
queue_work(nbd->recv_workq, &args->work);
}
- nbd_size_update(nbd);
+ nbd_size_update(nbd, true);
return error;
}
@@ -1431,6 +1432,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
bdev->bd_openers == 0)
nbd_disconnect_and_put(nbd);
+ bdput(bdev);
nbd_config_put(nbd);
nbd_put(nbd);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index c1d1b94f71b5..04ae2474e334 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -183,7 +183,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif_ring *ring);
+static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags);
static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
@@ -608,6 +608,8 @@ int xen_blkif_schedule(void *arg)
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
+ bool do_eoi;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
set_freezable();
while (!kthread_should_stop()) {
@@ -632,16 +634,23 @@ int xen_blkif_schedule(void *arg)
if (timeout == 0)
goto purge_gnt_list;
+ do_eoi = ring->waiting_reqs;
+
ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(ring);
+ ret = do_block_io_op(ring, &eoi_flags);
if (ret > 0)
ring->waiting_reqs = 1;
if (ret == -EACCES)
wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
+ if (do_eoi && !ring->waiting_reqs) {
+ xen_irq_lateeoi(ring->irq, eoi_flags);
+ eoi_flags |= XEN_EOI_FLAG_SPURIOUS;
+ }
+
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
time_after(jiffies, ring->next_lru)) {
@@ -1114,7 +1123,7 @@ static void end_block_io_op(struct bio *bio)
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif_ring *ring)
+__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
@@ -1137,6 +1146,9 @@ __do_block_io_op(struct xen_blkif_ring *ring)
if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
break;
+ /* We've seen a request, so clear spurious eoi flag. */
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (kthread_should_stop()) {
more_to_do = 1;
break;
@@ -1195,13 +1207,13 @@ __do_block_io_op(struct xen_blkif_ring *ring)
}
static int
-do_block_io_op(struct xen_blkif_ring *ring)
+do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags)
{
union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(ring);
+ more_to_do = __do_block_io_op(ring, eoi_flags);
if (more_to_do)
break;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e9fa4a1fc791..d19adf1db1f1 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -236,9 +236,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
BUG();
}
- err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
- xen_blkif_be_int, 0,
- "blkif-backend", ring);
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
+ evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
if (err < 0) {
xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
ring->blk_rings.common.sring = NULL;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b202f66fc383..868d2620b7ac 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1246,7 +1246,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
fast_mix(fast_pool);
add_interrupt_bench(cycles);
- this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
if (unlikely(crng_init == 0)) {
if ((fast_pool->count >= 64) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 11beef7c595f..d35e5d8e8a05 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1098,22 +1098,19 @@ static int cik_sdma_soft_reset(void *handle)
{
u32 srbm_soft_reset = 0;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- u32 tmp = RREG32(mmSRBM_STATUS2);
+ u32 tmp;
- if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
- }
- if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
- }
+ /* sdma0 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
+
+ /* sdma1 */
+ tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
+ tmp |= SDMA0_F32_CNTL__HALT_MASK;
+ WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
+ srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
if (srbm_soft_reset) {
tmp = RREG32(mmSRBM_SOFT_RESET);
diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
index 78eb10902809..076b6da44f46 100644
--- a/drivers/gpu/drm/gma500/psb_irq.c
+++ b/drivers/gpu/drm/gma500/psb_irq.c
@@ -350,6 +350,7 @@ int psb_irq_postinstall(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = dev->dev_private;
unsigned long irqflags;
+ unsigned int i;
spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags);
@@ -362,20 +363,12 @@ int psb_irq_postinstall(struct drm_device *dev)
PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R);
PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM);
- if (dev->vblank[0].enabled)
- psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[1].enabled)
- psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[2].enabled)
- psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
- else
- psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
+ for (i = 0; i < dev->num_crtcs; ++i) {
+ if (dev->vblank[i].enabled)
+ psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ else
+ psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ }
if (dev_priv->ops->hotplug_enable)
dev_priv->ops->hotplug_enable(dev, true);
@@ -388,6 +381,7 @@ void psb_irq_uninstall(struct drm_device *dev)
{
struct drm_psb_private *dev_priv = dev->dev_private;
unsigned long irqflags;
+ unsigned int i;
spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags);
@@ -396,14 +390,10 @@ void psb_irq_uninstall(struct drm_device *dev)
PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM);
- if (dev->vblank[0].enabled)
- psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[1].enabled)
- psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE);
-
- if (dev->vblank[2].enabled)
- psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE);
+ for (i = 0; i < dev->num_crtcs; ++i) {
+ if (dev->vblank[i].enabled)
+ psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE);
+ }
dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG |
_PSB_IRQ_MSVDX_FLAG |
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 2d93c8f454bc..423754cc6c30 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1230,7 +1230,7 @@ static void balloon_up(struct work_struct *dummy)
/* Refuse to balloon below the floor. */
if (avail_pages < num_pages || avail_pages - num_pages < floor) {
- pr_warn("Balloon request will be partially fulfilled. %s\n",
+ pr_info("Balloon request will be partially fulfilled. %s\n",
avail_pages < num_pages ? "Not enough memory." :
"Balloon floor reached.");
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 74c8638aac2b..ac3cac052af9 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -404,7 +404,11 @@ extern bool amd_iommu_np_cache;
/* Only true if all IOMMUs support device IOTLBs */
extern bool amd_iommu_iotlb_sup;
-#define MAX_IRQS_PER_TABLE 256
+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define MAX_IRQS_PER_TABLE 512
#define IRQ_TABLE_ALIGNMENT 128
struct irq_remap_table {
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 5371df4d8af3..be4fe2ea0afd 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -138,11 +138,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl)
*
* @cl: host client
*
- * Return: mtu
+ * Return: mtu or 0 if client is not connected
*/
static inline size_t mei_cl_mtu(const struct mei_cl *cl)
{
- return cl->me_cl->props.max_msg_length;
+ return cl->me_cl ? cl->me_cl->props.max_msg_length : 0;
}
/**
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 05ad5ed145a3..e79965a390aa 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -492,9 +492,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
*/
struct sk_buff *skb = priv->echo_skb[idx];
struct canfd_frame *cf = (struct canfd_frame *)skb->data;
- u8 len = cf->len;
- *len_ptr = len;
+ /* get the real payload length for netdev statistics */
+ if (cf->can_id & CAN_RTR_FLAG)
+ *len_ptr = 0;
+ else
+ *len_ptr = cf->len;
+
priv->echo_skb[idx] = NULL;
return skb;
@@ -519,7 +523,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
if (!skb)
return 0;
- netif_rx(skb);
+ skb_get(skb);
+ if (netif_rx(skb) == NET_RX_SUCCESS)
+ dev_consume_skb_any(skb);
+ else
+ dev_kfree_skb_any(skb);
return len;
}
diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c
index ed8561d4a90f..a38dc6d9c978 100644
--- a/drivers/net/can/peak_canfd/peak_canfd.c
+++ b/drivers/net/can/peak_canfd/peak_canfd.c
@@ -256,8 +256,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
cf_len = get_can_dlc(pucan_msg_get_dlc(msg));
/* if this frame is an echo, */
- if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) &&
- !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) {
+ if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) {
unsigned long flags;
spin_lock_irqsave(&priv->echo_lock, flags);
@@ -271,7 +270,13 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv,
netif_wake_queue(priv->ndev);
spin_unlock_irqrestore(&priv->echo_lock, flags);
- return 0;
+
+ /* if this frame is only an echo, stop here. Otherwise,
+ * continue to push this application self-received frame into
+ * its own rx queue.
+ */
+ if (!(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE))
+ return 0;
}
/* otherwise, it should be pushed into rx fifo */
diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index 54ffd1e91a69..a04680183066 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -281,7 +281,7 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
if (skb_queue_len(&offload->skb_queue) >
offload->skb_queue_len_max) {
- kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return -ENOBUFS;
}
@@ -326,7 +326,7 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload,
{
if (skb_queue_len(&offload->skb_queue) >
offload->skb_queue_len_max) {
- kfree_skb(skb);
+ dev_kfree_skb_any(skb);
return -ENOBUFS;
}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 85d92f129af2..9d78ba777614 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -154,14 +154,55 @@ void peak_usb_get_ts_tv(struct peak_time_ref *time_ref, u32 ts,
/* protect from getting timeval before setting now */
if (time_ref->tv_host.tv_sec > 0) {
u64 delta_us;
+ s64 delta_ts = 0;
+
+ /* General case: dev_ts_1 < dev_ts_2 < ts, with:
+ *
+ * - dev_ts_1 = previous sync timestamp
+ * - dev_ts_2 = last sync timestamp
+ * - ts = event timestamp
+ * - ts_period = known sync period (theoretical)
+ * ~ dev_ts2 - dev_ts1
+ * *but*:
+ *
+ * - time counters wrap (see adapter->ts_used_bits)
+ * - sometimes, dev_ts_1 < ts < dev_ts2
+ *
+ * "normal" case (sync time counters increase):
+ * must take into account case when ts wraps (tsw)
+ *
+ * < ts_period > < >
+ * | | |
+ * ---+--------+----+-------0-+--+-->
+ * ts_dev_1 | ts_dev_2 |
+ * ts tsw
+ */
+ if (time_ref->ts_dev_1 < time_ref->ts_dev_2) {
+ /* case when event time (tsw) wraps */
+ if (ts < time_ref->ts_dev_1)
+ delta_ts = 1 << time_ref->adapter->ts_used_bits;
+
+ /* Otherwise, sync time counter (ts_dev_2) has wrapped:
+ * handle case when event time (tsn) hasn't.
+ *
+ * < ts_period > < >
+ * | | |
+ * ---+--------+--0-+---------+--+-->
+ * ts_dev_1 | ts_dev_2 |
+ * tsn ts
+ */
+ } else if (time_ref->ts_dev_1 < ts) {
+ delta_ts = -(1 << time_ref->adapter->ts_used_bits);
+ }
- delta_us = ts - time_ref->ts_dev_2;
- if (ts < time_ref->ts_dev_2)
- delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1;
+ /* add delay between last sync and event timestamps */
+ delta_ts += (signed int)(ts - time_ref->ts_dev_2);
- delta_us += time_ref->ts_total;
+ /* add time from beginning to last sync */
+ delta_ts += time_ref->ts_total;
- delta_us *= time_ref->adapter->us_per_ts_scale;
+ /* convert ticks number into microseconds */
+ delta_us = delta_ts * time_ref->adapter->us_per_ts_scale;
delta_us >>= time_ref->adapter->us_per_ts_shift;
*tv = time_ref->tv_host_0;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 773fc15ac3ab..0d762bdac4f8 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -476,12 +476,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)];
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct canfd_frame *cfd;
struct sk_buff *skb;
const u16 rx_msg_flags = le16_to_cpu(rm->flags);
+ if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev))
+ return -ENOMEM;
+
+ dev = usb_if->dev[pucan_msg_get_channel(rm)];
+ netdev = dev->netdev;
+
if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) {
/* CANFD frame case */
skb = alloc_canfd_skb(netdev, &cfd);
@@ -528,15 +534,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
- struct pcan_usb_fd_device *pdev =
- container_of(dev, struct pcan_usb_fd_device, dev);
+ struct pcan_usb_fd_device *pdev;
enum can_state new_state = CAN_STATE_ERROR_ACTIVE;
enum can_state rx_state, tx_state;
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct can_frame *cf;
struct sk_buff *skb;
+ if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev))
+ return -ENOMEM;
+
+ dev = usb_if->dev[pucan_stmsg_get_channel(sm)];
+ pdev = container_of(dev, struct pcan_usb_fd_device, dev);
+ netdev = dev->netdev;
+
/* nothing should be sent while in BUS_OFF state */
if (dev->can.state == CAN_STATE_BUS_OFF)
return 0;
@@ -589,9 +601,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)];
- struct pcan_usb_fd_device *pdev =
- container_of(dev, struct pcan_usb_fd_device, dev);
+ struct pcan_usb_fd_device *pdev;
+ struct peak_usb_device *dev;
+
+ if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev))
+ return -EINVAL;
+
+ dev = usb_if->dev[pucan_ermsg_get_channel(er)];
+ pdev = container_of(dev, struct pcan_usb_fd_device, dev);
/* keep a trace of tx and rx error counters for later use */
pdev->bec.txerr = er->tx_err_cnt;
@@ -605,11 +622,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if,
struct pucan_msg *rx_msg)
{
struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg;
- struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)];
- struct net_device *netdev = dev->netdev;
+ struct peak_usb_device *dev;
+ struct net_device *netdev;
struct can_frame *cf;
struct sk_buff *skb;
+ if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev))
+ return -EINVAL;
+
+ dev = usb_if->dev[pufd_omsg_get_channel(ov)];
+ netdev = dev->netdev;
+
/* allocate an skb to store the error frame */
skb = alloc_can_err_skb(netdev, &cf);
if (!skb)
@@ -727,6 +750,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev,
u16 tx_msg_size, tx_msg_flags;
u8 can_dlc;
+ if (cfd->len > CANFD_MAX_DLEN)
+ return -EINVAL;
+
tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4);
tx_msg->size = cpu_to_le16(tx_msg_size);
tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 72961082e398..530b8da11960 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7143,7 +7143,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
opts[1] |= transport_offset << TCPHO_SHIFT;
} else {
if (unlikely(rtl_test_hw_pad_bug(tp, skb)))
- return !eth_skb_pad(skb);
+ /* eth_skb_pad would free the skb on error */
+ return !__skb_put_padto(skb, ETH_ZLEN, false);
}
return true;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 811fe0bde8a3..6e58ee9f143e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -334,8 +334,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
return ret;
}
-static int vrf_finish_direct(struct net *net, struct sock *sk,
- struct sk_buff *skb)
+static void vrf_finish_direct(struct sk_buff *skb)
{
struct net_device *vrf_dev = skb->dev;
@@ -354,7 +353,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk,
skb_pull(skb, ETH_HLEN);
}
- return 1;
+ /* reset skb device */
+ nf_reset(skb);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -433,15 +433,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output6_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip6_local_out(net, sk, skb);
+}
+
static int vrf_output6_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IPV6);
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output6_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output6_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip6_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
@@ -454,18 +480,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output6_direct);
+ skb, NULL, vrf_dev, vrf_ip6_out_direct_finish);
if (likely(err == 1))
err = vrf_output6_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
@@ -649,15 +672,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
return skb;
}
+static int vrf_output_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ vrf_finish_direct(skb);
+
+ return vrf_ip_local_out(net, sk, skb);
+}
+
static int vrf_output_direct(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ int err = 1;
+
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb->dev,
- vrf_finish_direct,
- !(IPCB(skb)->flags & IPSKB_REROUTED));
+ if (!(IPCB(skb)->flags & IPSKB_REROUTED))
+ err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, skb->dev, vrf_output_direct_finish);
+
+ if (likely(err == 1))
+ vrf_finish_direct(skb);
+
+ return err;
+}
+
+static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = vrf_output_direct(net, sk, skb);
+ if (likely(err == 1))
+ err = vrf_ip_local_out(net, sk, skb);
+
+ return err;
}
static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
@@ -670,18 +719,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
skb->dev = vrf_dev;
err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
- skb, NULL, vrf_dev, vrf_output_direct);
+ skb, NULL, vrf_dev, vrf_ip_out_direct_finish);
if (likely(err == 1))
err = vrf_output_direct(net, sk, skb);
- /* reset skb device */
if (likely(err == 1))
- nf_reset(skb);
- else
- skb = NULL;
+ return skb;
- return skb;
+ return NULL;
}
static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 6ea16260ec76..1a1f2d1069fb 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -902,6 +902,7 @@ static ssize_t cosa_write(struct file *file,
chan->tx_status = 1;
spin_unlock_irqrestore(&cosa->lock, flags);
up(&chan->wsem);
+ kfree(kbuf);
return -ERESTARTSYS;
}
}
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 11d06021b5e4..6782c3d0c333 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -973,7 +973,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
struct ath_htc_rx_status *rxstatus;
struct ath_rx_status rx_stats;
bool decrypt_error = false;
- __be16 rs_datalen;
+ u16 rs_datalen;
bool is_phyerr;
if (skb->len < HTC_RX_FRAME_HEADER_SIZE) {
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5b1d2e8402d9..347c796afd4e 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */
char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
struct xenvif *vif; /* Parent VIF */
+ /*
+ * TX/RX common EOI handling.
+ * When feature-split-event-channels = 0, interrupt handler sets
+ * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set
+ * by the RX and TX interrupt handlers.
+ * RX and TX handler threads will issue an EOI when either
+ * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or
+ * NETBK_TX_EOI) are set and they will reset those bits.
+ */
+ atomic_t eoi_pending;
+#define NETBK_RX_EOI 0x01
+#define NETBK_TX_EOI 0x02
+#define NETBK_COMMON_EOI 0x04
+
/* Use NAPI for guest TX */
struct napi_struct napi;
/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
@@ -356,6 +370,7 @@ int xenvif_dealloc_kthread(void *data);
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread);
void xenvif_rx_action(struct xenvif_queue *queue);
void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index b5fa910b47b7..007600b7b868 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif)
!vif->disabled;
}
+static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx);
+ if (rc)
+ napi_schedule(&queue->napi);
+ return rc;
+}
+
static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx))
- napi_schedule(&queue->napi);
+ old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_tx_interrupt(queue)) {
+ atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
return work_done;
}
+static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue)
+{
+ bool rc;
+
+ rc = xenvif_have_rx_work(queue, false);
+ if (rc)
+ xenvif_kick_thread(queue);
+ return rc;
+}
+
static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
+ int old;
- xenvif_kick_thread(queue);
+ old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending);
+ WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n");
+
+ if (!xenvif_handle_rx_interrupt(queue)) {
+ atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
- xenvif_tx_interrupt(irq, dev_id);
- xenvif_rx_interrupt(irq, dev_id);
+ struct xenvif_queue *queue = dev_id;
+ int old;
+
+ old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
+ WARN(old, "Interrupt while EOI pending\n");
+
+ /* Use bitwise or as we need to call both functions. */
+ if ((!xenvif_handle_tx_interrupt(queue) |
+ !xenvif_handle_rx_interrupt(queue))) {
+ atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
+ xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
+ }
return IRQ_HANDLED;
}
@@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
shared = (struct xen_netif_ctrl_sring *)addr;
BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
if (err < 0)
goto err_unmap;
@@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
queue->name, queue);
if (err < 0)
@@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
/* feature-split-event-channels == 1 */
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
queue->tx_irq_name, queue);
if (err < 0)
@@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
queue->rx_irq_name, queue);
if (err < 0)
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a871aa6418d0..b8100298017b 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
if (more_to_do)
napi_schedule(&queue->napi);
+ else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_TX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->tx_irq, 0);
}
static void tx_add_credit(struct xenvif_queue *queue)
@@ -1615,9 +1619,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif)
irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
{
struct xenvif *vif = data;
+ unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS;
- while (xenvif_ctrl_work_todo(vif))
+ while (xenvif_ctrl_work_todo(vif)) {
xenvif_ctrl_action(vif);
+ eoi_flag = 0;
+ }
+
+ xen_irq_lateeoi(irq, eoi_flag);
return IRQ_HANDLED;
}
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b1cf7c6f407a..f152246c7dfb 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
return queue->stalled && prod - cons >= 1;
}
-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread)
{
return xenvif_rx_ring_slots_available(queue) ||
(queue->vif->stall_timeout &&
(xenvif_rx_queue_stalled(queue) ||
xenvif_rx_queue_ready(queue))) ||
- kthread_should_stop() ||
+ (test_kthread && kthread_should_stop()) ||
queue->vif->disabled;
}
@@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
{
DEFINE_WAIT(wait);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
return;
for (;;) {
long ret;
prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
- if (xenvif_have_rx_work(queue))
+ if (xenvif_have_rx_work(queue, true))
break;
+ if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI,
+ &queue->eoi_pending) &
+ (NETBK_RX_EOI | NETBK_COMMON_EOI))
+ xen_irq_lateeoi(queue->rx_irq, 0);
+
ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
if (!ret)
break;
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 456339c19aed..94f4ff22044f 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -899,11 +899,13 @@ EXPORT_SYMBOL_GPL(of_dma_get_range);
*/
bool of_dma_is_coherent(struct device_node *np)
{
- struct device_node *node = of_node_get(np);
+ struct device_node *node;
if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT))
return true;
+ node = of_node_get(np);
+
while (node) {
if (of_property_read_bool(node, "dma-coherent")) {
of_node_put(node);
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
index 7f13ce8450a3..5249033ed413 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
@@ -458,13 +458,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function,
static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr)
{
/*
- * The signal type is GPIO if the signal name has "GPIO" as a prefix.
+ * The signal type is GPIO if the signal name has "GPI" as a prefix.
* strncmp (rather than strcmp) is used to implement the prefix
* requirement.
*
- * expr->signal might look like "GPIOT3" in the GPIO case.
+ * expr->signal might look like "GPIOB1" in the GPIO case.
+ * expr->signal might look like "GPIT0" in the GPI case.
*/
- return strncmp(expr->signal, "GPIO", 4) == 0;
+ return strncmp(expr->signal, "GPI", 3) == 0;
}
static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs)
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 71df0f70b61f..45b062b0d418 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -602,6 +602,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin,
value |= PADCFG1_TERM_UP;
+ /* Set default strength value in case none is given */
+ if (arg == 1)
+ arg = 5000;
+
switch (arg) {
case 20000:
value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT;
@@ -624,6 +628,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin,
case PIN_CONFIG_BIAS_PULL_DOWN:
value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK);
+ /* Set default strength value in case none is given */
+ if (arg == 1)
+ arg = 5000;
+
switch (arg) {
case 20000:
value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT;
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 7385cd81498c..d4cd24ad077c 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -144,7 +144,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
} else if (debounce < 250000) {
- time = debounce / 15600;
+ time = debounce / 15625;
pin_reg |= time & DB_TMR_OUT_MASK;
pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg |= BIT(DB_TMR_LARGE_OFF);
@@ -154,14 +154,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg |= BIT(DB_TMR_LARGE_OFF);
} else {
- pin_reg &= ~DB_CNTRl_MASK;
+ pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
ret = -EINVAL;
}
} else {
pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF);
pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
pin_reg &= ~DB_TMR_OUT_MASK;
- pin_reg &= ~DB_CNTRl_MASK;
+ pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
}
writel(pin_reg, gpio_dev->base + offset * 4);
raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c265177855..978e145b5c8f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3217,6 +3217,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev)
ret = rdev->desc->fixed_uV;
} else if (rdev->supply) {
ret = _regulator_get_voltage(rdev->supply->rdev);
+ } else if (rdev->supply_name) {
+ return -EPROBE_DEFER;
} else {
return -EINVAL;
}
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 135376ee2cbf..ba68454109ba 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -653,8 +653,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
rcu_read_lock();
list_for_each_entry_rcu(h,
&tmp_pg->dh_list, node) {
- /* h->sdev should always be valid */
- BUG_ON(!h->sdev);
+ if (!h->sdev)
+ continue;
h->sdev->access_state = desc[0];
}
rcu_read_unlock();
@@ -700,7 +700,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
pg->expiry = 0;
rcu_read_lock();
list_for_each_entry_rcu(h, &pg->dh_list, node) {
- BUG_ON(!h->sdev);
+ if (!h->sdev)
+ continue;
h->sdev->access_state =
(pg->state & SCSI_ACCESS_STATE_MASK);
if (pg->pref)
@@ -1138,7 +1139,6 @@ static void alua_bus_detach(struct scsi_device *sdev)
spin_lock(&h->pg_lock);
pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
rcu_assign_pointer(h->pg, NULL);
- h->sdev = NULL;
spin_unlock(&h->pg_lock);
if (pg) {
spin_lock_irq(&pg->lock);
@@ -1147,6 +1147,7 @@ static void alua_bus_detach(struct scsi_device *sdev)
kref_put(&pg->kref, release_port_group);
}
sdev->handler_data = NULL;
+ synchronize_rcu();
kfree(h);
}
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 3b892918d821..9ad9910cc085 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8549,7 +8549,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* hook into SCSI subsystem */
rc = hpsa_scsi_add_host(h);
if (rc)
- goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
+ goto clean8; /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */
/* Monitor the controller for firmware lockups */
h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
@@ -8564,6 +8564,8 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
HPSA_EVENT_MONITOR_INTERVAL);
return 0;
+clean8: /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */
+ kfree(h->lastlogicals);
clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */
hpsa_free_performant_mode(h);
h->access.set_intr_mask(h, HPSA_INTR_OFF);
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index af44e6e6b3bf..8b38e639e880 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -315,12 +315,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend)
ring->vector = ret;
- ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector);
- if (ring->irq < 0)
- return ring->irq;
+ ret = pci_irq_vector(ring->nhi->pdev, ring->vector);
+ if (ret < 0)
+ goto err_ida_remove;
+
+ ring->irq = ret;
irqflags = no_suspend ? IRQF_NO_SUSPEND : 0;
- return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring);
+ ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring);
+ if (ret)
+ goto err_ida_remove;
+
+ return 0;
+
+err_ida_remove:
+ ida_simple_remove(&nhi->msix_ida, ring->vector);
+
+ return ret;
}
static void ring_release_msix(struct tb_ring *ring)
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 4fc94b5e15ef..be33a5ce3d8e 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -414,10 +414,10 @@ static int uio_get_minor(struct uio_device *idev)
return retval;
}
-static void uio_free_minor(struct uio_device *idev)
+static void uio_free_minor(unsigned long minor)
{
mutex_lock(&minor_lock);
- idr_remove(&uio_idr, idev->minor);
+ idr_remove(&uio_idr, minor);
mutex_unlock(&minor_lock);
}
@@ -989,7 +989,7 @@ int __uio_register_device(struct module *owner,
err_uio_dev_add_attributes:
device_del(&idev->dev);
err_device_create:
- uio_free_minor(idev);
+ uio_free_minor(idev->minor);
put_device(&idev->dev);
return ret;
}
@@ -1003,11 +1003,13 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
void uio_unregister_device(struct uio_info *info)
{
struct uio_device *idev;
+ unsigned long minor;
if (!info || !info->uio_dev)
return;
idev = info->uio_dev;
+ minor = idev->minor;
mutex_lock(&idev->info_lock);
uio_dev_del_attributes(idev);
@@ -1020,7 +1022,7 @@ void uio_unregister_device(struct uio_info *info)
device_unregister(&idev->dev);
- uio_free_minor(idev);
+ uio_free_minor(minor);
return;
}
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index d07033d99c81..38f85b0e32ab 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1751,6 +1751,15 @@ static const struct usb_device_id acm_ids[] = {
{ USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
+ { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
+ { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
+ { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */
+ .driver_info = DISABLE_ECHO, /* Don't echo banner */
+ },
{ USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */
.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
},
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 8433c22900dc..0ed0d0365132 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1772,6 +1772,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err;
}
+ pci_set_drvdata(pdev, dev);
spin_lock_init(&dev->lock);
dev->pdev = pdev;
dev->gadget.ops = &goku_ops;
@@ -1805,7 +1806,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
dev->regs = (struct goku_udc_regs __iomem *) base;
- pci_set_drvdata(pdev, dev);
INFO(dev, "%s\n", driver_desc);
INFO(dev, "version: " DRIVER_VERSION " %s\n", dmastr());
INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base);
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index 8edef51c92e5..f026624898e7 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port)
BUG_ON(!irqs_disabled());
+ smp_wmb(); /* All writes before unmask must be visible. */
+
if (unlikely((cpu != cpu_from_evtchn(port))))
do_hypercall = 1;
else {
@@ -159,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu,
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
*/
-static void evtchn_2l_handle_events(unsigned cpu)
+static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl)
{
int irq;
xen_ulong_t pending_words;
@@ -240,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu)
/* Process port. */
port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = get_evtchn_to_irq(port);
-
- if (irq != -1)
- generic_handle_irq(irq);
+ handle_irq_for_port(port, ctrl);
bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index fdeeef2b9947..aca845675279 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -32,6 +32,10 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
@@ -61,6 +65,15 @@
#include "events_internal.h"
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
const struct evtchn_ops *evtchn_ops;
/*
@@ -69,6 +82,24 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);
+/*
+ * Lock protecting event handling loop against removing event channels.
+ * Adding of event channels is no issue as the associated IRQ becomes active
+ * only after everything is setup (before request_[threaded_]irq() the handler
+ * can't be entered for an event, as the event channel will be unmasked only
+ * then).
+ */
+static DEFINE_RWLOCK(evtchn_rwlock);
+
+/*
+ * Lock hierarchy:
+ *
+ * irq_mapping_update_lock
+ * evtchn_rwlock
+ * IRQ-desc lock
+ * percpu eoi_list_lock
+ */
+
static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */
@@ -93,17 +124,20 @@ static bool (*pirq_needs_eoi)(unsigned irq);
static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
static struct irq_chip xen_dynamic_chip;
+static struct irq_chip xen_lateeoi_chip;
static struct irq_chip xen_percpu_chip;
static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
static void clear_evtchn_to_irq_row(unsigned row)
{
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
- evtchn_to_irq[row][col] = -1;
+ WRITE_ONCE(evtchn_to_irq[row][col], -1);
}
static void clear_evtchn_to_irq_all(void)
@@ -140,7 +174,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
clear_evtchn_to_irq_row(row);
}
- evtchn_to_irq[row][col] = irq;
+ WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0;
}
@@ -150,7 +184,7 @@ int get_evtchn_to_irq(unsigned evtchn)
return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1;
- return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+ return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
}
/* Get info for IRQ */
@@ -259,10 +293,14 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/
unsigned int evtchn_from_irq(unsigned irq)
{
- if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
+ const struct irq_info *info = NULL;
+
+ if (likely(irq < nr_irqs))
+ info = info_for_irq(irq);
+ if (!info)
return 0;
- return info_for_irq(irq)->evtchn;
+ return info->evtchn;
}
unsigned irq_from_evtchn(unsigned int evtchn)
@@ -373,9 +411,157 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+struct lateeoi_work {
+ struct delayed_work delayed;
+ spinlock_t eoi_list_lock;
+ struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+ list_del_init(&info->eoi_list);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+ struct irq_info *elem;
+ u64 now = get_jiffies_64();
+ unsigned long delay;
+ unsigned long flags;
+
+ if (now < info->eoi_time)
+ delay = info->eoi_time - now;
+ else
+ delay = 1;
+
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+ if (list_empty(&eoi->eoi_list)) {
+ list_add(&info->eoi_list, &eoi->eoi_list);
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, delay);
+ } else {
+ list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+ if (elem->eoi_time <= info->eoi_time)
+ break;
+ }
+ list_add(&info->eoi_list, &elem->eoi_list);
+ }
+
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
+{
+ evtchn_port_t evtchn;
+ unsigned int cpu;
+ unsigned int delay = 0;
+
+ evtchn = info->evtchn;
+ if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
+ return;
+
+ if (spurious) {
+ if ((1 << info->spurious_cnt) < (HZ << 2))
+ info->spurious_cnt++;
+ if (info->spurious_cnt > 1) {
+ delay = 1 << (info->spurious_cnt - 2);
+ if (delay > HZ)
+ delay = HZ;
+ if (!info->eoi_time)
+ info->eoi_cpu = smp_processor_id();
+ info->eoi_time = get_jiffies_64() + delay;
+ }
+ } else {
+ info->spurious_cnt = 0;
+ }
+
+ cpu = info->eoi_cpu;
+ if (info->eoi_time &&
+ (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
+ lateeoi_list_add(info);
+ return;
+ }
+
+ info->eoi_time = 0;
+ unmask_evtchn(evtchn);
+}
+
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+ struct lateeoi_work *eoi;
+ struct irq_info *info;
+ u64 now = get_jiffies_64();
+ unsigned long flags;
+
+ eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ while (true) {
+ spin_lock(&eoi->eoi_list_lock);
+
+ info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+ eoi_list);
+
+ if (info == NULL || now < info->eoi_time) {
+ spin_unlock(&eoi->eoi_list_lock);
+ break;
+ }
+
+ list_del_init(&info->eoi_list);
+
+ spin_unlock(&eoi->eoi_list_lock);
+
+ info->eoi_time = 0;
+
+ xen_irq_lateeoi_locked(info, false);
+ }
+
+ if (info)
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed, info->eoi_time - now);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+ struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+ INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+ spin_lock_init(&eoi->eoi_list_lock);
+ INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
+{
+ struct irq_info *info;
+ unsigned long flags;
+
+ read_lock_irqsave(&evtchn_rwlock, flags);
+
+ info = info_for_irq(irq);
+
+ if (info)
+ xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
+
+ read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
+
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
+
#ifdef CONFIG_SMP
/* By default all event channels notify CPU#0. */
cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
@@ -390,6 +576,7 @@ static void xen_irq_init(unsigned irq)
set_info_for_irq(irq, info);
+ INIT_LIST_HEAD(&info->eoi_list);
list_add_tail(&info->list, &xen_irq_list_head);
}
@@ -438,16 +625,24 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
+ unsigned long flags;
if (WARN_ON(!info))
return;
+ write_lock_irqsave(&evtchn_rwlock, flags);
+
+ if (!list_empty(&info->eoi_list))
+ lateeoi_list_del(info);
+
list_del(&info->list);
set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0);
+ write_unlock_irqrestore(&evtchn_rwlock, flags);
+
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@@ -839,7 +1034,7 @@ int xen_pirq_from_irq(unsigned irq)
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
-int bind_evtchn_to_irq(unsigned int evtchn)
+static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
{
int irq;
int ret;
@@ -856,7 +1051,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
if (irq < 0)
goto out;
- irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
+ irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event");
ret = xen_irq_info_evtchn_setup(irq, evtchn);
@@ -877,8 +1072,19 @@ int bind_evtchn_to_irq(unsigned int evtchn)
return irq;
}
+
+int bind_evtchn_to_irq(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn)
+{
+ return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi);
+
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
@@ -920,8 +1126,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port)
+static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ struct irq_chip *chip)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
@@ -932,10 +1139,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
- return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
+ return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
+ chip);
+}
+
+int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_dynamic_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port)
+{
+ return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
+
static int find_virq(unsigned int virq, unsigned int cpu)
{
struct evtchn_status status;
@@ -1031,14 +1254,15 @@ static void unbind_from_irq(unsigned int irq)
mutex_unlock(&irq_mapping_update_lock);
}
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname, void *dev_id)
+static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id,
+ struct irq_chip *chip)
{
int irq, retval;
- irq = bind_evtchn_to_irq(evtchn);
+ irq = bind_evtchn_to_irq_chip(evtchn, chip);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1049,18 +1273,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn,
return irq;
}
+
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- unsigned int remote_port,
- irq_handler_t handler,
- unsigned long irqflags,
- const char *devname,
- void *dev_id)
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname, void *dev_id)
+{
+ return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
+ devname, dev_id,
+ &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
+
+static int bind_interdomain_evtchn_to_irqhandler_chip(
+ unsigned int remote_domain, evtchn_port_t remote_port,
+ irq_handler_t handler, unsigned long irqflags,
+ const char *devname, void *dev_id, struct irq_chip *chip)
{
int irq, retval;
- irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+ irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ chip);
if (irq < 0)
return irq;
@@ -1072,8 +1316,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
return irq;
}
+
+int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_dynamic_chip);
+}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+{
+ return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ remote_port, handler, irqflags, devname,
+ dev_id, &xen_lateeoi_chip);
+}
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
+
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
@@ -1186,7 +1455,7 @@ int evtchn_get(unsigned int evtchn)
goto done;
err = -EINVAL;
- if (info->refcnt <= 0)
+ if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
goto done;
info->refcnt++;
@@ -1225,6 +1494,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
+struct evtchn_loop_ctrl {
+ ktime_t timeout;
+ unsigned count;
+ bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+ int irq;
+ struct irq_info *info;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq == -1)
+ return;
+
+ /*
+ * Check for timeout every 256 events.
+ * We are setting the timeout value only after the first 256
+ * events in order to not hurt the common case of few loop
+ * iterations. The 256 is basically an arbitrary value.
+ *
+ * In case we are hitting the timeout we need to defer all further
+ * EOIs in order to ensure to leave the event handling loop rather
+ * sooner than later.
+ */
+ if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+ ktime_t kt = ktime_get();
+
+ if (!ctrl->timeout) {
+ kt = ktime_add_ms(kt,
+ jiffies_to_msecs(event_loop_timeout));
+ ctrl->timeout = kt;
+ } else if (kt > ctrl->timeout) {
+ ctrl->defer_eoi = true;
+ }
+ }
+
+ info = info_for_irq(irq);
+
+ if (ctrl->defer_eoi) {
+ info->eoi_cpu = smp_processor_id();
+ info->irq_epoch = __this_cpu_read(irq_epoch);
+ info->eoi_time = get_jiffies_64() + event_eoi_delay;
+ }
+
+ generic_handle_irq(irq);
+}
+
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
static void __xen_evtchn_do_upcall(void)
@@ -1232,6 +1549,9 @@ static void __xen_evtchn_do_upcall(void)
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = get_cpu();
unsigned count;
+ struct evtchn_loop_ctrl ctrl = { 0 };
+
+ read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
@@ -1239,7 +1559,7 @@ static void __xen_evtchn_do_upcall(void)
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
- xen_evtchn_handle_events(cpu);
+ xen_evtchn_handle_events(cpu, &ctrl);
BUG_ON(!irqs_disabled());
@@ -1248,6 +1568,14 @@ static void __xen_evtchn_do_upcall(void)
} while (count != 1 || vcpu_info->evtchn_upcall_pending);
out:
+ read_unlock(&evtchn_rwlock);
+
+ /*
+ * Increment irq_epoch only now to defer EOIs only for
+ * xen_irq_lateeoi() invocations occurring from inside the loop
+ * above.
+ */
+ __this_cpu_inc(irq_epoch);
put_cpu();
}
@@ -1614,6 +1942,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
.irq_retrigger = retrigger_dynirq,
};
+static struct irq_chip xen_lateeoi_chip __read_mostly = {
+ /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
+ .name = "xen-dyn-lateeoi",
+
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
+
+ .irq_ack = mask_ack_dynirq,
+ .irq_mask_ack = mask_ack_dynirq,
+
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
+};
+
static struct irq_chip xen_pirq_chip __read_mostly = {
.name = "xen-pirq",
@@ -1680,12 +2023,31 @@ void xen_callback_vector(void)
void xen_callback_vector(void) {}
#endif
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
static bool fifo_events = true;
module_param(fifo_events, bool, 0);
+static int xen_evtchn_cpu_prepare(unsigned int cpu)
+{
+ int ret = 0;
+
+ xen_cpu_init_eoi(cpu);
+
+ if (evtchn_ops->percpu_init)
+ ret = evtchn_ops->percpu_init(cpu);
+
+ return ret;
+}
+
+static int xen_evtchn_cpu_dead(unsigned int cpu)
+{
+ int ret = 0;
+
+ if (evtchn_ops->percpu_deinit)
+ ret = evtchn_ops->percpu_deinit(cpu);
+
+ return ret;
+}
+
void __init xen_init_IRQ(void)
{
int ret = -EINVAL;
@@ -1696,6 +2058,12 @@ void __init xen_init_IRQ(void)
if (ret < 0)
xen_evtchn_2l_init();
+ xen_cpu_init_eoi(smp_processor_id());
+
+ cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
+ "xen/evtchn:prepare",
+ xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
+
evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
sizeof(*evtchn_to_irq), GFP_KERNEL);
BUG_ON(!evtchn_to_irq);
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index 76b318e88382..33462521bfd0 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsigned port)
return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
/*
- * Clear MASKED, spinning if BUSY is set.
+ * Clear MASKED if not PENDING, spinning if BUSY is set.
+ * Return true if mask was cleared.
*/
-static void clear_masked(volatile event_word_t *word)
+static bool clear_masked_cond(volatile event_word_t *word)
{
event_word_t new, old, w;
w = *word;
do {
+ if (w & (1 << EVTCHN_FIFO_PENDING))
+ return false;
+
old = w & ~(1 << EVTCHN_FIFO_BUSY);
new = old & ~(1 << EVTCHN_FIFO_MASKED);
w = sync_cmpxchg(word, old, new);
} while (w != old);
+
+ return true;
}
static void evtchn_fifo_unmask(unsigned port)
@@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned port)
BUG_ON(!irqs_disabled());
- clear_masked(word);
- if (evtchn_fifo_is_pending(port)) {
+ if (!clear_masked_cond(word)) {
struct evtchn_unmask unmask = { .port = port };
(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
}
@@ -270,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word)
return w & EVTCHN_FIFO_LINK_MASK;
}
-static void handle_irq_for_port(unsigned port)
-{
- int irq;
-
- irq = get_evtchn_to_irq(port);
- if (irq != -1)
- generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
struct evtchn_fifo_control_block *control_block,
- unsigned priority, unsigned long *ready,
- bool drop)
+ unsigned priority, unsigned long *ready)
{
struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
uint32_t head;
@@ -315,16 +310,17 @@ static void consume_one_event(unsigned cpu,
clear_bit(priority, ready);
if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
- if (unlikely(drop))
+ if (unlikely(!ctrl))
pr_warn("Dropping pending event for port %u\n", port);
else
- handle_irq_for_port(port);
+ handle_irq_for_port(port, ctrl);
}
q->head[priority] = head;
}
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
struct evtchn_fifo_control_block *control_block;
unsigned long ready;
@@ -336,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
while (ready) {
q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
- consume_one_event(cpu, control_block, q, &ready, drop);
+ consume_one_event(cpu, ctrl, control_block, q, &ready);
ready |= xchg(&control_block->ready, 0);
}
}
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- __evtchn_fifo_handle_events(cpu, false);
+ __evtchn_fifo_handle_events(cpu, ctrl);
}
static void evtchn_fifo_resume(void)
@@ -380,21 +377,6 @@ static void evtchn_fifo_resume(void)
event_array_pages = 0;
}
-static const struct evtchn_ops evtchn_ops_fifo = {
- .max_channels = evtchn_fifo_max_channels,
- .nr_channels = evtchn_fifo_nr_channels,
- .setup = evtchn_fifo_setup,
- .bind_to_cpu = evtchn_fifo_bind_to_cpu,
- .clear_pending = evtchn_fifo_clear_pending,
- .set_pending = evtchn_fifo_set_pending,
- .is_pending = evtchn_fifo_is_pending,
- .test_and_set_mask = evtchn_fifo_test_and_set_mask,
- .mask = evtchn_fifo_mask,
- .unmask = evtchn_fifo_unmask,
- .handle_events = evtchn_fifo_handle_events,
- .resume = evtchn_fifo_resume,
-};
-
static int evtchn_fifo_alloc_control_block(unsigned cpu)
{
void *control_block = NULL;
@@ -417,19 +399,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu)
return ret;
}
-static int xen_evtchn_cpu_prepare(unsigned int cpu)
+static int evtchn_fifo_percpu_init(unsigned int cpu)
{
if (!per_cpu(cpu_control_block, cpu))
return evtchn_fifo_alloc_control_block(cpu);
return 0;
}
-static int xen_evtchn_cpu_dead(unsigned int cpu)
+static int evtchn_fifo_percpu_deinit(unsigned int cpu)
{
- __evtchn_fifo_handle_events(cpu, true);
+ __evtchn_fifo_handle_events(cpu, NULL);
return 0;
}
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+ .percpu_init = evtchn_fifo_percpu_init,
+ .percpu_deinit = evtchn_fifo_percpu_deinit,
+};
+
int __init xen_evtchn_fifo_init(void)
{
int cpu = smp_processor_id();
@@ -443,9 +442,5 @@ int __init xen_evtchn_fifo_init(void)
evtchn_ops = &evtchn_ops_fifo;
- cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
- "xen/evtchn:prepare",
- xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
-
return ret;
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 50c2050a1e32..b9b4f5919893 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -32,11 +32,16 @@ enum xen_irq_type {
*/
struct irq_info {
struct list_head list;
- int refcnt;
+ struct list_head eoi_list;
+ short refcnt;
+ short spurious_cnt;
enum xen_irq_type type; /* type */
unsigned irq;
unsigned int evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
+ unsigned short eoi_cpu; /* EOI must happen on this cpu */
+ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+ u64 eoi_time; /* Time in jiffies when to EOI. */
union {
unsigned short virq;
@@ -55,6 +60,8 @@ struct irq_info {
#define PIRQ_SHAREABLE (1 << 1)
#define PIRQ_MSI_GROUP (1 << 2)
+struct evtchn_loop_ctrl;
+
struct evtchn_ops {
unsigned (*max_channels)(void);
unsigned (*nr_channels)(void);
@@ -69,14 +76,18 @@ struct evtchn_ops {
void (*mask)(unsigned port);
void (*unmask)(unsigned port);
- void (*handle_events)(unsigned cpu);
+ void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
void (*resume)(void);
+
+ int (*percpu_init)(unsigned int cpu);
+ int (*percpu_deinit)(unsigned int cpu);
};
extern const struct evtchn_ops *evtchn_ops;
extern int **evtchn_to_irq;
int get_evtchn_to_irq(unsigned int evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
struct irq_info *info_for_irq(unsigned irq);
unsigned cpu_from_irq(unsigned irq);
@@ -134,9 +145,10 @@ static inline void unmask_evtchn(unsigned port)
return evtchn_ops->unmask(port);
}
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+ struct evtchn_loop_ctrl *ctrl)
{
- return evtchn_ops->handle_events(cpu);
+ return evtchn_ops->handle_events(cpu, ctrl);
}
static inline void xen_evtchn_resume(void)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 055123f48039..0770f427beb2 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
"Interrupt for port %d, but apparently not enabled; per-user %p\n",
evtchn->port, u);
- disable_irq_nosync(irq);
evtchn->enabled = false;
spin_lock(&u->ring_prod_lock);
@@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
evtchn = find_evtchn(u, port);
if (evtchn && !evtchn->enabled) {
evtchn->enabled = true;
- enable_irq(irq_from_evtchn(port));
+ xen_irq_lateeoi(irq_from_evtchn(port), 0);
}
}
@@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0)
goto err;
- rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
- u->name, evtchn);
+ rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0,
+ u->name, evtchn);
if (rc < 0)
goto err;
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 62a0c4111dc4..b13d03aba791 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -75,6 +75,7 @@ struct sock_mapping {
atomic_t write;
atomic_t io;
atomic_t release;
+ atomic_t eoi;
void (*saved_data_ready)(struct sock *sk);
struct pvcalls_ioworker ioworker;
};
@@ -96,7 +97,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
struct pvcalls_fedata *fedata,
struct sock_mapping *map);
-static void pvcalls_conn_back_read(void *opaque)
+static bool pvcalls_conn_back_read(void *opaque)
{
struct sock_mapping *map = (struct sock_mapping *)opaque;
struct msghdr msg;
@@ -116,17 +117,17 @@ static void pvcalls_conn_back_read(void *opaque)
virt_mb();
if (error)
- return;
+ return false;
size = pvcalls_queued(prod, cons, array_size);
if (size >= array_size)
- return;
+ return false;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
atomic_set(&map->read, 0);
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
flags);
- return;
+ return true;
}
spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
wanted = array_size - size;
@@ -154,7 +155,7 @@ static void pvcalls_conn_back_read(void *opaque)
ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
WARN_ON(ret > wanted);
if (ret == -EAGAIN) /* shouldn't happen */
- return;
+ return true;
if (!ret)
ret = -ENOTCONN;
spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
@@ -173,10 +174,10 @@ static void pvcalls_conn_back_read(void *opaque)
virt_wmb();
notify_remote_via_irq(map->irq);
- return;
+ return true;
}
-static void pvcalls_conn_back_write(struct sock_mapping *map)
+static bool pvcalls_conn_back_write(struct sock_mapping *map)
{
struct pvcalls_data_intf *intf = map->ring;
struct pvcalls_data *data = &map->data;
@@ -193,7 +194,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
array_size = XEN_FLEX_RING_SIZE(map->ring_order);
size = pvcalls_queued(prod, cons, array_size);
if (size == 0)
- return;
+ return false;
memset(&msg, 0, sizeof(msg));
msg.msg_flags |= MSG_DONTWAIT;
@@ -215,12 +216,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
atomic_set(&map->write, 0);
ret = inet_sendmsg(map->sock, &msg, size);
- if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
+ if (ret == -EAGAIN) {
atomic_inc(&map->write);
atomic_inc(&map->io);
+ return true;
}
- if (ret == -EAGAIN)
- return;
/* write the data, then update the indexes */
virt_wmb();
@@ -233,9 +233,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
}
/* update the indexes, then notify the other end */
virt_wmb();
- if (prod != cons + ret)
+ if (prod != cons + ret) {
atomic_inc(&map->write);
+ atomic_inc(&map->io);
+ }
notify_remote_via_irq(map->irq);
+
+ return true;
}
static void pvcalls_back_ioworker(struct work_struct *work)
@@ -244,6 +248,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
struct pvcalls_ioworker, register_work);
struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
ioworker);
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
while (atomic_read(&map->io) > 0) {
if (atomic_read(&map->release) > 0) {
@@ -251,10 +256,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
return;
}
- if (atomic_read(&map->read) > 0)
- pvcalls_conn_back_read(map);
- if (atomic_read(&map->write) > 0)
- pvcalls_conn_back_write(map);
+ if (atomic_read(&map->read) > 0 &&
+ pvcalls_conn_back_read(map))
+ eoi_flags = 0;
+ if (atomic_read(&map->write) > 0 &&
+ pvcalls_conn_back_write(map))
+ eoi_flags = 0;
+
+ if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
+ atomic_set(&map->eoi, 0);
+ xen_irq_lateeoi(map->irq, eoi_flags);
+ eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+ }
atomic_dec(&map->io);
}
@@ -351,12 +364,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
goto out;
map->bytes = page;
- ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
- evtchn,
- pvcalls_back_conn_event,
- 0,
- "pvcalls-backend",
- map);
+ ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+ fedata->dev->otherend_id, evtchn,
+ pvcalls_back_conn_event, 0, "pvcalls-backend", map);
if (ret < 0)
goto out;
map->irq = ret;
@@ -890,15 +900,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
{
struct xenbus_device *dev = dev_id;
struct pvcalls_fedata *fedata = NULL;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (dev == NULL)
- return IRQ_HANDLED;
+ if (dev) {
+ fedata = dev_get_drvdata(&dev->dev);
+ if (fedata) {
+ pvcalls_back_work(fedata);
+ eoi_flags = 0;
+ }
+ }
- fedata = dev_get_drvdata(&dev->dev);
- if (fedata == NULL)
- return IRQ_HANDLED;
+ xen_irq_lateeoi(irq, eoi_flags);
- pvcalls_back_work(fedata);
return IRQ_HANDLED;
}
@@ -908,12 +921,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
struct pvcalls_ioworker *iow;
if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
- map->sock->sk->sk_user_data != map)
+ map->sock->sk->sk_user_data != map) {
+ xen_irq_lateeoi(irq, 0);
return IRQ_HANDLED;
+ }
iow = &map->ioworker;
atomic_inc(&map->write);
+ atomic_inc(&map->eoi);
atomic_inc(&map->io);
queue_work(iow->wq, &iow->register_work);
@@ -948,7 +964,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error;
}
- err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
if (err < 0)
goto error;
fedata->irq = err;
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 8c250f4a3a97..0842b8e26107 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -733,10 +733,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb();
notify_remote_via_irq(pdev->evtchn_irq);
+ /* Enable IRQ to signal "request done". */
+ xen_pcibk_lateeoi(pdev, 0);
+
ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ);
+ /* Enable IRQ for pcifront request if not already active. */
+ if (!test_bit(_PDEVF_op_active, &pdev->flags))
+ xen_pcibk_lateeoi(pdev, 0);
+
if (!ret) {
if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) {
@@ -750,13 +757,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
}
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
- if (test_bit(_XEN_PCIF_active,
- (unsigned long *)&sh_info->flags)) {
- dev_dbg(&psdev->dev->dev,
- "schedule pci_conf service in " DRV_NAME "\n");
- xen_pcibk_test_and_schedule_op(psdev->pdev);
- }
-
res = (pci_ers_result_t)aer_op->err;
return res;
}
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 263c059bff90..235cdfe13494 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
+#include <xen/events.h>
#include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback"
@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
+#define _EOI_pending (2)
+#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device {
void *pci_dev_data;
@@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
+static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
+ unsigned int eoi_flag)
+{
+ if (test_and_clear_bit(_EOI_pending, &pdev->flags))
+ xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
+}
+
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
extern int verbose_request;
-
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 7af047008ea2..0273c52e1b11 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
return 0;
}
#endif
+
+static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
+{
+ return test_bit(_XEN_PCIF_active,
+ (unsigned long *)&pdev->sh_info->flags) &&
+ !test_and_set_bit(_PDEVF_op_active, &pdev->flags);
+}
+
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
-void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
+static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
+ bool eoi = true;
+
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
- if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
- && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
+ if (xen_pcibk_test_op_pending(pdev)) {
schedule_work(&pdev->op_work);
+ eoi = false;
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
+ eoi = false;
}
+
+ /* EOI if there was nothing to do. */
+ if (eoi)
+ xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
}
/* Performing the configuration space reads/writes must not be done in atomic
@@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
-void xen_pcibk_do_op(struct work_struct *data)
+static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{
- struct xen_pcibk_device *pdev =
- container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op;
@@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */
+}
- /* Check to see if the driver domain tried to start another request in
- * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
- */
- xen_pcibk_test_and_schedule_op(pdev);
+void xen_pcibk_do_op(struct work_struct *data)
+{
+ struct xen_pcibk_device *pdev =
+ container_of(data, struct xen_pcibk_device, op_work);
+
+ do {
+ xen_pcibk_do_one_op(pdev);
+ } while (xen_pcibk_test_op_pending(pdev));
+
+ xen_pcibk_lateeoi(pdev, 0);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
+ bool eoi;
+
+ /* IRQs might come in before pdev->evtchn_irq is written. */
+ if (unlikely(pdev->evtchn_irq != irq))
+ pdev->evtchn_irq = irq;
+
+ eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
+ WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev);
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 581c4e1a8b82..3bbed47da3fa 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr;
- err = bind_interdomain_evtchn_to_irqhandler(
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index e2f3e8b0fba9..fd32c3459b66 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -91,7 +91,6 @@ struct vscsibk_info {
unsigned int irq;
struct vscsiif_back_ring ring;
- int ring_error;
spinlock_t ring_lock;
atomic_t nr_unreplied_reqs;
@@ -721,7 +720,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info,
return pending_req;
}
-static int scsiback_do_cmd_fn(struct vscsibk_info *info)
+static int scsiback_do_cmd_fn(struct vscsibk_info *info,
+ unsigned int *eoi_flags)
{
struct vscsiif_back_ring *ring = &info->ring;
struct vscsiif_request ring_req;
@@ -738,11 +738,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
rc = ring->rsp_prod_pvt;
pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n",
info->domid, rp, rc, rp - rc);
- info->ring_error = 1;
- return 0;
+ return -EINVAL;
}
while ((rc != rp)) {
+ *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS;
+
if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
break;
@@ -801,13 +802,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info)
static irqreturn_t scsiback_irq_fn(int irq, void *dev_id)
{
struct vscsibk_info *info = dev_id;
+ int rc;
+ unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;
- if (info->ring_error)
- return IRQ_HANDLED;
-
- while (scsiback_do_cmd_fn(info))
+ while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0)
cond_resched();
+ /* In case of a ring error we keep the event channel masked. */
+ if (!rc)
+ xen_irq_lateeoi(irq, eoi_flags);
+
return IRQ_HANDLED;
}
@@ -828,7 +832,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq(info->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
if (err < 0)
goto unmap_page;
@@ -1251,7 +1255,6 @@ static int scsiback_probe(struct xenbus_device *dev,
info->domid = dev->otherend_id;
spin_lock_init(&info->ring_lock);
- info->ring_error = 0;
atomic_set(&info->nr_unreplied_reqs, 0);
init_waitqueue_head(&info->waiting_to_free);
info->dev = dev;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0ba338cffa93..fd56c22c12a0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3869,6 +3869,10 @@ int btree_write_cache_pages(struct address_space *mapping,
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
ret = write_one_eb(eb, fs_info, wbc, &epd);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 56123ce3b9f0..d3f76e3efd44 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3863,6 +3863,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
ret = -EINTR;
goto out;
}
+
+ cond_resched();
}
ret = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f3cb042a28d5..3d92feff249a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2432,9 +2432,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
tmp = btrfs_super_num_devices(fs_info->super_copy);
btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
- /* add sysfs device entry */
- btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
-
/*
* we've got more storage, clear any full flags on the space
* infos
@@ -2442,6 +2439,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
+
+ /* Add sysfs device entry */
+ btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index a2b2355e7f01..9986817532b1 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -501,7 +501,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/exec.c b/fs/exec.c
index 92fd21f0b0ff..e8d1e6705977 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1024,10 +1024,23 @@ static int exec_mmap(struct mm_struct *mm)
}
}
task_lock(tsk);
+
+ local_irq_disable();
active_mm = tsk->active_mm;
- tsk->mm = mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 6064bcb8572b..89a4ac83cbcd 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1895,6 +1895,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ec96466f2909..eaa8bcd59b6a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1679,8 +1679,8 @@ static const struct mount_opts {
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7cb0672294df..c4eb6a5fcea9 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -720,9 +720,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -1361,6 +1361,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c3f3f1ae4e1b..639e2c86758a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -924,6 +924,7 @@ static void gfs2_put_super(struct super_block *sb)
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ truncate_inode_pages_final(&sdp->sd_aspace);
gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8216550faf45..f9deabfa303e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1733,6 +1733,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 55c88a732690..1f16c2da2472 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1319,7 +1319,7 @@ xfs_rmap_convert_shared(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 9d9c9192584c..cd689d21d3af 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -262,8 +262,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
- y = rec->rm_offset;
+ x = be64_to_cpu(kp->rm_offset);
+ y = xfs_rmap_irec_offset_pack(rec);
if (x > y)
return 1;
else if (y > x)
@@ -294,8 +294,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x)
return -1;
- x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
- y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
+ x = be64_to_cpu(kp1->rm_offset);
+ y = be64_to_cpu(kp2->rm_offset);
if (x > y)
return 1;
else if (y > x)
@@ -400,8 +400,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
+ a = be64_to_cpu(k1->rmap.rm_offset);
+ b = be64_to_cpu(k2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
@@ -430,8 +430,8 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
- a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
- b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
+ a = be64_to_cpu(r1->rmap.rm_offset);
+ b = be64_to_cpu(r2->rmap.rm_offset);
if (a <= b)
return 1;
return 0;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 4e6f2c8574f7..42c670a31351 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -879,6 +879,16 @@ xfs_setattr_size(
if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
}
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index aa6c5c193f45..8538916d255e 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -140,7 +140,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index b3379a97245c..a34694e675c9 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
*/
static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
{
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
- if (likely(nskb)) {
- can_skb_set_owner(nskb, skb->sk);
- consume_skb(skb);
- return nskb;
- } else {
- kfree_skb(skb);
- return NULL;
- }
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!nskb)) {
+ kfree_skb(skb);
+ return NULL;
}
- /* we can assume to have an unshared skb with proper owner */
- return skb;
+ can_skb_set_owner(nskb, skb->sk);
+ consume_skb(skb);
+ return nskb;
}
#endif /* !_CAN_SKB_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 956d76744c91..41a3307a971c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -466,7 +466,7 @@ struct pmu {
*/
struct perf_addr_filter {
struct list_head entry;
- struct inode *inode;
+ struct path path;
unsigned long offset;
unsigned long size;
unsigned int range : 1,
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index aa16e6468f91..cc1e71334e53 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes);
void prandom_seed(u32 seed);
void prandom_reseed_late(void);
+#if BITS_PER_LONG == 64
+/*
+ * The core SipHash round function. Each line can be executed in
+ * parallel given enough CPU resources.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+ v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \
+ v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \
+ v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \
+ v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \
+)
+
+#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
+#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
+
+#elif BITS_PER_LONG == 32
+/*
+ * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
+ * This is weaker, but 32-bit machines are not used for high-traffic
+ * applications, so there is less output for an attacker to analyze.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+ v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \
+ v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \
+ v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \
+ v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \
+)
+#define PRND_K0 0x6c796765
+#define PRND_K1 0x74656462
+
+#else
+#error Unsupported BITS_PER_LONG
+#endif
+
struct rnd_state {
__u32 s1, s2, s3, s4;
};
-DECLARE_PER_CPU(struct rnd_state, net_rand_state);
-
u32 prandom_u32_state(struct rnd_state *state);
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
diff --git a/include/linux/time64.h b/include/linux/time64.h
index ad33260618f7..99ab4a686c30 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -189,6 +189,10 @@ static inline bool timespec64_valid_strict(const struct timespec64 *ts)
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
+ /* Prevent multiplication overflow */
+ if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ return KTIME_MAX;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
diff --git a/include/xen/events.h b/include/xen/events.h
index 1650d39decae..d8255ed2052c 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -14,11 +14,16 @@
unsigned xen_evtchn_nr_channels(void);
-int bind_evtchn_to_irq(unsigned int evtchn);
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
+int bind_evtchn_to_irq(evtchn_port_t evtchn);
+int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
+int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
void *dev_id);
+int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
+ irq_handler_t handler,
+ unsigned long irqflags, const char *devname,
+ void *dev_id);
int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu);
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
@@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
const char *devname,
void *dev_id);
int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
- unsigned int remote_port);
+ evtchn_port_t remote_port);
+int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port);
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
- unsigned int remote_port,
+ evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id);
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+ evtchn_port_t remote_port,
+ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
/*
* Common unbind function for all event sources. Takes IRQ to unbind from.
@@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
*/
void unbind_from_irqhandler(unsigned int irq, void *dev_id);
+/*
+ * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi
+ * functions above.
+ */
+void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags);
+/* Signal an event was spurious, i.e. there was no action resulting from it. */
+#define XEN_EOI_FLAG_SPURIOUS 0x00000001
+
#define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX
#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT
#define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5807fad2c405..75cf23232a92 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5207,11 +5207,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
-
struct ring_buffer *rb = ring_buffer_get(event);
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
+ bool detach_rest = false;
if (event->pmu->event_unmapped)
event->pmu->event_unmapped(event, vma->vm_mm);
@@ -5242,7 +5242,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
}
- atomic_dec(&rb->mmap_count);
+ if (atomic_dec_and_test(&rb->mmap_count))
+ detach_rest = true;
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
goto out_put;
@@ -5251,7 +5252,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
- if (atomic_read(&rb->mmap_count))
+ if (!detach_rest)
goto out_put;
/*
@@ -6450,7 +6451,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
raw_spin_lock_irqsave(&ifh->lock, flags);
list_for_each_entry(filter, &ifh->list, entry) {
- if (filter->inode) {
+ if (filter->path.dentry) {
event->addr_filters_offs[count] = 0;
restart++;
}
@@ -7124,7 +7125,11 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
struct file *file, unsigned long offset,
unsigned long size)
{
- if (filter->inode != file_inode(file))
+ /* d_inode(NULL) won't be equal to any mapped user-space file */
+ if (!filter->path.dentry)
+ return false;
+
+ if (d_inode(filter->path.dentry) != file_inode(file))
return false;
if (filter->offset > offset + size)
@@ -8345,8 +8350,7 @@ static void free_filters_list(struct list_head *filters)
struct perf_addr_filter *filter, *iter;
list_for_each_entry_safe(filter, iter, filters, entry) {
- if (filter->inode)
- iput(filter->inode);
+ path_put(&filter->path);
list_del(&filter->entry);
kfree(filter);
}
@@ -8443,7 +8447,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
* Adjust base offset if the filter is associated to a binary
* that needs to be mapped:
*/
- if (filter->inode)
+ if (filter->path.dentry)
event->addr_filters_offs[count] =
perf_addr_filter_apply(filter, mm);
@@ -8516,7 +8520,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
{
struct perf_addr_filter *filter = NULL;
char *start, *orig, *filename = NULL;
- struct path path;
substring_t args[MAX_OPT_ARGS];
int state = IF_STATE_ACTION, token;
unsigned int kernel = 0;
@@ -8579,6 +8582,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
int fpos = filter->range ? 2 : 1;
+ kfree(filename);
filename = match_strdup(&args[fpos]);
if (!filename) {
ret = -ENOMEM;
@@ -8617,22 +8621,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
*/
ret = -EOPNOTSUPP;
if (!event->ctx->task)
- goto fail_free_name;
+ goto fail;
/* look up the path and grab its inode */
- ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+ ret = kern_path(filename, LOOKUP_FOLLOW,
+ &filter->path);
if (ret)
- goto fail_free_name;
-
- filter->inode = igrab(d_inode(path.dentry));
- path_put(&path);
- kfree(filename);
- filename = NULL;
+ goto fail;
ret = -EINVAL;
- if (!filter->inode ||
- !S_ISREG(filter->inode->i_mode))
- /* free_filters_list() will iput() */
+ if (!filter->path.dentry ||
+ !S_ISREG(d_inode(filter->path.dentry)
+ ->i_mode))
goto fail;
event->addr_filters.nr_file_filters++;
@@ -8647,13 +8647,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
if (state != IF_STATE_ACTION)
goto fail;
+ kfree(filename);
kfree(orig);
return 0;
-fail_free_name:
- kfree(filename);
fail:
+ kfree(filename);
free_filters_list(filters);
kfree(orig);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 09b1537ae06c..41317d04eeae 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -213,7 +213,7 @@ static inline int get_recursion_context(int *recursion)
rctx = 3;
else if (in_irq())
rctx = 2;
- else if (in_softirq())
+ else if (in_serving_softirq())
rctx = 1;
else
rctx = 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 7a7984d7a4d8..9e70577b818a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -516,7 +516,10 @@ static void exit_mm(void)
up_read(&mm->mmap_sem);
self.task = current;
- self.next = xchg(&core_state->dumper.next, &self);
+ if (self.task->flags & PF_SIGNALED)
+ self.next = xchg(&core_state->dumper.next, &self);
+ else
+ self.task = NULL;
/*
* Implies mb(), the result of xchg() must be visible
* to core_state->dumper.
diff --git a/kernel/futex.c b/kernel/futex.c
index 774de77bc502..322b9a840da6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -862,8 +862,9 @@ static void put_pi_state(struct futex_pi_state *pi_state)
*/
if (pi_state->owner) {
struct task_struct *owner;
+ unsigned long flags;
- raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner;
if (owner) {
raw_spin_lock(&owner->pi_lock);
@@ -871,7 +872,7 @@ static void put_pi_state(struct futex_pi_state *pi_state)
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
- raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
if (current->pi_state_cache) {
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index a117adf7084b..779d858da2b3 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -80,6 +80,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS
# Generic IRQ IPI support
config GENERIC_IRQ_IPI
bool
+ select IRQ_DOMAIN_HIERARCHY
# Generic MSI interrupt support
config GENERIC_MSI_IRQ
diff --git a/kernel/reboot.c b/kernel/reboot.c
index bd30a973fe94..2946ed1d99d4 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -512,22 +512,22 @@ static int __init reboot_setup(char *str)
break;
case 's':
- {
- int rc;
-
- if (isdigit(*(str+1))) {
- rc = kstrtoint(str+1, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else if (str[1] == 'm' && str[2] == 'p' &&
- isdigit(*(str+3))) {
- rc = kstrtoint(str+3, 0, &reboot_cpu);
- if (rc)
- return rc;
- } else
+ if (isdigit(*(str+1)))
+ reboot_cpu = simple_strtoul(str+1, NULL, 0);
+ else if (str[1] == 'm' && str[2] == 'p' &&
+ isdigit(*(str+3)))
+ reboot_cpu = simple_strtoul(str+3, NULL, 0);
+ else
reboot_mode = REBOOT_SOFT;
+ if (reboot_cpu >= num_possible_cpus()) {
+ pr_err("Ignoring the CPU number in reboot= option. "
+ "CPU %d exceeds possible cpu number %d\n",
+ reboot_cpu, num_possible_cpus());
+ reboot_cpu = 0;
+ break;
+ }
break;
- }
+
case 'g':
reboot_mode = REBOOT_GPIO;
break;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f26acef5d7b4..7f718565507b 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -153,10 +153,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = ktime_to_ns(timeval_to_ktime(value->it_value));
ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval));
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d4bc272e03ee..99f885d2904a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1596,13 +1596,6 @@ void update_process_times(int user_tick)
scheduler_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
run_posix_cpu_timers(p);
-
- /* The current CPU might make use of net randoms without receiving IRQs
- * to renew them often enough. Let's update the net_rand_state from a
- * non-constant value that's not affine to the number of calls to make
- * sure it's updated when there's some activity (we don't care in idle).
- */
- this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
}
/**
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b9b71e7fb697..8082328eb01a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -416,14 +416,16 @@ struct rb_event_info {
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -2553,10 +2555,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -2579,6 +2581,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -2597,8 +2623,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
} else
bit = RB_CTX_NORMAL;
- if (unlikely(val & (1 << bit)))
- return 1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << bit))
+ return 1;
+ }
val |= (1 << bit);
cpu_buffer->current_context = val;
diff --git a/lib/random32.c b/lib/random32.c
index eb54663e9e94..f5e967f4adfa 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -40,16 +40,6 @@
#include <linux/sched.h>
#include <asm/unaligned.h>
-#ifdef CONFIG_RANDOM32_SELFTEST
-static void __init prandom_state_selftest(void);
-#else
-static inline void prandom_state_selftest(void)
-{
-}
-#endif
-
-DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
-
/**
* prandom_u32_state - seeded pseudo-random number generator.
* @state: pointer to state structure holding seeded state.
@@ -69,25 +59,6 @@ u32 prandom_u32_state(struct rnd_state *state)
}
EXPORT_SYMBOL(prandom_u32_state);
-/**
- * prandom_u32 - pseudo random number generator
- *
- * A 32 bit pseudo-random number is generated using a fast
- * algorithm suitable for simulation. This algorithm is NOT
- * considered safe for cryptographic use.
- */
-u32 prandom_u32(void)
-{
- struct rnd_state *state = &get_cpu_var(net_rand_state);
- u32 res;
-
- res = prandom_u32_state(state);
- put_cpu_var(net_rand_state);
-
- return res;
-}
-EXPORT_SYMBOL(prandom_u32);
-
/**
* prandom_bytes_state - get the requested number of pseudo-random bytes
*
@@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
}
EXPORT_SYMBOL(prandom_bytes_state);
-/**
- * prandom_bytes - get the requested number of pseudo-random bytes
- * @buf: where to copy the pseudo-random bytes to
- * @bytes: the requested number of bytes
- */
-void prandom_bytes(void *buf, size_t bytes)
-{
- struct rnd_state *state = &get_cpu_var(net_rand_state);
-
- prandom_bytes_state(state, buf, bytes);
- put_cpu_var(net_rand_state);
-}
-EXPORT_SYMBOL(prandom_bytes);
-
static void prandom_warmup(struct rnd_state *state)
{
/* Calling RNG ten times to satisfy recurrence condition */
@@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_state *state)
prandom_u32_state(state);
}
-static u32 __extract_hwseed(void)
-{
- unsigned int val = 0;
-
- (void)(arch_get_random_seed_int(&val) ||
- arch_get_random_int(&val));
-
- return val;
-}
-
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
- bool mix_with_hwseed)
-{
-#define LCG(x) ((x) * 69069U) /* super-duper LCG */
-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
- state->s1 = __seed(HWSEED() ^ LCG(seed), 2U);
- state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U);
- state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U);
- state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
-}
-
-/**
- * prandom_seed - add entropy to pseudo random number generator
- * @seed: seed value
- *
- * Add some additional seeding to the prandom pool.
- */
-void prandom_seed(u32 entropy)
-{
- int i;
- /*
- * No locking on the CPUs, but then somewhat random results are, well,
- * expected.
- */
- for_each_possible_cpu(i) {
- struct rnd_state *state = &per_cpu(net_rand_state, i);
-
- state->s1 = __seed(state->s1 ^ entropy, 2U);
- prandom_warmup(state);
- }
-}
-EXPORT_SYMBOL(prandom_seed);
-
-/*
- * Generate some initially weak seeding values to allow
- * to start the prandom_u32() engine.
- */
-static int __init prandom_init(void)
-{
- int i;
-
- prandom_state_selftest();
-
- for_each_possible_cpu(i) {
- struct rnd_state *state = &per_cpu(net_rand_state, i);
- u32 weak_seed = (i + jiffies) ^ random_get_entropy();
-
- prandom_seed_early(state, weak_seed, true);
- prandom_warmup(state);
- }
-
- return 0;
-}
-core_initcall(prandom_init);
-
-static void __prandom_timer(unsigned long dontcare);
-
-static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
-
-static void __prandom_timer(unsigned long dontcare)
-{
- u32 entropy;
- unsigned long expires;
-
- get_random_bytes(&entropy, sizeof(entropy));
- prandom_seed(entropy);
-
- /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
- expires = 40 + prandom_u32_max(40);
- seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
-
- add_timer(&seed_timer);
-}
-
-static void __init __prandom_start_seed_timer(void)
-{
- seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
- add_timer(&seed_timer);
-}
-
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
{
int i;
@@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
}
EXPORT_SYMBOL(prandom_seed_full_state);
-/*
- * Generate better values after random number generator
- * is fully initialized.
- */
-static void __prandom_reseed(bool late)
-{
- unsigned long flags;
- static bool latch = false;
- static DEFINE_SPINLOCK(lock);
-
- /* Asking for random bytes might result in bytes getting
- * moved into the nonblocking pool and thus marking it
- * as initialized. In this case we would double back into
- * this function and attempt to do a late reseed.
- * Ignore the pointless attempt to reseed again if we're
- * already waiting for bytes when the nonblocking pool
- * got initialized.
- */
-
- /* only allow initial seeding (late == false) once */
- if (!spin_trylock_irqsave(&lock, flags))
- return;
-
- if (latch && !late)
- goto out;
-
- latch = true;
- prandom_seed_full_state(&net_rand_state);
-out:
- spin_unlock_irqrestore(&lock, flags);
-}
-
-void prandom_reseed_late(void)
-{
- __prandom_reseed(true);
-}
-
-static int __init prandom_reseed(void)
-{
- __prandom_reseed(false);
- __prandom_start_seed_timer();
- return 0;
-}
-late_initcall(prandom_reseed);
-
#ifdef CONFIG_RANDOM32_SELFTEST
static struct prandom_test1 {
u32 seed;
@@ -421,7 +243,28 @@ static struct prandom_test2 {
{ 407983964U, 921U, 728767059U },
};
-static void __init prandom_state_selftest(void)
+static u32 __extract_hwseed(void)
+{
+ unsigned int val = 0;
+
+ (void)(arch_get_random_seed_int(&val) ||
+ arch_get_random_int(&val));
+
+ return val;
+}
+
+static void prandom_seed_early(struct rnd_state *state, u32 seed,
+ bool mix_with_hwseed)
+{
+#define LCG(x) ((x) * 69069U) /* super-duper LCG */
+#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
+ state->s1 = __seed(HWSEED() ^ LCG(seed), 2U);
+ state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U);
+ state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U);
+ state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
+}
+
+static int __init prandom_state_selftest(void)
{
int i, j, errors = 0, runs = 0;
bool error = false;
@@ -461,5 +304,266 @@ static void __init prandom_state_selftest(void)
pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
else
pr_info("prandom: %d self tests passed\n", runs);
+ return 0;
}
+core_initcall(prandom_state_selftest);
#endif
+
+/*
+ * The prandom_u32() implementation is now completely separate from the
+ * prandom_state() functions, which are retained (for now) for compatibility.
+ *
+ * Because of (ab)use in the networking code for choosing random TCP/UDP port
+ * numbers, which open DoS possibilities if guessable, we want something
+ * stronger than a standard PRNG. But the performance requirements of
+ * the network code do not allow robust crypto for this application.
+ *
+ * So this is a homebrew Junior Spaceman implementation, based on the
+ * lowest-latency trustworthy crypto primitive available, SipHash.
+ * (The authors of SipHash have not been consulted about this abuse of
+ * their work.)
+ *
+ * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
+ * one word of output. This abbreviated version uses 2 rounds per word
+ * of output.
+ */
+
+struct siprand_state {
+ unsigned long v0;
+ unsigned long v1;
+ unsigned long v2;
+ unsigned long v3;
+};
+
+static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+
+/*
+ * This is the core CPRNG function. As "pseudorandom", this is not used
+ * for truly valuable things, just intended to be a PITA to guess.
+ * For maximum speed, we do just two SipHash rounds per word. This is
+ * the same rate as 4 rounds per 64 bits that SipHash normally uses,
+ * so hopefully it's reasonably secure.
+ *
+ * There are two changes from the official SipHash finalization:
+ * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
+ * they are there only to make the output rounds distinct from the input
+ * rounds, and this application has no input rounds.
+ * - Rather than returning v0^v1^v2^v3, return v1+v3.
+ * If you look at the SipHash round, the last operation on v3 is
+ * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
+ * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but
+ * it still cancels out half of the bits in v2 for no benefit.)
+ * Second, since the last combining operation was xor, continue the
+ * pattern of alternating xor/add for a tiny bit of extra non-linearity.
+ */
+static inline u32 siprand_u32(struct siprand_state *s)
+{
+ unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
+
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3;
+ return v1 + v3;
+}
+
+
+/**
+ * prandom_u32 - pseudo random number generator
+ *
+ * A 32 bit pseudo-random number is generated using a fast
+ * algorithm suitable for simulation. This algorithm is NOT
+ * considered safe for cryptographic use.
+ */
+u32 prandom_u32(void)
+{
+ struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+ u32 res = siprand_u32(state);
+
+ put_cpu_ptr(&net_rand_state);
+ return res;
+}
+EXPORT_SYMBOL(prandom_u32);
+
+/**
+ * prandom_bytes - get the requested number of pseudo-random bytes
+ * @buf: where to copy the pseudo-random bytes to
+ * @bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, size_t bytes)
+{
+ struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+ u8 *ptr = buf;
+
+ while (bytes >= sizeof(u32)) {
+ put_unaligned(siprand_u32(state), (u32 *)ptr);
+ ptr += sizeof(u32);
+ bytes -= sizeof(u32);
+ }
+
+ if (bytes > 0) {
+ u32 rem = siprand_u32(state);
+
+ do {
+ *ptr++ = (u8)rem;
+ rem >>= BITS_PER_BYTE;
+ } while (--bytes > 0);
+ }
+ put_cpu_ptr(&net_rand_state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
+/**
+ * prandom_seed - add entropy to pseudo random number generator
+ * @entropy: entropy value
+ *
+ * Add some additional seed material to the prandom pool.
+ * The "entropy" is actually our IP address (the only caller is
+ * the network code), not for unpredictability, but to ensure that
+ * different machines are initialized differently.
+ */
+void prandom_seed(u32 entropy)
+{
+ int i;
+
+ add_device_randomness(&entropy, sizeof(entropy));
+
+ for_each_possible_cpu(i) {
+ struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
+ unsigned long v0 = state->v0, v1 = state->v1;
+ unsigned long v2 = state->v2, v3 = state->v3;
+
+ do {
+ v3 ^= entropy;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= entropy;
+ } while (unlikely(!v0 || !v1 || !v2 || !v3));
+
+ WRITE_ONCE(state->v0, v0);
+ WRITE_ONCE(state->v1, v1);
+ WRITE_ONCE(state->v2, v2);
+ WRITE_ONCE(state->v3, v3);
+ }
+}
+EXPORT_SYMBOL(prandom_seed);
+
+/*
+ * Generate some initially weak seeding values to allow
+ * the prandom_u32() engine to be started.
+ */
+static int __init prandom_init_early(void)
+{
+ int i;
+ unsigned long v0, v1, v2, v3;
+
+ if (!arch_get_random_long(&v0))
+ v0 = jiffies;
+ if (!arch_get_random_long(&v1))
+ v1 = random_get_entropy();
+ v2 = v0 ^ PRND_K0;
+ v3 = v1 ^ PRND_K1;
+
+ for_each_possible_cpu(i) {
+ struct siprand_state *state;
+
+ v3 ^= i;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= i;
+
+ state = per_cpu_ptr(&net_rand_state, i);
+ state->v0 = v0; state->v1 = v1;
+ state->v2 = v2; state->v3 = v3;
+ }
+
+ return 0;
+}
+core_initcall(prandom_init_early);
+
+
+/* Stronger reseeding when available, and periodically thereafter. */
+static void prandom_reseed(unsigned long dontcare);
+
+static DEFINE_TIMER(seed_timer, prandom_reseed, 0, 0);
+
+static void prandom_reseed(unsigned long dontcare)
+{
+ unsigned long expires;
+ int i;
+
+ /*
+ * Reinitialize each CPU's PRNG with 128 bits of key.
+ * No locking on the CPUs, but then somewhat random results are,
+ * well, expected.
+ */
+ for_each_possible_cpu(i) {
+ struct siprand_state *state;
+ unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
+ unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
+#if BITS_PER_LONG == 32
+ int j;
+
+ /*
+ * On 32-bit machines, hash in two extra words to
+ * approximate 128-bit key length. Not that the hash
+ * has that much security, but this prevents a trivial
+ * 64-bit brute force.
+ */
+ for (j = 0; j < 2; j++) {
+ unsigned long m = get_random_long();
+
+ v3 ^= m;
+ PRND_SIPROUND(v0, v1, v2, v3);
+ PRND_SIPROUND(v0, v1, v2, v3);
+ v0 ^= m;
+ }
+#endif
+ /*
+ * Probably impossible in practice, but there is a
+ * theoretical risk that a race between this reseeding
+ * and the target CPU writing its state back could
+ * create the all-zero SipHash fixed point.
+ *
+ * To ensure that never happens, ensure the state
+ * we write contains no zero words.
+ */
+ state = per_cpu_ptr(&net_rand_state, i);
+ WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
+ WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
+ WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
+ WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
+ }
+
+ /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
+ expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
+ mod_timer(&seed_timer, expires);
+}
+
+/*
+ * The random ready callback can be called from almost any interrupt.
+ * To avoid worrying about whether it's safe to delay that interrupt
+ * long enough to seed all CPUs, just schedule an immediate timer event.
+ */
+static void prandom_timer_start(struct random_ready_callback *unused)
+{
+ mod_timer(&seed_timer, jiffies);
+}
+
+/*
+ * Start periodic full reseeding as soon as strong
+ * random numbers are available.
+ */
+static int __init prandom_init_late(void)
+{
+ static struct random_ready_callback random_ready = {
+ .func = prandom_timer_start
+ };
+ int ret = add_random_ready_callback(&random_ready);
+
+ if (ret == -EALREADY) {
+ prandom_timer_start(&random_ready);
+ ret = 0;
+ }
+ return ret;
+}
+late_initcall(prandom_init_late);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b4c768de3344..e73617b11af1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -254,6 +254,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
if (verbose)
swiotlb_print_info();
@@ -285,9 +286,11 @@ swiotlb_init(int verbose)
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
return;
- if (io_tlb_start)
+ if (io_tlb_start) {
memblock_free_early(io_tlb_start,
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ io_tlb_start = 0;
+ }
pr_warn("Cannot allocate buffer");
no_iotlb_memory = true;
}
@@ -390,6 +393,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
}
io_tlb_index = 0;
+ no_iotlb_memory = false;
swiotlb_print_info();
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d857e4770cc8..4e30d23943d5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
int ret;
- pte_t *pte;
+ pte_t *pte, *mapped_pte;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma);
@@ -511,7 +511,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
if (pmd_trans_unstable(pmd))
return 0;
retry:
- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
@@ -549,7 +549,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
} else
break;
}
- pte_unmap_unlock(pte - 1, ptl);
+ pte_unmap_unlock(mapped_pte, ptl);
cond_resched();
return addr != end ? -EIO : 0;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2f871424925e..bf8df824bae3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -296,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
@@ -389,8 +389,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fb3f917db57a..85b18319e2d8 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1073,7 +1073,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
if (tdev && !netif_is_l3_master(tdev)) {
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
dev->mtu = tdev->mtu - t_hlen;
if (dev->mtu < IPV6_MIN_MTU)
dev->mtu = IPV6_MIN_MTU;
@@ -1363,7 +1362,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - t_hlen;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0fe8ffcda1d7..d1bfdb9c2f2d 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
u32 tsoff = 0;
@@ -244,7 +244,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
+
+ tcp_select_initial_window(full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 7a9cbc9502d9..91235769c1b7 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1552,7 +1552,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
break;
}
- if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+ if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+ sk->sk_state == IUCV_CONNECTED) {
if (iucv->transport == AF_IUCV_TRANS_IUCV) {
txmsg.class = 0;
txmsg.tag = 0;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1b1f2d6cb3f4..0ab710576673 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1851,19 +1851,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* device xmit handlers */
+enum ieee80211_encrypt {
+ ENCRYPT_NO,
+ ENCRYPT_MGMT,
+ ENCRYPT_DATA,
+};
+
static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- int head_need, bool may_encrypt)
+ int head_need,
+ enum ieee80211_encrypt encrypt)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_hdr *hdr;
bool enc_tailroom;
int tail_need = 0;
- hdr = (struct ieee80211_hdr *) skb->data;
- enc_tailroom = may_encrypt &&
- (sdata->crypto_tx_tailroom_needed_cnt ||
- ieee80211_is_mgmt(hdr->frame_control));
+ enc_tailroom = encrypt == ENCRYPT_MGMT ||
+ (encrypt == ENCRYPT_DATA &&
+ sdata->crypto_tx_tailroom_needed_cnt);
if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
@@ -1896,21 +1901,27 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
int headroom;
- bool may_encrypt;
+ enum ieee80211_encrypt encrypt;
- may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT);
+ if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)
+ encrypt = ENCRYPT_NO;
+ else if (ieee80211_is_mgmt(hdr->frame_control))
+ encrypt = ENCRYPT_MGMT;
+ else
+ encrypt = ENCRYPT_DATA;
headroom = local->tx_headroom;
- if (may_encrypt)
+ if (encrypt != ENCRYPT_NO)
headroom += sdata->encrypt_headroom;
headroom -= skb_headroom(skb);
headroom = max_t(int, 0, headroom);
- if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
+ if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
+ /* reload after potential resize */
hdr = (struct ieee80211_hdr *) skb->data;
info->control.vif = &sdata->vif;
@@ -2692,7 +2703,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
head_need += sdata->encrypt_headroom;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) {
ieee80211_free_txskb(&local->hw, skb);
skb = NULL;
return ERR_PTR(-ENOMEM);
@@ -3352,7 +3363,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (unlikely(ieee80211_skb_resize(sdata, skb,
max_t(int, extra_head + hw_headroom -
skb_headroom(skb), 0),
- false))) {
+ ENCRYPT_NO))) {
kfree_skb(skb);
return true;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 9eb9d34cef7b..db8cc505caf7 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2846,7 +2846,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule = ®_rule->power_rule;
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO",
freq_range->max_bandwidth_khz,
reg_get_max_bandwidth(rd, reg_rule));
else
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index f4fa33b84cde..790b79647004 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -823,7 +823,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc) {
+ if (rc && x25->neighbour) {
read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
x25->neighbour = NULL;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 05c275a712f1..5164dfe0aa09 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1783,6 +1783,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
int err = -ENOENT;
__be32 minspi = htonl(low);
__be32 maxspi = htonl(high);
+ __be32 newspi = 0;
u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
@@ -1801,21 +1802,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
xfrm_state_put(x0);
goto unlock;
}
- x->id.spi = minspi;
+ newspi = minspi;
} else {
u32 spi = 0;
for (h = 0; h < high-low+1; h++) {
spi = low + prandom_u32()%(high-low+1);
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
- x->id.spi = htonl(spi);
+ newspi = htonl(spi);
break;
}
xfrm_state_put(x0);
}
}
- if (x->id.spi) {
+ if (newspi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c
index e3614ee5f1c0..36e61f622b5a 100644
--- a/security/selinux/ibpkey.c
+++ b/security/selinux/ibpkey.c
@@ -160,8 +160,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid)
* is valid, it just won't be added to the cache.
*/
new = kzalloc(sizeof(*new), GFP_ATOMIC);
- if (!new)
+ if (!new) {
+ ret = -ENOMEM;
goto out;
+ }
new->psec.subnet_prefix = subnet_prefix;
new->psec.pkey = pkey_num;
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 84f3b8168716..b679d5f37e4d 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -155,6 +155,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus,
return NULL;
if (ebus->idx != bus_idx)
return NULL;
+ if (addr < 0 || addr > 31)
+ return NULL;
list_for_each_entry(hlink, &ebus->hlink_list, list) {
for (i = 0; i < HDA_MAX_CODECS; i++) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c49e8ea1a42c..decd5d147e81 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -492,6 +492,7 @@ static void perf_event__mmap2_swap(union perf_event *event,
event->mmap2.maj = bswap_32(event->mmap2.maj);
event->mmap2.min = bswap_32(event->mmap2.min);
event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
if (sample_id_all) {
void *data = &event->mmap2.filename;
prev parent reply other threads:[~2020-11-19 12:15 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-19 12:14 Linux 4.14.207 Greg Kroah-Hartman
2020-11-19 12:14 ` Greg Kroah-Hartman [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=160578808695214@kroah.com \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=jslaby@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=lwn@lwn.net \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.