From: "Sharma, Nishit" <nishit.sharma@intel.com>
To: Sobin Thomas <sobin.thomas@intel.com>,
<igt-dev@lists.freedesktop.org>, <thomas.hellstrom@intel.com>
Subject: Re: [PATCH v2 i-g-t 1/1] test/intel/xe_vm: Add oversubscribe concurrent bind stress subtest
Date: Tue, 10 Mar 2026 11:45:25 +0530 [thread overview]
Message-ID: <12a73707-f04e-4c25-80f0-502200f249fd@intel.com> (raw)
In-Reply-To: <20260309115416.1704717-2-sobin.thomas@intel.com>
On 3/9/2026 5:24 PM, Sobin Thomas wrote:
> Add test for oversubscribing VRAM in multi process environment that
> creates VM, bind large BOs and submit workloads nearly simultaneously.
>
> Previous coverage lacked a scenario combining multi-process bind
> with VRAM oversubscription. This generates memory pressure with
> multi-process VM Bind activity and concurrent submission, excercising
> the bind pipeline under eviction pressure.
>
> Signed-off-by: Sobin Thomas <sobin.thomas@intel.com>
> ---
> tests/intel/xe_vm.c | 438 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 438 insertions(+)
>
> diff --git a/tests/intel/xe_vm.c b/tests/intel/xe_vm.c
> index ccff8f804..ed46f4bee 100644
> --- a/tests/intel/xe_vm.c
> +++ b/tests/intel/xe_vm.c
> @@ -19,8 +19,176 @@
> #include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> #include "xe/xe_spin.h"
> +#include <inttypes.h>
> #include <string.h>
>
> +#define MI_BB_END (0 << 29 | 0x0A << 23 | 0)
> +#define MI_LOAD_REG_MEM (0 << 29 | 0x29 << 23 | 0 << 22 | 0 << 21 | 1 << 19 | 2)
> +#define MI_STORE_REG_MEM (0 << 29 | 0x24 << 23 | 0 << 22 | 0 << 21 | 1 << 19 | 2)
> +#define MI_MATH_R(length) (0 << 29 | 0x1A << 23 | ((length) & 0xFF))
> +#define GPR_RX_ADDR(x) (0x600 + (x) * 8)
> +#define ALU_LOAD(dst, src) (0x080 << 20 | ((dst) << 10) | (src))
> +#define ALU_STORE(dst, src) (0x180 << 20 | (dst) << 10 | (src))
> +#define ALU_ADD (0x100 << 20)
> +#define ALU_RX(x) (x)
> +#define ALU_SRCA 0x20
> +#define ALU_SRCB 0x21
> +#define ALU_ACCU 0x31
> +#define GB(x) (1024ULL * 1024ULL * 1024ULL * (x))
> +
> +struct gem_bo {
> + uint32_t handle;
> + uint64_t size;
> + int *ptr;
> + uint64_t addr;
> +};
> +
> +struct xe_test_ctx {
> + int fd;
> + uint32_t vm_id;
> + uint32_t exec_queue_id;
> + uint16_t sram_instance;
> + uint16_t vram_instance;
> + bool has_vram;
> +};
> +
> +static uint64_t align_to_page_size(uint64_t size)
> +{
> + return (size + 4095UL) & ~4095UL;
> +}
> +
> +static void create_exec_queue(int fd, struct xe_test_ctx *ctx)
> +{
> + struct drm_xe_engine_class_instance *hwe;
> + struct drm_xe_engine_class_instance eci = {
> + .engine_class = DRM_XE_ENGINE_CLASS_RENDER,
> + };
> +
> + /* Find first render engine */
> + xe_for_each_engine(fd, hwe) {
> + if (hwe->engine_class == DRM_XE_ENGINE_CLASS_RENDER) {
> + eci = *hwe;
> + break;
> + }
> + }
> + ctx->exec_queue_id = xe_exec_queue_create(fd, ctx->vm_id, &eci, 0);
> +}
> +
> +static void vm_bind_gem_bo(int fd, struct xe_test_ctx *ctx, uint32_t handle,
> + uint64_t addr, uint64_t size)
> +{
> + int rc;
> + uint64_t timeline_val = 1;
> + uint32_t syncobj_handle = syncobj_create(fd, 0);
> +
> + struct drm_xe_sync bind_sync = {
> + .extensions = 0,
> + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + .handle = syncobj_handle,
> + .timeline_value = timeline_val,
> + };
> + struct drm_xe_vm_bind vm_bind = {
> + .extensions = 0,
> + .vm_id = ctx->vm_id,
> + .exec_queue_id = 0,
> + .num_binds = 1,
> + .bind = {
> + .obj = handle,
> + .obj_offset = 0,
> + .range = size,
> + .addr = addr,
> + .op = DRM_XE_VM_BIND_OP_MAP,
> + .flags = 0,
> + },
> + .num_syncs = 1,
> + .syncs = (uintptr_t)&bind_sync,
> + };
> + rc = igt_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &vm_bind);
> +
> + igt_assert(rc == 0);
> +
> + /* The right way to do this in the real world is to not wait for the
> + * syncobj here - since it just makes everything synchronous -, but
> + * instead pass the syncobj as a 'wait'-type object to thie execbuf
> + * ioctl. We do it here just to make the example simpler.
> + */
> + igt_assert(syncobj_timeline_wait(fd, &syncobj_handle, &timeline_val,
> + 1, INT64_MAX, 0, NULL));
> +
> + syncobj_destroy(fd, syncobj_handle);
> +}
> +
> +static uint32_t
> +vm_bind_gem_bos(int fd, struct xe_test_ctx *ctx, struct gem_bo *bos, int size)
> +{
> + int rc;
> + uint32_t syncobj_handle = syncobj_create(fd, 0);
> + uint64_t timeline_val = 1;
> + struct drm_xe_sync bind_sync = {
> + .extensions = 0,
> + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + .handle = syncobj_handle,
> + .timeline_value = timeline_val,
> + };
> + struct drm_xe_vm_bind_op binds[size];
> + struct drm_xe_vm_bind vm_bind = {
> + .extensions = 0,
> + .vm_id = ctx->vm_id,
> + .exec_queue_id = 0,
> + .num_binds = size,
> + .vector_of_binds = (uintptr_t)binds,
> + .num_syncs = 1,
> + .syncs = (uintptr_t)&bind_sync,
> + };
> +
> + /* Need to call the ioctl differently when size is 1. */
> + igt_assert(size != 1);
How ioctl called if size passed is 1, here it'll assert and terminate
the test.
> +
> + for (int i = 0; i < size; i++) {
> + binds[i] = (struct drm_xe_vm_bind_op) {
> + .extensions = 0,
> + .obj = bos[i].handle,
> + .pat_index = 0,
> + .pad = 0,
> + .obj_offset = 0,
> + .range = bos[i].size,
> + .addr = bos[i].addr,
> + .op = DRM_XE_VM_BIND_OP_MAP,
> + .flags = 0,
> + .prefetch_mem_region_instance = 0,
> + .pad2 = 0,
> + };
> + }
> + rc = igt_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &vm_bind);
> + igt_assert(rc == 0);
> +
> + return syncobj_handle;
> +}
> +
> +static void query_mem_info(int fd, struct xe_test_ctx *ctx)
> +{
> + uint64_t vram_reg, sys_reg;
> + struct drm_xe_mem_region *region;
> +
> + ctx->has_vram = xe_has_vram(fd);
> + if (ctx->has_vram) {
> + /* Get VRAM instance - vram_memory returns a bitmask,
> + * so we extract the instance from it
> + */
> + vram_reg = vram_memory(fd, 0);
> + region = xe_mem_region(fd, vram_reg);
> + ctx->vram_instance = region->instance;
> + }
> +
> + /* Get SRAM instance */
> + sys_reg = system_memory(fd);
> + region = xe_mem_region(fd, sys_reg);
> + ctx->sram_instance = region->instance;
> + igt_debug("has_vram: %d\n", ctx->has_vram);
> +}
> +
> static uint32_t
> addr_low(uint64_t addr)
> {
> @@ -2450,6 +2618,271 @@ static void test_oom(int fd)
> }
> }
>
> +/**
> + * SUBTEST: oversubscribe-concurrent-bind
> + * Description: Test for oversubscribing the VM with multiple processes
> + * doing binds at the same time, and ensure they all complete successfully.
> + * Functionality: This check is for a specific bug where if multiple processes
> + * oversubscribe the VM, some of the binds may fail with ENOMEM due to
> + * deadlock in the bind code.
> + * Test category: stress test
> + */
> +static void test_vm_oversubscribe_concurrent_bind(int fd)
> +{
> + uint64_t vram_size = xe_visible_vram_size(fd, 0);
Shouldn't be this available vram in xe device? visible_vram will be
complete VRAM exist in xe device but that some size of VRAM being used
for operations.
xe_visible_vram_size(fd, 0) gives size which is greater than
xe_visible_available_vram_size()/xe_available_vram_size()
> + uint64_t sram_avail = (uint64_t)igt_get_avail_ram_mb() << 20;
> + const uint64_t buf_size = GB(1);
> + uint64_t target_vram = vram_size * 2; /* 2 of VRAM */
> + uint64_t target_sram = sram_avail * 60 / 100; /* 60% system RAM */
> +
> + int total_vram_bufs = target_vram / buf_size;
> + int total_sram_bufs = target_sram / buf_size;
> +
> + /* determine concurrency from memory pressure */
> + int max_proc_vram = total_vram_bufs / 2;
> + int max_proc_sram = total_sram_bufs;
> + int n_proc = min(max_proc_vram, max_proc_sram);
> + int n_vram_bufs = max(2, total_vram_bufs / n_proc);
> + int n_sram_bufs = max(2, total_sram_bufs / n_proc);
> + uint64_t total_vram_demand = (uint64_t)n_proc * n_vram_bufs * buf_size;
> + pthread_barrier_t *barrier;
> + pthread_barrierattr_t attr;
> +
> + igt_debug("VRAM demand: %" PRIu64 "GB (%.2fx oversubscription)\n",
> + total_vram_demand >> 30, (double)total_vram_demand / vram_size);
> + igt_debug("Processes=%d VRAM_bufs=%d SRAM_bufs=%d\n", n_proc,
> + n_vram_bufs, n_sram_bufs);
> +
> + /* Create shared memory barrier for process synchronization */
> + barrier = mmap(NULL, sizeof(pthread_barrier_t), PROT_READ | PROT_WRITE,
> + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> + igt_assert(barrier != MAP_FAILED);
> + pthread_barrierattr_init(&attr);
> + pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
> + pthread_barrier_init(barrier, &attr, n_proc);
> + igt_fork(child, n_proc) {
> + struct xe_test_ctx ctx = {0};
> + int rc;
> + uint64_t addr = GB(1);
This addr should be GPU VA, correct? If it is then why it's assigned
size which is 1GB but not with GPU VA?
> + struct timespec start;
> + uint32_t vram_binds_syncobj, sram_binds_syncobj;
> + struct gem_bo *vram_bufs;
> + struct gem_bo *sram_bufs;
> + int expected_result = 0;
> + int ints_to_add = 4;
> + int gpu_result;
> + int retries;
> + int max_retries = 1024;
> + uint32_t batch_syncobj;
> + /* integers_bo contains the integers we're going to add. */
> + struct gem_bo integers_bo, result_bo, batch_bo;
> + uint64_t tmp_addr;
> + struct drm_xe_sync batch_syncs[3];
> + int n_batch_syncs = 0;
> + int pos = 0;
> + uint64_t timeline_val = 1;
> + struct drm_xe_exec exec;
> +
> + if (n_vram_bufs == 0 && n_sram_bufs == 0)
> + return;
this should be checked above where n_vram_bufs and n_sram_bufs are
defined outside fork otherwise system resources will be busy once you
return from here.
> +
> + vram_bufs = (struct gem_bo *)calloc(n_vram_bufs, sizeof(struct gem_bo));
> + sram_bufs = (struct gem_bo *)calloc(n_sram_bufs, sizeof(struct gem_bo));
> +
> + if (!vram_bufs || !sram_bufs)
> + igt_assert_f(0, "Failed to allocate memory for buffer objects\n");
this can be clubbed like igt_assert(vram_bufs && sram_bufs)
> +
> + rc = clock_gettime(CLOCK_MONOTONIC, &start);
> + igt_assert(rc == 0);
> + ctx.vm_id = xe_vm_create(fd, DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE, 0);
> + query_mem_info(fd, &ctx);
> + create_exec_queue(fd, &ctx);
> + for (int i = 0; i < n_vram_bufs; i++) {
> + struct gem_bo *bo = &vram_bufs[i];
> +
> + bo->size = GB(1);
> + bo->handle = xe_bo_create_caching(fd, ctx.vm_id, vram_bufs[i].size,
> + vram_memory(fd, 0), 0,
> + DRM_XE_GEM_CPU_CACHING_WC);
> + bo->ptr = NULL;
> + bo->addr = addr;
This should be valid GPU address not size
> + addr += bo->size;
basically addr is holding size and then again size is added. it should
be offset (valid GPU VA) + bo->size
> + igt_debug("vram buffer %d created at 0x%016lx\n",
> + i, bo->addr);
> + }
> + for (int i = 0; i < n_sram_bufs; i++) {
> + struct gem_bo *bo = &sram_bufs[i];
> +
> + bo->size = GB(1);
> + bo->handle = xe_bo_create_caching(fd, ctx.vm_id, sram_bufs[i].size,
> + system_memory(fd), 0,
> + DRM_XE_GEM_CPU_CACHING_WC);
> + bo->ptr = NULL;
> + bo->addr = addr;
same above
> + addr += bo->size;
check above
> + igt_debug("sram buffer %d created at 0x%016lx\n", i, bo->addr);
> + }
> + pthread_barrier_wait(barrier);
> +
> + if (n_vram_bufs)
> + vram_binds_syncobj = vm_bind_gem_bos(fd, &ctx, vram_bufs, n_vram_bufs);
> +
> + if (n_sram_bufs)
> + sram_binds_syncobj = vm_bind_gem_bos(fd, &ctx, sram_bufs, n_sram_bufs);
> +
> + integers_bo.size = align_to_page_size(sizeof(int) * ints_to_add);
> + integers_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, integers_bo.size,
> + system_memory(fd), 0,
> + DRM_XE_GEM_CPU_CACHING_WC);
> + integers_bo.ptr = (int *)xe_bo_map(fd, integers_bo.handle, integers_bo.size);
> +
> + integers_bo.addr = 0x100000;
> +
> + for (int i = 0; i < ints_to_add; i++) {
> + int random_int = rand() % 8;
> +
> + integers_bo.ptr[i] = random_int;
> + expected_result += random_int;
> +
> + }
> + igt_assert_eq(munmap(integers_bo.ptr, integers_bo.size), 0);
> + integers_bo.ptr = NULL;
Do munmap in last when test is completely executed. Chances are if
munmapped and someone tries to access it that can lead to unwanted results.
> +
> + result_bo.size = align_to_page_size(sizeof(int));
> + result_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, result_bo.size,
> + system_memory(fd), 0,
> + DRM_XE_GEM_CPU_CACHING_WC);
> + result_bo.ptr = NULL;
> + result_bo.addr = 0x200000;
> + /* batch_bo contains the commands the GPU will run. */
> +
> + batch_bo.size = 4096;
> + batch_bo.handle = xe_bo_create_caching(fd, ctx.vm_id, batch_bo.size,
> + system_memory(fd), 0,
> + DRM_XE_GEM_CPU_CACHING_WC);
> +
> + batch_bo.ptr = (int *)xe_bo_map(fd, batch_bo.handle, batch_bo.size);
> + batch_bo.addr = 0x300000;
> +
> + /* r0 = integers_bo[0] */
> + batch_bo.ptr[pos++] = MI_LOAD_REG_MEM;
> + batch_bo.ptr[pos++] = GPR_RX_ADDR(0);
> + tmp_addr = integers_bo.addr + 0 * sizeof(uint32_t);
> + batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
> + batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
> + for (int i = 1; i < ints_to_add; i++) {
> + /* r1 = integers_bo[i] */
> + batch_bo.ptr[pos++] = MI_LOAD_REG_MEM;
> + batch_bo.ptr[pos++] = GPR_RX_ADDR(1);
> + tmp_addr = integers_bo.addr + i * sizeof(uint32_t);
> + batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
> + batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
> + /* r0 = r0 + r1 */
> + batch_bo.ptr[pos++] = MI_MATH_R(3);
> + batch_bo.ptr[pos++] = ALU_LOAD(ALU_SRCA, ALU_RX(0));
> + batch_bo.ptr[pos++] = ALU_LOAD(ALU_SRCB, ALU_RX(1));
> + batch_bo.ptr[pos++] = ALU_ADD;
> + batch_bo.ptr[pos++] = ALU_STORE(ALU_RX(0), ALU_ACCU);
> + }
> + /* result_bo[0] = r0 */
> + batch_bo.ptr[pos++] = MI_STORE_REG_MEM;
> + batch_bo.ptr[pos++] = GPR_RX_ADDR(0);
> + tmp_addr = result_bo.addr + 0 * sizeof(uint32_t);
> + batch_bo.ptr[pos++] = tmp_addr & 0xFFFFFFFF;
> + batch_bo.ptr[pos++] = (tmp_addr >> 32) & 0xFFFFFFFF;
> +
> + batch_bo.ptr[pos++] = MI_BB_END;
> + while (pos % 4 != 0)
> + batch_bo.ptr[pos++] = MI_NOOP;
> +
> + igt_assert(pos * sizeof(int) <= batch_bo.size);
> +
> + vm_bind_gem_bo(fd, &ctx, integers_bo.handle, integers_bo.addr, integers_bo.size);
> + vm_bind_gem_bo(fd, &ctx, result_bo.handle, result_bo.addr, result_bo.size);
> + vm_bind_gem_bo(fd, &ctx, batch_bo.handle, batch_bo.addr, batch_bo.size);
> +
> + /* Now we do the actual batch submission to the GPU. */
> + batch_syncobj = syncobj_create(fd, 0);
> +
> + igt_assert_eq(rc, 0);
> + batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
> + .extensions = 0,
> + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
> + .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> + .handle = batch_syncobj,
> + .timeline_value = timeline_val,
> + };
> + if (n_vram_bufs) {
> + batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
> + .extensions = 0,
> + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
> + .flags = 0, /* wait */
> + .handle = vram_binds_syncobj,
> + .timeline_value = 1,
> + };
> + }
> + if (n_sram_bufs) {
> + batch_syncs[n_batch_syncs++] = (struct drm_xe_sync) {
> + .extensions = 0,
> + .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
> + .flags = 0, /* wait */
> + .handle = sram_binds_syncobj,
> + .timeline_value = 1,
> + };
> + }
> + exec = (struct drm_xe_exec) {
> + .exec_queue_id = ctx.exec_queue_id,
> + .num_syncs = n_batch_syncs,
> + .syncs = (uintptr_t)batch_syncs,
> + .address = batch_bo.addr,
> + .num_batch_buffer = 1,
> + };
> + for (retries = 0; retries < max_retries; retries++) {
> + rc = igt_ioctl(fd, DRM_IOCTL_XE_EXEC, &exec);
> + if (!(rc && errno == ENOMEM))
> + break;
> +
> + usleep(100 * retries);
> + if (retries == 0)
> + igt_warn("got ENOMEM\n");
> + }
> + if (retries == max_retries)
> + igt_warn("gave up after %d retries\n", retries);
> +
> + if (rc) {
> + igt_warn("errno: %d (%s)\n", errno, strerror(errno));
> + perror(__func__);
> + }
> + igt_assert_eq(rc, 0);
> +
> + if (retries)
> + igt_debug("!!!!!! succeeded after %d retries !!!!!!\n",
> + retries);
> +
> + /* We need to wait for the GPU to finish. */
> + igt_assert(syncobj_timeline_wait(fd, &batch_syncobj,
> + &timeline_val, 1, INT64_MAX, 0, NULL));
> + result_bo.ptr = (int *)xe_bo_map(fd, result_bo.handle, result_bo.size);
> + gpu_result = result_bo.ptr[0];
> + igt_debug("gpu_result = %d\n", gpu_result);
> + igt_debug("expected_result = %d\n", expected_result);
result_bo.ptr = (int *)xe_bo_map(fd, result_bo.handle, result_bo.size);
igt_assert_eq(result_bo.ptr[0], expected_result);
> +
> + igt_assert_eq(gpu_result, expected_result);
> + igt_assert_eq(munmap(result_bo.ptr, result_bo.size), 0);
munmap for batch_bo?
> + result_bo.ptr = NULL;
> +
> + gem_close(fd, batch_bo.handle);
> + gem_close(fd, result_bo.handle);
> + gem_close(fd, integers_bo.handle);
> +
> + xe_vm_destroy(fd, ctx.vm_id);
> + close(fd);
> + }
> + igt_waitchildren();
> + pthread_barrier_destroy(barrier);
> + munmap(barrier, sizeof(pthread_barrier_t));
> +}
> +
> int igt_main()
> {
> struct drm_xe_engine_class_instance *hwe, *hwe_non_copy = NULL;
> @@ -2849,6 +3282,11 @@ int igt_main()
> igt_assert(xe_visible_vram_size(fd, 0));
> test_oom(fd);
> }
> + igt_subtest("oversubscribe-concurrent-bind")
> + {
> + igt_require(xe_has_vram(fd));
> + test_vm_oversubscribe_concurrent_bind(fd);
> + }
>
> igt_fixture()
> drm_close_driver(fd);
next prev parent reply other threads:[~2026-03-10 6:15 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-09 11:54 [PATCH v2 i-g-t 0/1] test/intel/xe_vm: Add oversubscribe concurrent bind stress subtest Sobin Thomas
2026-03-09 11:54 ` [PATCH v2 i-g-t 1/1] " Sobin Thomas
2026-03-10 6:15 ` Sharma, Nishit [this message]
2026-03-09 20:21 ` ✓ Xe.CI.BAT: success for " Patchwork
2026-03-09 20:22 ` ✓ i915.CI.BAT: " Patchwork
2026-03-10 0:28 ` ✗ Xe.CI.FULL: failure " Patchwork
2026-03-10 5:08 ` ✗ i915.CI.Full: " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=12a73707-f04e-4c25-80f0-502200f249fd@intel.com \
--to=nishit.sharma@intel.com \
--cc=igt-dev@lists.freedesktop.org \
--cc=sobin.thomas@intel.com \
--cc=thomas.hellstrom@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.