Fix vreq with error of -EBUSY fails 100 times too soon, that return EIO to td device layer. If this td
device request is metadata of filesystem, the result is not good at all.
To reproduce it witch "./iozone -I -s 2G -r 512k -r 1m -r 2m -r 4m -i 0 -i 1 -f /data/iozone_test.tmp",
the result is input/output error & iozone is stopped.
From ccc0ce3aede1f5c920036847ddb16b25969be7ba Mon Sep 17 00:00:00 2001
Date: Fri, 28 Feb 2014 03:01:19 -0500
Subject: [PATCH] fix vreq with error of -EBUSY fails 100 times too soon,
return EIO to td device layer.
1) all free list is consumed by vreq in one or two block that
is seted in bat entry.
2) vreq in fail queue, get a chance to run quickly, such as 90us.
---
tools/blktap2/drivers/tapdisk-vbd.c | 17 ++++++++++++++---
tools/blktap2/drivers/tapdisk-vbd.h | 1 +
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/tools/blktap2/drivers/tapdisk-vbd.c b/tools/blktap2/drivers/tapdisk-vbd.c
index c665f27..529eb91 100644
--- a/tools/blktap2/drivers/tapdisk-vbd.c
+++ b/tools/blktap2/drivers/tapdisk-vbd.c
@@ -1081,7 +1081,7 @@ tapdisk_vbd_check_state(td_vbd_t *vbd)
td_vbd_request_t *vreq, *tmp;
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests)
- if (vreq->num_retries >= TD_VBD_MAX_RETRIES)
+ if (vreq->num_retries >= TD_VBD_MAX_RETRIES && vreq->busy_looping != 1 )
tapdisk_vbd_complete_vbd_request(vbd, vreq);
if (!list_empty(&vbd->new_requests) ||
@@ -1168,7 +1168,7 @@ tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
{
if (!vreq->submitting && !vreq->secs_pending) {
if (vreq->status == BLKIF_RSP_ERROR &&
- vreq->num_retries < TD_VBD_MAX_RETRIES &&
+ (vreq->num_retries < TD_VBD_MAX_RETRIES || vreq->busy_looping == 1)&&
!td_flag_test(vbd->state, TD_VBD_DEAD) &&
!td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
tapdisk_vbd_move_request(vreq, &vbd->failed_requests);
@@ -1450,17 +1450,28 @@ tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd)
gettimeofday(&now, NULL);
tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
+ uint64_t delta = 0;
+
if (vreq->secs_pending)
continue;
if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
goto fail;
+ if (vreq->num_retries > TD_VBD_MAX_RETRIES - 10) {
+ delta = (now.tv_sec - vreq->last_try.tv_sec) * 1000000\
+ + now.tv_usec - vreq->last_try.tv_usec;
+ if (delta * vreq->num_retries < TD_VBD_RETRY_INTERVAL)
+ vreq->busy_looping = 1;
+ else
+ vreq->busy_looping = 0;
+ }
+
if (vreq->error != -EBUSY &&
now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL)
continue;
- if (vreq->num_retries >= TD_VBD_MAX_RETRIES) {
+ if (vreq->num_retries >= TD_VBD_MAX_RETRIES && vreq->busy_looping != 1) {
fail:
DBG(TLOG_INFO, "req %"PRIu64"retried %d times\n",
vreq->
req.id, vreq->num_retries);
diff --git a/tools/blktap2/drivers/tapdisk-vbd.h b/tools/blktap2/drivers/tapdisk-vbd.h
index be084b2..9e5f5f6 100644
--- a/tools/blktap2/drivers/tapdisk-vbd.h
+++ b/tools/blktap2/drivers/tapdisk-vbd.h
@@ -73,6 +73,7 @@ struct td_vbd_request {
int submitting;
int secs_pending;
int num_retries;
+ int busy_looping;
struct timeval last_try;
td_vbd_t *vbd;
--
1.8.3.1