From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55609) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1W64Cl-0006fM-05 for qemu-devel@nongnu.org; Wed, 22 Jan 2014 15:15:24 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1W64Ce-0006oS-Cv for qemu-devel@nongnu.org; Wed, 22 Jan 2014 15:15:18 -0500 Received: from paradis.irqsave.net ([62.212.105.220]:48079) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1W64Cd-0006oK-SR for qemu-devel@nongnu.org; Wed, 22 Jan 2014 15:15:12 -0500 Date: Wed, 22 Jan 2014 21:15:10 +0100 From: =?iso-8859-1?Q?Beno=EEt?= Canet Message-ID: <20140122201510.GD3053@irqsave.net> References: <1389968119-24771-1-git-send-email-kwolf@redhat.com> <1389968119-24771-19-git-send-email-kwolf@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <1389968119-24771-19-git-send-email-kwolf@redhat.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v3 18/29] block: Make overlap range for serialisation dynamic List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Kevin Wolf Cc: pl@kamp.de, qemu-devel@nongnu.org, mreitz@redhat.com, stefanha@redhat.com, pbonzini@redhat.com, xiawenc@linux.vnet.ibm.com Le Friday 17 Jan 2014 =E0 15:15:08 (+0100), Kevin Wolf a =E9crit : > Copy on Read wants to serialise with all requests touching the same > cluster, so wait_serialising_requests() rounded to cluster boundaries. > Other users like alignment RMW will have different requirements, though > (requests touching the same sector), so make it dynamic. >=20 > Signed-off-by: Kevin Wolf > Reviewed-by: Max Reitz > --- > block.c | 53 ++++++++++++++++++++++++---------------= -------- > include/block/block_int.h | 4 ++++ > 2 files changed, 31 insertions(+), 26 deletions(-) >=20 > diff --git a/block.c b/block.c > index efa8979..e72966a 100644 > --- a/block.c > +++ b/block.c > @@ -2051,6 +2051,8 @@ static void tracked_request_begin(BdrvTrackedRequ= est *req, > .is_write =3D is_write, > .co =3D qemu_coroutine_self(), > .serialising =3D false, > + .overlap_offset =3D offset, > + .overlap_bytes =3D bytes, > }; > =20 > qemu_co_queue_init(&req->wait_queue); > @@ -2058,12 +2060,19 @@ static void tracked_request_begin(BdrvTrackedRe= quest *req, > QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); > } > =20 > -static void mark_request_serialising(BdrvTrackedRequest *req) > +static void mark_request_serialising(BdrvTrackedRequest *req, size_t a= lign) > { > + int64_t overlap_offset =3D req->offset & ~(align - 1); > + int overlap_bytes =3D ROUND_UP(req->offset + req->bytes, align) > + - overlap_offset; > + > if (!req->serialising) { > req->bs->serialising_in_flight++; > req->serialising =3D true; > } > + > + req->overlap_offset =3D MIN(req->overlap_offset, overlap_offset); > + req->overlap_bytes =3D MAX(req->overlap_bytes, overlap_bytes); > } > =20 > /** > @@ -2087,20 +2096,16 @@ void bdrv_round_to_clusters(BlockDriverState *b= s, > } > } > =20 > -static void round_bytes_to_clusters(BlockDriverState *bs, > - int64_t offset, unsigned int bytes= , > - int64_t *cluster_offset, > - unsigned int *cluster_bytes) > +static int bdrv_get_cluster_size(BlockDriverState *bs) > { > BlockDriverInfo bdi; > + int ret; > =20 > - if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size =3D=3D 0) { > - *cluster_offset =3D offset; > - *cluster_bytes =3D bytes; > + ret =3D bdrv_get_info(bs, &bdi); > + if (ret < 0 || bdi.cluster_size =3D=3D 0) { > + return bs->request_alignment; > } else { > - *cluster_offset =3D QEMU_ALIGN_DOWN(offset, bdi.cluster_size); > - *cluster_bytes =3D QEMU_ALIGN_UP(offset - *cluster_offset + by= tes, > - bdi.cluster_size); > + return bdi.cluster_size; > } > } > =20 > @@ -2108,11 +2113,11 @@ static bool tracked_request_overlaps(BdrvTracke= dRequest *req, > int64_t offset, unsigned int byte= s) > { > /* aaaa bbbb */ > - if (offset >=3D req->offset + req->bytes) { > + if (offset >=3D req->overlap_offset + req->overlap_bytes) { > return false; > } > /* bbbb aaaa */ > - if (req->offset >=3D offset + bytes) { > + if (req->overlap_offset >=3D offset + bytes) { > return false; > } > return true; > @@ -2122,30 +2127,21 @@ static void coroutine_fn wait_serialising_reque= sts(BdrvTrackedRequest *self) > { > BlockDriverState *bs =3D self->bs; > BdrvTrackedRequest *req; > - int64_t cluster_offset; > - unsigned int cluster_bytes; > bool retry; > =20 > if (!bs->serialising_in_flight) { > return; > } > =20 > - /* If we touch the same cluster it counts as an overlap. This gua= rantees > - * that allocating writes will be serialized and not race with eac= h other > - * for the same cluster. For example, in copy-on-read it ensures = that the > - * CoR read and write operations are atomic and guest writes canno= t > - * interleave between them. > - */ > - round_bytes_to_clusters(bs, self->offset, self->bytes, > - &cluster_offset, &cluster_bytes); > - > do { > retry =3D false; > QLIST_FOREACH(req, &bs->tracked_requests, list) { > if (req =3D=3D self || (!req->serialising && !self->serial= ising)) { > continue; > } > - if (tracked_request_overlaps(req, cluster_offset, cluster_= bytes)) { > + if (tracked_request_overlaps(req, self->overlap_offset, > + self->overlap_bytes)) > + { > /* Hitting this means there was a reentrant request, f= or > * example, a block driver issuing nested requests. T= his must > * never happen since it means deadlock. > @@ -2761,7 +2757,12 @@ static int coroutine_fn bdrv_aligned_preadv(Bloc= kDriverState *bs, > =20 > /* Handle Copy on Read and associated serialisation */ > if (flags & BDRV_REQ_COPY_ON_READ) { > - mark_request_serialising(req); > + /* If we touch the same cluster it counts as an overlap. This > + * guarantees that allocating writes will be serialized and no= t race > + * with each other for the same cluster. For example, in copy= -on-read > + * it ensures that the CoR read and write operations are atomi= c and > + * guest writes cannot interleave between them. */ > + mark_request_serialising(req, bdrv_get_cluster_size(bs)); > } > =20 > wait_serialising_requests(req); > diff --git a/include/block/block_int.h b/include/block/block_int.h > index d8443df..ccd2c68 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -60,7 +60,11 @@ typedef struct BdrvTrackedRequest { > int64_t offset; > unsigned int bytes; > bool is_write; > + > bool serialising; > + int64_t overlap_offset; > + unsigned int overlap_bytes; > + > QLIST_ENTRY(BdrvTrackedRequest) list; > Coroutine *co; /* owner, used for deadlock detection */ > CoQueue wait_queue; /* coroutines blocked on this request */ > --=20 > 1.8.1.4 >=20 >=20 Reviewed-by: Benoit Canet