* [PATCH 1/7] libceph: kill off osd data write_request parameters
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
@ 2013-04-16 3:36 ` Alex Elder
2013-04-16 3:37 ` [PATCH 2/7] libceph: clean up osd data field access functions Alex Elder
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:36 UTC (permalink / raw)
To: ceph-devel
In the incremental move toward supporting distinct data items in an
osd request some of the functions had "write_request" parameters to
indicate, basically, whether the data belonged to in_data or the
out_data. Now that we maintain the data fields in the op structure
there is no need to indicate the direction, so get rid of the
"write_request" parameters.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 4 ++--
fs/ceph/addr.c | 9 ++++-----
fs/ceph/file.c | 4 ++--
include/linux/ceph/osd_client.h | 8 ++++----
net/ceph/osd_client.c | 25 +++++++++++--------------
5 files changed, 23 insertions(+), 27 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 13a381b..8e8b876 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1779,7 +1779,7 @@ static int rbd_img_request_fill_bio(struct
rbd_img_request *img_request,
osd_req_op_extent_init(osd_req, 0, opcode, offset, length,
0, 0);
- osd_req_op_extent_osd_data_bio(osd_req, 0, write_request,
+ osd_req_op_extent_osd_data_bio(osd_req, 0,
obj_request->bio_list, obj_request->length);
rbd_osd_req_format(obj_request, write_request);
@@ -2281,7 +2281,7 @@ static int rbd_obj_read_sync(struct rbd_device
*rbd_dev,
osd_req_op_extent_init(obj_request->osd_req, 0, CEPH_OSD_OP_READ,
offset, length, 0, 0);
- osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0, false,
+ osd_req_op_extent_osd_data_pages(obj_request->osd_req, 0,
obj_request->pages,
obj_request->length,
obj_request->offset & ~PAGE_MASK,
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 068d2c8..68c187b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -245,7 +245,7 @@ static void finish_read(struct ceph_osd_request
*req, struct ceph_msg *msg)
dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
/* unlock all pages, zeroing any data we didn't read */
- osd_data = osd_req_op_extent_osd_data(req, 0, false);
+ osd_data = osd_req_op_extent_osd_data(req, 0);
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
num_pages = calc_pages_for((u64)osd_data->alignment,
(u64)osd_data->length);
@@ -343,8 +343,7 @@ static int start_read(struct inode *inode, struct
list_head *page_list, int max)
}
pages[i] = page;
}
- osd_req_op_extent_osd_data_pages(req, 0, false, pages, len, 0,
- false, false);
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
req->r_callback = finish_read;
req->r_inode = inode;
@@ -571,7 +570,7 @@ static void writepages_finish(struct
ceph_osd_request *req,
long writeback_stat;
unsigned issued = ceph_caps_issued(ci);
- osd_data = osd_req_op_extent_osd_data(req, 0, true);
+ osd_data = osd_req_op_extent_osd_data(req, 0);
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
num_pages = calc_pages_for((u64)osd_data->alignment,
(u64)osd_data->length);
@@ -916,7 +915,7 @@ get_more_pages:
dout("writepages got %d pages at %llu~%llu\n",
locked_pages, offset, len);
- osd_req_op_extent_osd_data_pages(req, 0, true, pages, len, 0,
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
!!pool, false);
pages = NULL; /* request message now owns the pages array */
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b7e6caa..4c87e17 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -574,8 +574,8 @@ more:
own_pages = true;
}
}
- osd_req_op_extent_osd_data_pages(req, 0, true, pages, len,
- page_align, false, own_pages);
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+ false, own_pages);
/* BUG_ON(vino.snap != CEPH_NOSNAP); */
ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
diff --git a/include/linux/ceph/osd_client.h
b/include/linux/ceph/osd_client.h
index 2a68a74..26d29cd 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -239,22 +239,22 @@ extern void osd_req_op_extent_update(struct
ceph_osd_request *osd_req,
extern struct ceph_osd_data *osd_req_op_extent_osd_data(
struct ceph_osd_request *osd_req,
- unsigned int which, bool write_request);
+ unsigned int which);
extern struct ceph_osd_data *osd_req_op_cls_response_data(
struct ceph_osd_request *osd_req,
unsigned int which);
extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
- unsigned int which, bool write_request,
+ unsigned int which,
struct page **pages, u64 length,
u32 alignment, bool pages_from_pool,
bool own_pages);
extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
- unsigned int which, bool write_request,
+ unsigned int which,
struct ceph_pagelist *pagelist);
#ifdef CONFIG_BLOCK
extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
- unsigned int which, bool write_request,
+ unsigned int which,
struct bio *bio, size_t bio_length);
#endif /* CONFIG_BLOCK */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 939be67..b76e416 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -117,7 +117,7 @@ static void ceph_osd_data_bio_init(struct
ceph_osd_data *osd_data,
struct ceph_osd_data *
osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
- unsigned int which, bool write_request)
+ unsigned int which)
{
BUG_ON(which >= osd_req->r_num_ops);
@@ -156,37 +156,34 @@ osd_req_op_cls_response_data(struct
ceph_osd_request *osd_req,
EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
- unsigned int which, bool write_request,
- struct page **pages, u64 length, u32 alignment,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
bool pages_from_pool, bool own_pages)
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_extent_osd_data(osd_req, which, write_request);
+ osd_data = osd_req_op_extent_osd_data(osd_req, which);
ceph_osd_data_pages_init(osd_data, pages, length, alignment,
pages_from_pool, own_pages);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
- unsigned int which, bool write_request,
- struct ceph_pagelist *pagelist)
+ unsigned int which, struct ceph_pagelist *pagelist)
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_extent_osd_data(osd_req, which, write_request);
+ osd_data = osd_req_op_extent_osd_data(osd_req, which);
ceph_osd_data_pagelist_init(osd_data, pagelist);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
#ifdef CONFIG_BLOCK
void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
- unsigned int which, bool write_request,
- struct bio *bio, size_t bio_length)
+ unsigned int which, struct bio *bio, size_t bio_length)
{
struct ceph_osd_data *osd_data;
-
- osd_data = osd_req_op_extent_osd_data(osd_req, which, write_request);
+ osd_data = osd_req_op_extent_osd_data(osd_req, which);
ceph_osd_data_bio_init(osd_data, bio, bio_length);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
@@ -2278,7 +2275,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
/* it may be a short read due to an object boundary */
- osd_req_op_extent_osd_data_pages(req, 0, false,
+ osd_req_op_extent_osd_data_pages(req, 0,
pages, *plen, page_align, false, false);
dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
@@ -2321,7 +2318,7 @@ int ceph_osdc_writepages(struct ceph_osd_client
*osdc, struct ceph_vino vino,
return PTR_ERR(req);
/* it may be a short write due to an object boundary */
- osd_req_op_extent_osd_data_pages(req, 0, true, pages, len, page_align,
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
false, false);
dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
@@ -2422,7 +2419,7 @@ static struct ceph_msg *get_reply(struct
ceph_connection *con,
* XXX page data. Probably OK for reads, but this
* XXX ought to be done more generally.
*/
- osd_data = osd_req_op_extent_osd_data(req, 0, false);
+ osd_data = osd_req_op_extent_osd_data(req, 0);
if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
if (osd_data->pages &&
unlikely(osd_data->length < data_len)) {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/7] libceph: clean up osd data field access functions
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
2013-04-16 3:36 ` [PATCH 1/7] libceph: kill off osd data write_request parameters Alex Elder
@ 2013-04-16 3:37 ` Alex Elder
2013-04-16 3:38 ` [PATCH 3/7] libceph: support raw data requests Alex Elder
` (5 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:37 UTC (permalink / raw)
To: ceph-devel
There are a bunch of functions defined to encapsulate getting the
address of a data field for a particular op in an osd request.
They're all defined the same way, so create a macro to take the
place of all of them.
Two of these are used outside the osd client code, so preserve them
(but convert them to use the new macro internally). Stop exporting
the ones that aren't used elsewhere.
Signed-off-by: Alex Elder <elder@inktank.com>
---
net/ceph/osd_client.c | 48
+++++++++++++++---------------------------------
1 file changed, 15 insertions(+), 33 deletions(-)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b76e416..104353a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -115,43 +115,25 @@ static void ceph_osd_data_bio_init(struct
ceph_osd_data *osd_data,
}
#endif /* CONFIG_BLOCK */
+#define osd_req_op_data(oreq, whch, typ, fld) \
+ ({ \
+ BUG_ON(whch >= (oreq)->r_num_ops); \
+ &(oreq)->r_ops[whch].typ.fld; \
+ })
+
struct ceph_osd_data *
osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
unsigned int which)
{
- BUG_ON(which >= osd_req->r_num_ops);
-
- return &osd_req->r_ops[which].extent.osd_data;
+ return osd_req_op_data(osd_req, which, extent, osd_data);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data);
struct ceph_osd_data *
-osd_req_op_cls_request_info(struct ceph_osd_request *osd_req,
- unsigned int which)
-{
- BUG_ON(which >= osd_req->r_num_ops);
-
- return &osd_req->r_ops[which].cls.request_info;
-}
-EXPORT_SYMBOL(osd_req_op_cls_request_info); /* ??? */
-
-struct ceph_osd_data *
-osd_req_op_cls_request_data(struct ceph_osd_request *osd_req,
- unsigned int which)
-{
- BUG_ON(which >= osd_req->r_num_ops);
-
- return &osd_req->r_ops[which].cls.request_data;
-}
-EXPORT_SYMBOL(osd_req_op_cls_request_data); /* ??? */
-
-struct ceph_osd_data *
osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
unsigned int which)
{
- BUG_ON(which >= osd_req->r_num_ops);
-
- return &osd_req->r_ops[which].cls.response_data;
+ return osd_req_op_data(osd_req, which, cls, response_data);
}
EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
@@ -162,7 +144,7 @@ void osd_req_op_extent_osd_data_pages(struct
ceph_osd_request *osd_req,
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_extent_osd_data(osd_req, which);
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
ceph_osd_data_pages_init(osd_data, pages, length, alignment,
pages_from_pool, own_pages);
}
@@ -173,7 +155,7 @@ void osd_req_op_extent_osd_data_pagelist(struct
ceph_osd_request *osd_req,
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_extent_osd_data(osd_req, which);
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
ceph_osd_data_pagelist_init(osd_data, pagelist);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
@@ -183,7 +165,8 @@ void osd_req_op_extent_osd_data_bio(struct
ceph_osd_request *osd_req,
unsigned int which, struct bio *bio, size_t bio_length)
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_extent_osd_data(osd_req, which);
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
ceph_osd_data_bio_init(osd_data, bio, bio_length);
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
@@ -195,7 +178,7 @@ static void osd_req_op_cls_request_info_pagelist(
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_cls_request_info(osd_req, which);
+ osd_data = osd_req_op_data(osd_req, which, cls, request_info);
ceph_osd_data_pagelist_init(osd_data, pagelist);
}
@@ -205,7 +188,7 @@ void osd_req_op_cls_request_data_pagelist(
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_cls_request_data(osd_req, which);
+ osd_data = osd_req_op_data(osd_req, which, cls, request_data);
ceph_osd_data_pagelist_init(osd_data, pagelist);
}
EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
@@ -216,7 +199,7 @@ void osd_req_op_cls_response_data_pages(struct
ceph_osd_request *osd_req,
{
struct ceph_osd_data *osd_data;
- osd_data = osd_req_op_cls_response_data(osd_req, which);
+ osd_data = osd_req_op_data(osd_req, which, cls, response_data);
ceph_osd_data_pages_init(osd_data, pages, length, alignment,
pages_from_pool, own_pages);
}
@@ -241,7 +224,6 @@ static u64 ceph_osd_data_length(struct ceph_osd_data
*osd_data)
}
}
-
static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
{
if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 3/7] libceph: support raw data requests
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
2013-04-16 3:36 ` [PATCH 1/7] libceph: kill off osd data write_request parameters Alex Elder
2013-04-16 3:37 ` [PATCH 2/7] libceph: clean up osd data field access functions Alex Elder
@ 2013-04-16 3:38 ` Alex Elder
2013-04-16 3:38 ` [PATCH 4/7] rbd: adjust image object request ref counting Alex Elder
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:38 UTC (permalink / raw)
To: ceph-devel
Allow osd request ops that aren't otherwise structured (not class,
extent, or watch ops) to specify "raw" data to be used to hold
incoming data for the op. Make use of this capability for the osd
STAT op.
Prefix the name of the private function osd_req_op_init() with "_",
and expose a new function by that (earlier) name whose purpose is to
initialize osd ops with (only) implied data.
For now we'll just support the use of a page array for an osd op
with incoming raw data.
Signed-off-by: Alex Elder <elder@inktank.com>
---
include/linux/ceph/osd_client.h | 10 ++++++++++
net/ceph/osd_client.c | 38
++++++++++++++++++++++++++++++++++----
2 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/include/linux/ceph/osd_client.h
b/include/linux/ceph/osd_client.h
index 26d29cd..7d4fd18 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -83,6 +83,7 @@ struct ceph_osd_req_op {
u16 op; /* CEPH_OSD_OP_* */
u32 payload_len;
union {
+ struct ceph_osd_data raw_data_in;
struct {
u64 offset, length;
u64 truncate_size;
@@ -230,6 +231,15 @@ extern void ceph_osdc_handle_reply(struct
ceph_osd_client *osdc,
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
struct ceph_msg *msg);
+extern void osd_req_op_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode);
+
+extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
+ unsigned int which,
+ struct page **pages, u64 length,
+ u32 alignment, bool pages_from_pool,
+ bool own_pages);
+
extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 offset, u64 length,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 104353a..74ef9fd 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -121,6 +121,14 @@ static void ceph_osd_data_bio_init(struct
ceph_osd_data *osd_data,
&(oreq)->r_ops[whch].typ.fld; \
})
+static struct ceph_osd_data *
+osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int
which)
+{
+ BUG_ON(which >= osd_req->r_num_ops);
+
+ return &osd_req->r_ops[which].raw_data_in;
+}
+
struct ceph_osd_data *
osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
unsigned int which)
@@ -137,6 +145,19 @@ osd_req_op_cls_response_data(struct
ceph_osd_request *osd_req,
}
EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
+void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
+ unsigned int which, struct page **pages,
+ u64 length, u32 alignment,
+ bool pages_from_pool, bool own_pages)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_raw_data_in(osd_req, which);
+ ceph_osd_data_pages_init(osd_data, pages, length, alignment,
+ pages_from_pool, own_pages);
+}
+EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
+
void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
unsigned int which, struct page **pages,
u64 length, u32 alignment,
@@ -437,7 +458,7 @@ static bool osd_req_opcode_valid(u16 opcode)
* common init routine for all the other init functions, below.
*/
static struct ceph_osd_req_op *
-osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
+_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
u16 opcode)
{
struct ceph_osd_req_op *op;
@@ -452,12 +473,19 @@ osd_req_op_init(struct ceph_osd_request *osd_req,
unsigned int which,
return op;
}
+void osd_req_op_init(struct ceph_osd_request *osd_req,
+ unsigned int which, u16 opcode)
+{
+ (void)_osd_req_op_init(osd_req, which, opcode);
+}
+EXPORT_SYMBOL(osd_req_op_init);
+
void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 offset, u64 length,
u64 truncate_size, u32 truncate_seq)
{
- struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
size_t payload_len = 0;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
@@ -495,7 +523,7 @@ EXPORT_SYMBOL(osd_req_op_extent_update);
void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int
which,
u16 opcode, const char *class, const char *method)
{
- struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
struct ceph_pagelist *pagelist;
size_t payload_len = 0;
size_t size;
@@ -532,7 +560,7 @@ void osd_req_op_watch_init(struct ceph_osd_request
*osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag)
{
- struct ceph_osd_req_op *op = osd_req_op_init(osd_req, which, opcode);
+ struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
@@ -584,6 +612,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request
*req,
switch (src->op) {
case CEPH_OSD_OP_STAT:
+ osd_data = &src->raw_data_in;
+ ceph_osdc_msg_data_add(req->r_reply, osd_data);
break;
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 4/7] rbd: adjust image object request ref counting
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
` (2 preceding siblings ...)
2013-04-16 3:38 ` [PATCH 3/7] libceph: support raw data requests Alex Elder
@ 2013-04-16 3:38 ` Alex Elder
2013-04-16 3:38 ` [PATCH 5/7] rbd: always check IMG_DATA flag Alex Elder
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:38 UTC (permalink / raw)
To: ceph-devel
An extra reference is taken when an object request is added as one
of the requests making up an image object. A reference is dropped
again when the image's object requests get submitted.
The original reference for the object request will remain throughout
this period, so we don't need to add and then take away an extra
one.
This can be interpreted as the image request inheriting the original
object request's reference.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 8e8b876..81751cd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1167,7 +1167,7 @@ static inline void rbd_img_obj_request_add(struct
rbd_img_request *img_request,
{
rbd_assert(obj_request->img_request == NULL);
- rbd_obj_request_get(obj_request);
+ /* Image request now owns object's original reference */
obj_request->img_request = img_request;
obj_request->which = img_request->obj_request_count;
rbd_assert(!obj_request_img_data_test(obj_request));
@@ -1815,12 +1815,6 @@ static int rbd_img_request_submit(struct
rbd_img_request *img_request)
ret = rbd_obj_request_submit(osdc, obj_request);
if (ret)
return ret;
- /*
- * The image request has its own reference to each
- * of its object requests, so we can safely drop the
- * initial one here.
- */
- rbd_obj_request_put(obj_request);
}
return 0;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 5/7] rbd: always check IMG_DATA flag
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
` (3 preceding siblings ...)
2013-04-16 3:38 ` [PATCH 4/7] rbd: adjust image object request ref counting Alex Elder
@ 2013-04-16 3:38 ` Alex Elder
2013-04-16 3:38 ` [PATCH 6/7] rbd: add target object existence flags Alex Elder
` (2 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:38 UTC (permalink / raw)
To: ceph-devel
In a few spots, whether the an object request's img_request pointer
is null is used to determine whether an object request is being done
as part of an image data request.
Stop doing that, and instead always use the object request IMG_DATA
flag for that purpose. Swap the order of the definition of the
IMG_DATA and DONE flag helpers, because obj_request_done_set() now
refers to obj_request_img_data_set() to get its rbd_dev value.
This will become important because the img_request pointer is
about to become part of a union.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 51
++++++++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 21 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 81751cd..211baa7f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1094,40 +1094,39 @@ out_err:
* each flag, once its value is set to 1 it is never reset to 0
* again.
*/
-static void obj_request_done_set(struct rbd_obj_request *obj_request)
+static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
{
- if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
- struct rbd_img_request *img_request = obj_request->img_request;
+ if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
struct rbd_device *rbd_dev;
- rbd_dev = img_request ? img_request->rbd_dev : NULL;
- rbd_warn(rbd_dev, "obj_request %p already marked done\n",
+ rbd_dev = obj_request->img_request->rbd_dev;
+ rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
obj_request);
}
}
-static bool obj_request_done_test(struct rbd_obj_request *obj_request)
+static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
{
smp_mb();
- return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
+ return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
}
-static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
+static void obj_request_done_set(struct rbd_obj_request *obj_request)
{
- if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
- struct rbd_img_request *img_request = obj_request->img_request;
- struct rbd_device *rbd_dev;
+ if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
+ struct rbd_device *rbd_dev = NULL;
- rbd_dev = img_request ? img_request->rbd_dev : NULL;
- rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
+ if (obj_request_img_data_test(obj_request))
+ rbd_dev = obj_request->img_request->rbd_dev;
+ rbd_warn(rbd_dev, "obj_request %p already marked done\n",
obj_request);
}
}
-static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
+static bool obj_request_done_test(struct rbd_obj_request *obj_request)
{
smp_mb();
- return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
+ return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
}
static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
@@ -1338,8 +1337,16 @@ static void rbd_osd_trivial_callback(struct
rbd_obj_request *obj_request)
static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
{
- struct rbd_img_request *img_request = obj_request->img_request;
- bool layered = img_request && img_request_layered_test(img_request);
+ struct rbd_img_request *img_request = NULL;
+ bool layered = false;
+
+ if (obj_request_img_data_test(obj_request)) {
+ img_request = obj_request->img_request;
+ layered = img_request && img_request_layered_test(img_request);
+ } else {
+ img_request = NULL;
+ layered = false;
+ }
dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
obj_request, img_request, obj_request->result,
@@ -1382,10 +1389,12 @@ static void rbd_osd_req_callback(struct
ceph_osd_request *osd_req,
dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg);
rbd_assert(osd_req == obj_request->osd_req);
- rbd_assert(obj_request_img_data_test(obj_request) ^
- !obj_request->img_request);
- rbd_assert(obj_request_img_data_test(obj_request) ^
- (obj_request->which == BAD_WHICH));
+ if (obj_request_img_data_test(obj_request)) {
+ rbd_assert(obj_request->img_request);
+ rbd_assert(obj_request->which != BAD_WHICH);
+ } else {
+ rbd_assert(obj_request->which == BAD_WHICH);
+ }
if (osd_req->r_result < 0)
obj_request->result = osd_req->r_result;
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 6/7] rbd: add target object existence flags
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
` (4 preceding siblings ...)
2013-04-16 3:38 ` [PATCH 5/7] rbd: always check IMG_DATA flag Alex Elder
@ 2013-04-16 3:38 ` Alex Elder
2013-04-16 3:38 ` [PATCH 7/7] rbd: issue stat request before layered write Alex Elder
2013-04-20 3:04 ` [PATCH 0/7] rbd: existence check or layered writes Josh Durgin
7 siblings, 0 replies; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:38 UTC (permalink / raw)
To: ceph-devel
This creates two new flags for object requests to indicate what is
known about the existence of the object to which a request is to be
sent. The KNOWN flag will be true if the the EXISTS flag is
meaningful. That is:
KNOWN EXISTS
----- ------
0 0 don't know whether the object exists
0 1 (no used/invalid)
1 0 object is known to not exist
1 0 object is known to exist
This will be used in determining how to handle write requests for
data objects for layered rbd images.
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 211baa7f..b1b8ef8 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -173,6 +173,8 @@ enum obj_request_type {
enum obj_req_flags {
OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */
OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */
+ OBJ_REQ_KNOWN, /* EXISTS flag valid: no = 0, yes = 1 */
+ OBJ_REQ_EXISTS, /* target exists: no = 0, yes = 1 */
};
struct rbd_obj_request {
@@ -1129,6 +1131,37 @@ static bool obj_request_done_test(struct
rbd_obj_request *obj_request)
return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
}
+/*
+ * This sets the KNOWN flag after (possibly) setting the EXISTS
+ * flag. The latter is set based on the "exists" value provided.
+ *
+ * Note that for our purposes once an object exists it never goes
+ * away again. It's possible that the response from two existence
+ * checks are separated by the creation of the target object, and
+ * the first ("doesn't exist") response arrives *after* the second
+ * ("does exist"). In that case we ignore the second one.
+ */
+static void obj_request_existence_set(struct rbd_obj_request *obj_request,
+ bool exists)
+{
+ if (exists)
+ set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
+ set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
+ smp_mb();
+}
+
+static bool obj_request_known_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
+}
+
+static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
+{
+ smp_mb();
+ return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
+}
+
static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
{
dout("%s: obj %p (was %d)\n", __func__, obj_request,
@@ -1623,6 +1656,10 @@ static struct rbd_img_request
*rbd_img_request_create(
INIT_LIST_HEAD(&img_request->obj_requests);
kref_init(&img_request->kref);
+ (void) obj_request_existence_set;
+ (void) obj_request_known_test;
+ (void) obj_request_exists_test;
+
rbd_img_request_get(img_request); /* Avoid a warning */
rbd_img_request_put(img_request); /* TEMPORARY */
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 7/7] rbd: issue stat request before layered write
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
` (5 preceding siblings ...)
2013-04-16 3:38 ` [PATCH 6/7] rbd: add target object existence flags Alex Elder
@ 2013-04-16 3:38 ` Alex Elder
2013-04-18 12:44 ` [PATCH 7/7, v2] " Alex Elder
2013-04-20 3:04 ` [PATCH 0/7] rbd: existence check or layered writes Josh Durgin
7 siblings, 1 reply; 11+ messages in thread
From: Alex Elder @ 2013-04-16 3:38 UTC (permalink / raw)
To: ceph-devel
This is a step toward fully implementing layered writes.
Add checks before request submission for the object(s) associated
with an image request. For write requests, if we don't know that
the target object exists, issue a STAT request to find out. When
that request completes, mark the known and exists flags for the
original object request accordingly and re-submit the object
request. (Note that this still does the existence check only; the
copyup operation is not yet done.)
A new object request is created to perform the existence check. A
pointer to the original request is added to that object request to
allow the stat request to re-issue the original request after
updating its flags. If there is a failure with the stat request
the error code is stored with the original request, which is then
completed.
This resolves:
http://tracker.ceph.com/issues/3418
Signed-off-by: Alex Elder <elder@inktank.com>
---
drivers/block/rbd.c | 155
++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 147 insertions(+), 8 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b1b8ef8..f3a4a74 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -183,9 +183,31 @@ struct rbd_obj_request {
u64 length; /* bytes from offset */
unsigned long flags;
- struct rbd_img_request *img_request;
- u64 img_offset; /* image relative offset */
- struct list_head links; /* img_request->obj_requests */
+ /*
+ * An object request associated with an image will have its
+ * img_data flag set; a standlone object request will not.
+ *
+ * A standalone object request will have which == BAD_WHICH
+ * and a null obj_request pointer.
+ *
+ * An object request initiated in support of a layered image
+ * object (to check for its existence before a write) will
+ * have which == BAD_WHICH and a non-null obj_request pointer.
+ *
+ * Finally, an object request for rbd image data will have
+ * which != BAD_WHICH, and will have a non-null img_request
+ * pointer. The value of which will be in the range
+ * 0..(img_request->obj_request_count-1).
+ */
+ union {
+ struct rbd_obj_request *obj_request; /* STAT op */
+ struct {
+ struct rbd_img_request *img_request;
+ u64 img_offset;
+ /* links for img_request->obj_requests list */
+ struct list_head links;
+ };
+ };
u32 which; /* posn image request list */
enum obj_request_type type;
@@ -1656,10 +1678,6 @@ static struct rbd_img_request
*rbd_img_request_create(
INIT_LIST_HEAD(&img_request->obj_requests);
kref_init(&img_request->kref);
- (void) obj_request_existence_set;
- (void) obj_request_known_test;
- (void) obj_request_exists_test;
-
rbd_img_request_get(img_request); /* Avoid a warning */
rbd_img_request_put(img_request); /* TEMPORARY */
@@ -1847,18 +1865,139 @@ out_unwind:
return -ENOMEM;
}
+static void rbd_img_obj_exists_callback(struct rbd_obj_request
*obj_request)
+{
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct rbd_obj_request *orig_request;
+
+ rbd_assert(!obj_request_img_data_test(obj_request));
+ orig_request = obj_request->obj_request;
+ obj_request->obj_request = NULL;;
+
+ dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
+ obj_request, orig_request, obj_request->result,
+ obj_request->xferred, obj_request->length);
+
+ rbd_assert(orig_request);
+ rbd_assert(orig_request->img_request);
+ rbd_dev = orig_request->img_request->rbd_dev;
+ osdc = &rbd_dev->rbd_client->client->osdc;
+
+ /*
+ * Our only purpose here is to determine whether the object
+ * exists, and we don't want to treat the non-existence as
+ * an error. If something else comes back, transfer the
+ * error to the original request and complete it now.
+ */
+ if (!obj_request->result) {
+ obj_request_existence_set(orig_request, true);
+ } else if (obj_request->result == -ENOENT) {
+ obj_request_existence_set(orig_request, false);
+ obj_request->result = 0;
+ } else if (obj_request->result) {
+ orig_request->result = obj_request->result;
+ goto out_err;
+ }
+
+ /* Done with the stat request */
+
+ rbd_obj_request_put(obj_request);
+
+ /*
+ * Resubmit the original request now that we have recorded
+ * whether the target object exists.
+ */
+ orig_request->result = rbd_obj_request_submit(osdc, orig_request);
+out_err:
+ if (orig_request->result)
+ rbd_obj_request_complete(orig_request);
+ rbd_obj_request_put(orig_request);
+}
+
+static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+{
+ struct rbd_obj_request *stat_request;
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct page **pages = NULL;
+ u32 page_count;
+ size_t size;
+ int ret;
+
+ /*
+ * The response data for a STAT call consists of:
+ * le64 length;
+ * struct {
+ * le32 tv_sec;
+ * le32 tv_nsec;
+ * } mtime;
+ */
+ size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
+ page_count = (u32)calc_pages_for(0, size);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+ OBJ_REQUEST_PAGES);
+ if (!stat_request)
+ goto out;
+
+ rbd_obj_request_get(obj_request);
+ stat_request->obj_request = obj_request;
+ stat_request->pages = pages;
+ stat_request->page_count = page_count;
+
+ rbd_assert(obj_request->img_request);
+ rbd_dev = obj_request->img_request->rbd_dev;
+ stat_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+ stat_request);
+ if (!stat_request->osd_req)
+ goto out;
+ stat_request->callback = rbd_img_obj_exists_callback;
+
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+ false, false);
+ rbd_osd_req_format(stat_request, false);
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ ret = rbd_obj_request_submit(osdc, stat_request);
+out:
+ if (ret)
+ rbd_obj_request_put(obj_request);
+
+ return ret;
+}
+
static int rbd_img_request_submit(struct rbd_img_request *img_request)
{
struct rbd_device *rbd_dev = img_request->rbd_dev;
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct rbd_obj_request *obj_request;
struct rbd_obj_request *next_obj_request;
+ bool write_request = img_request_write_test(img_request);
+ bool layered = img_request_layered_test(img_request);
dout("%s: img %p\n", __func__, img_request);
for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
+ bool known;
+ bool object_exists;
int ret;
- ret = rbd_obj_request_submit(osdc, obj_request);
+ /*
+ * We need to know whether the target object exists
+ * for a layered write. Issue an existence check
+ * first if we need to.
+ */
+ known = obj_request_known_test(obj_request);
+ object_exists = known && obj_request_exists_test(obj_request);
+ if (!write_request || !layered || object_exists)
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ else
+ ret = rbd_img_obj_exists_submit(obj_request);
if (ret)
return ret;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 7/7, v2] rbd: issue stat request before layered write
2013-04-16 3:38 ` [PATCH 7/7] rbd: issue stat request before layered write Alex Elder
@ 2013-04-18 12:44 ` Alex Elder
2013-04-20 3:03 ` Josh Durgin
0 siblings, 1 reply; 11+ messages in thread
From: Alex Elder @ 2013-04-18 12:44 UTC (permalink / raw)
To: ceph-devel
(Since this hasn't been reviewed I have updated it slightly.
I rebased the series onto the current testing branch. They
are all available in the "review/wip-4679-3" in the ceph-client
git repository. I also made some minor changes in the definition
of rbd_img_obj_exists_callback()).
This is a step toward fully implementing layered writes.
Add checks before request submission for the object(s) associated
with an image request. For write requests, if we don't know that
the target object exists, issue a STAT request to find out. When
that request completes, mark the known and exists flags for the
original object request accordingly and re-submit the object
request. (Note that this still does the existence check only; the
copyup operation is not yet done.)
A new object request is created to perform the existence check. A
pointer to the original request is added to that object request to
allow the stat request to re-issue the original request after
updating its flags. If there is a failure with the stat request
the error code is stored with the original request, which is then
completed.
This resolves:
http://tracker.ceph.com/issues/3418
Signed-off-by: Alex Elder <elder@inktank.com>
---
v2: rebased to testing; small cleanup in rbd_img_obj_exists_callback()
drivers/block/rbd.c | 163
++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 155 insertions(+), 8 deletions(-)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b1b8ef8..ce2fb3a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -183,9 +183,31 @@ struct rbd_obj_request {
u64 length; /* bytes from offset */
unsigned long flags;
- struct rbd_img_request *img_request;
- u64 img_offset; /* image relative offset */
- struct list_head links; /* img_request->obj_requests */
+ /*
+ * An object request associated with an image will have its
+ * img_data flag set; a standlone object request will not.
+ *
+ * A standalone object request will have which == BAD_WHICH
+ * and a null obj_request pointer.
+ *
+ * An object request initiated in support of a layered image
+ * object (to check for its existence before a write) will
+ * have which == BAD_WHICH and a non-null obj_request pointer.
+ *
+ * Finally, an object request for rbd image data will have
+ * which != BAD_WHICH, and will have a non-null img_request
+ * pointer. The value of which will be in the range
+ * 0..(img_request->obj_request_count-1).
+ */
+ union {
+ struct rbd_obj_request *obj_request; /* STAT op */
+ struct {
+ struct rbd_img_request *img_request;
+ u64 img_offset;
+ /* links for img_request->obj_requests list */
+ struct list_head links;
+ };
+ };
u32 which; /* posn image request list */
enum obj_request_type type;
@@ -1656,10 +1678,6 @@ static struct rbd_img_request
*rbd_img_request_create(
INIT_LIST_HEAD(&img_request->obj_requests);
kref_init(&img_request->kref);
- (void) obj_request_existence_set;
- (void) obj_request_known_test;
- (void) obj_request_exists_test;
-
rbd_img_request_get(img_request); /* Avoid a warning */
rbd_img_request_put(img_request); /* TEMPORARY */
@@ -1847,18 +1865,147 @@ out_unwind:
return -ENOMEM;
}
+static void rbd_img_obj_exists_callback(struct rbd_obj_request
*obj_request)
+{
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct rbd_obj_request *orig_request;
+ int result;
+
+ rbd_assert(!obj_request_img_data_test(obj_request));
+
+ /*
+ * All we need from the object request is the original
+ * request and the result of the STAT op. Grab those, then
+ * we're done with the request.
+ */
+ orig_request = obj_request->obj_request;
+ obj_request->obj_request = NULL;
+ rbd_assert(orig_request);
+ rbd_assert(orig_request->img_request);
+
+ result = obj_request->result;
+ obj_request->result = 0;
+
+ dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
+ obj_request, orig_request, result,
+ obj_request->xferred, obj_request->length);
+ rbd_obj_request_put(obj_request);
+
+ rbd_assert(orig_request);
+ rbd_assert(orig_request->img_request);
+ rbd_dev = orig_request->img_request->rbd_dev;
+ osdc = &rbd_dev->rbd_client->client->osdc;
+
+ /*
+ * Our only purpose here is to determine whether the object
+ * exists, and we don't want to treat the non-existence as
+ * an error. If something else comes back, transfer the
+ * error to the original request and complete it now.
+ */
+ if (!result) {
+ obj_request_existence_set(orig_request, true);
+ } else if (result == -ENOENT) {
+ obj_request_existence_set(orig_request, false);
+ } else if (result) {
+ orig_request->result = result;
+ goto out_err;
+ }
+
+ /*
+ * Resubmit the original request now that we have recorded
+ * whether the target object exists.
+ */
+ orig_request->result = rbd_obj_request_submit(osdc, orig_request);
+out_err:
+ if (orig_request->result)
+ rbd_obj_request_complete(orig_request);
+ rbd_obj_request_put(orig_request);
+}
+
+static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+{
+ struct rbd_obj_request *stat_request;
+ struct rbd_device *rbd_dev;
+ struct ceph_osd_client *osdc;
+ struct page **pages = NULL;
+ u32 page_count;
+ size_t size;
+ int ret;
+
+ /*
+ * The response data for a STAT call consists of:
+ * le64 length;
+ * struct {
+ * le32 tv_sec;
+ * le32 tv_nsec;
+ * } mtime;
+ */
+ size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
+ page_count = (u32)calc_pages_for(0, size);
+ pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ ret = -ENOMEM;
+ stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
+ OBJ_REQUEST_PAGES);
+ if (!stat_request)
+ goto out;
+
+ rbd_obj_request_get(obj_request);
+ stat_request->obj_request = obj_request;
+ stat_request->pages = pages;
+ stat_request->page_count = page_count;
+
+ rbd_assert(obj_request->img_request);
+ rbd_dev = obj_request->img_request->rbd_dev;
+ stat_request->osd_req = rbd_osd_req_create(rbd_dev, false,
+ stat_request);
+ if (!stat_request->osd_req)
+ goto out;
+ stat_request->callback = rbd_img_obj_exists_callback;
+
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
+ false, false);
+ rbd_osd_req_format(stat_request, false);
+
+ osdc = &rbd_dev->rbd_client->client->osdc;
+ ret = rbd_obj_request_submit(osdc, stat_request);
+out:
+ if (ret)
+ rbd_obj_request_put(obj_request);
+
+ return ret;
+}
+
static int rbd_img_request_submit(struct rbd_img_request *img_request)
{
struct rbd_device *rbd_dev = img_request->rbd_dev;
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
struct rbd_obj_request *obj_request;
struct rbd_obj_request *next_obj_request;
+ bool write_request = img_request_write_test(img_request);
+ bool layered = img_request_layered_test(img_request);
dout("%s: img %p\n", __func__, img_request);
for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
+ bool known;
+ bool object_exists;
int ret;
- ret = rbd_obj_request_submit(osdc, obj_request);
+ /*
+ * We need to know whether the target object exists
+ * for a layered write. Issue an existence check
+ * first if we need to.
+ */
+ known = obj_request_known_test(obj_request);
+ object_exists = known && obj_request_exists_test(obj_request);
+ if (!write_request || !layered || object_exists)
+ ret = rbd_obj_request_submit(osdc, obj_request);
+ else
+ ret = rbd_img_obj_exists_submit(obj_request);
if (ret)
return ret;
}
--
1.7.9.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH 7/7, v2] rbd: issue stat request before layered write
2013-04-18 12:44 ` [PATCH 7/7, v2] " Alex Elder
@ 2013-04-20 3:03 ` Josh Durgin
0 siblings, 0 replies; 11+ messages in thread
From: Josh Durgin @ 2013-04-20 3:03 UTC (permalink / raw)
To: Alex Elder; +Cc: ceph-devel
On 04/18/2013 05:44 AM, Alex Elder wrote:
> (Since this hasn't been reviewed I have updated it slightly.
> I rebased the series onto the current testing branch. They
> are all available in the "review/wip-4679-3" in the ceph-client
> git repository. I also made some minor changes in the definition
> of rbd_img_obj_exists_callback()).
>
>
> This is a step toward fully implementing layered writes.
>
> Add checks before request submission for the object(s) associated
> with an image request. For write requests, if we don't know that
> the target object exists, issue a STAT request to find out. When
> that request completes, mark the known and exists flags for the
> original object request accordingly and re-submit the object
> request. (Note that this still does the existence check only; the
> copyup operation is not yet done.)
>
> A new object request is created to perform the existence check. A
> pointer to the original request is added to that object request to
> allow the stat request to re-issue the original request after
> updating its flags. If there is a failure with the stat request
> the error code is stored with the original request, which is then
> completed.
>
> This resolves:
> http://tracker.ceph.com/issues/3418
>
> Signed-off-by: Alex Elder <elder@inktank.com>
> ---
> v2: rebased to testing; small cleanup in rbd_img_obj_exists_callback()
>
> drivers/block/rbd.c | 163
> ++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 155 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index b1b8ef8..ce2fb3a 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -183,9 +183,31 @@ struct rbd_obj_request {
> u64 length; /* bytes from offset */
> unsigned long flags;
>
> - struct rbd_img_request *img_request;
> - u64 img_offset; /* image relative offset */
> - struct list_head links; /* img_request->obj_requests */
> + /*
> + * An object request associated with an image will have its
> + * img_data flag set; a standlone object request will not.
s/standlone/standalone/
> + *
> + * A standalone object request will have which == BAD_WHICH
> + * and a null obj_request pointer.
> + *
> + * An object request initiated in support of a layered image
> + * object (to check for its existence before a write) will
> + * have which == BAD_WHICH and a non-null obj_request pointer.
> + *
> + * Finally, an object request for rbd image data will have
> + * which != BAD_WHICH, and will have a non-null img_request
> + * pointer. The value of which will be in the range
> + * 0..(img_request->obj_request_count-1).
> + */
> + union {
> + struct rbd_obj_request *obj_request; /* STAT op */
> + struct {
> + struct rbd_img_request *img_request;
> + u64 img_offset;
> + /* links for img_request->obj_requests list */
> + struct list_head links;
> + };
> + };
A future optimization could do the stat and the write in the same
rbd_obj_request, so that a write to an extant object would take only one
request instead of two.
> u32 which; /* posn image request list */
>
> enum obj_request_type type;
> @@ -1656,10 +1678,6 @@ static struct rbd_img_request
> *rbd_img_request_create(
> INIT_LIST_HEAD(&img_request->obj_requests);
> kref_init(&img_request->kref);
>
> - (void) obj_request_existence_set;
> - (void) obj_request_known_test;
> - (void) obj_request_exists_test;
> -
> rbd_img_request_get(img_request); /* Avoid a warning */
> rbd_img_request_put(img_request); /* TEMPORARY */
>
> @@ -1847,18 +1865,147 @@ out_unwind:
> return -ENOMEM;
> }
>
> +static void rbd_img_obj_exists_callback(struct rbd_obj_request
> *obj_request)
> +{
> + struct rbd_device *rbd_dev;
> + struct ceph_osd_client *osdc;
> + struct rbd_obj_request *orig_request;
> + int result;
> +
> + rbd_assert(!obj_request_img_data_test(obj_request));
> +
> + /*
> + * All we need from the object request is the original
> + * request and the result of the STAT op. Grab those, then
> + * we're done with the request.
> + */
> + orig_request = obj_request->obj_request;
> + obj_request->obj_request = NULL;
> + rbd_assert(orig_request);
> + rbd_assert(orig_request->img_request);
> +
> + result = obj_request->result;
> + obj_request->result = 0;
> +
> + dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
> + obj_request, orig_request, result,
> + obj_request->xferred, obj_request->length);
> + rbd_obj_request_put(obj_request);
> +
> + rbd_assert(orig_request);
> + rbd_assert(orig_request->img_request);
> + rbd_dev = orig_request->img_request->rbd_dev;
> + osdc = &rbd_dev->rbd_client->client->osdc;
> +
> + /*
> + * Our only purpose here is to determine whether the object
> + * exists, and we don't want to treat the non-existence as
> + * an error. If something else comes back, transfer the
> + * error to the original request and complete it now.
> + */
> + if (!result) {
> + obj_request_existence_set(orig_request, true);
> + } else if (result == -ENOENT) {
> + obj_request_existence_set(orig_request, false);
> + } else if (result) {
> + orig_request->result = result;
> + goto out_err;
> + }
> +
> + /*
> + * Resubmit the original request now that we have recorded
> + * whether the target object exists.
> + */
> + orig_request->result = rbd_obj_request_submit(osdc, orig_request);
> +out_err:
> + if (orig_request->result)
> + rbd_obj_request_complete(orig_request);
> + rbd_obj_request_put(orig_request);
> +}
> +
> +static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
> +{
> + struct rbd_obj_request *stat_request;
> + struct rbd_device *rbd_dev;
> + struct ceph_osd_client *osdc;
> + struct page **pages = NULL;
> + u32 page_count;
> + size_t size;
> + int ret;
> +
> + /*
> + * The response data for a STAT call consists of:
> + * le64 length;
> + * struct {
> + * le32 tv_sec;
> + * le32 tv_nsec;
> + * } mtime;
> + */
> + size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
> + page_count = (u32)calc_pages_for(0, size);
> + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL);
> + if (IS_ERR(pages))
> + return PTR_ERR(pages);
> +
> + ret = -ENOMEM;
> + stat_request = rbd_obj_request_create(obj_request->object_name, 0, 0,
> + OBJ_REQUEST_PAGES);
> + if (!stat_request)
> + goto out;
> +
> + rbd_obj_request_get(obj_request);
> + stat_request->obj_request = obj_request;
> + stat_request->pages = pages;
> + stat_request->page_count = page_count;
> +
> + rbd_assert(obj_request->img_request);
> + rbd_dev = obj_request->img_request->rbd_dev;
> + stat_request->osd_req = rbd_osd_req_create(rbd_dev, false,
> + stat_request);
> + if (!stat_request->osd_req)
> + goto out;
> + stat_request->callback = rbd_img_obj_exists_callback;
> +
> + osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
> + osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
> + false, false);
> + rbd_osd_req_format(stat_request, false);
> +
> + osdc = &rbd_dev->rbd_client->client->osdc;
> + ret = rbd_obj_request_submit(osdc, stat_request);
> +out:
> + if (ret)
> + rbd_obj_request_put(obj_request);
> +
> + return ret;
> +}
> +
> static int rbd_img_request_submit(struct rbd_img_request *img_request)
> {
> struct rbd_device *rbd_dev = img_request->rbd_dev;
> struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
> struct rbd_obj_request *obj_request;
> struct rbd_obj_request *next_obj_request;
> + bool write_request = img_request_write_test(img_request);
> + bool layered = img_request_layered_test(img_request);
>
> dout("%s: img %p\n", __func__, img_request);
> for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
> + bool known;
> + bool object_exists;
> int ret;
>
> - ret = rbd_obj_request_submit(osdc, obj_request);
> + /*
> + * We need to know whether the target object exists
> + * for a layered write. Issue an existence check
> + * first if we need to.
> + */
> + known = obj_request_known_test(obj_request);
> + object_exists = known && obj_request_exists_test(obj_request);
> + if (!write_request || !layered || object_exists)
> + ret = rbd_obj_request_submit(osdc, obj_request);
> + else
> + ret = rbd_img_obj_exists_submit(obj_request);
> if (ret)
> return ret;
> }
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 0/7] rbd: existence check or layered writes
2013-04-16 3:35 [PATCH 0/7] rbd: existence check or layered writes Alex Elder
` (6 preceding siblings ...)
2013-04-16 3:38 ` [PATCH 7/7] rbd: issue stat request before layered write Alex Elder
@ 2013-04-20 3:04 ` Josh Durgin
7 siblings, 0 replies; 11+ messages in thread
From: Josh Durgin @ 2013-04-20 3:04 UTC (permalink / raw)
To: Alex Elder; +Cc: ceph-devel
On 04/15/2013 08:35 PM, Alex Elder wrote:
> I've broken these patches into three groups.
>
> The first two are sort of cleanup from prior patches.
>
> The next one adds support for simple ops that contain
> a single data item, which I've termed "raw data."
>
> The next two are some more refinements but they
> were done intentionally to prepare for the last
> one.
>
> The sixth puts in place flags on object requests
> that allow the last patch to determine whether it
> needs to issue a STAT call for a target object
> before doing a layered write request.
>
> The layered write request ends up being fairly
> simple. Existence flags are recorded for the
> target of object requests. If it is unknown
> whether the object exists, a new standalone
> object request is created to do a STAT on the
> object. The result (success or ENOENT) is
> recorded in a flag in the original request,
> at which point the original request is
> re-submitted.
>
> This is the first step in doing layered writes.
>
> -Alex
>
> [PATCH 1/7] libceph: kill off osd data write_request parameters
> [PATCH 2/7] libceph: clean up osd data field access functions
>
> [PATCH 3/7] libceph: support raw data requests
>
> [PATCH 4/7] rbd: adjust image object request ref counting
> [PATCH 5/7] rbd: always check IMG_DATA flag
>
> [PATCH 6/7] rbd: add target object existence flags
> [PATCH 7/7] rbd: issue stat request before layered write
These look good. Just a couple comments on the last one.
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
^ permalink raw reply [flat|nested] 11+ messages in thread