-static void
-rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
-{
- struct rbd_img_request *img_request;
- struct rbd_device *rbd_dev;
-
- dout("%s: obj %p\n", __func__, obj_request);
-
- rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
- obj_request->type == OBJ_REQUEST_NODATA);
- rbd_assert(obj_request_img_data_test(obj_request));
- img_request = obj_request->img_request;
- rbd_assert(img_request);
-
- rbd_dev = img_request->rbd_dev;
- rbd_assert(rbd_dev);
-
- /*
- * We want the transfer count to reflect the size of the
- * original write request. There is no such thing as a
- * successful short write, so if the request was successful
- * we can just set it to the originally-requested length.
- */
- if (!obj_request->result)
- obj_request->xferred = obj_request->length;
-
- obj_request_done_set(obj_request);
-}
-
-static void
-rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
-{
- struct rbd_obj_request *orig_request;
- struct ceph_osd_request *osd_req;
- struct rbd_device *rbd_dev;
- enum obj_operation_type op_type;
- int img_result;
- u64 parent_length;
-
- rbd_assert(img_request_child_test(img_request));
-
- /* First get what we need from the image request */
-
- orig_request = img_request->obj_request;
- rbd_assert(orig_request != NULL);
- rbd_assert(obj_request_type_valid(orig_request->type));
- img_result = img_request->result;
- parent_length = img_request->length;
- rbd_assert(img_result || parent_length == img_request->xferred);
- rbd_img_request_put(img_request);
-
- rbd_assert(orig_request->img_request);
- rbd_dev = orig_request->img_request->rbd_dev;
- rbd_assert(rbd_dev);
-
- /*
- * If the overlap has become 0 (most likely because the
- * image has been flattened) we need to free the pages
- * and re-submit the original write request.
- */
- if (!rbd_dev->parent_overlap) {
- rbd_obj_request_submit(orig_request);
- return;
- }
-
- if (img_result)
- goto out_err;
-
- /*
- * The original osd request is of no use to use any more.
- * We need a new one that can hold the three ops in a copyup
- * request. Allocate the new copyup osd request for the
- * original request, and release the old one.
- */
- img_result = -ENOMEM;
- osd_req = rbd_osd_req_create_copyup(orig_request);
- if (!osd_req)
- goto out_err;
- rbd_osd_req_destroy(orig_request->osd_req);
- orig_request->osd_req = osd_req;
-
- /* Initialize the copyup op */
-
- osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
- osd_req_op_cls_request_data_bvecs(osd_req, 0, orig_request->copyup_bvecs,
- parent_length);
-
- /* Add the other op(s) */
-
- op_type = rbd_img_request_op_type(orig_request->img_request);
- rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1);
-
- /* All set, send it off. */
-
- rbd_obj_request_submit(orig_request);
- return;
-
-out_err:
- rbd_obj_request_error(orig_request, img_result);
-}
-
-static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap);
-
-/*
- * Read from the parent image the range of data that covers the
- * entire target of the given object request. This is used for
- * satisfying a layered image write request when the target of an
- * object request from the image request does not exist.
- *
- * A page array big enough to hold the returned data is allocated
- * and supplied to rbd_img_request_fill() as the "data descriptor."
- * When the read completes, this page array will be transferred to
- * the original object request for the copyup operation.
- *
- * If an error occurs, it is recorded as the result of the original
- * object request in rbd_img_obj_exists_callback().
- */
-static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
-{
- struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
- struct rbd_img_request *parent_request = NULL;
- struct ceph_bvec_iter bvec_it = { 0 };
- u64 img_offset;
- u64 length;
- int result;
-
- rbd_assert(rbd_dev->parent != NULL);
-
- /*
- * Determine the byte range covered by the object in the
- * child image to which the original request was to be sent.
- */
- img_offset = obj_request->img_offset - obj_request->offset;
- length = rbd_obj_bytes(&rbd_dev->header);
-
- /*
- * There is no defined parent data beyond the parent
- * overlap, so limit what we read at that boundary if
- * necessary.
- */
- if (img_offset + length > rbd_dev->parent_overlap) {
- rbd_assert(img_offset < rbd_dev->parent_overlap);
- length = rbd_dev->parent_overlap - img_offset;
- }
-
- /*
- * Allocate a page array big enough to receive the data read
- * from the parent.
- */
- result = setup_copyup_bvecs(obj_request, length);
- if (result)
- goto out_err;
-
- result = -ENOMEM;
- parent_request = rbd_parent_request_create(obj_request,
- img_offset, length);
- if (!parent_request)
- goto out_err;
-
- bvec_it.bvecs = obj_request->copyup_bvecs;
- bvec_it.iter.bi_size = length;
- result = rbd_img_request_fill(parent_request, OBJ_REQUEST_BVECS,
- &bvec_it);
- if (result)
- goto out_err;
-
- parent_request->callback = rbd_img_obj_parent_read_full_callback;
-
- result = rbd_img_request_submit(parent_request);
- if (!result)
- return 0;
-
-out_err:
- if (parent_request)
- rbd_img_request_put(parent_request);
- return result;
-}
-
-static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
-{
- struct rbd_obj_request *orig_request;
- struct rbd_device *rbd_dev;
- int result;
-
- rbd_assert(!obj_request_img_data_test(obj_request));
-
- /*
- * All we need from the object request is the original
- * request and the result of the STAT op. Grab those, then
- * we're done with the request.
- */
- orig_request = obj_request->obj_request;
- obj_request->obj_request = NULL;
- rbd_obj_request_put(orig_request);
- rbd_assert(orig_request);
- rbd_assert(orig_request->img_request);
-
- result = obj_request->result;
- obj_request->result = 0;
-
- dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
- obj_request, orig_request, result,
- obj_request->xferred, obj_request->length);
- rbd_obj_request_put(obj_request);
-
- /*
- * If the overlap has become 0 (most likely because the
- * image has been flattened) we need to re-submit the
- * original request.
- */
- rbd_dev = orig_request->img_request->rbd_dev;
- if (!rbd_dev->parent_overlap) {
- rbd_obj_request_submit(orig_request);
- return;
- }
-
- /*
- * Our only purpose here is to determine whether the object
- * exists, and we don't want to treat the non-existence as
- * an error. If something else comes back, transfer the
- * error to the original request and complete it now.
- */
- if (!result) {
- obj_request_existence_set(orig_request, true);
- } else if (result == -ENOENT) {
- obj_request_existence_set(orig_request, false);
- } else {
- goto fail_orig_request;
- }
-
- /*
- * Resubmit the original request now that we have recorded
- * whether the target object exists.
- */
- result = rbd_img_obj_request_submit(orig_request);
- if (result)
- goto fail_orig_request;
-
- return;
-
-fail_orig_request:
- rbd_obj_request_error(orig_request, result);
-}
-
-static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
-{
- struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
- struct rbd_obj_request *stat_request;
- struct page **pages;
- int ret;
-
- stat_request = rbd_obj_request_create(OBJ_REQUEST_NODATA);
- if (!stat_request)
- return -ENOMEM;
-
- stat_request->object_no = obj_request->object_no;
-
- stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
- stat_request);
- if (!stat_request->osd_req) {
- ret = -ENOMEM;
- goto fail_stat_request;
- }
-
- /*
- * The response data for a STAT call consists of:
- * le64 length;
- * struct {
- * le32 tv_sec;
- * le32 tv_nsec;
- * } mtime;
- */
- pages = ceph_alloc_page_vector(1, GFP_NOIO);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
- goto fail_stat_request;
- }
-
- osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
- osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages,
- 8 + sizeof(struct ceph_timespec),
- 0, false, true);
-
- rbd_obj_request_get(obj_request);
- stat_request->obj_request = obj_request;
- stat_request->callback = rbd_img_obj_exists_callback;
-
- rbd_obj_request_submit(stat_request);
- return 0;
-
-fail_stat_request:
- rbd_obj_request_put(stat_request);
- return ret;
-}
-
-static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
-{
- struct rbd_img_request *img_request = obj_request->img_request;
- struct rbd_device *rbd_dev = img_request->rbd_dev;
-
- /* Reads */
- if (!img_request_write_test(img_request) &&
- !img_request_discard_test(img_request))
- return true;
-
- /* Non-layered writes */
- if (!img_request_layered_test(img_request))
- return true;
-
- /*
- * Layered writes outside of the parent overlap range don't
- * share any data with the parent.
- */
- if (!obj_request_overlaps_parent(obj_request))
- return true;
-
- /*
- * Entire-object layered writes - we will overwrite whatever
- * parent data there is anyway.
- */
- if (!obj_request->offset &&
- obj_request->length == rbd_obj_bytes(&rbd_dev->header))
- return true;
-
- /*
- * If the object is known to already exist, its parent data has
- * already been copied.
- */
- if (obj_request_known_test(obj_request) &&
- obj_request_exists_test(obj_request))
- return true;
-
- return false;
-}
-
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
-{
- rbd_assert(obj_request_img_data_test(obj_request));
- rbd_assert(obj_request_type_valid(obj_request->type));
- rbd_assert(obj_request->img_request);
-
- if (img_obj_request_simple(obj_request)) {
- rbd_obj_request_submit(obj_request);
- return 0;
- }
-
- /*
- * It's a layered write. The target object might exist but
- * we may not know that yet. If we know it doesn't exist,
- * start by reading the data for the full target object from
- * the parent so we can use it for a copyup to the target.
- */
- if (obj_request_known_test(obj_request))
- return rbd_img_obj_parent_read_full(obj_request);
-
- /* We don't know whether the target exists. Go find out. */
-
- return rbd_img_obj_exists_submit(obj_request);
-}
-