Commit | Line | Data |
---|---|---|
f7a7a5c2 JW |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* | |
3 | * RDMA Network Block Driver | |
4 | * | |
5 | * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. | |
6 | * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. | |
7 | * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. | |
8 | */ | |
9 | ||
10 | #undef pr_fmt | |
11 | #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt | |
12 | ||
13 | #include <linux/module.h> | |
14 | #include <linux/blkdev.h> | |
15 | #include <linux/hdreg.h> | |
16 | #include <linux/scatterlist.h> | |
17 | #include <linux/idr.h> | |
18 | ||
19 | #include "rnbd-clt.h" | |
20 | ||
21 | MODULE_DESCRIPTION("RDMA Network Block Device Client"); | |
22 | MODULE_LICENSE("GPL"); | |
23 | ||
24 | static int rnbd_client_major; | |
25 | static DEFINE_IDA(index_ida); | |
f7a7a5c2 JW |
26 | static DEFINE_MUTEX(sess_lock); |
27 | static LIST_HEAD(sess_list); | |
28 | ||
29 | /* | |
30 | * Maximum number of partitions an instance can have. | |
31 | * 6 bits = 64 minors = 63 partitions (one minor is used for the device itself) | |
32 | */ | |
33 | #define RNBD_PART_BITS 6 | |
34 | ||
35 | static inline bool rnbd_clt_get_sess(struct rnbd_clt_session *sess) | |
36 | { | |
37 | return refcount_inc_not_zero(&sess->refcount); | |
38 | } | |
39 | ||
40 | static void free_sess(struct rnbd_clt_session *sess); | |
41 | ||
42 | static void rnbd_clt_put_sess(struct rnbd_clt_session *sess) | |
43 | { | |
44 | might_sleep(); | |
45 | ||
46 | if (refcount_dec_and_test(&sess->refcount)) | |
47 | free_sess(sess); | |
48 | } | |
49 | ||
50 | static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) | |
51 | { | |
52 | might_sleep(); | |
53 | ||
54 | if (!refcount_dec_and_test(&dev->refcount)) | |
55 | return; | |
56 | ||
24afc15d | 57 | ida_free(&index_ida, dev->clt_device_id); |
f7a7a5c2 | 58 | kfree(dev->hw_queues); |
64e8a6ec | 59 | kfree(dev->pathname); |
f7a7a5c2 JW |
60 | rnbd_clt_put_sess(dev->sess); |
61 | mutex_destroy(&dev->lock); | |
62 | kfree(dev); | |
63 | } | |
64 | ||
65 | static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev) | |
66 | { | |
67 | return refcount_inc_not_zero(&dev->refcount); | |
68 | } | |
69 | ||
70 | static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev, | |
71 | const struct rnbd_msg_open_rsp *rsp) | |
72 | { | |
73 | struct rnbd_clt_session *sess = dev->sess; | |
74 | ||
75 | if (!rsp->logical_block_size) | |
76 | return -EINVAL; | |
77 | ||
78 | dev->device_id = le32_to_cpu(rsp->device_id); | |
79 | dev->nsectors = le64_to_cpu(rsp->nsectors); | |
80 | dev->logical_block_size = le16_to_cpu(rsp->logical_block_size); | |
81 | dev->physical_block_size = le16_to_cpu(rsp->physical_block_size); | |
f7a7a5c2 JW |
82 | dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors); |
83 | dev->discard_granularity = le32_to_cpu(rsp->discard_granularity); | |
84 | dev->discard_alignment = le32_to_cpu(rsp->discard_alignment); | |
85 | dev->secure_discard = le16_to_cpu(rsp->secure_discard); | |
292660fa | 86 | dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK); |
512c781f | 87 | dev->fua = !!(rsp->cache_policy & RNBD_FUA); |
f7a7a5c2 JW |
88 | |
89 | dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE; | |
7404bdde | 90 | dev->max_segments = sess->max_segments; |
f7a7a5c2 | 91 | |
f7a7a5c2 JW |
92 | return 0; |
93 | } | |
94 | ||
95 | static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, | |
96 | size_t new_nsectors) | |
97 | { | |
f7a7a5c2 JW |
98 | rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", |
99 | dev->nsectors, new_nsectors); | |
100 | dev->nsectors = new_nsectors; | |
230272b4 | 101 | set_capacity_and_notify(dev->gd, dev->nsectors); |
659e56ba | 102 | return 0; |
f7a7a5c2 JW |
103 | } |
104 | ||
105 | static int process_msg_open_rsp(struct rnbd_clt_dev *dev, | |
106 | struct rnbd_msg_open_rsp *rsp) | |
107 | { | |
3ba1c693 | 108 | struct kobject *gd_kobj; |
f7a7a5c2 JW |
109 | int err = 0; |
110 | ||
111 | mutex_lock(&dev->lock); | |
112 | if (dev->dev_state == DEV_STATE_UNMAPPED) { | |
113 | rnbd_clt_info(dev, | |
114 | "Ignoring Open-Response message from server for unmapped device\n"); | |
115 | err = -ENOENT; | |
116 | goto out; | |
117 | } | |
118 | if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) { | |
119 | u64 nsectors = le64_to_cpu(rsp->nsectors); | |
120 | ||
121 | /* | |
122 | * If the device was remapped and the size changed in the | |
123 | * meantime we need to revalidate it | |
124 | */ | |
125 | if (dev->nsectors != nsectors) | |
126 | rnbd_clt_change_capacity(dev, nsectors); | |
3ba1c693 MHI |
127 | gd_kobj = &disk_to_dev(dev->gd)->kobj; |
128 | kobject_uevent(gd_kobj, KOBJ_ONLINE); | |
f7a7a5c2 JW |
129 | rnbd_clt_info(dev, "Device online, device remapped successfully\n"); |
130 | } | |
131 | err = rnbd_clt_set_dev_attr(dev, rsp); | |
132 | if (err) | |
133 | goto out; | |
134 | dev->dev_state = DEV_STATE_MAPPED; | |
135 | ||
136 | out: | |
137 | mutex_unlock(&dev->lock); | |
138 | ||
139 | return err; | |
140 | } | |
141 | ||
142 | int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize) | |
143 | { | |
144 | int ret = 0; | |
145 | ||
146 | mutex_lock(&dev->lock); | |
147 | if (dev->dev_state != DEV_STATE_MAPPED) { | |
148 | pr_err("Failed to set new size of the device, device is not opened\n"); | |
149 | ret = -ENOENT; | |
150 | goto out; | |
151 | } | |
152 | ret = rnbd_clt_change_capacity(dev, newsize); | |
153 | ||
154 | out: | |
155 | mutex_unlock(&dev->lock); | |
156 | ||
157 | return ret; | |
158 | } | |
159 | ||
160 | static inline void rnbd_clt_dev_requeue(struct rnbd_queue *q) | |
161 | { | |
162 | if (WARN_ON(!q->hctx)) | |
163 | return; | |
164 | ||
165 | /* We can come here from interrupt, thus async=true */ | |
166 | blk_mq_run_hw_queue(q->hctx, true); | |
167 | } | |
168 | ||
169 | enum { | |
170 | RNBD_DELAY_IFBUSY = -1, | |
171 | }; | |
172 | ||
173 | /** | |
174 | * rnbd_get_cpu_qlist() - finds a list with HW queues to be rerun | |
175 | * @sess: Session to find a queue for | |
176 | * @cpu: Cpu to start the search from | |
177 | * | |
178 | * Description: | |
179 | * Each CPU has a list of HW queues, which needs to be rerun. If a list | |
180 | * is not empty - it is marked with a bit. This function finds first | |
181 | * set bit in a bitmap and returns corresponding CPU list. | |
182 | */ | |
183 | static struct rnbd_cpu_qlist * | |
184 | rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu) | |
185 | { | |
186 | int bit; | |
187 | ||
188 | /* Search from cpu to nr_cpu_ids */ | |
189 | bit = find_next_bit(sess->cpu_queues_bm, nr_cpu_ids, cpu); | |
190 | if (bit < nr_cpu_ids) { | |
191 | return per_cpu_ptr(sess->cpu_queues, bit); | |
192 | } else if (cpu != 0) { | |
193 | /* Search from 0 to cpu */ | |
b5c7e7ec | 194 | bit = find_first_bit(sess->cpu_queues_bm, cpu); |
f7a7a5c2 JW |
195 | if (bit < cpu) |
196 | return per_cpu_ptr(sess->cpu_queues, bit); | |
197 | } | |
198 | ||
199 | return NULL; | |
200 | } | |
201 | ||
202 | static inline int nxt_cpu(int cpu) | |
203 | { | |
204 | return (cpu + 1) % nr_cpu_ids; | |
205 | } | |
206 | ||
207 | /** | |
208 | * rnbd_rerun_if_needed() - rerun next queue marked as stopped | |
209 | * @sess: Session to rerun a queue on | |
210 | * | |
211 | * Description: | |
212 | * Each CPU has it's own list of HW queues, which should be rerun. | |
213 | * Function finds such list with HW queues, takes a list lock, picks up | |
214 | * the first HW queue out of the list and requeues it. | |
215 | * | |
216 | * Return: | |
217 | * True if the queue was requeued, false otherwise. | |
218 | * | |
219 | * Context: | |
220 | * Does not matter. | |
221 | */ | |
222 | static bool rnbd_rerun_if_needed(struct rnbd_clt_session *sess) | |
223 | { | |
224 | struct rnbd_queue *q = NULL; | |
225 | struct rnbd_cpu_qlist *cpu_q; | |
226 | unsigned long flags; | |
227 | int *cpup; | |
228 | ||
229 | /* | |
230 | * To keep fairness and not to let other queues starve we always | |
231 | * try to wake up someone else in round-robin manner. That of course | |
232 | * increases latency but queues always have a chance to be executed. | |
233 | */ | |
234 | cpup = get_cpu_ptr(sess->cpu_rr); | |
235 | for (cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(*cpup)); cpu_q; | |
236 | cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(cpu_q->cpu))) { | |
237 | if (!spin_trylock_irqsave(&cpu_q->requeue_lock, flags)) | |
238 | continue; | |
1e31016b | 239 | if (!test_bit(cpu_q->cpu, sess->cpu_queues_bm)) |
f7a7a5c2 JW |
240 | goto unlock; |
241 | q = list_first_entry_or_null(&cpu_q->requeue_list, | |
242 | typeof(*q), requeue_list); | |
243 | if (WARN_ON(!q)) | |
244 | goto clear_bit; | |
245 | list_del_init(&q->requeue_list); | |
246 | clear_bit_unlock(0, &q->in_list); | |
247 | ||
248 | if (list_empty(&cpu_q->requeue_list)) { | |
249 | /* Clear bit if nothing is left */ | |
250 | clear_bit: | |
251 | clear_bit(cpu_q->cpu, sess->cpu_queues_bm); | |
252 | } | |
253 | unlock: | |
254 | spin_unlock_irqrestore(&cpu_q->requeue_lock, flags); | |
255 | ||
256 | if (q) | |
257 | break; | |
258 | } | |
259 | ||
260 | /** | |
261 | * Saves the CPU that is going to be requeued on the per-cpu var. Just | |
262 | * incrementing it doesn't work because rnbd_get_cpu_qlist() will | |
263 | * always return the first CPU with something on the queue list when the | |
264 | * value stored on the var is greater than the last CPU with something | |
265 | * on the list. | |
266 | */ | |
267 | if (cpu_q) | |
268 | *cpup = cpu_q->cpu; | |
94dace8c | 269 | put_cpu_ptr(sess->cpu_rr); |
f7a7a5c2 JW |
270 | |
271 | if (q) | |
272 | rnbd_clt_dev_requeue(q); | |
273 | ||
274 | return q; | |
275 | } | |
276 | ||
277 | /** | |
278 | * rnbd_rerun_all_if_idle() - rerun all queues left in the list if | |
279 | * session is idling (there are no requests | |
280 | * in-flight). | |
281 | * @sess: Session to rerun the queues on | |
282 | * | |
283 | * Description: | |
284 | * This function tries to rerun all stopped queues if there are no | |
285 | * requests in-flight anymore. This function tries to solve an obvious | |
286 | * problem, when number of tags < than number of queues (hctx), which | |
287 | * are stopped and put to sleep. If last permit, which has been just put, | |
288 | * does not wake up all left queues (hctxs), IO requests hang forever. | |
289 | * | |
290 | * That can happen when all number of permits, say N, have been exhausted | |
291 | * from one CPU, and we have many block devices per session, say M. | |
292 | * Each block device has it's own queue (hctx) for each CPU, so eventually | |
293 | * we can put that number of queues (hctxs) to sleep: M x nr_cpu_ids. | |
294 | * If number of permits N < M x nr_cpu_ids finally we will get an IO hang. | |
295 | * | |
296 | * To avoid this hang last caller of rnbd_put_permit() (last caller is the | |
297 | * one who observes sess->busy == 0) must wake up all remaining queues. | |
298 | * | |
299 | * Context: | |
300 | * Does not matter. | |
301 | */ | |
302 | static void rnbd_rerun_all_if_idle(struct rnbd_clt_session *sess) | |
303 | { | |
304 | bool requeued; | |
305 | ||
306 | do { | |
307 | requeued = rnbd_rerun_if_needed(sess); | |
308 | } while (atomic_read(&sess->busy) == 0 && requeued); | |
309 | } | |
310 | ||
311 | static struct rtrs_permit *rnbd_get_permit(struct rnbd_clt_session *sess, | |
312 | enum rtrs_clt_con_type con_type, | |
9f455eea | 313 | enum wait_type wait) |
f7a7a5c2 JW |
314 | { |
315 | struct rtrs_permit *permit; | |
316 | ||
9f455eea | 317 | permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait); |
1e31016b | 318 | if (permit) |
f7a7a5c2 JW |
319 | /* We have a subtle rare case here, when all permits can be |
320 | * consumed before busy counter increased. This is safe, | |
321 | * because loser will get NULL as a permit, observe 0 busy | |
322 | * counter and immediately restart the queue himself. | |
323 | */ | |
324 | atomic_inc(&sess->busy); | |
325 | ||
326 | return permit; | |
327 | } | |
328 | ||
329 | static void rnbd_put_permit(struct rnbd_clt_session *sess, | |
330 | struct rtrs_permit *permit) | |
331 | { | |
332 | rtrs_clt_put_permit(sess->rtrs, permit); | |
333 | atomic_dec(&sess->busy); | |
334 | /* Paired with rnbd_clt_dev_add_to_requeue(). Decrement first | |
335 | * and then check queue bits. | |
336 | */ | |
337 | smp_mb__after_atomic(); | |
338 | rnbd_rerun_all_if_idle(sess); | |
339 | } | |
340 | ||
341 | static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, | |
342 | enum rtrs_clt_con_type con_type, | |
9f455eea | 343 | enum wait_type wait) |
f7a7a5c2 JW |
344 | { |
345 | struct rnbd_iu *iu; | |
346 | struct rtrs_permit *permit; | |
347 | ||
9aaf9a2a | 348 | iu = kzalloc(sizeof(*iu), GFP_KERNEL); |
292660fa | 349 | if (!iu) |
9aaf9a2a | 350 | return NULL; |
9aaf9a2a | 351 | |
9f455eea | 352 | permit = rnbd_get_permit(sess, con_type, wait); |
1e31016b | 353 | if (!permit) { |
9aaf9a2a | 354 | kfree(iu); |
f7a7a5c2 | 355 | return NULL; |
9aaf9a2a GK |
356 | } |
357 | ||
f7a7a5c2 JW |
358 | iu->permit = permit; |
359 | /* | |
360 | * 1st reference is dropped after finishing sending a "user" message, | |
361 | * 2nd reference is dropped after confirmation with the response is | |
362 | * returned. | |
363 | * 1st and 2nd can happen in any order, so the rnbd_iu should be | |
3877ece0 | 364 | * released (rtrs_permit returned to rtrs) only after both |
f7a7a5c2 JW |
365 | * are finished. |
366 | */ | |
367 | atomic_set(&iu->refcount, 2); | |
368 | init_waitqueue_head(&iu->comp.wait); | |
369 | iu->comp.errno = INT_MAX; | |
370 | ||
80f99093 GJ |
371 | if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) { |
372 | rnbd_put_permit(sess, permit); | |
373 | kfree(iu); | |
374 | return NULL; | |
375 | } | |
376 | ||
f7a7a5c2 JW |
377 | return iu; |
378 | } | |
379 | ||
380 | static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) | |
381 | { | |
9aaf9a2a | 382 | if (atomic_dec_and_test(&iu->refcount)) { |
80f99093 | 383 | sg_free_table(&iu->sgt); |
f7a7a5c2 | 384 | rnbd_put_permit(sess, iu->permit); |
9aaf9a2a GK |
385 | kfree(iu); |
386 | } | |
f7a7a5c2 JW |
387 | } |
388 | ||
389 | static void rnbd_softirq_done_fn(struct request *rq) | |
390 | { | |
f3fa33ac | 391 | struct rnbd_clt_dev *dev = rq->q->disk->private_data; |
f7a7a5c2 JW |
392 | struct rnbd_clt_session *sess = dev->sess; |
393 | struct rnbd_iu *iu; | |
394 | ||
395 | iu = blk_mq_rq_to_pdu(rq); | |
5a1328d0 | 396 | sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); |
f7a7a5c2 JW |
397 | rnbd_put_permit(sess, iu->permit); |
398 | blk_mq_end_request(rq, errno_to_blk_status(iu->errno)); | |
399 | } | |
400 | ||
401 | static void msg_io_conf(void *priv, int errno) | |
402 | { | |
403 | struct rnbd_iu *iu = priv; | |
404 | struct rnbd_clt_dev *dev = iu->dev; | |
405 | struct request *rq = iu->rq; | |
406 | int rw = rq_data_dir(rq); | |
407 | ||
408 | iu->errno = errno; | |
409 | ||
410 | blk_mq_complete_request(rq); | |
411 | ||
412 | if (errno) | |
413 | rnbd_clt_info_rl(dev, "%s I/O failed with err: %d\n", | |
414 | rw == READ ? "read" : "write", errno); | |
415 | } | |
416 | ||
417 | static void wake_up_iu_comp(struct rnbd_iu *iu, int errno) | |
418 | { | |
419 | iu->comp.errno = errno; | |
420 | wake_up(&iu->comp.wait); | |
421 | } | |
422 | ||
423 | static void msg_conf(void *priv, int errno) | |
424 | { | |
425 | struct rnbd_iu *iu = priv; | |
426 | ||
427 | iu->errno = errno; | |
428 | schedule_work(&iu->work); | |
429 | } | |
430 | ||
f3433d79 | 431 | static int send_usr_msg(struct rtrs_clt_sess *rtrs, int dir, |
46a99e0c | 432 | struct rnbd_iu *iu, struct kvec *vec, |
f7a7a5c2 JW |
433 | size_t len, struct scatterlist *sg, unsigned int sg_len, |
434 | void (*conf)(struct work_struct *work), | |
9f455eea | 435 | int *errno, int wait) |
f7a7a5c2 JW |
436 | { |
437 | int err; | |
438 | struct rtrs_clt_req_ops req_ops; | |
439 | ||
440 | INIT_WORK(&iu->work, conf); | |
441 | req_ops = (struct rtrs_clt_req_ops) { | |
442 | .priv = iu, | |
443 | .conf_fn = msg_conf, | |
444 | }; | |
445 | err = rtrs_clt_request(dir, &req_ops, rtrs, iu->permit, | |
46a99e0c | 446 | vec, 1, len, sg, sg_len); |
f7a7a5c2 JW |
447 | if (!err && wait) { |
448 | wait_event(iu->comp.wait, iu->comp.errno != INT_MAX); | |
449 | *errno = iu->comp.errno; | |
450 | } else { | |
451 | *errno = 0; | |
452 | } | |
453 | ||
454 | return err; | |
455 | } | |
456 | ||
457 | static void msg_close_conf(struct work_struct *work) | |
458 | { | |
459 | struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); | |
460 | struct rnbd_clt_dev *dev = iu->dev; | |
461 | ||
462 | wake_up_iu_comp(iu, iu->errno); | |
463 | rnbd_put_iu(dev->sess, iu); | |
464 | rnbd_clt_put_dev(dev); | |
465 | } | |
466 | ||
9f455eea GK |
467 | static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, |
468 | enum wait_type wait) | |
f7a7a5c2 JW |
469 | { |
470 | struct rnbd_clt_session *sess = dev->sess; | |
471 | struct rnbd_msg_close msg; | |
472 | struct rnbd_iu *iu; | |
473 | struct kvec vec = { | |
474 | .iov_base = &msg, | |
475 | .iov_len = sizeof(msg) | |
476 | }; | |
477 | int err, errno; | |
478 | ||
479 | iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); | |
480 | if (!iu) | |
481 | return -ENOMEM; | |
482 | ||
483 | iu->buf = NULL; | |
484 | iu->dev = dev; | |
485 | ||
f7a7a5c2 JW |
486 | msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); |
487 | msg.device_id = cpu_to_le32(device_id); | |
488 | ||
489 | WARN_ON(!rnbd_clt_get_dev(dev)); | |
46a99e0c | 490 | err = send_usr_msg(sess->rtrs, WRITE, iu, &vec, 0, NULL, 0, |
f7a7a5c2 JW |
491 | msg_close_conf, &errno, wait); |
492 | if (err) { | |
493 | rnbd_clt_put_dev(dev); | |
494 | rnbd_put_iu(sess, iu); | |
495 | } else { | |
496 | err = errno; | |
497 | } | |
498 | ||
499 | rnbd_put_iu(sess, iu); | |
500 | return err; | |
501 | } | |
502 | ||
503 | static void msg_open_conf(struct work_struct *work) | |
504 | { | |
505 | struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); | |
506 | struct rnbd_msg_open_rsp *rsp = iu->buf; | |
507 | struct rnbd_clt_dev *dev = iu->dev; | |
508 | int errno = iu->errno; | |
509 | ||
510 | if (errno) { | |
511 | rnbd_clt_err(dev, | |
512 | "Opening failed, server responded: %d\n", | |
513 | errno); | |
514 | } else { | |
515 | errno = process_msg_open_rsp(dev, rsp); | |
516 | if (errno) { | |
517 | u32 device_id = le32_to_cpu(rsp->device_id); | |
518 | /* | |
519 | * If server thinks its fine, but we fail to process | |
520 | * then be nice and send a close to server. | |
521 | */ | |
9f455eea | 522 | send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT); |
f7a7a5c2 JW |
523 | } |
524 | } | |
525 | kfree(rsp); | |
526 | wake_up_iu_comp(iu, errno); | |
527 | rnbd_put_iu(dev->sess, iu); | |
528 | rnbd_clt_put_dev(dev); | |
529 | } | |
530 | ||
531 | static void msg_sess_info_conf(struct work_struct *work) | |
532 | { | |
533 | struct rnbd_iu *iu = container_of(work, struct rnbd_iu, work); | |
534 | struct rnbd_msg_sess_info_rsp *rsp = iu->buf; | |
535 | struct rnbd_clt_session *sess = iu->sess; | |
536 | ||
537 | if (!iu->errno) | |
538 | sess->ver = min_t(u8, rsp->ver, RNBD_PROTO_VER_MAJOR); | |
539 | ||
540 | kfree(rsp); | |
541 | wake_up_iu_comp(iu, iu->errno); | |
542 | rnbd_put_iu(sess, iu); | |
543 | rnbd_clt_put_sess(sess); | |
544 | } | |
545 | ||
9f455eea | 546 | static int send_msg_open(struct rnbd_clt_dev *dev, enum wait_type wait) |
f7a7a5c2 JW |
547 | { |
548 | struct rnbd_clt_session *sess = dev->sess; | |
549 | struct rnbd_msg_open_rsp *rsp; | |
550 | struct rnbd_msg_open msg; | |
551 | struct rnbd_iu *iu; | |
552 | struct kvec vec = { | |
553 | .iov_base = &msg, | |
554 | .iov_len = sizeof(msg) | |
555 | }; | |
556 | int err, errno; | |
557 | ||
558 | rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); | |
559 | if (!rsp) | |
560 | return -ENOMEM; | |
561 | ||
562 | iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); | |
563 | if (!iu) { | |
564 | kfree(rsp); | |
565 | return -ENOMEM; | |
566 | } | |
567 | ||
568 | iu->buf = rsp; | |
569 | iu->dev = dev; | |
570 | ||
5a1328d0 | 571 | sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); |
f7a7a5c2 JW |
572 | |
573 | msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); | |
574 | msg.access_mode = dev->access_mode; | |
57b93ed4 | 575 | strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name)); |
f7a7a5c2 JW |
576 | |
577 | WARN_ON(!rnbd_clt_get_dev(dev)); | |
578 | err = send_usr_msg(sess->rtrs, READ, iu, | |
5a1328d0 | 579 | &vec, sizeof(*rsp), iu->sgt.sgl, 1, |
f7a7a5c2 JW |
580 | msg_open_conf, &errno, wait); |
581 | if (err) { | |
582 | rnbd_clt_put_dev(dev); | |
583 | rnbd_put_iu(sess, iu); | |
584 | kfree(rsp); | |
585 | } else { | |
586 | err = errno; | |
587 | } | |
588 | ||
589 | rnbd_put_iu(sess, iu); | |
590 | return err; | |
591 | } | |
592 | ||
9f455eea | 593 | static int send_msg_sess_info(struct rnbd_clt_session *sess, enum wait_type wait) |
f7a7a5c2 JW |
594 | { |
595 | struct rnbd_msg_sess_info_rsp *rsp; | |
596 | struct rnbd_msg_sess_info msg; | |
597 | struct rnbd_iu *iu; | |
598 | struct kvec vec = { | |
599 | .iov_base = &msg, | |
600 | .iov_len = sizeof(msg) | |
601 | }; | |
602 | int err, errno; | |
603 | ||
604 | rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); | |
605 | if (!rsp) | |
606 | return -ENOMEM; | |
607 | ||
608 | iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT); | |
609 | if (!iu) { | |
610 | kfree(rsp); | |
611 | return -ENOMEM; | |
612 | } | |
613 | ||
614 | iu->buf = rsp; | |
615 | iu->sess = sess; | |
5a1328d0 | 616 | sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); |
f7a7a5c2 JW |
617 | |
618 | msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); | |
619 | msg.ver = RNBD_PROTO_VER_MAJOR; | |
620 | ||
621 | if (!rnbd_clt_get_sess(sess)) { | |
622 | /* | |
623 | * That can happen only in one case, when RTRS has restablished | |
624 | * the connection and link_ev() is called, but session is almost | |
625 | * dead, last reference on session is put and caller is waiting | |
626 | * for RTRS to close everything. | |
627 | */ | |
628 | err = -ENODEV; | |
629 | goto put_iu; | |
630 | } | |
631 | err = send_usr_msg(sess->rtrs, READ, iu, | |
5a1328d0 | 632 | &vec, sizeof(*rsp), iu->sgt.sgl, 1, |
f7a7a5c2 JW |
633 | msg_sess_info_conf, &errno, wait); |
634 | if (err) { | |
635 | rnbd_clt_put_sess(sess); | |
636 | put_iu: | |
637 | rnbd_put_iu(sess, iu); | |
638 | kfree(rsp); | |
639 | } else { | |
640 | err = errno; | |
641 | } | |
f7a7a5c2 JW |
642 | rnbd_put_iu(sess, iu); |
643 | return err; | |
644 | } | |
645 | ||
646 | static void set_dev_states_to_disconnected(struct rnbd_clt_session *sess) | |
647 | { | |
648 | struct rnbd_clt_dev *dev; | |
3ba1c693 | 649 | struct kobject *gd_kobj; |
f7a7a5c2 JW |
650 | |
651 | mutex_lock(&sess->lock); | |
652 | list_for_each_entry(dev, &sess->devs_list, list) { | |
653 | rnbd_clt_err(dev, "Device disconnected.\n"); | |
654 | ||
655 | mutex_lock(&dev->lock); | |
3ba1c693 | 656 | if (dev->dev_state == DEV_STATE_MAPPED) { |
f7a7a5c2 | 657 | dev->dev_state = DEV_STATE_MAPPED_DISCONNECTED; |
3ba1c693 MHI |
658 | gd_kobj = &disk_to_dev(dev->gd)->kobj; |
659 | kobject_uevent(gd_kobj, KOBJ_OFFLINE); | |
660 | } | |
f7a7a5c2 JW |
661 | mutex_unlock(&dev->lock); |
662 | } | |
663 | mutex_unlock(&sess->lock); | |
664 | } | |
665 | ||
666 | static void remap_devs(struct rnbd_clt_session *sess) | |
667 | { | |
668 | struct rnbd_clt_dev *dev; | |
669 | struct rtrs_attrs attrs; | |
670 | int err; | |
671 | ||
672 | /* | |
673 | * Careful here: we are called from RTRS link event directly, | |
674 | * thus we can't send any RTRS request and wait for response | |
675 | * or RTRS will not be able to complete request with failure | |
676 | * if something goes wrong (failing of outstanding requests | |
677 | * happens exactly from the context where we are blocking now). | |
678 | * | |
679 | * So to avoid deadlocks each usr message sent from here must | |
680 | * be asynchronous. | |
681 | */ | |
682 | ||
9f455eea | 683 | err = send_msg_sess_info(sess, RTRS_PERMIT_NOWAIT); |
f7a7a5c2 JW |
684 | if (err) { |
685 | pr_err("send_msg_sess_info(\"%s\"): %d\n", sess->sessname, err); | |
686 | return; | |
687 | } | |
688 | ||
1056ad82 MHI |
689 | err = rtrs_clt_query(sess->rtrs, &attrs); |
690 | if (err) { | |
691 | pr_err("rtrs_clt_query(\"%s\"): %d\n", sess->sessname, err); | |
692 | return; | |
693 | } | |
f7a7a5c2 JW |
694 | mutex_lock(&sess->lock); |
695 | sess->max_io_size = attrs.max_io_size; | |
696 | ||
697 | list_for_each_entry(dev, &sess->devs_list, list) { | |
698 | bool skip; | |
699 | ||
700 | mutex_lock(&dev->lock); | |
701 | skip = (dev->dev_state == DEV_STATE_INIT); | |
702 | mutex_unlock(&dev->lock); | |
703 | if (skip) | |
704 | /* | |
705 | * When device is establishing connection for the first | |
706 | * time - do not remap, it will be closed soon. | |
707 | */ | |
708 | continue; | |
709 | ||
710 | rnbd_clt_info(dev, "session reconnected, remapping device\n"); | |
9f455eea | 711 | err = send_msg_open(dev, RTRS_PERMIT_NOWAIT); |
f7a7a5c2 JW |
712 | if (err) { |
713 | rnbd_clt_err(dev, "send_msg_open(): %d\n", err); | |
714 | break; | |
715 | } | |
716 | } | |
717 | mutex_unlock(&sess->lock); | |
718 | } | |
719 | ||
720 | static void rnbd_clt_link_ev(void *priv, enum rtrs_clt_link_ev ev) | |
721 | { | |
722 | struct rnbd_clt_session *sess = priv; | |
723 | ||
724 | switch (ev) { | |
725 | case RTRS_CLT_LINK_EV_DISCONNECTED: | |
726 | set_dev_states_to_disconnected(sess); | |
727 | break; | |
728 | case RTRS_CLT_LINK_EV_RECONNECTED: | |
729 | remap_devs(sess); | |
730 | break; | |
731 | default: | |
732 | pr_err("Unknown session event received (%d), session: %s\n", | |
733 | ev, sess->sessname); | |
734 | } | |
735 | } | |
736 | ||
737 | static void rnbd_init_cpu_qlists(struct rnbd_cpu_qlist __percpu *cpu_queues) | |
738 | { | |
739 | unsigned int cpu; | |
740 | struct rnbd_cpu_qlist *cpu_q; | |
741 | ||
742 | for_each_possible_cpu(cpu) { | |
743 | cpu_q = per_cpu_ptr(cpu_queues, cpu); | |
744 | ||
745 | cpu_q->cpu = cpu; | |
746 | INIT_LIST_HEAD(&cpu_q->requeue_list); | |
747 | spin_lock_init(&cpu_q->requeue_lock); | |
748 | } | |
749 | } | |
750 | ||
751 | static void destroy_mq_tags(struct rnbd_clt_session *sess) | |
752 | { | |
753 | if (sess->tag_set.tags) | |
754 | blk_mq_free_tag_set(&sess->tag_set); | |
755 | } | |
756 | ||
757 | static inline void wake_up_rtrs_waiters(struct rnbd_clt_session *sess) | |
758 | { | |
759 | sess->rtrs_ready = true; | |
760 | wake_up_all(&sess->rtrs_waitq); | |
761 | } | |
762 | ||
763 | static void close_rtrs(struct rnbd_clt_session *sess) | |
764 | { | |
765 | might_sleep(); | |
766 | ||
767 | if (!IS_ERR_OR_NULL(sess->rtrs)) { | |
768 | rtrs_clt_close(sess->rtrs); | |
769 | sess->rtrs = NULL; | |
770 | wake_up_rtrs_waiters(sess); | |
771 | } | |
772 | } | |
773 | ||
774 | static void free_sess(struct rnbd_clt_session *sess) | |
775 | { | |
776 | WARN_ON(!list_empty(&sess->devs_list)); | |
777 | ||
778 | might_sleep(); | |
779 | ||
780 | close_rtrs(sess); | |
781 | destroy_mq_tags(sess); | |
782 | if (!list_empty(&sess->list)) { | |
783 | mutex_lock(&sess_lock); | |
784 | list_del(&sess->list); | |
785 | mutex_unlock(&sess_lock); | |
786 | } | |
787 | free_percpu(sess->cpu_queues); | |
788 | free_percpu(sess->cpu_rr); | |
789 | mutex_destroy(&sess->lock); | |
790 | kfree(sess); | |
791 | } | |
792 | ||
793 | static struct rnbd_clt_session *alloc_sess(const char *sessname) | |
794 | { | |
795 | struct rnbd_clt_session *sess; | |
796 | int err, cpu; | |
797 | ||
798 | sess = kzalloc_node(sizeof(*sess), GFP_KERNEL, NUMA_NO_NODE); | |
799 | if (!sess) | |
800 | return ERR_PTR(-ENOMEM); | |
57b93ed4 | 801 | strscpy(sess->sessname, sessname, sizeof(sess->sessname)); |
f7a7a5c2 JW |
802 | atomic_set(&sess->busy, 0); |
803 | mutex_init(&sess->lock); | |
804 | INIT_LIST_HEAD(&sess->devs_list); | |
805 | INIT_LIST_HEAD(&sess->list); | |
292660fa | 806 | bitmap_zero(sess->cpu_queues_bm, num_possible_cpus()); |
f7a7a5c2 JW |
807 | init_waitqueue_head(&sess->rtrs_waitq); |
808 | refcount_set(&sess->refcount, 1); | |
809 | ||
810 | sess->cpu_queues = alloc_percpu(struct rnbd_cpu_qlist); | |
811 | if (!sess->cpu_queues) { | |
812 | err = -ENOMEM; | |
813 | goto err; | |
814 | } | |
815 | rnbd_init_cpu_qlists(sess->cpu_queues); | |
816 | ||
817 | /* | |
3877ece0 | 818 | * That is simple percpu variable which stores cpu indices, which are |
f7a7a5c2 JW |
819 | * incremented on each access. We need that for the sake of fairness |
820 | * to wake up queues in a round-robin manner. | |
821 | */ | |
822 | sess->cpu_rr = alloc_percpu(int); | |
823 | if (!sess->cpu_rr) { | |
824 | err = -ENOMEM; | |
825 | goto err; | |
826 | } | |
827 | for_each_possible_cpu(cpu) | |
828 | * per_cpu_ptr(sess->cpu_rr, cpu) = cpu; | |
829 | ||
830 | return sess; | |
831 | ||
832 | err: | |
833 | free_sess(sess); | |
834 | ||
835 | return ERR_PTR(err); | |
836 | } | |
837 | ||
838 | static int wait_for_rtrs_connection(struct rnbd_clt_session *sess) | |
839 | { | |
840 | wait_event(sess->rtrs_waitq, sess->rtrs_ready); | |
841 | if (IS_ERR_OR_NULL(sess->rtrs)) | |
842 | return -ECONNRESET; | |
843 | ||
844 | return 0; | |
845 | } | |
846 | ||
847 | static void wait_for_rtrs_disconnection(struct rnbd_clt_session *sess) | |
848 | __releases(&sess_lock) | |
849 | __acquires(&sess_lock) | |
850 | { | |
851 | DEFINE_WAIT(wait); | |
852 | ||
853 | prepare_to_wait(&sess->rtrs_waitq, &wait, TASK_UNINTERRUPTIBLE); | |
854 | if (IS_ERR_OR_NULL(sess->rtrs)) { | |
855 | finish_wait(&sess->rtrs_waitq, &wait); | |
856 | return; | |
857 | } | |
858 | mutex_unlock(&sess_lock); | |
859 | /* loop in caller, see __find_and_get_sess(). | |
860 | * You can't leave mutex locked and call schedule(), you will catch a | |
861 | * deadlock with a caller of free_sess(), which has just put the last | |
862 | * reference and is about to take the sess_lock in order to delete | |
863 | * the session from the list. | |
864 | */ | |
865 | schedule(); | |
866 | mutex_lock(&sess_lock); | |
867 | } | |
868 | ||
869 | static struct rnbd_clt_session *__find_and_get_sess(const char *sessname) | |
870 | __releases(&sess_lock) | |
871 | __acquires(&sess_lock) | |
872 | { | |
873 | struct rnbd_clt_session *sess, *sn; | |
874 | int err; | |
875 | ||
876 | again: | |
877 | list_for_each_entry_safe(sess, sn, &sess_list, list) { | |
878 | if (strcmp(sessname, sess->sessname)) | |
879 | continue; | |
880 | ||
881 | if (sess->rtrs_ready && IS_ERR_OR_NULL(sess->rtrs)) | |
882 | /* | |
883 | * No RTRS connection, session is dying. | |
884 | */ | |
885 | continue; | |
886 | ||
887 | if (rnbd_clt_get_sess(sess)) { | |
888 | /* | |
889 | * Alive session is found, wait for RTRS connection. | |
890 | */ | |
891 | mutex_unlock(&sess_lock); | |
892 | err = wait_for_rtrs_connection(sess); | |
893 | if (err) | |
894 | rnbd_clt_put_sess(sess); | |
895 | mutex_lock(&sess_lock); | |
896 | ||
897 | if (err) | |
898 | /* Session is dying, repeat the loop */ | |
899 | goto again; | |
900 | ||
901 | return sess; | |
902 | } | |
903 | /* | |
904 | * Ref is 0, session is dying, wait for RTRS disconnect | |
905 | * in order to avoid session names clashes. | |
906 | */ | |
907 | wait_for_rtrs_disconnection(sess); | |
908 | /* | |
909 | * RTRS is disconnected and soon session will be freed, | |
910 | * so repeat a loop. | |
911 | */ | |
912 | goto again; | |
913 | } | |
914 | ||
915 | return NULL; | |
916 | } | |
917 | ||
ce9d2b4f | 918 | /* caller is responsible for initializing 'first' to false */ |
f7a7a5c2 JW |
919 | static struct |
920 | rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first) | |
921 | { | |
922 | struct rnbd_clt_session *sess = NULL; | |
923 | ||
924 | mutex_lock(&sess_lock); | |
925 | sess = __find_and_get_sess(sessname); | |
926 | if (!sess) { | |
927 | sess = alloc_sess(sessname); | |
47393fb5 | 928 | if (IS_ERR(sess)) { |
f7a7a5c2 | 929 | mutex_unlock(&sess_lock); |
47393fb5 | 930 | return sess; |
f7a7a5c2 | 931 | } |
47393fb5 DC |
932 | list_add(&sess->list, &sess_list); |
933 | *first = true; | |
ce9d2b4f | 934 | } |
f7a7a5c2 JW |
935 | mutex_unlock(&sess_lock); |
936 | ||
937 | return sess; | |
938 | } | |
939 | ||
940 | static int rnbd_client_open(struct block_device *block_device, fmode_t mode) | |
941 | { | |
942 | struct rnbd_clt_dev *dev = block_device->bd_disk->private_data; | |
943 | ||
944 | if (dev->read_only && (mode & FMODE_WRITE)) | |
945 | return -EPERM; | |
946 | ||
947 | if (dev->dev_state == DEV_STATE_UNMAPPED || | |
948 | !rnbd_clt_get_dev(dev)) | |
949 | return -EIO; | |
950 | ||
951 | return 0; | |
952 | } | |
953 | ||
954 | static void rnbd_client_release(struct gendisk *gen, fmode_t mode) | |
955 | { | |
956 | struct rnbd_clt_dev *dev = gen->private_data; | |
957 | ||
958 | rnbd_clt_put_dev(dev); | |
959 | } | |
960 | ||
961 | static int rnbd_client_getgeo(struct block_device *block_device, | |
962 | struct hd_geometry *geo) | |
963 | { | |
964 | u64 size; | |
965 | struct rnbd_clt_dev *dev; | |
966 | ||
967 | dev = block_device->bd_disk->private_data; | |
968 | size = dev->size * (dev->logical_block_size / SECTOR_SIZE); | |
969 | geo->cylinders = size >> 6; /* size/64 */ | |
970 | geo->heads = 4; | |
971 | geo->sectors = 16; | |
972 | geo->start = 0; | |
973 | ||
974 | return 0; | |
975 | } | |
976 | ||
977 | static const struct block_device_operations rnbd_client_ops = { | |
978 | .owner = THIS_MODULE, | |
979 | .open = rnbd_client_open, | |
980 | .release = rnbd_client_release, | |
981 | .getgeo = rnbd_client_getgeo | |
982 | }; | |
983 | ||
984 | /* The amount of data that belongs to an I/O and the amount of data that | |
985 | * should be read or written to the disk (bi_size) can differ. | |
986 | * | |
987 | * E.g. When WRITE_SAME is used, only a small amount of data is | |
988 | * transferred that is then written repeatedly over a lot of sectors. | |
989 | * | |
990 | * Get the size of data to be transferred via RTRS by summing up the size | |
991 | * of the scather-gather list entries. | |
992 | */ | |
993 | static size_t rnbd_clt_get_sg_size(struct scatterlist *sglist, u32 len) | |
994 | { | |
995 | struct scatterlist *sg; | |
996 | size_t tsize = 0; | |
997 | int i; | |
998 | ||
999 | for_each_sg(sglist, sg, len, i) | |
1000 | tsize += sg->length; | |
1001 | return tsize; | |
1002 | } | |
1003 | ||
1004 | static int rnbd_client_xfer_request(struct rnbd_clt_dev *dev, | |
1005 | struct request *rq, | |
1006 | struct rnbd_iu *iu) | |
1007 | { | |
f3433d79 | 1008 | struct rtrs_clt_sess *rtrs = dev->sess->rtrs; |
f7a7a5c2 JW |
1009 | struct rtrs_permit *permit = iu->permit; |
1010 | struct rnbd_msg_io msg; | |
1011 | struct rtrs_clt_req_ops req_ops; | |
1012 | unsigned int sg_cnt = 0; | |
1013 | struct kvec vec; | |
1014 | size_t size; | |
1015 | int err; | |
1016 | ||
1017 | iu->rq = rq; | |
1018 | iu->dev = dev; | |
1019 | msg.sector = cpu_to_le64(blk_rq_pos(rq)); | |
1020 | msg.bi_size = cpu_to_le32(blk_rq_bytes(rq)); | |
1021 | msg.rw = cpu_to_le32(rq_to_rnbd_flags(rq)); | |
1022 | msg.prio = cpu_to_le16(req_get_ioprio(rq)); | |
1023 | ||
1024 | /* | |
1025 | * We only support discards with single segment for now. | |
1026 | * See queue limits. | |
1027 | */ | |
1028 | if (req_op(rq) != REQ_OP_DISCARD) | |
5a1328d0 | 1029 | sg_cnt = blk_rq_map_sg(dev->queue, rq, iu->sgt.sgl); |
f7a7a5c2 JW |
1030 | |
1031 | if (sg_cnt == 0) | |
5a1328d0 | 1032 | sg_mark_end(&iu->sgt.sgl[0]); |
f7a7a5c2 JW |
1033 | |
1034 | msg.hdr.type = cpu_to_le16(RNBD_MSG_IO); | |
1035 | msg.device_id = cpu_to_le32(dev->device_id); | |
1036 | ||
1037 | vec = (struct kvec) { | |
1038 | .iov_base = &msg, | |
1039 | .iov_len = sizeof(msg) | |
1040 | }; | |
5a1328d0 | 1041 | size = rnbd_clt_get_sg_size(iu->sgt.sgl, sg_cnt); |
f7a7a5c2 JW |
1042 | req_ops = (struct rtrs_clt_req_ops) { |
1043 | .priv = iu, | |
1044 | .conf_fn = msg_io_conf, | |
1045 | }; | |
1046 | err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit, | |
5a1328d0 | 1047 | &vec, 1, size, iu->sgt.sgl, sg_cnt); |
1e31016b | 1048 | if (err) { |
f7a7a5c2 JW |
1049 | rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n", |
1050 | err); | |
1051 | return err; | |
1052 | } | |
1053 | ||
1054 | return 0; | |
1055 | } | |
1056 | ||
1057 | /** | |
1058 | * rnbd_clt_dev_add_to_requeue() - add device to requeue if session is busy | |
1059 | * @dev: Device to be checked | |
1060 | * @q: Queue to be added to the requeue list if required | |
1061 | * | |
1062 | * Description: | |
1063 | * If session is busy, that means someone will requeue us when resources | |
1064 | * are freed. If session is not doing anything - device is not added to | |
1065 | * the list and @false is returned. | |
1066 | */ | |
1067 | static bool rnbd_clt_dev_add_to_requeue(struct rnbd_clt_dev *dev, | |
1068 | struct rnbd_queue *q) | |
1069 | { | |
1070 | struct rnbd_clt_session *sess = dev->sess; | |
1071 | struct rnbd_cpu_qlist *cpu_q; | |
1072 | unsigned long flags; | |
1073 | bool added = true; | |
1074 | bool need_set; | |
1075 | ||
1076 | cpu_q = get_cpu_ptr(sess->cpu_queues); | |
1077 | spin_lock_irqsave(&cpu_q->requeue_lock, flags); | |
1078 | ||
1e31016b | 1079 | if (!test_and_set_bit_lock(0, &q->in_list)) { |
f7a7a5c2 JW |
1080 | if (WARN_ON(!list_empty(&q->requeue_list))) |
1081 | goto unlock; | |
1082 | ||
1083 | need_set = !test_bit(cpu_q->cpu, sess->cpu_queues_bm); | |
1084 | if (need_set) { | |
1085 | set_bit(cpu_q->cpu, sess->cpu_queues_bm); | |
1086 | /* Paired with rnbd_put_permit(). Set a bit first | |
1087 | * and then observe the busy counter. | |
1088 | */ | |
1089 | smp_mb__before_atomic(); | |
1090 | } | |
1e31016b | 1091 | if (atomic_read(&sess->busy)) { |
f7a7a5c2 JW |
1092 | list_add_tail(&q->requeue_list, &cpu_q->requeue_list); |
1093 | } else { | |
1094 | /* Very unlikely, but possible: busy counter was | |
1095 | * observed as zero. Drop all bits and return | |
1096 | * false to restart the queue by ourselves. | |
1097 | */ | |
1098 | if (need_set) | |
1099 | clear_bit(cpu_q->cpu, sess->cpu_queues_bm); | |
1100 | clear_bit_unlock(0, &q->in_list); | |
1101 | added = false; | |
1102 | } | |
1103 | } | |
1104 | unlock: | |
1105 | spin_unlock_irqrestore(&cpu_q->requeue_lock, flags); | |
1106 | put_cpu_ptr(sess->cpu_queues); | |
1107 | ||
1108 | return added; | |
1109 | } | |
1110 | ||
1111 | static void rnbd_clt_dev_kick_mq_queue(struct rnbd_clt_dev *dev, | |
1112 | struct blk_mq_hw_ctx *hctx, | |
1113 | int delay) | |
1114 | { | |
1115 | struct rnbd_queue *q = hctx->driver_data; | |
1116 | ||
1117 | if (delay != RNBD_DELAY_IFBUSY) | |
1118 | blk_mq_delay_run_hw_queue(hctx, delay); | |
1e31016b | 1119 | else if (!rnbd_clt_dev_add_to_requeue(dev, q)) |
f7a7a5c2 JW |
1120 | /* |
1121 | * If session is not busy we have to restart | |
1122 | * the queue ourselves. | |
1123 | */ | |
1124 | blk_mq_delay_run_hw_queue(hctx, 10/*ms*/); | |
1125 | } | |
1126 | ||
1127 | static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, | |
1128 | const struct blk_mq_queue_data *bd) | |
1129 | { | |
1130 | struct request *rq = bd->rq; | |
f3fa33ac | 1131 | struct rnbd_clt_dev *dev = rq->q->disk->private_data; |
f7a7a5c2 JW |
1132 | struct rnbd_iu *iu = blk_mq_rq_to_pdu(rq); |
1133 | int err; | |
5a1328d0 | 1134 | blk_status_t ret = BLK_STS_IOERR; |
f7a7a5c2 | 1135 | |
1e31016b | 1136 | if (dev->dev_state != DEV_STATE_MAPPED) |
f7a7a5c2 JW |
1137 | return BLK_STS_IOERR; |
1138 | ||
1139 | iu->permit = rnbd_get_permit(dev->sess, RTRS_IO_CON, | |
1140 | RTRS_PERMIT_NOWAIT); | |
1e31016b | 1141 | if (!iu->permit) { |
f7a7a5c2 JW |
1142 | rnbd_clt_dev_kick_mq_queue(dev, hctx, RNBD_DELAY_IFBUSY); |
1143 | return BLK_STS_RESOURCE; | |
1144 | } | |
1145 | ||
5a1328d0 GK |
1146 | iu->sgt.sgl = iu->first_sgl; |
1147 | err = sg_alloc_table_chained(&iu->sgt, | |
1148 | /* Even-if the request has no segment, | |
292660fa DS |
1149 | * sglist must have one entry at least. |
1150 | */ | |
5a1328d0 GK |
1151 | blk_rq_nr_phys_segments(rq) ? : 1, |
1152 | iu->sgt.sgl, | |
1153 | RNBD_INLINE_SG_CNT); | |
1154 | if (err) { | |
1155 | rnbd_clt_err_rl(dev, "sg_alloc_table_chained ret=%d\n", err); | |
1156 | rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); | |
1157 | rnbd_put_permit(dev->sess, iu->permit); | |
1158 | return BLK_STS_RESOURCE; | |
1159 | } | |
1160 | ||
f7a7a5c2 JW |
1161 | blk_mq_start_request(rq); |
1162 | err = rnbd_client_xfer_request(dev, rq, iu); | |
1e31016b | 1163 | if (err == 0) |
f7a7a5c2 | 1164 | return BLK_STS_OK; |
1e31016b | 1165 | if (err == -EAGAIN || err == -ENOMEM) { |
f7a7a5c2 | 1166 | rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/); |
5a1328d0 | 1167 | ret = BLK_STS_RESOURCE; |
f7a7a5c2 | 1168 | } |
5a1328d0 | 1169 | sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); |
f7a7a5c2 | 1170 | rnbd_put_permit(dev->sess, iu->permit); |
5a1328d0 | 1171 | return ret; |
f7a7a5c2 JW |
1172 | } |
1173 | ||
5a72e899 | 1174 | static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) |
2958a995 GK |
1175 | { |
1176 | struct rnbd_queue *q = hctx->driver_data; | |
1177 | struct rnbd_clt_dev *dev = q->dev; | |
1178 | int cnt; | |
1179 | ||
1180 | cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num); | |
1181 | return cnt; | |
1182 | } | |
1183 | ||
1184 | static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set) | |
1185 | { | |
1186 | struct rnbd_clt_session *sess = set->driver_data; | |
1187 | ||
1188 | /* shared read/write queues */ | |
1189 | set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus(); | |
1190 | set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; | |
1191 | set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus(); | |
1192 | set->map[HCTX_TYPE_READ].queue_offset = 0; | |
1193 | blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); | |
1194 | blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); | |
1195 | ||
1196 | if (sess->nr_poll_queues) { | |
1197 | /* dedicated queue for poll */ | |
1198 | set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues; | |
1199 | set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset + | |
1200 | set->map[HCTX_TYPE_READ].nr_queues; | |
1201 | blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); | |
1202 | pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n", | |
1203 | sess->sessname, | |
1204 | set->map[HCTX_TYPE_DEFAULT].nr_queues, | |
1205 | set->map[HCTX_TYPE_READ].nr_queues, | |
1206 | set->map[HCTX_TYPE_POLL].nr_queues); | |
1207 | } else { | |
1208 | pr_info("[session=%s] mapped %d/%d default/read queues.\n", | |
1209 | sess->sessname, | |
1210 | set->map[HCTX_TYPE_DEFAULT].nr_queues, | |
1211 | set->map[HCTX_TYPE_READ].nr_queues); | |
1212 | } | |
1213 | ||
1214 | return 0; | |
1215 | } | |
1216 | ||
f7a7a5c2 JW |
1217 | static struct blk_mq_ops rnbd_mq_ops = { |
1218 | .queue_rq = rnbd_queue_rq, | |
f7a7a5c2 | 1219 | .complete = rnbd_softirq_done_fn, |
2958a995 GK |
1220 | .map_queues = rnbd_rdma_map_queues, |
1221 | .poll = rnbd_rdma_poll, | |
f7a7a5c2 JW |
1222 | }; |
1223 | ||
1224 | static int setup_mq_tags(struct rnbd_clt_session *sess) | |
1225 | { | |
1226 | struct blk_mq_tag_set *tag_set = &sess->tag_set; | |
1227 | ||
1228 | memset(tag_set, 0, sizeof(*tag_set)); | |
1229 | tag_set->ops = &rnbd_mq_ops; | |
1230 | tag_set->queue_depth = sess->queue_depth; | |
1231 | tag_set->numa_node = NUMA_NO_NODE; | |
1232 | tag_set->flags = BLK_MQ_F_SHOULD_MERGE | | |
51db1c37 | 1233 | BLK_MQ_F_TAG_QUEUE_SHARED; |
5a1328d0 | 1234 | tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; |
2958a995 GK |
1235 | |
1236 | /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */ | |
1237 | tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2; | |
1238 | /* | |
1239 | * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues | |
1240 | * others are for HCTX_TYPE_POLL | |
1241 | */ | |
1242 | tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues; | |
1243 | tag_set->driver_data = sess; | |
f7a7a5c2 JW |
1244 | |
1245 | return blk_mq_alloc_tag_set(tag_set); | |
1246 | } | |
1247 | ||
1248 | static struct rnbd_clt_session * | |
1249 | find_and_get_or_create_sess(const char *sessname, | |
1250 | const struct rtrs_addr *paths, | |
2958a995 | 1251 | size_t path_cnt, u16 port_nr, u32 nr_poll_queues) |
f7a7a5c2 JW |
1252 | { |
1253 | struct rnbd_clt_session *sess; | |
1254 | struct rtrs_attrs attrs; | |
1255 | int err; | |
ce9d2b4f | 1256 | bool first = false; |
f7a7a5c2 JW |
1257 | struct rtrs_clt_ops rtrs_ops; |
1258 | ||
1259 | sess = find_or_create_sess(sessname, &first); | |
4a09a845 | 1260 | if (sess == ERR_PTR(-ENOMEM)) { |
2958a995 | 1261 | return ERR_PTR(-ENOMEM); |
4a09a845 | 1262 | } else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { |
2958a995 GK |
1263 | /* |
1264 | * A device MUST have its own session to use the polling-mode. | |
1265 | * It must fail to map new device with the same session. | |
1266 | */ | |
1267 | err = -EINVAL; | |
1268 | goto put_sess; | |
1269 | } | |
1270 | ||
ce9d2b4f | 1271 | if (!first) |
f7a7a5c2 JW |
1272 | return sess; |
1273 | ||
ce9fe18a MHI |
1274 | if (!path_cnt) { |
1275 | pr_err("Session %s not found, and path parameter not given", sessname); | |
1276 | err = -ENXIO; | |
1277 | goto put_sess; | |
1278 | } | |
1279 | ||
f7a7a5c2 JW |
1280 | rtrs_ops = (struct rtrs_clt_ops) { |
1281 | .priv = sess, | |
1282 | .link_ev = rnbd_clt_link_ev, | |
1283 | }; | |
1284 | /* | |
1285 | * Nothing was found, establish rtrs connection and proceed further. | |
1286 | */ | |
1287 | sess->rtrs = rtrs_clt_open(&rtrs_ops, sessname, | |
1288 | paths, path_cnt, port_nr, | |
9aaf9a2a | 1289 | 0, /* Do not use pdu of rtrs */ |
7404bdde | 1290 | RECONNECT_DELAY, |
2958a995 | 1291 | MAX_RECONNECTS, nr_poll_queues); |
f7a7a5c2 JW |
1292 | if (IS_ERR(sess->rtrs)) { |
1293 | err = PTR_ERR(sess->rtrs); | |
1294 | goto wake_up_and_put; | |
1295 | } | |
1056ad82 MHI |
1296 | |
1297 | err = rtrs_clt_query(sess->rtrs, &attrs); | |
1298 | if (err) | |
1299 | goto close_rtrs; | |
1300 | ||
f7a7a5c2 JW |
1301 | sess->max_io_size = attrs.max_io_size; |
1302 | sess->queue_depth = attrs.queue_depth; | |
2958a995 | 1303 | sess->nr_poll_queues = nr_poll_queues; |
7404bdde | 1304 | sess->max_segments = attrs.max_segments; |
f7a7a5c2 JW |
1305 | |
1306 | err = setup_mq_tags(sess); | |
1307 | if (err) | |
1308 | goto close_rtrs; | |
1309 | ||
9f455eea | 1310 | err = send_msg_sess_info(sess, RTRS_PERMIT_WAIT); |
f7a7a5c2 JW |
1311 | if (err) |
1312 | goto close_rtrs; | |
1313 | ||
1314 | wake_up_rtrs_waiters(sess); | |
1315 | ||
1316 | return sess; | |
1317 | ||
1318 | close_rtrs: | |
1319 | close_rtrs(sess); | |
1320 | put_sess: | |
1321 | rnbd_clt_put_sess(sess); | |
1322 | ||
1323 | return ERR_PTR(err); | |
1324 | ||
1325 | wake_up_and_put: | |
1326 | wake_up_rtrs_waiters(sess); | |
1327 | goto put_sess; | |
1328 | } | |
1329 | ||
1330 | static inline void rnbd_init_hw_queue(struct rnbd_clt_dev *dev, | |
1331 | struct rnbd_queue *q, | |
1332 | struct blk_mq_hw_ctx *hctx) | |
1333 | { | |
1334 | INIT_LIST_HEAD(&q->requeue_list); | |
1335 | q->dev = dev; | |
1336 | q->hctx = hctx; | |
1337 | } | |
1338 | ||
1339 | static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev) | |
1340 | { | |
4f481208 | 1341 | unsigned long i; |
f7a7a5c2 JW |
1342 | struct blk_mq_hw_ctx *hctx; |
1343 | struct rnbd_queue *q; | |
1344 | ||
1345 | queue_for_each_hw_ctx(dev->queue, hctx, i) { | |
1346 | q = &dev->hw_queues[i]; | |
1347 | rnbd_init_hw_queue(dev, q, hctx); | |
1348 | hctx->driver_data = q; | |
1349 | } | |
1350 | } | |
1351 | ||
f7a7a5c2 JW |
1352 | static void setup_request_queue(struct rnbd_clt_dev *dev) |
1353 | { | |
1354 | blk_queue_logical_block_size(dev->queue, dev->logical_block_size); | |
1355 | blk_queue_physical_block_size(dev->queue, dev->physical_block_size); | |
1356 | blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors); | |
f7a7a5c2 JW |
1357 | |
1358 | /* | |
1359 | * we don't support discards to "discontiguous" segments | |
1360 | * in on request | |
1361 | */ | |
1362 | blk_queue_max_discard_segments(dev->queue, 1); | |
1363 | ||
1364 | blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors); | |
1365 | dev->queue->limits.discard_granularity = dev->discard_granularity; | |
1366 | dev->queue->limits.discard_alignment = dev->discard_alignment; | |
f7a7a5c2 | 1367 | if (dev->secure_discard) |
44abff2c CH |
1368 | blk_queue_max_secure_erase_sectors(dev->queue, |
1369 | dev->max_discard_sectors); | |
f7a7a5c2 JW |
1370 | blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue); |
1371 | blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue); | |
1372 | blk_queue_max_segments(dev->queue, dev->max_segments); | |
1373 | blk_queue_io_opt(dev->queue, dev->sess->max_io_size); | |
1374 | blk_queue_virt_boundary(dev->queue, SZ_4K - 1); | |
512c781f | 1375 | blk_queue_write_cache(dev->queue, dev->wc, dev->fua); |
f7a7a5c2 JW |
1376 | } |
1377 | ||
2e9e31be | 1378 | static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) |
f7a7a5c2 | 1379 | { |
2e9e31be LC |
1380 | int err; |
1381 | ||
f7a7a5c2 JW |
1382 | dev->gd->major = rnbd_client_major; |
1383 | dev->gd->first_minor = idx << RNBD_PART_BITS; | |
2c6ee0ae | 1384 | dev->gd->minors = 1 << RNBD_PART_BITS; |
f7a7a5c2 JW |
1385 | dev->gd->fops = &rnbd_client_ops; |
1386 | dev->gd->queue = dev->queue; | |
1387 | dev->gd->private_data = dev; | |
1388 | snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d", | |
1389 | idx); | |
1390 | pr_debug("disk_name=%s, capacity=%zu\n", | |
1391 | dev->gd->disk_name, | |
1392 | dev->nsectors * (dev->logical_block_size / SECTOR_SIZE) | |
1393 | ); | |
1394 | ||
1395 | set_capacity(dev->gd, dev->nsectors); | |
1396 | ||
1397 | if (dev->access_mode == RNBD_ACCESS_RO) { | |
1398 | dev->read_only = true; | |
1399 | set_disk_ro(dev->gd, true); | |
1400 | } else { | |
1401 | dev->read_only = false; | |
1402 | } | |
1403 | ||
030ce8ba GK |
1404 | /* |
1405 | * Network device does not need rotational | |
1406 | */ | |
1407 | blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); | |
2e9e31be LC |
1408 | err = add_disk(dev->gd); |
1409 | if (err) | |
1410 | blk_cleanup_disk(dev->gd); | |
1411 | ||
1412 | return err; | |
f7a7a5c2 JW |
1413 | } |
1414 | ||
8b7f0511 | 1415 | static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) |
f7a7a5c2 | 1416 | { |
2c6ee0ae | 1417 | int idx = dev->clt_device_id; |
f7a7a5c2 JW |
1418 | |
1419 | dev->size = dev->nsectors * dev->logical_block_size; | |
1420 | ||
2c6ee0ae CH |
1421 | dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev); |
1422 | if (IS_ERR(dev->gd)) | |
1423 | return PTR_ERR(dev->gd); | |
1424 | dev->queue = dev->gd->queue; | |
1425 | rnbd_init_mq_hw_queues(dev); | |
f7a7a5c2 JW |
1426 | |
1427 | setup_request_queue(dev); | |
2e9e31be | 1428 | return rnbd_clt_setup_gen_disk(dev, idx); |
f7a7a5c2 JW |
1429 | } |
1430 | ||
1431 | static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, | |
1432 | enum rnbd_access_mode access_mode, | |
2958a995 GK |
1433 | const char *pathname, |
1434 | u32 nr_poll_queues) | |
f7a7a5c2 JW |
1435 | { |
1436 | struct rnbd_clt_dev *dev; | |
1437 | int ret; | |
1438 | ||
1439 | dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, NUMA_NO_NODE); | |
1440 | if (!dev) | |
1441 | return ERR_PTR(-ENOMEM); | |
1442 | ||
2958a995 GK |
1443 | /* |
1444 | * nr_cpu_ids: the number of softirq queues | |
1445 | * nr_poll_queues: the number of polling queues | |
1446 | */ | |
1447 | dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues, | |
1448 | sizeof(*dev->hw_queues), | |
f7a7a5c2 JW |
1449 | GFP_KERNEL); |
1450 | if (!dev->hw_queues) { | |
1451 | ret = -ENOMEM; | |
1452 | goto out_alloc; | |
1453 | } | |
1454 | ||
24afc15d CJ |
1455 | ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS), |
1456 | GFP_KERNEL); | |
f7a7a5c2 JW |
1457 | if (ret < 0) { |
1458 | pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", | |
1459 | pathname, sess->sessname, ret); | |
1460 | goto out_queues; | |
1461 | } | |
64e8a6ec | 1462 | |
e7508d48 | 1463 | dev->pathname = kstrdup(pathname, GFP_KERNEL); |
64e8a6ec MHI |
1464 | if (!dev->pathname) { |
1465 | ret = -ENOMEM; | |
1466 | goto out_queues; | |
1467 | } | |
64e8a6ec | 1468 | |
f7a7a5c2 JW |
1469 | dev->clt_device_id = ret; |
1470 | dev->sess = sess; | |
1471 | dev->access_mode = access_mode; | |
2958a995 | 1472 | dev->nr_poll_queues = nr_poll_queues; |
f7a7a5c2 JW |
1473 | mutex_init(&dev->lock); |
1474 | refcount_set(&dev->refcount, 1); | |
1475 | dev->dev_state = DEV_STATE_INIT; | |
1476 | ||
1477 | /* | |
1478 | * Here we called from sysfs entry, thus clt-sysfs is | |
1479 | * responsible that session will not disappear. | |
1480 | */ | |
1481 | WARN_ON(!rnbd_clt_get_sess(sess)); | |
1482 | ||
1483 | return dev; | |
1484 | ||
1485 | out_queues: | |
1486 | kfree(dev->hw_queues); | |
1487 | out_alloc: | |
1488 | kfree(dev); | |
1489 | return ERR_PTR(ret); | |
1490 | } | |
1491 | ||
91f4acb2 | 1492 | static bool __exists_dev(const char *pathname, const char *sessname) |
f7a7a5c2 JW |
1493 | { |
1494 | struct rnbd_clt_session *sess; | |
1495 | struct rnbd_clt_dev *dev; | |
1496 | bool found = false; | |
1497 | ||
1498 | list_for_each_entry(sess, &sess_list, list) { | |
91f4acb2 GJ |
1499 | if (sessname && strncmp(sess->sessname, sessname, |
1500 | sizeof(sess->sessname))) | |
1501 | continue; | |
f7a7a5c2 JW |
1502 | mutex_lock(&sess->lock); |
1503 | list_for_each_entry(dev, &sess->devs_list, list) { | |
64e8a6ec MHI |
1504 | if (strlen(dev->pathname) == strlen(pathname) && |
1505 | !strcmp(dev->pathname, pathname)) { | |
f7a7a5c2 JW |
1506 | found = true; |
1507 | break; | |
1508 | } | |
1509 | } | |
1510 | mutex_unlock(&sess->lock); | |
1511 | if (found) | |
1512 | break; | |
1513 | } | |
1514 | ||
1515 | return found; | |
1516 | } | |
1517 | ||
91f4acb2 | 1518 | static bool exists_devpath(const char *pathname, const char *sessname) |
f7a7a5c2 JW |
1519 | { |
1520 | bool found; | |
1521 | ||
1522 | mutex_lock(&sess_lock); | |
91f4acb2 | 1523 | found = __exists_dev(pathname, sessname); |
f7a7a5c2 JW |
1524 | mutex_unlock(&sess_lock); |
1525 | ||
1526 | return found; | |
1527 | } | |
1528 | ||
02ee80f5 | 1529 | static bool insert_dev_if_not_exists_devpath(struct rnbd_clt_dev *dev) |
f7a7a5c2 JW |
1530 | { |
1531 | bool found; | |
02ee80f5 | 1532 | struct rnbd_clt_session *sess = dev->sess; |
f7a7a5c2 JW |
1533 | |
1534 | mutex_lock(&sess_lock); | |
02ee80f5 | 1535 | found = __exists_dev(dev->pathname, sess->sessname); |
f7a7a5c2 JW |
1536 | if (!found) { |
1537 | mutex_lock(&sess->lock); | |
1538 | list_add_tail(&dev->list, &sess->devs_list); | |
1539 | mutex_unlock(&sess->lock); | |
1540 | } | |
1541 | mutex_unlock(&sess_lock); | |
1542 | ||
1543 | return found; | |
1544 | } | |
1545 | ||
1546 | static void delete_dev(struct rnbd_clt_dev *dev) | |
1547 | { | |
1548 | struct rnbd_clt_session *sess = dev->sess; | |
1549 | ||
1550 | mutex_lock(&sess->lock); | |
1551 | list_del(&dev->list); | |
1552 | mutex_unlock(&sess->lock); | |
1553 | } | |
1554 | ||
1555 | struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, | |
1556 | struct rtrs_addr *paths, | |
1557 | size_t path_cnt, u16 port_nr, | |
1558 | const char *pathname, | |
2958a995 GK |
1559 | enum rnbd_access_mode access_mode, |
1560 | u32 nr_poll_queues) | |
f7a7a5c2 JW |
1561 | { |
1562 | struct rnbd_clt_session *sess; | |
1563 | struct rnbd_clt_dev *dev; | |
1564 | int ret; | |
1565 | ||
1e31016b | 1566 | if (exists_devpath(pathname, sessname)) |
f7a7a5c2 JW |
1567 | return ERR_PTR(-EEXIST); |
1568 | ||
2958a995 | 1569 | sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues); |
f7a7a5c2 JW |
1570 | if (IS_ERR(sess)) |
1571 | return ERR_CAST(sess); | |
1572 | ||
2958a995 | 1573 | dev = init_dev(sess, access_mode, pathname, nr_poll_queues); |
f7a7a5c2 JW |
1574 | if (IS_ERR(dev)) { |
1575 | pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n", | |
1576 | pathname, sess->sessname, PTR_ERR(dev)); | |
1577 | ret = PTR_ERR(dev); | |
1578 | goto put_sess; | |
1579 | } | |
02ee80f5 | 1580 | if (insert_dev_if_not_exists_devpath(dev)) { |
f7a7a5c2 JW |
1581 | ret = -EEXIST; |
1582 | goto put_dev; | |
1583 | } | |
9f455eea | 1584 | ret = send_msg_open(dev, RTRS_PERMIT_WAIT); |
f7a7a5c2 JW |
1585 | if (ret) { |
1586 | rnbd_clt_err(dev, | |
1587 | "map_device: failed, can't open remote device, err: %d\n", | |
1588 | ret); | |
1589 | goto del_dev; | |
1590 | } | |
1591 | mutex_lock(&dev->lock); | |
1592 | pr_debug("Opened remote device: session=%s, path='%s'\n", | |
1593 | sess->sessname, pathname); | |
8b7f0511 | 1594 | ret = rnbd_client_setup_device(dev); |
f7a7a5c2 JW |
1595 | if (ret) { |
1596 | rnbd_clt_err(dev, | |
1597 | "map_device: Failed to configure device, err: %d\n", | |
1598 | ret); | |
1599 | mutex_unlock(&dev->lock); | |
47be77c2 | 1600 | goto send_close; |
f7a7a5c2 JW |
1601 | } |
1602 | ||
1603 | rnbd_clt_info(dev, | |
6f2689a7 | 1604 | "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n", |
f7a7a5c2 JW |
1605 | dev->gd->disk_name, dev->nsectors, |
1606 | dev->logical_block_size, dev->physical_block_size, | |
ebd04737 | 1607 | dev->max_discard_sectors, |
f7a7a5c2 JW |
1608 | dev->discard_granularity, dev->discard_alignment, |
1609 | dev->secure_discard, dev->max_segments, | |
030ce8ba | 1610 | dev->max_hw_sectors, dev->wc, dev->fua); |
f7a7a5c2 JW |
1611 | |
1612 | mutex_unlock(&dev->lock); | |
f7a7a5c2 JW |
1613 | rnbd_clt_put_sess(sess); |
1614 | ||
1615 | return dev; | |
1616 | ||
47be77c2 | 1617 | send_close: |
9f455eea | 1618 | send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT); |
f7a7a5c2 JW |
1619 | del_dev: |
1620 | delete_dev(dev); | |
1621 | put_dev: | |
1622 | rnbd_clt_put_dev(dev); | |
1623 | put_sess: | |
1624 | rnbd_clt_put_sess(sess); | |
1625 | ||
1626 | return ERR_PTR(ret); | |
1627 | } | |
1628 | ||
1629 | static void destroy_gen_disk(struct rnbd_clt_dev *dev) | |
1630 | { | |
1631 | del_gendisk(dev->gd); | |
2c6ee0ae | 1632 | blk_cleanup_disk(dev->gd); |
f7a7a5c2 JW |
1633 | } |
1634 | ||
1635 | static void destroy_sysfs(struct rnbd_clt_dev *dev, | |
1636 | const struct attribute *sysfs_self) | |
1637 | { | |
1638 | rnbd_clt_remove_dev_symlink(dev); | |
1639 | if (dev->kobj.state_initialized) { | |
1640 | if (sysfs_self) | |
1641 | /* To avoid deadlock firstly remove itself */ | |
1642 | sysfs_remove_file_self(&dev->kobj, sysfs_self); | |
1643 | kobject_del(&dev->kobj); | |
1644 | kobject_put(&dev->kobj); | |
1645 | } | |
1646 | } | |
1647 | ||
1648 | int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force, | |
1649 | const struct attribute *sysfs_self) | |
1650 | { | |
1651 | struct rnbd_clt_session *sess = dev->sess; | |
1652 | int refcount, ret = 0; | |
1653 | bool was_mapped; | |
1654 | ||
1655 | mutex_lock(&dev->lock); | |
1656 | if (dev->dev_state == DEV_STATE_UNMAPPED) { | |
1657 | rnbd_clt_info(dev, "Device is already being unmapped\n"); | |
1658 | ret = -EALREADY; | |
1659 | goto err; | |
1660 | } | |
1661 | refcount = refcount_read(&dev->refcount); | |
1662 | if (!force && refcount > 1) { | |
1663 | rnbd_clt_err(dev, | |
1664 | "Closing device failed, device is in use, (%d device users)\n", | |
1665 | refcount - 1); | |
1666 | ret = -EBUSY; | |
1667 | goto err; | |
1668 | } | |
1669 | was_mapped = (dev->dev_state == DEV_STATE_MAPPED); | |
1670 | dev->dev_state = DEV_STATE_UNMAPPED; | |
1671 | mutex_unlock(&dev->lock); | |
1672 | ||
1673 | delete_dev(dev); | |
1674 | destroy_sysfs(dev, sysfs_self); | |
1675 | destroy_gen_disk(dev); | |
1676 | if (was_mapped && sess->rtrs) | |
9f455eea | 1677 | send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT); |
f7a7a5c2 JW |
1678 | |
1679 | rnbd_clt_info(dev, "Device is unmapped\n"); | |
1680 | ||
1681 | /* Likely last reference put */ | |
1682 | rnbd_clt_put_dev(dev); | |
1683 | ||
1684 | /* | |
1685 | * Here device and session can be vanished! | |
1686 | */ | |
1687 | ||
1688 | return 0; | |
1689 | err: | |
1690 | mutex_unlock(&dev->lock); | |
1691 | ||
1692 | return ret; | |
1693 | } | |
1694 | ||
1695 | int rnbd_clt_remap_device(struct rnbd_clt_dev *dev) | |
1696 | { | |
1697 | int err; | |
1698 | ||
1699 | mutex_lock(&dev->lock); | |
1700 | if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) | |
1701 | err = 0; | |
1702 | else if (dev->dev_state == DEV_STATE_UNMAPPED) | |
1703 | err = -ENODEV; | |
1704 | else if (dev->dev_state == DEV_STATE_MAPPED) | |
1705 | err = -EALREADY; | |
1706 | else | |
1707 | err = -EBUSY; | |
1708 | mutex_unlock(&dev->lock); | |
1709 | if (!err) { | |
1710 | rnbd_clt_info(dev, "Remapping device.\n"); | |
9f455eea | 1711 | err = send_msg_open(dev, RTRS_PERMIT_WAIT); |
f7a7a5c2 JW |
1712 | if (err) |
1713 | rnbd_clt_err(dev, "remap_device: %d\n", err); | |
1714 | } | |
1715 | ||
1716 | return err; | |
1717 | } | |
1718 | ||
1719 | static void unmap_device_work(struct work_struct *work) | |
1720 | { | |
1721 | struct rnbd_clt_dev *dev; | |
1722 | ||
1723 | dev = container_of(work, typeof(*dev), unmap_on_rmmod_work); | |
1724 | rnbd_clt_unmap_device(dev, true, NULL); | |
1725 | } | |
1726 | ||
1727 | static void rnbd_destroy_sessions(void) | |
1728 | { | |
1729 | struct rnbd_clt_session *sess, *sn; | |
1730 | struct rnbd_clt_dev *dev, *tn; | |
1731 | ||
1732 | /* Firstly forbid access through sysfs interface */ | |
f7a7a5c2 JW |
1733 | rnbd_clt_destroy_sysfs_files(); |
1734 | ||
1735 | /* | |
1736 | * Here at this point there is no any concurrent access to sessions | |
1737 | * list and devices list: | |
3877ece0 | 1738 | * 1. New session or device can't be created - session sysfs files |
f7a7a5c2 JW |
1739 | * are removed. |
1740 | * 2. Device or session can't be removed - module reference is taken | |
1741 | * into account in unmap device sysfs callback. | |
1742 | * 3. No IO requests inflight - each file open of block_dev increases | |
1743 | * module reference in get_disk(). | |
1744 | * | |
1745 | * But still there can be user requests inflights, which are sent by | |
1746 | * asynchronous send_msg_*() functions, thus before unmapping devices | |
1747 | * RTRS session must be explicitly closed. | |
1748 | */ | |
1749 | ||
1750 | list_for_each_entry_safe(sess, sn, &sess_list, list) { | |
3a21777c JW |
1751 | if (!rnbd_clt_get_sess(sess)) |
1752 | continue; | |
f7a7a5c2 JW |
1753 | close_rtrs(sess); |
1754 | list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { | |
1755 | /* | |
1756 | * Here unmap happens in parallel for only one reason: | |
1757 | * blk_cleanup_queue() takes around half a second, so | |
1758 | * on huge amount of devices the whole module unload | |
1759 | * procedure takes minutes. | |
1760 | */ | |
1761 | INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work); | |
1762 | queue_work(system_long_wq, &dev->unmap_on_rmmod_work); | |
1763 | } | |
1764 | rnbd_clt_put_sess(sess); | |
1765 | } | |
1766 | /* Wait for all scheduled unmap works */ | |
1767 | flush_workqueue(system_long_wq); | |
1768 | WARN_ON(!list_empty(&sess_list)); | |
1769 | } | |
1770 | ||
1771 | static int __init rnbd_client_init(void) | |
1772 | { | |
1773 | int err = 0; | |
1774 | ||
1775 | BUILD_BUG_ON(sizeof(struct rnbd_msg_hdr) != 4); | |
1776 | BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info) != 36); | |
1777 | BUILD_BUG_ON(sizeof(struct rnbd_msg_sess_info_rsp) != 36); | |
1778 | BUILD_BUG_ON(sizeof(struct rnbd_msg_open) != 264); | |
1779 | BUILD_BUG_ON(sizeof(struct rnbd_msg_close) != 8); | |
1780 | BUILD_BUG_ON(sizeof(struct rnbd_msg_open_rsp) != 56); | |
1781 | rnbd_client_major = register_blkdev(rnbd_client_major, "rnbd"); | |
1782 | if (rnbd_client_major <= 0) { | |
1783 | pr_err("Failed to load module, block device registration failed\n"); | |
1784 | return -EBUSY; | |
1785 | } | |
1786 | ||
1787 | err = rnbd_clt_create_sysfs_files(); | |
1788 | if (err) { | |
1789 | pr_err("Failed to load module, creating sysfs device files failed, err: %d\n", | |
1790 | err); | |
1791 | unregister_blkdev(rnbd_client_major, "rnbd"); | |
1792 | } | |
1793 | ||
1794 | return err; | |
1795 | } | |
1796 | ||
1797 | static void __exit rnbd_client_exit(void) | |
1798 | { | |
1799 | rnbd_destroy_sessions(); | |
1800 | unregister_blkdev(rnbd_client_major, "rnbd"); | |
1801 | ida_destroy(&index_ida); | |
1802 | } | |
1803 | ||
1804 | module_init(rnbd_client_init); | |
1805 | module_exit(rnbd_client_exit); |