Commit | Line | Data |
---|---|---|
7336d0e6 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
869d81df | 2 | /* |
2402211a | 3 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
869d81df DT |
4 | */ |
5 | ||
2402211a | 6 | #include <linux/fs.h> |
5970e15d | 7 | #include <linux/filelock.h> |
869d81df | 8 | #include <linux/miscdevice.h> |
bd01f843 | 9 | #include <linux/poll.h> |
2402211a DT |
10 | #include <linux/dlm.h> |
11 | #include <linux/dlm_plock.h> | |
5a0e3ad6 | 12 | #include <linux/slab.h> |
869d81df | 13 | |
2402211a DT |
14 | #include "dlm_internal.h" |
15 | #include "lockspace.h" | |
869d81df | 16 | |
314a5540 AA |
17 | static DEFINE_SPINLOCK(ops_lock); |
18 | static LIST_HEAD(send_list); | |
19 | static LIST_HEAD(recv_list); | |
20 | static DECLARE_WAIT_QUEUE_HEAD(send_wq); | |
21 | static DECLARE_WAIT_QUEUE_HEAD(recv_wq); | |
869d81df | 22 | |
bcbb4ba6 | 23 | struct plock_async_data { |
586759f0 ME |
24 | void *fl; |
25 | void *file; | |
26 | struct file_lock flc; | |
bcbb4ba6 | 27 | int (*callback)(struct file_lock *fl, int result); |
586759f0 ME |
28 | }; |
29 | ||
bcbb4ba6 AA |
30 | struct plock_op { |
31 | struct list_head list; | |
32 | int done; | |
b92a4e3f AA |
33 | /* if lock op got interrupted while waiting dlm_controld reply */ |
34 | bool sigint; | |
bcbb4ba6 AA |
35 | struct dlm_plock_info info; |
36 | /* if set indicates async handling */ | |
37 | struct plock_async_data *data; | |
38 | }; | |
586759f0 | 39 | |
2402211a | 40 | static inline void set_version(struct dlm_plock_info *info) |
869d81df | 41 | { |
2402211a DT |
42 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; |
43 | info->version[1] = DLM_PLOCK_VERSION_MINOR; | |
44 | info->version[2] = DLM_PLOCK_VERSION_PATCH; | |
869d81df DT |
45 | } |
46 | ||
2402211a | 47 | static int check_version(struct dlm_plock_info *info) |
869d81df | 48 | { |
2402211a DT |
49 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || |
50 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { | |
51 | log_print("plock device version mismatch: " | |
869d81df | 52 | "kernel (%u.%u.%u), user (%u.%u.%u)", |
2402211a DT |
53 | DLM_PLOCK_VERSION_MAJOR, |
54 | DLM_PLOCK_VERSION_MINOR, | |
55 | DLM_PLOCK_VERSION_PATCH, | |
869d81df DT |
56 | info->version[0], |
57 | info->version[1], | |
58 | info->version[2]); | |
59 | return -EINVAL; | |
60 | } | |
61 | return 0; | |
62 | } | |
63 | ||
bcbb4ba6 AA |
64 | static void dlm_release_plock_op(struct plock_op *op) |
65 | { | |
66 | kfree(op->data); | |
67 | kfree(op); | |
68 | } | |
69 | ||
869d81df DT |
70 | static void send_op(struct plock_op *op) |
71 | { | |
72 | set_version(&op->info); | |
869d81df DT |
73 | spin_lock(&ops_lock); |
74 | list_add_tail(&op->list, &send_list); | |
75 | spin_unlock(&ops_lock); | |
76 | wake_up(&send_wq); | |
77 | } | |
78 | ||
901025d2 DT |
79 | /* If a process was killed while waiting for the only plock on a file, |
80 | locks_remove_posix will not see any lock on the file so it won't | |
81 | send an unlock-close to us to pass on to userspace to clean up the | |
82 | abandoned waiter. So, we have to insert the unlock-close when the | |
83 | lock call is interrupted. */ | |
84 | ||
4d413ae9 | 85 | static void do_unlock_close(const struct dlm_plock_info *info) |
901025d2 DT |
86 | { |
87 | struct plock_op *op; | |
88 | ||
89 | op = kzalloc(sizeof(*op), GFP_NOFS); | |
90 | if (!op) | |
91 | return; | |
92 | ||
93 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | |
4d413ae9 AA |
94 | op->info.pid = info->pid; |
95 | op->info.fsid = info->fsid; | |
96 | op->info.number = info->number; | |
901025d2 DT |
97 | op->info.start = 0; |
98 | op->info.end = OFFSET_MAX; | |
4d413ae9 | 99 | op->info.owner = info->owner; |
901025d2 DT |
100 | |
101 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | |
102 | send_op(op); | |
103 | } | |
104 | ||
2402211a DT |
105 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
106 | int cmd, struct file_lock *fl) | |
869d81df | 107 | { |
bcbb4ba6 | 108 | struct plock_async_data *op_data; |
2402211a | 109 | struct dlm_ls *ls; |
869d81df DT |
110 | struct plock_op *op; |
111 | int rv; | |
112 | ||
2402211a DT |
113 | ls = dlm_find_lockspace_local(lockspace); |
114 | if (!ls) | |
115 | return -EINVAL; | |
116 | ||
bcbb4ba6 AA |
117 | op = kzalloc(sizeof(*op), GFP_NOFS); |
118 | if (!op) { | |
2402211a DT |
119 | rv = -ENOMEM; |
120 | goto out; | |
121 | } | |
869d81df | 122 | |
2402211a | 123 | op->info.optype = DLM_PLOCK_OP_LOCK; |
3a2a9c96 | 124 | op->info.pid = fl->fl_pid; |
869d81df DT |
125 | op->info.ex = (fl->fl_type == F_WRLCK); |
126 | op->info.wait = IS_SETLKW(cmd); | |
2402211a DT |
127 | op->info.fsid = ls->ls_global_id; |
128 | op->info.number = number; | |
869d81df DT |
129 | op->info.start = fl->fl_start; |
130 | op->info.end = fl->fl_end; | |
bcbb4ba6 | 131 | /* async handling */ |
8fb47a4f | 132 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) { |
bcbb4ba6 AA |
133 | op_data = kzalloc(sizeof(*op_data), GFP_NOFS); |
134 | if (!op_data) { | |
135 | dlm_release_plock_op(op); | |
136 | rv = -ENOMEM; | |
137 | goto out; | |
138 | } | |
139 | ||
2066b58b DT |
140 | /* fl_owner is lockd which doesn't distinguish |
141 | processes on the nfs client */ | |
142 | op->info.owner = (__u64) fl->fl_pid; | |
bcbb4ba6 AA |
143 | op_data->callback = fl->fl_lmops->lm_grant; |
144 | locks_init_lock(&op_data->flc); | |
145 | locks_copy_lock(&op_data->flc, fl); | |
146 | op_data->fl = fl; | |
147 | op_data->file = file; | |
148 | ||
149 | op->data = op_data; | |
a800ba77 AA |
150 | |
151 | send_op(op); | |
152 | rv = FILE_LOCK_DEFERRED; | |
153 | goto out; | |
2066b58b DT |
154 | } else { |
155 | op->info.owner = (__u64)(long) fl->fl_owner; | |
2066b58b | 156 | } |
869d81df DT |
157 | |
158 | send_op(op); | |
586759f0 | 159 | |
a800ba77 AA |
160 | rv = wait_event_interruptible(recv_wq, (op->done != 0)); |
161 | if (rv == -ERESTARTSYS) { | |
a800ba77 | 162 | spin_lock(&ops_lock); |
b92a4e3f AA |
163 | /* recheck under ops_lock if we got a done != 0, |
164 | * if so this interrupt case should be ignored | |
165 | */ | |
166 | if (op->done != 0) { | |
167 | spin_unlock(&ops_lock); | |
168 | goto do_lock_wait; | |
169 | } | |
170 | ||
171 | op->sigint = true; | |
a800ba77 | 172 | spin_unlock(&ops_lock); |
ea06d4ca | 173 | log_debug(ls, "%s: wait interrupted %x %llx pid %d", |
bcfad426 | 174 | __func__, ls->ls_global_id, |
19d7ca05 | 175 | (unsigned long long)number, op->info.pid); |
2402211a DT |
176 | goto out; |
177 | } | |
869d81df | 178 | |
b92a4e3f AA |
179 | do_lock_wait: |
180 | ||
a559790c | 181 | WARN_ON(!list_empty(&op->list)); |
869d81df DT |
182 | |
183 | rv = op->info.rv; | |
184 | ||
185 | if (!rv) { | |
4f656367 | 186 | if (locks_lock_file_wait(file, fl) < 0) |
2402211a DT |
187 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", |
188 | (unsigned long long)number); | |
869d81df DT |
189 | } |
190 | ||
bcbb4ba6 | 191 | dlm_release_plock_op(op); |
2402211a DT |
192 | out: |
193 | dlm_put_lockspace(ls); | |
586759f0 ME |
194 | return rv; |
195 | } | |
2402211a | 196 | EXPORT_SYMBOL_GPL(dlm_posix_lock); |
586759f0 | 197 | |
af901ca1 | 198 | /* Returns failure iff a successful lock operation should be canceled */ |
2402211a | 199 | static int dlm_plock_callback(struct plock_op *op) |
586759f0 | 200 | { |
bcbb4ba6 | 201 | struct plock_async_data *op_data = op->data; |
586759f0 ME |
202 | struct file *file; |
203 | struct file_lock *fl; | |
204 | struct file_lock *flc; | |
d0449b90 | 205 | int (*notify)(struct file_lock *fl, int result) = NULL; |
586759f0 ME |
206 | int rv = 0; |
207 | ||
a559790c | 208 | WARN_ON(!list_empty(&op->list)); |
586759f0 ME |
209 | |
210 | /* check if the following 2 are still valid or make a copy */ | |
bcbb4ba6 AA |
211 | file = op_data->file; |
212 | flc = &op_data->flc; | |
213 | fl = op_data->fl; | |
214 | notify = op_data->callback; | |
586759f0 ME |
215 | |
216 | if (op->info.rv) { | |
d0449b90 | 217 | notify(fl, op->info.rv); |
586759f0 ME |
218 | goto out; |
219 | } | |
220 | ||
221 | /* got fs lock; bookkeep locally as well: */ | |
222 | flc->fl_flags &= ~FL_SLEEP; | |
223 | if (posix_lock_file(file, flc, NULL)) { | |
224 | /* | |
225 | * This can only happen in the case of kmalloc() failure. | |
226 | * The filesystem's own lock is the authoritative lock, | |
227 | * so a failure to get the lock locally is not a disaster. | |
2402211a | 228 | * As long as the fs cannot reliably cancel locks (especially |
586759f0 ME |
229 | * in a low-memory situation), we're better off ignoring |
230 | * this failure than trying to recover. | |
231 | */ | |
2402211a DT |
232 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", |
233 | (unsigned long long)op->info.number, file, fl); | |
586759f0 ME |
234 | } |
235 | ||
d0449b90 | 236 | rv = notify(fl, 0); |
586759f0 ME |
237 | if (rv) { |
238 | /* XXX: We need to cancel the fs lock here: */ | |
2402211a DT |
239 | log_print("dlm_plock_callback: lock granted after lock request " |
240 | "failed; dangling lock!\n"); | |
586759f0 ME |
241 | goto out; |
242 | } | |
243 | ||
244 | out: | |
bcbb4ba6 | 245 | dlm_release_plock_op(op); |
869d81df DT |
246 | return rv; |
247 | } | |
248 | ||
2402211a DT |
249 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
250 | struct file_lock *fl) | |
869d81df | 251 | { |
2402211a | 252 | struct dlm_ls *ls; |
869d81df DT |
253 | struct plock_op *op; |
254 | int rv; | |
90008318 | 255 | unsigned char fl_flags = fl->fl_flags; |
869d81df | 256 | |
2402211a DT |
257 | ls = dlm_find_lockspace_local(lockspace); |
258 | if (!ls) | |
259 | return -EINVAL; | |
260 | ||
573c24c4 | 261 | op = kzalloc(sizeof(*op), GFP_NOFS); |
2402211a DT |
262 | if (!op) { |
263 | rv = -ENOMEM; | |
264 | goto out; | |
265 | } | |
869d81df | 266 | |
90008318 DT |
267 | /* cause the vfs unlock to return ENOENT if lock is not found */ |
268 | fl->fl_flags |= FL_EXISTS; | |
269 | ||
4f656367 | 270 | rv = locks_lock_file_wait(file, fl); |
90008318 DT |
271 | if (rv == -ENOENT) { |
272 | rv = 0; | |
273 | goto out_free; | |
274 | } | |
275 | if (rv < 0) { | |
276 | log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", | |
277 | rv, (unsigned long long)number); | |
278 | } | |
869d81df | 279 | |
2402211a | 280 | op->info.optype = DLM_PLOCK_OP_UNLOCK; |
3a2a9c96 | 281 | op->info.pid = fl->fl_pid; |
2402211a DT |
282 | op->info.fsid = ls->ls_global_id; |
283 | op->info.number = number; | |
869d81df DT |
284 | op->info.start = fl->fl_start; |
285 | op->info.end = fl->fl_end; | |
8fb47a4f | 286 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) |
2066b58b DT |
287 | op->info.owner = (__u64) fl->fl_pid; |
288 | else | |
289 | op->info.owner = (__u64)(long) fl->fl_owner; | |
869d81df | 290 | |
901025d2 DT |
291 | if (fl->fl_flags & FL_CLOSE) { |
292 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | |
293 | send_op(op); | |
294 | rv = 0; | |
295 | goto out; | |
296 | } | |
297 | ||
869d81df DT |
298 | send_op(op); |
299 | wait_event(recv_wq, (op->done != 0)); | |
300 | ||
a559790c | 301 | WARN_ON(!list_empty(&op->list)); |
869d81df DT |
302 | |
303 | rv = op->info.rv; | |
304 | ||
586759f0 ME |
305 | if (rv == -ENOENT) |
306 | rv = 0; | |
307 | ||
90008318 | 308 | out_free: |
bcbb4ba6 | 309 | dlm_release_plock_op(op); |
2402211a DT |
310 | out: |
311 | dlm_put_lockspace(ls); | |
90008318 | 312 | fl->fl_flags = fl_flags; |
869d81df DT |
313 | return rv; |
314 | } | |
2402211a | 315 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); |
869d81df | 316 | |
2402211a DT |
317 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
318 | struct file_lock *fl) | |
869d81df | 319 | { |
2402211a | 320 | struct dlm_ls *ls; |
869d81df DT |
321 | struct plock_op *op; |
322 | int rv; | |
323 | ||
2402211a DT |
324 | ls = dlm_find_lockspace_local(lockspace); |
325 | if (!ls) | |
326 | return -EINVAL; | |
327 | ||
573c24c4 | 328 | op = kzalloc(sizeof(*op), GFP_NOFS); |
2402211a DT |
329 | if (!op) { |
330 | rv = -ENOMEM; | |
331 | goto out; | |
332 | } | |
869d81df | 333 | |
2402211a | 334 | op->info.optype = DLM_PLOCK_OP_GET; |
3a2a9c96 | 335 | op->info.pid = fl->fl_pid; |
869d81df | 336 | op->info.ex = (fl->fl_type == F_WRLCK); |
2402211a DT |
337 | op->info.fsid = ls->ls_global_id; |
338 | op->info.number = number; | |
869d81df DT |
339 | op->info.start = fl->fl_start; |
340 | op->info.end = fl->fl_end; | |
8fb47a4f | 341 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) |
2066b58b DT |
342 | op->info.owner = (__u64) fl->fl_pid; |
343 | else | |
344 | op->info.owner = (__u64)(long) fl->fl_owner; | |
586759f0 | 345 | |
869d81df DT |
346 | send_op(op); |
347 | wait_event(recv_wq, (op->done != 0)); | |
348 | ||
a559790c | 349 | WARN_ON(!list_empty(&op->list)); |
869d81df | 350 | |
a7a2ff8a DT |
351 | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, |
352 | -ENOENT if there are no locks on the file */ | |
353 | ||
869d81df DT |
354 | rv = op->info.rv; |
355 | ||
586759f0 ME |
356 | fl->fl_type = F_UNLCK; |
357 | if (rv == -ENOENT) | |
358 | rv = 0; | |
a7a2ff8a | 359 | else if (rv > 0) { |
20d5a399 | 360 | locks_init_lock(fl); |
869d81df | 361 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; |
20d5a399 | 362 | fl->fl_flags = FL_POSIX; |
9d5b86ac | 363 | fl->fl_pid = -op->info.pid; |
869d81df DT |
364 | fl->fl_start = op->info.start; |
365 | fl->fl_end = op->info.end; | |
a7a2ff8a | 366 | rv = 0; |
869d81df DT |
367 | } |
368 | ||
bcbb4ba6 | 369 | dlm_release_plock_op(op); |
2402211a DT |
370 | out: |
371 | dlm_put_lockspace(ls); | |
869d81df DT |
372 | return rv; |
373 | } | |
2402211a | 374 | EXPORT_SYMBOL_GPL(dlm_posix_get); |
869d81df DT |
375 | |
376 | /* a read copies out one plock request from the send list */ | |
377 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |
378 | loff_t *ppos) | |
379 | { | |
2402211a | 380 | struct dlm_plock_info info; |
869d81df DT |
381 | struct plock_op *op = NULL; |
382 | ||
383 | if (count < sizeof(info)) | |
384 | return -EINVAL; | |
385 | ||
386 | spin_lock(&ops_lock); | |
387 | if (!list_empty(&send_list)) { | |
976a0624 | 388 | op = list_first_entry(&send_list, struct plock_op, list); |
901025d2 DT |
389 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
390 | list_del(&op->list); | |
391 | else | |
392 | list_move(&op->list, &recv_list); | |
869d81df DT |
393 | memcpy(&info, &op->info, sizeof(info)); |
394 | } | |
395 | spin_unlock(&ops_lock); | |
396 | ||
397 | if (!op) | |
398 | return -EAGAIN; | |
399 | ||
901025d2 DT |
400 | /* there is no need to get a reply from userspace for unlocks |
401 | that were generated by the vfs cleaning up for a close | |
402 | (the process did not make an unlock call). */ | |
403 | ||
404 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | |
bcbb4ba6 | 405 | dlm_release_plock_op(op); |
901025d2 | 406 | |
869d81df DT |
407 | if (copy_to_user(u, &info, sizeof(info))) |
408 | return -EFAULT; | |
409 | return sizeof(info); | |
410 | } | |
411 | ||
412 | /* a write copies in one plock result that should match a plock_op | |
413 | on the recv list */ | |
414 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | |
415 | loff_t *ppos) | |
416 | { | |
dc1acd5c | 417 | struct plock_op *op = NULL, *iter; |
2402211a | 418 | struct dlm_plock_info info; |
dc1acd5c | 419 | int do_callback = 0; |
869d81df DT |
420 | |
421 | if (count != sizeof(info)) | |
422 | return -EINVAL; | |
423 | ||
424 | if (copy_from_user(&info, u, sizeof(info))) | |
425 | return -EFAULT; | |
426 | ||
427 | if (check_version(&info)) | |
428 | return -EINVAL; | |
429 | ||
430 | spin_lock(&ops_lock); | |
dc1acd5c JK |
431 | list_for_each_entry(iter, &recv_list, list) { |
432 | if (iter->info.fsid == info.fsid && | |
433 | iter->info.number == info.number && | |
434 | iter->info.owner == info.owner) { | |
b92a4e3f AA |
435 | if (iter->sigint) { |
436 | list_del(&iter->list); | |
437 | spin_unlock(&ops_lock); | |
438 | ||
439 | pr_debug("%s: sigint cleanup %x %llx pid %d", | |
440 | __func__, iter->info.fsid, | |
441 | (unsigned long long)iter->info.number, | |
442 | iter->info.pid); | |
443 | do_unlock_close(&iter->info); | |
444 | memcpy(&iter->info, &info, sizeof(info)); | |
445 | dlm_release_plock_op(iter); | |
446 | return count; | |
447 | } | |
dc1acd5c JK |
448 | list_del_init(&iter->list); |
449 | memcpy(&iter->info, &info, sizeof(info)); | |
450 | if (iter->data) | |
c78a87d0 DT |
451 | do_callback = 1; |
452 | else | |
dc1acd5c JK |
453 | iter->done = 1; |
454 | op = iter; | |
869d81df DT |
455 | break; |
456 | } | |
457 | } | |
458 | spin_unlock(&ops_lock); | |
459 | ||
dc1acd5c | 460 | if (op) { |
c78a87d0 | 461 | if (do_callback) |
817d10ba | 462 | dlm_plock_callback(op); |
586759f0 ME |
463 | else |
464 | wake_up(&recv_wq); | |
465 | } else | |
ea06d4ca | 466 | log_print("%s: no op %x %llx", __func__, |
bcfad426 | 467 | info.fsid, (unsigned long long)info.number); |
869d81df DT |
468 | return count; |
469 | } | |
470 | ||
076ccb76 | 471 | static __poll_t dev_poll(struct file *file, poll_table *wait) |
869d81df | 472 | { |
076ccb76 | 473 | __poll_t mask = 0; |
cee23c79 | 474 | |
869d81df DT |
475 | poll_wait(file, &send_wq, wait); |
476 | ||
477 | spin_lock(&ops_lock); | |
cee23c79 | 478 | if (!list_empty(&send_list)) |
a9a08845 | 479 | mask = EPOLLIN | EPOLLRDNORM; |
869d81df | 480 | spin_unlock(&ops_lock); |
cee23c79 DC |
481 | |
482 | return mask; | |
869d81df DT |
483 | } |
484 | ||
00977a59 | 485 | static const struct file_operations dev_fops = { |
869d81df DT |
486 | .read = dev_read, |
487 | .write = dev_write, | |
488 | .poll = dev_poll, | |
6038f373 AB |
489 | .owner = THIS_MODULE, |
490 | .llseek = noop_llseek, | |
869d81df DT |
491 | }; |
492 | ||
493 | static struct miscdevice plock_dev_misc = { | |
494 | .minor = MISC_DYNAMIC_MINOR, | |
2402211a | 495 | .name = DLM_PLOCK_MISC_NAME, |
869d81df DT |
496 | .fops = &dev_fops |
497 | }; | |
498 | ||
2402211a | 499 | int dlm_plock_init(void) |
869d81df DT |
500 | { |
501 | int rv; | |
502 | ||
869d81df DT |
503 | rv = misc_register(&plock_dev_misc); |
504 | if (rv) | |
2402211a | 505 | log_print("dlm_plock_init: misc_register failed %d", rv); |
869d81df DT |
506 | return rv; |
507 | } | |
508 | ||
2402211a | 509 | void dlm_plock_exit(void) |
869d81df | 510 | { |
f368ed60 | 511 | misc_deregister(&plock_dev_misc); |
869d81df DT |
512 | } |
513 |