Commit | Line | Data |
---|---|---|
7336d0e6 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
869d81df | 2 | /* |
2402211a | 3 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. |
869d81df DT |
4 | */ |
5 | ||
2402211a | 6 | #include <linux/fs.h> |
869d81df | 7 | #include <linux/miscdevice.h> |
bd01f843 | 8 | #include <linux/poll.h> |
2402211a DT |
9 | #include <linux/dlm.h> |
10 | #include <linux/dlm_plock.h> | |
5a0e3ad6 | 11 | #include <linux/slab.h> |
869d81df | 12 | |
2402211a DT |
13 | #include "dlm_internal.h" |
14 | #include "lockspace.h" | |
869d81df DT |
15 | |
16 | static spinlock_t ops_lock; | |
17 | static struct list_head send_list; | |
18 | static struct list_head recv_list; | |
19 | static wait_queue_head_t send_wq; | |
20 | static wait_queue_head_t recv_wq; | |
21 | ||
22 | struct plock_op { | |
23 | struct list_head list; | |
24 | int done; | |
2402211a | 25 | struct dlm_plock_info info; |
869d81df DT |
26 | }; |
27 | ||
586759f0 ME |
28 | struct plock_xop { |
29 | struct plock_op xop; | |
d0449b90 | 30 | int (*callback)(struct file_lock *fl, int result); |
586759f0 ME |
31 | void *fl; |
32 | void *file; | |
33 | struct file_lock flc; | |
34 | }; | |
35 | ||
36 | ||
2402211a | 37 | static inline void set_version(struct dlm_plock_info *info) |
869d81df | 38 | { |
2402211a DT |
39 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; |
40 | info->version[1] = DLM_PLOCK_VERSION_MINOR; | |
41 | info->version[2] = DLM_PLOCK_VERSION_PATCH; | |
869d81df DT |
42 | } |
43 | ||
2402211a | 44 | static int check_version(struct dlm_plock_info *info) |
869d81df | 45 | { |
2402211a DT |
46 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || |
47 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { | |
48 | log_print("plock device version mismatch: " | |
869d81df | 49 | "kernel (%u.%u.%u), user (%u.%u.%u)", |
2402211a DT |
50 | DLM_PLOCK_VERSION_MAJOR, |
51 | DLM_PLOCK_VERSION_MINOR, | |
52 | DLM_PLOCK_VERSION_PATCH, | |
869d81df DT |
53 | info->version[0], |
54 | info->version[1], | |
55 | info->version[2]); | |
56 | return -EINVAL; | |
57 | } | |
58 | return 0; | |
59 | } | |
60 | ||
61 | static void send_op(struct plock_op *op) | |
62 | { | |
63 | set_version(&op->info); | |
64 | INIT_LIST_HEAD(&op->list); | |
65 | spin_lock(&ops_lock); | |
66 | list_add_tail(&op->list, &send_list); | |
67 | spin_unlock(&ops_lock); | |
68 | wake_up(&send_wq); | |
69 | } | |
70 | ||
901025d2 DT |
71 | /* If a process was killed while waiting for the only plock on a file, |
72 | locks_remove_posix will not see any lock on the file so it won't | |
73 | send an unlock-close to us to pass on to userspace to clean up the | |
74 | abandoned waiter. So, we have to insert the unlock-close when the | |
75 | lock call is interrupted. */ | |
76 | ||
77 | static void do_unlock_close(struct dlm_ls *ls, u64 number, | |
78 | struct file *file, struct file_lock *fl) | |
79 | { | |
80 | struct plock_op *op; | |
81 | ||
82 | op = kzalloc(sizeof(*op), GFP_NOFS); | |
83 | if (!op) | |
84 | return; | |
85 | ||
86 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | |
87 | op->info.pid = fl->fl_pid; | |
88 | op->info.fsid = ls->ls_global_id; | |
89 | op->info.number = number; | |
90 | op->info.start = 0; | |
91 | op->info.end = OFFSET_MAX; | |
8fb47a4f | 92 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) |
901025d2 DT |
93 | op->info.owner = (__u64) fl->fl_pid; |
94 | else | |
95 | op->info.owner = (__u64)(long) fl->fl_owner; | |
96 | ||
97 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | |
98 | send_op(op); | |
99 | } | |
100 | ||
2402211a DT |
101 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
102 | int cmd, struct file_lock *fl) | |
869d81df | 103 | { |
2402211a | 104 | struct dlm_ls *ls; |
869d81df | 105 | struct plock_op *op; |
586759f0 | 106 | struct plock_xop *xop; |
869d81df DT |
107 | int rv; |
108 | ||
2402211a DT |
109 | ls = dlm_find_lockspace_local(lockspace); |
110 | if (!ls) | |
111 | return -EINVAL; | |
112 | ||
573c24c4 | 113 | xop = kzalloc(sizeof(*xop), GFP_NOFS); |
2402211a DT |
114 | if (!xop) { |
115 | rv = -ENOMEM; | |
116 | goto out; | |
117 | } | |
869d81df | 118 | |
586759f0 | 119 | op = &xop->xop; |
2402211a | 120 | op->info.optype = DLM_PLOCK_OP_LOCK; |
3a2a9c96 | 121 | op->info.pid = fl->fl_pid; |
869d81df DT |
122 | op->info.ex = (fl->fl_type == F_WRLCK); |
123 | op->info.wait = IS_SETLKW(cmd); | |
2402211a DT |
124 | op->info.fsid = ls->ls_global_id; |
125 | op->info.number = number; | |
869d81df DT |
126 | op->info.start = fl->fl_start; |
127 | op->info.end = fl->fl_end; | |
8fb47a4f | 128 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) { |
2066b58b DT |
129 | /* fl_owner is lockd which doesn't distinguish |
130 | processes on the nfs client */ | |
131 | op->info.owner = (__u64) fl->fl_pid; | |
8fb47a4f | 132 | xop->callback = fl->fl_lmops->lm_grant; |
586759f0 ME |
133 | locks_init_lock(&xop->flc); |
134 | locks_copy_lock(&xop->flc, fl); | |
135 | xop->fl = fl; | |
136 | xop->file = file; | |
2066b58b DT |
137 | } else { |
138 | op->info.owner = (__u64)(long) fl->fl_owner; | |
586759f0 | 139 | xop->callback = NULL; |
2066b58b | 140 | } |
869d81df DT |
141 | |
142 | send_op(op); | |
586759f0 | 143 | |
901025d2 | 144 | if (xop->callback == NULL) { |
a6b1533e | 145 | rv = wait_event_interruptible(recv_wq, (op->done != 0)); |
901025d2 DT |
146 | if (rv == -ERESTARTSYS) { |
147 | log_debug(ls, "dlm_posix_lock: wait killed %llx", | |
148 | (unsigned long long)number); | |
149 | spin_lock(&ops_lock); | |
150 | list_del(&op->list); | |
151 | spin_unlock(&ops_lock); | |
152 | kfree(xop); | |
153 | do_unlock_close(ls, number, file, fl); | |
154 | goto out; | |
155 | } | |
156 | } else { | |
bde74e4b | 157 | rv = FILE_LOCK_DEFERRED; |
2402211a DT |
158 | goto out; |
159 | } | |
869d81df DT |
160 | |
161 | spin_lock(&ops_lock); | |
162 | if (!list_empty(&op->list)) { | |
2402211a DT |
163 | log_error(ls, "dlm_posix_lock: op on list %llx", |
164 | (unsigned long long)number); | |
869d81df DT |
165 | list_del(&op->list); |
166 | } | |
167 | spin_unlock(&ops_lock); | |
168 | ||
169 | rv = op->info.rv; | |
170 | ||
171 | if (!rv) { | |
4f656367 | 172 | if (locks_lock_file_wait(file, fl) < 0) |
2402211a DT |
173 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", |
174 | (unsigned long long)number); | |
869d81df DT |
175 | } |
176 | ||
586759f0 | 177 | kfree(xop); |
2402211a DT |
178 | out: |
179 | dlm_put_lockspace(ls); | |
586759f0 ME |
180 | return rv; |
181 | } | |
2402211a | 182 | EXPORT_SYMBOL_GPL(dlm_posix_lock); |
586759f0 | 183 | |
af901ca1 | 184 | /* Returns failure iff a successful lock operation should be canceled */ |
2402211a | 185 | static int dlm_plock_callback(struct plock_op *op) |
586759f0 ME |
186 | { |
187 | struct file *file; | |
188 | struct file_lock *fl; | |
189 | struct file_lock *flc; | |
d0449b90 | 190 | int (*notify)(struct file_lock *fl, int result) = NULL; |
586759f0 ME |
191 | struct plock_xop *xop = (struct plock_xop *)op; |
192 | int rv = 0; | |
193 | ||
194 | spin_lock(&ops_lock); | |
195 | if (!list_empty(&op->list)) { | |
2402211a DT |
196 | log_print("dlm_plock_callback: op on list %llx", |
197 | (unsigned long long)op->info.number); | |
586759f0 ME |
198 | list_del(&op->list); |
199 | } | |
200 | spin_unlock(&ops_lock); | |
201 | ||
202 | /* check if the following 2 are still valid or make a copy */ | |
203 | file = xop->file; | |
204 | flc = &xop->flc; | |
205 | fl = xop->fl; | |
206 | notify = xop->callback; | |
207 | ||
208 | if (op->info.rv) { | |
d0449b90 | 209 | notify(fl, op->info.rv); |
586759f0 ME |
210 | goto out; |
211 | } | |
212 | ||
213 | /* got fs lock; bookkeep locally as well: */ | |
214 | flc->fl_flags &= ~FL_SLEEP; | |
215 | if (posix_lock_file(file, flc, NULL)) { | |
216 | /* | |
217 | * This can only happen in the case of kmalloc() failure. | |
218 | * The filesystem's own lock is the authoritative lock, | |
219 | * so a failure to get the lock locally is not a disaster. | |
2402211a | 220 | * As long as the fs cannot reliably cancel locks (especially |
586759f0 ME |
221 | * in a low-memory situation), we're better off ignoring |
222 | * this failure than trying to recover. | |
223 | */ | |
2402211a DT |
224 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", |
225 | (unsigned long long)op->info.number, file, fl); | |
586759f0 ME |
226 | } |
227 | ||
d0449b90 | 228 | rv = notify(fl, 0); |
586759f0 ME |
229 | if (rv) { |
230 | /* XXX: We need to cancel the fs lock here: */ | |
2402211a DT |
231 | log_print("dlm_plock_callback: lock granted after lock request " |
232 | "failed; dangling lock!\n"); | |
586759f0 ME |
233 | goto out; |
234 | } | |
235 | ||
236 | out: | |
237 | kfree(xop); | |
869d81df DT |
238 | return rv; |
239 | } | |
240 | ||
2402211a DT |
241 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
242 | struct file_lock *fl) | |
869d81df | 243 | { |
2402211a | 244 | struct dlm_ls *ls; |
869d81df DT |
245 | struct plock_op *op; |
246 | int rv; | |
90008318 | 247 | unsigned char fl_flags = fl->fl_flags; |
869d81df | 248 | |
2402211a DT |
249 | ls = dlm_find_lockspace_local(lockspace); |
250 | if (!ls) | |
251 | return -EINVAL; | |
252 | ||
573c24c4 | 253 | op = kzalloc(sizeof(*op), GFP_NOFS); |
2402211a DT |
254 | if (!op) { |
255 | rv = -ENOMEM; | |
256 | goto out; | |
257 | } | |
869d81df | 258 | |
90008318 DT |
259 | /* cause the vfs unlock to return ENOENT if lock is not found */ |
260 | fl->fl_flags |= FL_EXISTS; | |
261 | ||
4f656367 | 262 | rv = locks_lock_file_wait(file, fl); |
90008318 DT |
263 | if (rv == -ENOENT) { |
264 | rv = 0; | |
265 | goto out_free; | |
266 | } | |
267 | if (rv < 0) { | |
268 | log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", | |
269 | rv, (unsigned long long)number); | |
270 | } | |
869d81df | 271 | |
2402211a | 272 | op->info.optype = DLM_PLOCK_OP_UNLOCK; |
3a2a9c96 | 273 | op->info.pid = fl->fl_pid; |
2402211a DT |
274 | op->info.fsid = ls->ls_global_id; |
275 | op->info.number = number; | |
869d81df DT |
276 | op->info.start = fl->fl_start; |
277 | op->info.end = fl->fl_end; | |
8fb47a4f | 278 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) |
2066b58b DT |
279 | op->info.owner = (__u64) fl->fl_pid; |
280 | else | |
281 | op->info.owner = (__u64)(long) fl->fl_owner; | |
869d81df | 282 | |
901025d2 DT |
283 | if (fl->fl_flags & FL_CLOSE) { |
284 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | |
285 | send_op(op); | |
286 | rv = 0; | |
287 | goto out; | |
288 | } | |
289 | ||
869d81df DT |
290 | send_op(op); |
291 | wait_event(recv_wq, (op->done != 0)); | |
292 | ||
293 | spin_lock(&ops_lock); | |
294 | if (!list_empty(&op->list)) { | |
2402211a DT |
295 | log_error(ls, "dlm_posix_unlock: op on list %llx", |
296 | (unsigned long long)number); | |
869d81df DT |
297 | list_del(&op->list); |
298 | } | |
299 | spin_unlock(&ops_lock); | |
300 | ||
301 | rv = op->info.rv; | |
302 | ||
586759f0 ME |
303 | if (rv == -ENOENT) |
304 | rv = 0; | |
305 | ||
90008318 | 306 | out_free: |
869d81df | 307 | kfree(op); |
2402211a DT |
308 | out: |
309 | dlm_put_lockspace(ls); | |
90008318 | 310 | fl->fl_flags = fl_flags; |
869d81df DT |
311 | return rv; |
312 | } | |
2402211a | 313 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); |
869d81df | 314 | |
2402211a DT |
315 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
316 | struct file_lock *fl) | |
869d81df | 317 | { |
2402211a | 318 | struct dlm_ls *ls; |
869d81df DT |
319 | struct plock_op *op; |
320 | int rv; | |
321 | ||
2402211a DT |
322 | ls = dlm_find_lockspace_local(lockspace); |
323 | if (!ls) | |
324 | return -EINVAL; | |
325 | ||
573c24c4 | 326 | op = kzalloc(sizeof(*op), GFP_NOFS); |
2402211a DT |
327 | if (!op) { |
328 | rv = -ENOMEM; | |
329 | goto out; | |
330 | } | |
869d81df | 331 | |
2402211a | 332 | op->info.optype = DLM_PLOCK_OP_GET; |
3a2a9c96 | 333 | op->info.pid = fl->fl_pid; |
869d81df | 334 | op->info.ex = (fl->fl_type == F_WRLCK); |
2402211a DT |
335 | op->info.fsid = ls->ls_global_id; |
336 | op->info.number = number; | |
869d81df DT |
337 | op->info.start = fl->fl_start; |
338 | op->info.end = fl->fl_end; | |
8fb47a4f | 339 | if (fl->fl_lmops && fl->fl_lmops->lm_grant) |
2066b58b DT |
340 | op->info.owner = (__u64) fl->fl_pid; |
341 | else | |
342 | op->info.owner = (__u64)(long) fl->fl_owner; | |
586759f0 | 343 | |
869d81df DT |
344 | send_op(op); |
345 | wait_event(recv_wq, (op->done != 0)); | |
346 | ||
347 | spin_lock(&ops_lock); | |
348 | if (!list_empty(&op->list)) { | |
2402211a DT |
349 | log_error(ls, "dlm_posix_get: op on list %llx", |
350 | (unsigned long long)number); | |
869d81df DT |
351 | list_del(&op->list); |
352 | } | |
353 | spin_unlock(&ops_lock); | |
354 | ||
a7a2ff8a DT |
355 | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, |
356 | -ENOENT if there are no locks on the file */ | |
357 | ||
869d81df DT |
358 | rv = op->info.rv; |
359 | ||
586759f0 ME |
360 | fl->fl_type = F_UNLCK; |
361 | if (rv == -ENOENT) | |
362 | rv = 0; | |
a7a2ff8a | 363 | else if (rv > 0) { |
20d5a399 | 364 | locks_init_lock(fl); |
869d81df | 365 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; |
20d5a399 | 366 | fl->fl_flags = FL_POSIX; |
9d5b86ac | 367 | fl->fl_pid = -op->info.pid; |
869d81df DT |
368 | fl->fl_start = op->info.start; |
369 | fl->fl_end = op->info.end; | |
a7a2ff8a | 370 | rv = 0; |
869d81df DT |
371 | } |
372 | ||
373 | kfree(op); | |
2402211a DT |
374 | out: |
375 | dlm_put_lockspace(ls); | |
869d81df DT |
376 | return rv; |
377 | } | |
2402211a | 378 | EXPORT_SYMBOL_GPL(dlm_posix_get); |
869d81df DT |
379 | |
380 | /* a read copies out one plock request from the send list */ | |
381 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |
382 | loff_t *ppos) | |
383 | { | |
2402211a | 384 | struct dlm_plock_info info; |
869d81df DT |
385 | struct plock_op *op = NULL; |
386 | ||
387 | if (count < sizeof(info)) | |
388 | return -EINVAL; | |
389 | ||
390 | spin_lock(&ops_lock); | |
391 | if (!list_empty(&send_list)) { | |
392 | op = list_entry(send_list.next, struct plock_op, list); | |
901025d2 DT |
393 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
394 | list_del(&op->list); | |
395 | else | |
396 | list_move(&op->list, &recv_list); | |
869d81df DT |
397 | memcpy(&info, &op->info, sizeof(info)); |
398 | } | |
399 | spin_unlock(&ops_lock); | |
400 | ||
401 | if (!op) | |
402 | return -EAGAIN; | |
403 | ||
901025d2 DT |
404 | /* there is no need to get a reply from userspace for unlocks |
405 | that were generated by the vfs cleaning up for a close | |
406 | (the process did not make an unlock call). */ | |
407 | ||
408 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | |
409 | kfree(op); | |
410 | ||
869d81df DT |
411 | if (copy_to_user(u, &info, sizeof(info))) |
412 | return -EFAULT; | |
413 | return sizeof(info); | |
414 | } | |
415 | ||
416 | /* a write copies in one plock result that should match a plock_op | |
417 | on the recv list */ | |
418 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | |
419 | loff_t *ppos) | |
420 | { | |
2402211a | 421 | struct dlm_plock_info info; |
869d81df | 422 | struct plock_op *op; |
c78a87d0 | 423 | int found = 0, do_callback = 0; |
869d81df DT |
424 | |
425 | if (count != sizeof(info)) | |
426 | return -EINVAL; | |
427 | ||
428 | if (copy_from_user(&info, u, sizeof(info))) | |
429 | return -EFAULT; | |
430 | ||
431 | if (check_version(&info)) | |
432 | return -EINVAL; | |
433 | ||
434 | spin_lock(&ops_lock); | |
435 | list_for_each_entry(op, &recv_list, list) { | |
c78a87d0 DT |
436 | if (op->info.fsid == info.fsid && |
437 | op->info.number == info.number && | |
08eac93a | 438 | op->info.owner == info.owner) { |
c78a87d0 | 439 | struct plock_xop *xop = (struct plock_xop *)op; |
869d81df | 440 | list_del_init(&op->list); |
869d81df | 441 | memcpy(&op->info, &info, sizeof(info)); |
c78a87d0 DT |
442 | if (xop->callback) |
443 | do_callback = 1; | |
444 | else | |
445 | op->done = 1; | |
446 | found = 1; | |
869d81df DT |
447 | break; |
448 | } | |
449 | } | |
450 | spin_unlock(&ops_lock); | |
451 | ||
586759f0 | 452 | if (found) { |
c78a87d0 | 453 | if (do_callback) |
817d10ba | 454 | dlm_plock_callback(op); |
586759f0 ME |
455 | else |
456 | wake_up(&recv_wq); | |
457 | } else | |
2402211a DT |
458 | log_print("dev_write no op %x %llx", info.fsid, |
459 | (unsigned long long)info.number); | |
869d81df DT |
460 | return count; |
461 | } | |
462 | ||
076ccb76 | 463 | static __poll_t dev_poll(struct file *file, poll_table *wait) |
869d81df | 464 | { |
076ccb76 | 465 | __poll_t mask = 0; |
cee23c79 | 466 | |
869d81df DT |
467 | poll_wait(file, &send_wq, wait); |
468 | ||
469 | spin_lock(&ops_lock); | |
cee23c79 | 470 | if (!list_empty(&send_list)) |
a9a08845 | 471 | mask = EPOLLIN | EPOLLRDNORM; |
869d81df | 472 | spin_unlock(&ops_lock); |
cee23c79 DC |
473 | |
474 | return mask; | |
869d81df DT |
475 | } |
476 | ||
00977a59 | 477 | static const struct file_operations dev_fops = { |
869d81df DT |
478 | .read = dev_read, |
479 | .write = dev_write, | |
480 | .poll = dev_poll, | |
6038f373 AB |
481 | .owner = THIS_MODULE, |
482 | .llseek = noop_llseek, | |
869d81df DT |
483 | }; |
484 | ||
485 | static struct miscdevice plock_dev_misc = { | |
486 | .minor = MISC_DYNAMIC_MINOR, | |
2402211a | 487 | .name = DLM_PLOCK_MISC_NAME, |
869d81df DT |
488 | .fops = &dev_fops |
489 | }; | |
490 | ||
2402211a | 491 | int dlm_plock_init(void) |
869d81df DT |
492 | { |
493 | int rv; | |
494 | ||
495 | spin_lock_init(&ops_lock); | |
496 | INIT_LIST_HEAD(&send_list); | |
497 | INIT_LIST_HEAD(&recv_list); | |
498 | init_waitqueue_head(&send_wq); | |
499 | init_waitqueue_head(&recv_wq); | |
500 | ||
501 | rv = misc_register(&plock_dev_misc); | |
502 | if (rv) | |
2402211a | 503 | log_print("dlm_plock_init: misc_register failed %d", rv); |
869d81df DT |
504 | return rv; |
505 | } | |
506 | ||
2402211a | 507 | void dlm_plock_exit(void) |
869d81df | 508 | { |
f368ed60 | 509 | misc_deregister(&plock_dev_misc); |
869d81df DT |
510 | } |
511 |