Merge tag 'locking-core-2023-05-05' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / fs / d_path.c
CommitLineData
7a5cf791
AV
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/syscalls.h>
3#include <linux/export.h>
4#include <linux/uaccess.h>
5#include <linux/fs_struct.h>
6#include <linux/fs.h>
7#include <linux/slab.h>
8#include <linux/prefetch.h>
9#include "mount.h"
10
ad08ae58
AV
11struct prepend_buffer {
12 char *buf;
13 int len;
14};
15#define DECLARE_BUFFER(__name, __buf, __len) \
16 struct prepend_buffer __name = {.buf = __buf + __len, .len = __len}
17
18static char *extract_string(struct prepend_buffer *p)
7a5cf791 19{
ad08ae58
AV
20 if (likely(p->len >= 0))
21 return p->buf;
22 return ERR_PTR(-ENAMETOOLONG);
23}
24
b0cfcdd9 25static bool prepend_char(struct prepend_buffer *p, unsigned char c)
ad08ae58 26{
b0cfcdd9
LT
27 if (likely(p->len > 0)) {
28 p->len--;
29 *--p->buf = c;
30 return true;
31 }
32 p->len = -1;
33 return false;
34}
35
36/*
c4c84511 37 * The source of the prepend data can be an optimistic load
b0cfcdd9
LT
38 * of a dentry name and length. And because we don't hold any
39 * locks, the length and the pointer to the name may not be
40 * in sync if a concurrent rename happens, and the kernel
41 * copy might fault as a result.
42 *
43 * The end result will correct itself when we check the
44 * rename sequence count, but we need to be able to handle
45 * the fault gracefully.
46 */
47static bool prepend_copy(void *dst, const void *src, int len)
48{
49 if (unlikely(copy_from_kernel_nofault(dst, src, len))) {
50 memset(dst, 'x', len);
51 return false;
d8548232 52 }
b0cfcdd9
LT
53 return true;
54}
55
56static bool prepend(struct prepend_buffer *p, const char *str, int namelen)
57{
58 // Already overflowed?
59 if (p->len < 0)
60 return false;
61
62 // Will overflow?
63 if (p->len < namelen) {
64 // Fill as much as possible from the end of the name
65 str += namelen - p->len;
66 p->buf -= p->len;
67 prepend_copy(p->buf, str, p->len);
68 p->len = -1;
69 return false;
70 }
71
72 // Fits fully
73 p->len -= namelen;
74 p->buf -= namelen;
75 return prepend_copy(p->buf, str, namelen);
7a5cf791
AV
76}
77
78/**
79 * prepend_name - prepend a pathname in front of current buffer pointer
d41b6035
JH
80 * @p: prepend buffer which contains buffer pointer and allocated length
81 * @name: name string and length qstr structure
7a5cf791
AV
82 *
83 * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
84 * make sure that either the old or the new name pointer and length are
85 * fetched. However, there may be mismatch between length and pointer.
b0cfcdd9
LT
86 * But since the length cannot be trusted, we need to copy the name very
87 * carefully when doing the prepend_copy(). It also prepends "/" at
7a5cf791
AV
88 * the beginning of the name. The sequence number check at the caller will
89 * retry it again when a d_move() does happen. So any garbage in the buffer
90 * due to mismatched pointer and length will be discarded.
91 *
b0cfcdd9
LT
92 * Load acquire is needed to make sure that we see the new name data even
93 * if we might get the length wrong.
7a5cf791 94 */
ad08ae58 95static bool prepend_name(struct prepend_buffer *p, const struct qstr *name)
7a5cf791
AV
96{
97 const char *dname = smp_load_acquire(&name->name); /* ^^^ */
98 u32 dlen = READ_ONCE(name->len);
7a5cf791 99
b0cfcdd9 100 return prepend(p, dname, dlen) && prepend_char(p, '/');
7a5cf791
AV
101}
102
008673ff
AV
103static int __prepend_path(const struct dentry *dentry, const struct mount *mnt,
104 const struct path *root, struct prepend_buffer *p)
105{
106 while (dentry != root->dentry || &mnt->mnt != root->mnt) {
107 const struct dentry *parent = READ_ONCE(dentry->d_parent);
108
109 if (dentry == mnt->mnt.mnt_root) {
110 struct mount *m = READ_ONCE(mnt->mnt_parent);
111 struct mnt_namespace *mnt_ns;
112
113 if (likely(mnt != m)) {
114 dentry = READ_ONCE(mnt->mnt_mountpoint);
115 mnt = m;
116 continue;
117 }
118 /* Global root */
119 mnt_ns = READ_ONCE(mnt->mnt_ns);
120 /* open-coded is_mounted() to use local mnt_ns */
121 if (!IS_ERR_OR_NULL(mnt_ns) && !is_anon_ns(mnt_ns))
122 return 1; // absolute root
123 else
124 return 2; // detached or not attached yet
125 }
126
127 if (unlikely(dentry == parent))
128 /* Escaped? */
129 return 3;
130
131 prefetch(parent);
132 if (!prepend_name(p, &dentry->d_name))
133 break;
134 dentry = parent;
135 }
136 return 0;
137}
138
7a5cf791
AV
139/**
140 * prepend_path - Prepend path string to a buffer
141 * @path: the dentry/vfsmount to report
142 * @root: root vfsmnt/dentry
d41b6035 143 * @p: prepend buffer which contains buffer pointer and allocated length
7a5cf791
AV
144 *
145 * The function will first try to write out the pathname without taking any
146 * lock other than the RCU read lock to make sure that dentries won't go away.
147 * It only checks the sequence number of the global rename_lock as any change
148 * in the dentry's d_seq will be preceded by changes in the rename_lock
149 * sequence number. If the sequence number had been changed, it will restart
150 * the whole pathname back-tracing sequence again by taking the rename_lock.
151 * In this case, there is no need to take the RCU read lock as the recursive
152 * parent pointer references will keep the dentry chain alive as long as no
153 * rename operation is performed.
154 */
155static int prepend_path(const struct path *path,
156 const struct path *root,
ad08ae58 157 struct prepend_buffer *p)
7a5cf791 158{
7a5cf791 159 unsigned seq, m_seq = 0;
ad08ae58 160 struct prepend_buffer b;
008673ff 161 int error;
7a5cf791
AV
162
163 rcu_read_lock();
164restart_mnt:
165 read_seqbegin_or_lock(&mount_lock, &m_seq);
166 seq = 0;
167 rcu_read_lock();
168restart:
ad08ae58 169 b = *p;
7a5cf791 170 read_seqbegin_or_lock(&rename_lock, &seq);
008673ff 171 error = __prepend_path(path->dentry, real_mount(path->mnt), root, &b);
7a5cf791
AV
172 if (!(seq & 1))
173 rcu_read_unlock();
174 if (need_seqretry(&rename_lock, seq)) {
175 seq = 1;
176 goto restart;
177 }
178 done_seqretry(&rename_lock, seq);
179
180 if (!(m_seq & 1))
181 rcu_read_unlock();
182 if (need_seqretry(&mount_lock, m_seq)) {
183 m_seq = 1;
184 goto restart_mnt;
185 }
186 done_seqretry(&mount_lock, m_seq);
187
2dac0ad1
AV
188 if (unlikely(error == 3))
189 b = *p;
190
ad08ae58 191 if (b.len == p->len)
b0cfcdd9 192 prepend_char(&b, '/');
01a4428e 193
ad08ae58 194 *p = b;
7a5cf791
AV
195 return error;
196}
197
198/**
199 * __d_path - return the path of a dentry
200 * @path: the dentry/vfsmount to report
201 * @root: root vfsmnt/dentry
202 * @buf: buffer to return value in
203 * @buflen: buffer length
204 *
205 * Convert a dentry into an ASCII path name.
206 *
207 * Returns a pointer into the buffer or an error code if the
208 * path was too long.
209 *
210 * "buflen" should be positive.
211 *
212 * If the path is not reachable from the supplied root, return %NULL.
213 */
214char *__d_path(const struct path *path,
215 const struct path *root,
216 char *buf, int buflen)
217{
ad08ae58 218 DECLARE_BUFFER(b, buf, buflen);
7a5cf791 219
b0cfcdd9 220 prepend_char(&b, 0);
cf4febc1 221 if (unlikely(prepend_path(path, root, &b) > 0))
7a5cf791 222 return NULL;
ad08ae58 223 return extract_string(&b);
7a5cf791
AV
224}
225
226char *d_absolute_path(const struct path *path,
227 char *buf, int buflen)
228{
229 struct path root = {};
ad08ae58 230 DECLARE_BUFFER(b, buf, buflen);
7a5cf791 231
b0cfcdd9 232 prepend_char(&b, 0);
cf4febc1 233 if (unlikely(prepend_path(path, &root, &b) > 1))
01a4428e 234 return ERR_PTR(-EINVAL);
ad08ae58 235 return extract_string(&b);
7a5cf791
AV
236}
237
7a5cf791
AV
238static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
239{
240 unsigned seq;
241
242 do {
243 seq = read_seqcount_begin(&fs->seq);
244 *root = fs->root;
245 } while (read_seqcount_retry(&fs->seq, seq));
246}
247
248/**
249 * d_path - return the path of a dentry
250 * @path: path to report
251 * @buf: buffer to return value in
252 * @buflen: buffer length
253 *
254 * Convert a dentry into an ASCII path name. If the entry has been deleted
255 * the string " (deleted)" is appended. Note that this is ambiguous.
256 *
257 * Returns a pointer into the buffer or an error code if the path was
258 * too long. Note: Callers should use the returned pointer, not the passed
259 * in buffer, to use the name! The implementation often starts at an offset
260 * into the buffer, and may leave 0 bytes at the start.
261 *
262 * "buflen" should be positive.
263 */
264char *d_path(const struct path *path, char *buf, int buflen)
265{
ad08ae58 266 DECLARE_BUFFER(b, buf, buflen);
7a5cf791 267 struct path root;
7a5cf791
AV
268
269 /*
270 * We have various synthetic filesystems that never get mounted. On
271 * these filesystems dentries are never used for lookup purposes, and
272 * thus don't need to be hashed. They also don't need a name until a
273 * user wants to identify the object in /proc/pid/fd/. The little hack
274 * below allows us to generate a name for these objects on demand:
275 *
276 * Some pseudo inodes are mountable. When they are mounted
277 * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
278 * and instead have d_path return the mounted path.
279 */
280 if (path->dentry->d_op && path->dentry->d_op->d_dname &&
281 (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
282 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
283
284 rcu_read_lock();
285 get_fs_root_rcu(current->fs, &root);
9024348f 286 if (unlikely(d_unlinked(path->dentry)))
ad08ae58 287 prepend(&b, " (deleted)", 11);
9024348f 288 else
b0cfcdd9 289 prepend_char(&b, 0);
ad08ae58 290 prepend_path(path, &root, &b);
7a5cf791
AV
291 rcu_read_unlock();
292
ad08ae58 293 return extract_string(&b);
7a5cf791
AV
294}
295EXPORT_SYMBOL(d_path);
296
297/*
298 * Helper function for dentry_operations.d_dname() members
299 */
0f60d288 300char *dynamic_dname(char *buffer, int buflen, const char *fmt, ...)
7a5cf791
AV
301{
302 va_list args;
303 char temp[64];
304 int sz;
305
306 va_start(args, fmt);
307 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
308 va_end(args);
309
310 if (sz > sizeof(temp) || sz > buflen)
311 return ERR_PTR(-ENAMETOOLONG);
312
313 buffer += buflen - sz;
314 return memcpy(buffer, temp, sz);
315}
316
317char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
318{
ad08ae58 319 DECLARE_BUFFER(b, buffer, buflen);
7a5cf791 320 /* these dentries are never renamed, so d_lock is not needed */
ad08ae58
AV
321 prepend(&b, " (deleted)", 11);
322 prepend(&b, dentry->d_name.name, dentry->d_name.len);
b0cfcdd9 323 prepend_char(&b, '/');
ad08ae58 324 return extract_string(&b);
7a5cf791 325}
7a5cf791
AV
326
327/*
328 * Write full pathname from the root of the filesystem into the buffer.
329 */
ad08ae58 330static char *__dentry_path(const struct dentry *d, struct prepend_buffer *p)
7a5cf791 331{
a2bbe664 332 const struct dentry *dentry;
ad08ae58
AV
333 struct prepend_buffer b;
334 int seq = 0;
7a5cf791
AV
335
336 rcu_read_lock();
337restart:
338 dentry = d;
ad08ae58 339 b = *p;
7a5cf791
AV
340 read_seqbegin_or_lock(&rename_lock, &seq);
341 while (!IS_ROOT(dentry)) {
a2bbe664 342 const struct dentry *parent = dentry->d_parent;
7a5cf791
AV
343
344 prefetch(parent);
ad08ae58 345 if (!prepend_name(&b, &dentry->d_name))
7a5cf791 346 break;
7a5cf791
AV
347 dentry = parent;
348 }
349 if (!(seq & 1))
350 rcu_read_unlock();
351 if (need_seqretry(&rename_lock, seq)) {
352 seq = 1;
353 goto restart;
354 }
355 done_seqretry(&rename_lock, seq);
ad08ae58 356 if (b.len == p->len)
b0cfcdd9 357 prepend_char(&b, '/');
ad08ae58 358 return extract_string(&b);
7a5cf791
AV
359}
360
a2bbe664 361char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
7a5cf791 362{
ad08ae58
AV
363 DECLARE_BUFFER(b, buf, buflen);
364
b0cfcdd9 365 prepend_char(&b, 0);
ad08ae58 366 return __dentry_path(dentry, &b);
7a5cf791
AV
367}
368EXPORT_SYMBOL(dentry_path_raw);
369
a2bbe664 370char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
7a5cf791 371{
ad08ae58 372 DECLARE_BUFFER(b, buf, buflen);
3a291c97
AV
373
374 if (unlikely(d_unlinked(dentry)))
ad08ae58 375 prepend(&b, "//deleted", 10);
3a291c97 376 else
b0cfcdd9 377 prepend_char(&b, 0);
ad08ae58 378 return __dentry_path(dentry, &b);
7a5cf791
AV
379}
380
381static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
382 struct path *pwd)
383{
384 unsigned seq;
385
386 do {
387 seq = read_seqcount_begin(&fs->seq);
388 *root = fs->root;
389 *pwd = fs->pwd;
390 } while (read_seqcount_retry(&fs->seq, seq));
391}
392
393/*
394 * NOTE! The user-level library version returns a
395 * character pointer. The kernel system call just
396 * returns the length of the buffer filled (which
397 * includes the ending '\0' character), or a negative
398 * error value. So libc would do something like
399 *
400 * char *getcwd(char * buf, size_t size)
401 * {
402 * int retval;
403 *
404 * retval = sys_getcwd(buf, size);
405 * if (retval >= 0)
406 * return buf;
407 * errno = -retval;
408 * return NULL;
409 * }
410 */
411SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
412{
413 int error;
414 struct path pwd, root;
415 char *page = __getname();
416
417 if (!page)
418 return -ENOMEM;
419
420 rcu_read_lock();
421 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
422
e4b27553
AV
423 if (unlikely(d_unlinked(pwd.dentry))) {
424 rcu_read_unlock();
425 error = -ENOENT;
426 } else {
427 unsigned len;
ad08ae58 428 DECLARE_BUFFER(b, page, PATH_MAX);
7a5cf791 429
b0cfcdd9 430 prepend_char(&b, 0);
cf4febc1 431 if (unlikely(prepend_path(&pwd, &root, &b) > 0))
ad08ae58 432 prepend(&b, "(unreachable)", 13);
7a5cf791
AV
433 rcu_read_unlock();
434
ad08ae58 435 len = PATH_MAX - b.len;
e4b27553
AV
436 if (unlikely(len > PATH_MAX))
437 error = -ENAMETOOLONG;
438 else if (unlikely(len > size))
439 error = -ERANGE;
440 else if (copy_to_user(buf, b.buf, len))
441 error = -EFAULT;
442 else
7a5cf791 443 error = len;
7a5cf791 444 }
7a5cf791
AV
445 __putname(page);
446 return error;
447}