sysfs: Simplify readdir.
[linux-2.6-block.git] / fs / sysfs / dir.c
CommitLineData
1da177e4
LT
1/*
2 * dir.c - Operations for sysfs directories.
3 */
4
5#undef DEBUG
6
7#include <linux/fs.h>
8#include <linux/mount.h>
9#include <linux/module.h>
10#include <linux/kobject.h>
5f45f1a7 11#include <linux/namei.h>
2b611bb7 12#include <linux/idr.h>
8619f979 13#include <linux/completion.h>
869512ab 14#include <linux/mutex.h>
1da177e4
LT
15#include "sysfs.h"
16
3007e997 17DEFINE_MUTEX(sysfs_mutex);
5f995323 18spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
1da177e4 19
2b611bb7
TH
20static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
21static DEFINE_IDA(sysfs_ino_ida);
22
0c73f18b
TH
23/**
24 * sysfs_link_sibling - link sysfs_dirent into sibling list
25 * @sd: sysfs_dirent of interest
26 *
27 * Link @sd into its sibling list which starts from
28 * sd->s_parent->s_children.
29 *
30 * Locking:
3007e997 31 * mutex_lock(sysfs_mutex)
0c73f18b 32 */
41fc1c27 33static void sysfs_link_sibling(struct sysfs_dirent *sd)
0c73f18b
TH
34{
35 struct sysfs_dirent *parent_sd = sd->s_parent;
3efa65b9 36 struct sysfs_dirent **pos;
0c73f18b
TH
37
38 BUG_ON(sd->s_sibling);
3efa65b9
EB
39
40 /* Store directory entries in order by ino. This allows
41 * readdir to properly restart without having to add a
42 * cursor into the s_children list.
43 */
44 for (pos = &parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
45 if (sd->s_ino < (*pos)->s_ino)
46 break;
47 }
48 sd->s_sibling = *pos;
49 *pos = sd;
0c73f18b
TH
50}
51
52/**
53 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
54 * @sd: sysfs_dirent of interest
55 *
56 * Unlink @sd from its sibling list which starts from
57 * sd->s_parent->s_children.
58 *
59 * Locking:
3007e997 60 * mutex_lock(sysfs_mutex)
0c73f18b 61 */
41fc1c27 62static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
0c73f18b
TH
63{
64 struct sysfs_dirent **pos;
65
66 for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
67 if (*pos == sd) {
68 *pos = sd->s_sibling;
69 sd->s_sibling = NULL;
70 break;
71 }
72 }
73}
74
53e0ae92
TH
75/**
76 * sysfs_get_dentry - get dentry for the given sysfs_dirent
77 * @sd: sysfs_dirent of interest
78 *
79 * Get dentry for @sd. Dentry is looked up if currently not
80 * present. This function climbs sysfs_dirent tree till it
81 * reaches a sysfs_dirent with valid dentry attached and descends
82 * down from there looking up dentry for each step.
83 *
84 * LOCKING:
85 * Kernel thread context (may sleep)
86 *
87 * RETURNS:
88 * Pointer to found dentry on success, ERR_PTR() value on error.
89 */
90struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
91{
92 struct sysfs_dirent *cur;
93 struct dentry *parent_dentry, *dentry;
94 int i, depth;
95
96 /* Find the first parent which has valid s_dentry and get the
97 * dentry.
98 */
99 mutex_lock(&sysfs_mutex);
100 restart0:
101 spin_lock(&sysfs_assoc_lock);
102 restart1:
103 spin_lock(&dcache_lock);
104
105 dentry = NULL;
106 depth = 0;
107 cur = sd;
108 while (!cur->s_dentry || !cur->s_dentry->d_inode) {
109 if (cur->s_flags & SYSFS_FLAG_REMOVED) {
110 dentry = ERR_PTR(-ENOENT);
111 depth = 0;
112 break;
113 }
114 cur = cur->s_parent;
115 depth++;
116 }
117 if (!IS_ERR(dentry))
118 dentry = dget_locked(cur->s_dentry);
119
120 spin_unlock(&dcache_lock);
121 spin_unlock(&sysfs_assoc_lock);
122
123 /* from the found dentry, look up depth times */
124 while (depth--) {
125 /* find and get depth'th ancestor */
126 for (cur = sd, i = 0; cur && i < depth; i++)
127 cur = cur->s_parent;
128
129 /* This can happen if tree structure was modified due
130 * to move/rename. Restart.
131 */
132 if (i != depth) {
133 dput(dentry);
134 goto restart0;
135 }
136
137 sysfs_get(cur);
138
139 mutex_unlock(&sysfs_mutex);
140
141 /* look it up */
142 parent_dentry = dentry;
25328026 143 mutex_lock(&parent_dentry->d_inode->i_mutex);
53e0ae92
TH
144 dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
145 strlen(cur->s_name));
25328026 146 mutex_unlock(&parent_dentry->d_inode->i_mutex);
53e0ae92
TH
147 dput(parent_dentry);
148
149 if (IS_ERR(dentry)) {
150 sysfs_put(cur);
151 return dentry;
152 }
153
154 mutex_lock(&sysfs_mutex);
155 spin_lock(&sysfs_assoc_lock);
156
157 /* This, again, can happen if tree structure has
158 * changed and we looked up the wrong thing. Restart.
159 */
160 if (cur->s_dentry != dentry) {
161 dput(dentry);
162 sysfs_put(cur);
163 goto restart1;
164 }
165
166 spin_unlock(&sysfs_assoc_lock);
167
168 sysfs_put(cur);
169 }
170
171 mutex_unlock(&sysfs_mutex);
172 return dentry;
173}
174
b6b4a439
TH
175/**
176 * sysfs_get_active - get an active reference to sysfs_dirent
177 * @sd: sysfs_dirent to get an active reference to
178 *
179 * Get an active reference of @sd. This function is noop if @sd
180 * is NULL.
181 *
182 * RETURNS:
183 * Pointer to @sd on success, NULL on failure.
184 */
185struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
186{
8619f979
TH
187 if (unlikely(!sd))
188 return NULL;
189
190 while (1) {
191 int v, t;
192
193 v = atomic_read(&sd->s_active);
194 if (unlikely(v < 0))
195 return NULL;
196
197 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
198 if (likely(t == v))
199 return sd;
200 if (t < 0)
201 return NULL;
202
203 cpu_relax();
b6b4a439 204 }
b6b4a439
TH
205}
206
207/**
208 * sysfs_put_active - put an active reference to sysfs_dirent
209 * @sd: sysfs_dirent to put an active reference to
210 *
211 * Put an active reference to @sd. This function is noop if @sd
212 * is NULL.
213 */
214void sysfs_put_active(struct sysfs_dirent *sd)
215{
8619f979
TH
216 struct completion *cmpl;
217 int v;
218
219 if (unlikely(!sd))
220 return;
221
222 v = atomic_dec_return(&sd->s_active);
223 if (likely(v != SD_DEACTIVATED_BIAS))
224 return;
225
226 /* atomic_dec_return() is a mb(), we'll always see the updated
0c73f18b 227 * sd->s_sibling.
8619f979 228 */
0c73f18b 229 cmpl = (void *)sd->s_sibling;
8619f979 230 complete(cmpl);
b6b4a439
TH
231}
232
233/**
234 * sysfs_get_active_two - get active references to sysfs_dirent and parent
235 * @sd: sysfs_dirent of interest
236 *
237 * Get active reference to @sd and its parent. Parent's active
238 * reference is grabbed first. This function is noop if @sd is
239 * NULL.
240 *
241 * RETURNS:
242 * Pointer to @sd on success, NULL on failure.
243 */
244struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
245{
246 if (sd) {
247 if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
248 return NULL;
249 if (unlikely(!sysfs_get_active(sd))) {
250 sysfs_put_active(sd->s_parent);
251 return NULL;
252 }
253 }
254 return sd;
255}
256
257/**
258 * sysfs_put_active_two - put active references to sysfs_dirent and parent
259 * @sd: sysfs_dirent of interest
260 *
261 * Put active references to @sd and its parent. This function is
262 * noop if @sd is NULL.
263 */
264void sysfs_put_active_two(struct sysfs_dirent *sd)
265{
266 if (sd) {
267 sysfs_put_active(sd);
268 sysfs_put_active(sd->s_parent);
269 }
270}
271
272/**
273 * sysfs_deactivate - deactivate sysfs_dirent
274 * @sd: sysfs_dirent to deactivate
275 *
8619f979 276 * Deny new active references and drain existing ones.
b6b4a439 277 */
fb6896da 278static void sysfs_deactivate(struct sysfs_dirent *sd)
b6b4a439 279{
8619f979
TH
280 DECLARE_COMPLETION_ONSTACK(wait);
281 int v;
b6b4a439 282
380e6fbb 283 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
0c73f18b 284 sd->s_sibling = (void *)&wait;
8619f979
TH
285
286 /* atomic_add_return() is a mb(), put_active() will always see
0c73f18b 287 * the updated sd->s_sibling.
b6b4a439 288 */
8619f979
TH
289 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
290
291 if (v != SD_DEACTIVATED_BIAS)
292 wait_for_completion(&wait);
293
0c73f18b 294 sd->s_sibling = NULL;
b6b4a439
TH
295}
296
42b37df6 297static int sysfs_alloc_ino(ino_t *pino)
2b611bb7
TH
298{
299 int ino, rc;
300
301 retry:
302 spin_lock(&sysfs_ino_lock);
303 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
304 spin_unlock(&sysfs_ino_lock);
305
306 if (rc == -EAGAIN) {
307 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
308 goto retry;
309 rc = -ENOMEM;
310 }
311
312 *pino = ino;
313 return rc;
314}
315
316static void sysfs_free_ino(ino_t ino)
317{
318 spin_lock(&sysfs_ino_lock);
319 ida_remove(&sysfs_ino_ida, ino);
320 spin_unlock(&sysfs_ino_lock);
321}
322
fa7f912a
TH
323void release_sysfs_dirent(struct sysfs_dirent * sd)
324{
13b3086d
TH
325 struct sysfs_dirent *parent_sd;
326
327 repeat:
3007e997
TH
328 /* Moving/renaming is always done while holding reference.
329 * sd->s_parent won't change beneath us.
330 */
13b3086d
TH
331 parent_sd = sd->s_parent;
332
b402d72c 333 if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
2b29ac25 334 sysfs_put(sd->s_elem.symlink.target_sd);
b402d72c 335 if (sysfs_type(sd) & SYSFS_COPY_NAME)
0c096b50 336 kfree(sd->s_name);
fa7f912a 337 kfree(sd->s_iattr);
2b611bb7 338 sysfs_free_ino(sd->s_ino);
fa7f912a 339 kmem_cache_free(sysfs_dir_cachep, sd);
13b3086d
TH
340
341 sd = parent_sd;
342 if (sd && atomic_dec_and_test(&sd->s_count))
343 goto repeat;
fa7f912a
TH
344}
345
1da177e4
LT
346static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
347{
348 struct sysfs_dirent * sd = dentry->d_fsdata;
349
350 if (sd) {
5f995323
TH
351 /* sd->s_dentry is protected with sysfs_assoc_lock.
352 * This allows sysfs_drop_dentry() to dereference it.
dd14cbc9 353 */
5f995323 354 spin_lock(&sysfs_assoc_lock);
dd14cbc9
TH
355
356 /* The dentry might have been deleted or another
357 * lookup could have happened updating sd->s_dentry to
358 * point the new dentry. Ignore if it isn't pointing
359 * to this dentry.
360 */
361 if (sd->s_dentry == dentry)
362 sd->s_dentry = NULL;
5f995323 363 spin_unlock(&sysfs_assoc_lock);
1da177e4
LT
364 sysfs_put(sd);
365 }
366 iput(inode);
367}
368
369static struct dentry_operations sysfs_dentry_ops = {
370 .d_iput = sysfs_d_iput,
371};
372
3e519038 373struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
1da177e4 374{
0c096b50 375 char *dup_name = NULL;
01da2425 376 struct sysfs_dirent *sd;
0c096b50
TH
377
378 if (type & SYSFS_COPY_NAME) {
379 name = dup_name = kstrdup(name, GFP_KERNEL);
380 if (!name)
01da2425 381 return NULL;
0c096b50 382 }
1da177e4 383
c3762229 384 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
1da177e4 385 if (!sd)
01da2425 386 goto err_out1;
1da177e4 387
0c096b50 388 if (sysfs_alloc_ino(&sd->s_ino))
01da2425 389 goto err_out2;
2b611bb7 390
1da177e4 391 atomic_set(&sd->s_count, 1);
8619f979 392 atomic_set(&sd->s_active, 0);