Commit | Line | Data |
---|---|---|
7b6be844 DW |
1 | /* |
2 | * Copyright(c) 2017 Intel Corporation. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | */ | |
13 | #include <linux/pagemap.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/mount.h> | |
16 | #include <linux/magic.h> | |
17 | #include <linux/cdev.h> | |
18 | #include <linux/hash.h> | |
19 | #include <linux/slab.h> | |
20 | #include <linux/fs.h> | |
21 | ||
22 | static int nr_dax = CONFIG_NR_DEV_DAX; | |
23 | module_param(nr_dax, int, S_IRUGO); | |
24 | MODULE_PARM_DESC(nr_dax, "max number of dax device instances"); | |
25 | ||
26 | static dev_t dax_devt; | |
27 | DEFINE_STATIC_SRCU(dax_srcu); | |
28 | static struct vfsmount *dax_mnt; | |
29 | static DEFINE_IDA(dax_minor_ida); | |
30 | static struct kmem_cache *dax_cache __read_mostly; | |
31 | static struct super_block *dax_superblock __read_mostly; | |
32 | ||
72058005 DW |
33 | #define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head)) |
34 | static struct hlist_head dax_host_list[DAX_HASH_SIZE]; | |
35 | static DEFINE_SPINLOCK(dax_host_lock); | |
36 | ||
7b6be844 DW |
37 | int dax_read_lock(void) |
38 | { | |
39 | return srcu_read_lock(&dax_srcu); | |
40 | } | |
41 | EXPORT_SYMBOL_GPL(dax_read_lock); | |
42 | ||
43 | void dax_read_unlock(int id) | |
44 | { | |
45 | srcu_read_unlock(&dax_srcu, id); | |
46 | } | |
47 | EXPORT_SYMBOL_GPL(dax_read_unlock); | |
48 | ||
49 | /** | |
50 | * struct dax_device - anchor object for dax services | |
51 | * @inode: core vfs | |
52 | * @cdev: optional character interface for "device dax" | |
72058005 | 53 | * @host: optional name for lookups where the device path is not available |
7b6be844 DW |
54 | * @private: dax driver private data |
55 | * @alive: !alive + rcu grace period == no new operations / mappings | |
56 | */ | |
57 | struct dax_device { | |
72058005 | 58 | struct hlist_node list; |
7b6be844 DW |
59 | struct inode inode; |
60 | struct cdev cdev; | |
72058005 | 61 | const char *host; |
7b6be844 DW |
62 | void *private; |
63 | bool alive; | |
64 | }; | |
65 | ||
66 | bool dax_alive(struct dax_device *dax_dev) | |
67 | { | |
68 | lockdep_assert_held(&dax_srcu); | |
69 | return dax_dev->alive; | |
70 | } | |
71 | EXPORT_SYMBOL_GPL(dax_alive); | |
72 | ||
72058005 DW |
73 | static int dax_host_hash(const char *host) |
74 | { | |
75 | return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE; | |
76 | } | |
77 | ||
7b6be844 DW |
78 | /* |
79 | * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring | |
80 | * that any fault handlers or operations that might have seen | |
81 | * dax_alive(), have completed. Any operations that start after | |
82 | * synchronize_srcu() has run will abort upon seeing !dax_alive(). | |
83 | */ | |
84 | void kill_dax(struct dax_device *dax_dev) | |
85 | { | |
86 | if (!dax_dev) | |
87 | return; | |
88 | ||
89 | dax_dev->alive = false; | |
72058005 | 90 | |
7b6be844 | 91 | synchronize_srcu(&dax_srcu); |
72058005 DW |
92 | |
93 | spin_lock(&dax_host_lock); | |
94 | hlist_del_init(&dax_dev->list); | |
95 | spin_unlock(&dax_host_lock); | |
96 | ||
7b6be844 DW |
97 | dax_dev->private = NULL; |
98 | } | |
99 | EXPORT_SYMBOL_GPL(kill_dax); | |
100 | ||
101 | static struct inode *dax_alloc_inode(struct super_block *sb) | |
102 | { | |
103 | struct dax_device *dax_dev; | |
104 | ||
105 | dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL); | |
106 | return &dax_dev->inode; | |
107 | } | |
108 | ||
109 | static struct dax_device *to_dax_dev(struct inode *inode) | |
110 | { | |
111 | return container_of(inode, struct dax_device, inode); | |
112 | } | |
113 | ||
114 | static void dax_i_callback(struct rcu_head *head) | |
115 | { | |
116 | struct inode *inode = container_of(head, struct inode, i_rcu); | |
117 | struct dax_device *dax_dev = to_dax_dev(inode); | |
118 | ||
72058005 DW |
119 | kfree(dax_dev->host); |
120 | dax_dev->host = NULL; | |
7b6be844 DW |
121 | ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev)); |
122 | kmem_cache_free(dax_cache, dax_dev); | |
123 | } | |
124 | ||
125 | static void dax_destroy_inode(struct inode *inode) | |
126 | { | |
127 | struct dax_device *dax_dev = to_dax_dev(inode); | |
128 | ||
129 | WARN_ONCE(dax_dev->alive, | |
130 | "kill_dax() must be called before final iput()\n"); | |
131 | call_rcu(&inode->i_rcu, dax_i_callback); | |
132 | } | |
133 | ||
134 | static const struct super_operations dax_sops = { | |
135 | .statfs = simple_statfs, | |
136 | .alloc_inode = dax_alloc_inode, | |
137 | .destroy_inode = dax_destroy_inode, | |
138 | .drop_inode = generic_delete_inode, | |
139 | }; | |
140 | ||
141 | static struct dentry *dax_mount(struct file_system_type *fs_type, | |
142 | int flags, const char *dev_name, void *data) | |
143 | { | |
144 | return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC); | |
145 | } | |
146 | ||
147 | static struct file_system_type dax_fs_type = { | |
148 | .name = "dax", | |
149 | .mount = dax_mount, | |
150 | .kill_sb = kill_anon_super, | |
151 | }; | |
152 | ||
153 | static int dax_test(struct inode *inode, void *data) | |
154 | { | |
155 | dev_t devt = *(dev_t *) data; | |
156 | ||
157 | return inode->i_rdev == devt; | |
158 | } | |
159 | ||
160 | static int dax_set(struct inode *inode, void *data) | |
161 | { | |
162 | dev_t devt = *(dev_t *) data; | |
163 | ||
164 | inode->i_rdev = devt; | |
165 | return 0; | |
166 | } | |
167 | ||
168 | static struct dax_device *dax_dev_get(dev_t devt) | |
169 | { | |
170 | struct dax_device *dax_dev; | |
171 | struct inode *inode; | |
172 | ||
173 | inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31), | |
174 | dax_test, dax_set, &devt); | |
175 | ||
176 | if (!inode) | |
177 | return NULL; | |
178 | ||
179 | dax_dev = to_dax_dev(inode); | |
180 | if (inode->i_state & I_NEW) { | |
181 | dax_dev->alive = true; | |
182 | inode->i_cdev = &dax_dev->cdev; | |
183 | inode->i_mode = S_IFCHR; | |
184 | inode->i_flags = S_DAX; | |
185 | mapping_set_gfp_mask(&inode->i_data, GFP_USER); | |
186 | unlock_new_inode(inode); | |
187 | } | |
188 | ||
189 | return dax_dev; | |
190 | } | |
191 | ||
72058005 DW |
192 | static void dax_add_host(struct dax_device *dax_dev, const char *host) |
193 | { | |
194 | int hash; | |
195 | ||
196 | /* | |
197 | * Unconditionally init dax_dev since it's coming from a | |
198 | * non-zeroed slab cache | |
199 | */ | |
200 | INIT_HLIST_NODE(&dax_dev->list); | |
201 | dax_dev->host = host; | |
202 | if (!host) | |
203 | return; | |
204 | ||
205 | hash = dax_host_hash(host); | |
206 | spin_lock(&dax_host_lock); | |
207 | hlist_add_head(&dax_dev->list, &dax_host_list[hash]); | |
208 | spin_unlock(&dax_host_lock); | |
209 | } | |
210 | ||
211 | struct dax_device *alloc_dax(void *private, const char *__host) | |
7b6be844 DW |
212 | { |
213 | struct dax_device *dax_dev; | |
72058005 | 214 | const char *host; |
7b6be844 DW |
215 | dev_t devt; |
216 | int minor; | |
217 | ||
72058005 DW |
218 | host = kstrdup(__host, GFP_KERNEL); |
219 | if (__host && !host) | |
220 | return NULL; | |
221 | ||
7b6be844 DW |
222 | minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL); |
223 | if (minor < 0) | |
72058005 | 224 | goto err_minor; |
7b6be844 DW |
225 | |
226 | devt = MKDEV(MAJOR(dax_devt), minor); | |
227 | dax_dev = dax_dev_get(devt); | |
228 | if (!dax_dev) | |
72058005 | 229 | goto err_dev; |
7b6be844 | 230 | |
72058005 | 231 | dax_add_host(dax_dev, host); |
7b6be844 DW |
232 | dax_dev->private = private; |
233 | return dax_dev; | |
234 | ||
72058005 | 235 | err_dev: |
7b6be844 | 236 | ida_simple_remove(&dax_minor_ida, minor); |
72058005 DW |
237 | err_minor: |
238 | kfree(host); | |
7b6be844 DW |
239 | return NULL; |
240 | } | |
241 | EXPORT_SYMBOL_GPL(alloc_dax); | |
242 | ||
243 | void put_dax(struct dax_device *dax_dev) | |
244 | { | |
245 | if (!dax_dev) | |
246 | return; | |
247 | iput(&dax_dev->inode); | |
248 | } | |
249 | EXPORT_SYMBOL_GPL(put_dax); | |
250 | ||
72058005 DW |
251 | /** |
252 | * dax_get_by_host() - temporary lookup mechanism for filesystem-dax | |
253 | * @host: alternate name for the device registered by a dax driver | |
254 | */ | |
255 | struct dax_device *dax_get_by_host(const char *host) | |
256 | { | |
257 | struct dax_device *dax_dev, *found = NULL; | |
258 | int hash, id; | |
259 | ||
260 | if (!host) | |
261 | return NULL; | |
262 | ||
263 | hash = dax_host_hash(host); | |
264 | ||
265 | id = dax_read_lock(); | |
266 | spin_lock(&dax_host_lock); | |
267 | hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) { | |
268 | if (!dax_alive(dax_dev) | |
269 | || strcmp(host, dax_dev->host) != 0) | |
270 | continue; | |
271 | ||
272 | if (igrab(&dax_dev->inode)) | |
273 | found = dax_dev; | |
274 | break; | |
275 | } | |
276 | spin_unlock(&dax_host_lock); | |
277 | dax_read_unlock(id); | |
278 | ||
279 | return found; | |
280 | } | |
281 | EXPORT_SYMBOL_GPL(dax_get_by_host); | |
282 | ||
7b6be844 DW |
283 | /** |
284 | * inode_dax: convert a public inode into its dax_dev | |
285 | * @inode: An inode with i_cdev pointing to a dax_dev | |
286 | * | |
287 | * Note this is not equivalent to to_dax_dev() which is for private | |
288 | * internal use where we know the inode filesystem type == dax_fs_type. | |
289 | */ | |
290 | struct dax_device *inode_dax(struct inode *inode) | |
291 | { | |
292 | struct cdev *cdev = inode->i_cdev; | |
293 | ||
294 | return container_of(cdev, struct dax_device, cdev); | |
295 | } | |
296 | EXPORT_SYMBOL_GPL(inode_dax); | |
297 | ||
298 | struct inode *dax_inode(struct dax_device *dax_dev) | |
299 | { | |
300 | return &dax_dev->inode; | |
301 | } | |
302 | EXPORT_SYMBOL_GPL(dax_inode); | |
303 | ||
304 | void *dax_get_private(struct dax_device *dax_dev) | |
305 | { | |
306 | return dax_dev->private; | |
307 | } | |
308 | EXPORT_SYMBOL_GPL(dax_get_private); | |
309 | ||
310 | static void init_once(void *_dax_dev) | |
311 | { | |
312 | struct dax_device *dax_dev = _dax_dev; | |
313 | struct inode *inode = &dax_dev->inode; | |
314 | ||
315 | inode_init_once(inode); | |
316 | } | |
317 | ||
318 | static int __dax_fs_init(void) | |
319 | { | |
320 | int rc; | |
321 | ||
322 | dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0, | |
323 | (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| | |
324 | SLAB_MEM_SPREAD|SLAB_ACCOUNT), | |
325 | init_once); | |
326 | if (!dax_cache) | |
327 | return -ENOMEM; | |
328 | ||
329 | rc = register_filesystem(&dax_fs_type); | |
330 | if (rc) | |
331 | goto err_register_fs; | |
332 | ||
333 | dax_mnt = kern_mount(&dax_fs_type); | |
334 | if (IS_ERR(dax_mnt)) { | |
335 | rc = PTR_ERR(dax_mnt); | |
336 | goto err_mount; | |
337 | } | |
338 | dax_superblock = dax_mnt->mnt_sb; | |
339 | ||
340 | return 0; | |
341 | ||
342 | err_mount: | |
343 | unregister_filesystem(&dax_fs_type); | |
344 | err_register_fs: | |
345 | kmem_cache_destroy(dax_cache); | |
346 | ||
347 | return rc; | |
348 | } | |
349 | ||
350 | static void __dax_fs_exit(void) | |
351 | { | |
352 | kern_unmount(dax_mnt); | |
353 | unregister_filesystem(&dax_fs_type); | |
354 | kmem_cache_destroy(dax_cache); | |
355 | } | |
356 | ||
357 | static int __init dax_fs_init(void) | |
358 | { | |
359 | int rc; | |
360 | ||
361 | rc = __dax_fs_init(); | |
362 | if (rc) | |
363 | return rc; | |
364 | ||
365 | nr_dax = max(nr_dax, 256); | |
366 | rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax"); | |
367 | if (rc) | |
368 | __dax_fs_exit(); | |
369 | return rc; | |
370 | } | |
371 | ||
372 | static void __exit dax_fs_exit(void) | |
373 | { | |
374 | unregister_chrdev_region(dax_devt, nr_dax); | |
375 | ida_destroy(&dax_minor_ida); | |
376 | __dax_fs_exit(); | |
377 | } | |
378 | ||
379 | MODULE_AUTHOR("Intel Corporation"); | |
380 | MODULE_LICENSE("GPL v2"); | |
381 | subsys_initcall(dax_fs_init); | |
382 | module_exit(dax_fs_exit); |