Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
16725b9d | 2 | |
3d14c5d2 | 3 | #include <linux/ceph/ceph_debug.h> |
16725b9d SW |
4 | |
5 | #include <linux/backing-dev.h> | |
c309f0ab | 6 | #include <linux/ctype.h> |
16725b9d SW |
7 | #include <linux/fs.h> |
8 | #include <linux/inet.h> | |
9 | #include <linux/in6.h> | |
10 | #include <linux/module.h> | |
11 | #include <linux/mount.h> | |
82995cc6 DH |
12 | #include <linux/fs_context.h> |
13 | #include <linux/fs_parser.h> | |
16725b9d SW |
14 | #include <linux/sched.h> |
15 | #include <linux/seq_file.h> | |
5a0e3ad6 | 16 | #include <linux/slab.h> |
16725b9d SW |
17 | #include <linux/statfs.h> |
18 | #include <linux/string.h> | |
16725b9d | 19 | |
16725b9d | 20 | #include "super.h" |
3d14c5d2 | 21 | #include "mds_client.h" |
99ccbd22 | 22 | #include "cache.h" |
3d14c5d2 | 23 | |
1fe60e51 | 24 | #include <linux/ceph/ceph_features.h> |
3d14c5d2 YS |
25 | #include <linux/ceph/decode.h> |
26 | #include <linux/ceph/mon_client.h> | |
27 | #include <linux/ceph/auth.h> | |
28 | #include <linux/ceph/debugfs.h> | |
16725b9d | 29 | |
18f473b3 XL |
30 | static DEFINE_SPINLOCK(ceph_fsc_lock); |
31 | static LIST_HEAD(ceph_fsc_list); | |
32 | ||
16725b9d SW |
33 | /* |
34 | * Ceph superblock operations | |
35 | * | |
36 | * Handle the basics of mounting, unmounting. | |
37 | */ | |
38 | ||
16725b9d SW |
39 | /* |
40 | * super ops | |
41 | */ | |
42 | static void ceph_put_super(struct super_block *s) | |
43 | { | |
3d14c5d2 | 44 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
16725b9d SW |
45 | |
46 | dout("put_super\n"); | |
3d14c5d2 | 47 | ceph_mdsc_close_sessions(fsc->mdsc); |
16725b9d SW |
48 | } |
49 | ||
50 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |
51 | { | |
2b0143b5 | 52 | struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); |
73fb0949 | 53 | struct ceph_mon_client *monc = &fsc->client->monc; |
16725b9d SW |
54 | struct ceph_statfs st; |
55 | u64 fsid; | |
56 | int err; | |
06d74376 DF |
57 | u64 data_pool; |
58 | ||
59 | if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { | |
60 | data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0]; | |
61 | } else { | |
62 | data_pool = CEPH_NOPOOL; | |
63 | } | |
16725b9d SW |
64 | |
65 | dout("statfs\n"); | |
73fb0949 | 66 | err = ceph_monc_do_statfs(monc, data_pool, &st); |
16725b9d SW |
67 | if (err < 0) |
68 | return err; | |
69 | ||
70 | /* fill in kstatfs */ | |
71 | buf->f_type = CEPH_SUPER_MAGIC; /* ?? */ | |
72 | ||
73 | /* | |
74 | * express utilization in terms of large blocks to avoid | |
75 | * overflow on 32-bit machines. | |
92a49fb0 SW |
76 | * |
77 | * NOTE: for the time being, we make bsize == frsize to humor | |
78 | * not-yet-ancient versions of glibc that are broken. | |
79 | * Someday, we will probably want to report a real block | |
80 | * size... whatever that may mean for a network file system! | |
16725b9d SW |
81 | */ |
82 | buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; | |
92a49fb0 | 83 | buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; |
9122eed5 LH |
84 | |
85 | /* | |
86 | * By default use root quota for stats; fallback to overall filesystem | |
87 | * usage if using 'noquotadf' mount option or if the root dir doesn't | |
88 | * have max_bytes quota set. | |
89 | */ | |
90 | if (ceph_test_mount_opt(fsc, NOQUOTADF) || | |
91 | !ceph_quota_update_statfs(fsc, buf)) { | |
92 | buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); | |
93 | buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); | |
94 | buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); | |
95 | } | |
16725b9d SW |
96 | |
97 | buf->f_files = le64_to_cpu(st.num_objects); | |
98 | buf->f_ffree = -1; | |
558d3499 | 99 | buf->f_namelen = NAME_MAX; |
16725b9d | 100 | |
080a330e | 101 | /* Must convert the fsid, for consistent values across arches */ |
73fb0949 LH |
102 | mutex_lock(&monc->mutex); |
103 | fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ | |
104 | le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); | |
105 | mutex_unlock(&monc->mutex); | |
106 | ||
6d1349c7 | 107 | buf->f_fsid = u64_to_fsid(fsid); |
16725b9d SW |
108 | |
109 | return 0; | |
110 | } | |
111 | ||
2d9c98ae | 112 | static int ceph_sync_fs(struct super_block *sb, int wait) |
16725b9d | 113 | { |
3d14c5d2 | 114 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
2d9c98ae SW |
115 | |
116 | if (!wait) { | |
117 | dout("sync_fs (non-blocking)\n"); | |
3d14c5d2 | 118 | ceph_flush_dirty_caps(fsc->mdsc); |
2d9c98ae SW |
119 | dout("sync_fs (non-blocking) done\n"); |
120 | return 0; | |
121 | } | |
122 | ||
123 | dout("sync_fs (blocking)\n"); | |
3d14c5d2 YS |
124 | ceph_osdc_sync(&fsc->client->osdc); |
125 | ceph_mdsc_sync(fsc->mdsc); | |
2d9c98ae | 126 | dout("sync_fs (blocking) done\n"); |
16725b9d SW |
127 | return 0; |
128 | } | |
129 | ||
16725b9d SW |
130 | /* |
131 | * mount options | |
132 | */ | |
133 | enum { | |
16725b9d SW |
134 | Opt_wsize, |
135 | Opt_rsize, | |
83817e35 | 136 | Opt_rasize, |
16725b9d SW |
137 | Opt_caps_wanted_delay_min, |
138 | Opt_caps_wanted_delay_max, | |
fe33032d | 139 | Opt_caps_max, |
16725b9d | 140 | Opt_readdir_max_entries, |
23804d91 | 141 | Opt_readdir_max_bytes, |
2baba250 | 142 | Opt_congestion_kb, |
16725b9d SW |
143 | /* int args above */ |
144 | Opt_snapdirname, | |
430afbad | 145 | Opt_mds_namespace, |
131d7eb4 | 146 | Opt_recover_session, |
82995cc6 | 147 | Opt_source, |
16725b9d | 148 | /* string args above */ |
16725b9d | 149 | Opt_dirstat, |
16725b9d | 150 | Opt_rbytes, |
cffaba15 | 151 | Opt_asyncreaddir, |
a40dc6cc | 152 | Opt_dcache, |
ad1fee96 | 153 | Opt_ino32, |
99ccbd22 | 154 | Opt_fscache, |
10183a69 | 155 | Opt_poolperm, |
e9e427f0 | 156 | Opt_require_active_mds, |
45195e42 | 157 | Opt_acl, |
9122eed5 | 158 | Opt_quotadf, |
ea4cdc54 | 159 | Opt_copyfrom, |
2ccb4546 | 160 | Opt_wsync, |
16725b9d SW |
161 | }; |
162 | ||
82995cc6 DH |
163 | enum ceph_recover_session_mode { |
164 | ceph_recover_session_no, | |
165 | ceph_recover_session_clean | |
166 | }; | |
167 | ||
5eede625 | 168 | static const struct constant_table ceph_param_recover[] = { |
2710c957 AV |
169 | { "no", ceph_recover_session_no }, |
170 | { "clean", ceph_recover_session_clean }, | |
82995cc6 DH |
171 | {} |
172 | }; | |
173 | ||
d7167b14 | 174 | static const struct fs_parameter_spec ceph_mount_parameters[] = { |
82995cc6 DH |
175 | fsparam_flag_no ("acl", Opt_acl), |
176 | fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), | |
ad8c28a9 | 177 | fsparam_s32 ("caps_max", Opt_caps_max), |
82995cc6 DH |
178 | fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max), |
179 | fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min), | |
ad8c28a9 | 180 | fsparam_u32 ("write_congestion_kb", Opt_congestion_kb), |
82995cc6 DH |
181 | fsparam_flag_no ("copyfrom", Opt_copyfrom), |
182 | fsparam_flag_no ("dcache", Opt_dcache), | |
183 | fsparam_flag_no ("dirstat", Opt_dirstat), | |
48ce73b1 AV |
184 | fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc |
185 | fsparam_string ("fsc", Opt_fscache), // fsc=... | |
82995cc6 DH |
186 | fsparam_flag_no ("ino32", Opt_ino32), |
187 | fsparam_string ("mds_namespace", Opt_mds_namespace), | |
188 | fsparam_flag_no ("poolperm", Opt_poolperm), | |
189 | fsparam_flag_no ("quotadf", Opt_quotadf), | |
190 | fsparam_u32 ("rasize", Opt_rasize), | |
191 | fsparam_flag_no ("rbytes", Opt_rbytes), | |
ad8c28a9 JL |
192 | fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), |
193 | fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), | |
2710c957 | 194 | fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), |
82995cc6 DH |
195 | fsparam_flag_no ("require_active_mds", Opt_require_active_mds), |
196 | fsparam_u32 ("rsize", Opt_rsize), | |
197 | fsparam_string ("snapdirname", Opt_snapdirname), | |
198 | fsparam_string ("source", Opt_source), | |
199 | fsparam_u32 ("wsize", Opt_wsize), | |
2ccb4546 | 200 | fsparam_flag_no ("wsync", Opt_wsync), |
82995cc6 DH |
201 | {} |
202 | }; | |
203 | ||
82995cc6 DH |
204 | struct ceph_parse_opts_ctx { |
205 | struct ceph_options *copts; | |
206 | struct ceph_mount_options *opts; | |
207 | }; | |
208 | ||
b27a939e ID |
209 | /* |
210 | * Remove adjacent slashes and then the trailing slash, unless it is | |
211 | * the only remaining character. | |
212 | * | |
213 | * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". | |
214 | */ | |
215 | static void canonicalize_path(char *path) | |
216 | { | |
217 | int i, j = 0; | |
218 | ||
219 | for (i = 0; path[i] != '\0'; i++) { | |
220 | if (path[i] != '/' || j < 1 || path[j - 1] != '/') | |
221 | path[j++] = path[i]; | |
222 | } | |
223 | ||
224 | if (j > 1 && path[j - 1] == '/') | |
225 | j--; | |
226 | path[j] = '\0'; | |
227 | } | |
228 | ||
82995cc6 DH |
229 | /* |
230 | * Parse the source parameter. Distinguish the server list from the path. | |
82995cc6 DH |
231 | * |
232 | * The source will look like: | |
233 | * <server_spec>[,<server_spec>...]:[<path>] | |
234 | * where | |
235 | * <server_spec> is <ip>[:<port>] | |
236 | * <path> is optional, but if present must begin with '/' | |
237 | */ | |
238 | static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) | |
c309f0ab | 239 | { |
82995cc6 DH |
240 | struct ceph_parse_opts_ctx *pctx = fc->fs_private; |
241 | struct ceph_mount_options *fsopt = pctx->opts; | |
242 | char *dev_name = param->string, *dev_name_end; | |
243 | int ret; | |
3d14c5d2 | 244 | |
82995cc6 DH |
245 | dout("%s '%s'\n", __func__, dev_name); |
246 | if (!dev_name || !*dev_name) | |
d53d0f74 | 247 | return invalfc(fc, "Empty source"); |
3d14c5d2 | 248 | |
82995cc6 DH |
249 | dev_name_end = strchr(dev_name, '/'); |
250 | if (dev_name_end) { | |
4fbc0c71 XL |
251 | /* |
252 | * The server_path will include the whole chars from userland | |
253 | * including the leading '/'. | |
254 | */ | |
b27a939e | 255 | kfree(fsopt->server_path); |
4fbc0c71 XL |
256 | fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); |
257 | if (!fsopt->server_path) | |
258 | return -ENOMEM; | |
b27a939e ID |
259 | |
260 | canonicalize_path(fsopt->server_path); | |
3d14c5d2 | 261 | } else { |
82995cc6 | 262 | dev_name_end = dev_name + strlen(dev_name); |
c309f0ab SW |
263 | } |
264 | ||
82995cc6 DH |
265 | dev_name_end--; /* back up to ':' separator */ |
266 | if (dev_name_end < dev_name || *dev_name_end != ':') | |
d53d0f74 | 267 | return invalfc(fc, "No path or : separator in source"); |
82995cc6 DH |
268 | |
269 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | |
270 | if (fsopt->server_path) | |
271 | dout("server path '%s'\n", fsopt->server_path); | |
272 | ||
273 | ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, | |
cc3c0b53 | 274 | pctx->copts, fc->log.log); |
82995cc6 DH |
275 | if (ret) |
276 | return ret; | |
277 | ||
278 | fc->source = param->string; | |
279 | param->string = NULL; | |
280 | return 0; | |
281 | } | |
282 | ||
283 | static int ceph_parse_mount_param(struct fs_context *fc, | |
284 | struct fs_parameter *param) | |
285 | { | |
286 | struct ceph_parse_opts_ctx *pctx = fc->fs_private; | |
287 | struct ceph_mount_options *fsopt = pctx->opts; | |
288 | struct fs_parse_result result; | |
289 | unsigned int mode; | |
290 | int token, ret; | |
291 | ||
cc3c0b53 | 292 | ret = ceph_parse_param(param, pctx->copts, fc->log.log); |
82995cc6 DH |
293 | if (ret != -ENOPARAM) |
294 | return ret; | |
295 | ||
d7167b14 | 296 | token = fs_parse(fc, ceph_mount_parameters, param, &result); |
82995cc6 DH |
297 | dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); |
298 | if (token < 0) | |
299 | return token; | |
300 | ||
3d14c5d2 YS |
301 | switch (token) { |
302 | case Opt_snapdirname: | |
303 | kfree(fsopt->snapdir_name); | |
82995cc6 DH |
304 | fsopt->snapdir_name = param->string; |
305 | param->string = NULL; | |
3d14c5d2 | 306 | break; |
235a0982 | 307 | case Opt_mds_namespace: |
937441f3 | 308 | kfree(fsopt->mds_namespace); |
82995cc6 DH |
309 | fsopt->mds_namespace = param->string; |
310 | param->string = NULL; | |
235a0982 | 311 | break; |
131d7eb4 | 312 | case Opt_recover_session: |
82995cc6 DH |
313 | mode = result.uint_32; |
314 | if (mode == ceph_recover_session_no) | |
131d7eb4 | 315 | fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER; |
82995cc6 | 316 | else if (mode == ceph_recover_session_clean) |
131d7eb4 | 317 | fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER; |
82995cc6 DH |
318 | else |
319 | BUG(); | |
1d8f8360 | 320 | break; |
82995cc6 DH |
321 | case Opt_source: |
322 | if (fc->source) | |
d53d0f74 | 323 | return invalfc(fc, "Multiple sources specified"); |
82995cc6 | 324 | return ceph_parse_source(param, fc); |
3d14c5d2 | 325 | case Opt_wsize: |
82995cc6 DH |
326 | if (result.uint_32 < PAGE_SIZE || |
327 | result.uint_32 > CEPH_MAX_WRITE_SIZE) | |
328 | goto out_of_range; | |
329 | fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE); | |
3d14c5d2 YS |
330 | break; |
331 | case Opt_rsize: | |
82995cc6 DH |
332 | if (result.uint_32 < PAGE_SIZE || |
333 | result.uint_32 > CEPH_MAX_READ_SIZE) | |
334 | goto out_of_range; | |
335 | fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE); | |
3d14c5d2 | 336 | break; |
83817e35 | 337 | case Opt_rasize: |
82995cc6 | 338 | fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE); |
83817e35 | 339 | break; |
3d14c5d2 | 340 | case Opt_caps_wanted_delay_min: |
82995cc6 DH |
341 | if (result.uint_32 < 1) |
342 | goto out_of_range; | |
343 | fsopt->caps_wanted_delay_min = result.uint_32; | |
3d14c5d2 YS |
344 | break; |
345 | case Opt_caps_wanted_delay_max: | |
82995cc6 DH |
346 | if (result.uint_32 < 1) |
347 | goto out_of_range; | |
348 | fsopt->caps_wanted_delay_max = result.uint_32; | |
3d14c5d2 | 349 | break; |
fe33032d | 350 | case Opt_caps_max: |
ad8c28a9 JL |
351 | if (result.int_32 < 0) |
352 | goto out_of_range; | |
353 | fsopt->caps_max = result.int_32; | |
fe33032d | 354 | break; |
3d14c5d2 | 355 | case Opt_readdir_max_entries: |
82995cc6 DH |
356 | if (result.uint_32 < 1) |
357 | goto out_of_range; | |
358 | fsopt->max_readdir = result.uint_32; | |
3d14c5d2 YS |
359 | break; |
360 | case Opt_readdir_max_bytes: | |
82995cc6 DH |
361 | if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0) |
362 | goto out_of_range; | |
363 | fsopt->max_readdir_bytes = result.uint_32; | |
3d14c5d2 YS |
364 | break; |
365 | case Opt_congestion_kb: | |
82995cc6 DH |
366 | if (result.uint_32 < 1024) /* at least 1M */ |
367 | goto out_of_range; | |
368 | fsopt->congestion_kb = result.uint_32; | |
3d14c5d2 YS |
369 | break; |
370 | case Opt_dirstat: | |
82995cc6 DH |
371 | if (!result.negated) |
372 | fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; | |
373 | else | |
374 | fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; | |
3d14c5d2 YS |
375 | break; |
376 | case Opt_rbytes: | |
82995cc6 DH |
377 | if (!result.negated) |
378 | fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; | |
379 | else | |
380 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | |
3d14c5d2 | 381 | break; |
cffaba15 | 382 | case Opt_asyncreaddir: |
82995cc6 DH |
383 | if (!result.negated) |
384 | fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR; | |
385 | else | |
386 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | |
3d14c5d2 | 387 | break; |
a40dc6cc | 388 | case Opt_dcache: |
82995cc6 DH |
389 | if (!result.negated) |
390 | fsopt->flags |= CEPH_MOUNT_OPT_DCACHE; | |
391 | else | |
392 | fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE; | |
a40dc6cc | 393 | break; |
ad1fee96 | 394 | case Opt_ino32: |
82995cc6 DH |
395 | if (!result.negated) |
396 | fsopt->flags |= CEPH_MOUNT_OPT_INO32; | |
397 | else | |
398 | fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; | |
cffaba15 | 399 | break; |
82995cc6 | 400 | |
99ccbd22 | 401 | case Opt_fscache: |
ff29fde8 | 402 | #ifdef CONFIG_CEPH_FSCACHE |
7ae7a828 CX |
403 | kfree(fsopt->fscache_uniq); |
404 | fsopt->fscache_uniq = NULL; | |
82995cc6 DH |
405 | if (result.negated) { |
406 | fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; | |
407 | } else { | |
408 | fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; | |
409 | fsopt->fscache_uniq = param->string; | |
410 | param->string = NULL; | |
411 | } | |
99ccbd22 | 412 | break; |
ff29fde8 | 413 | #else |
d53d0f74 | 414 | return invalfc(fc, "fscache support is disabled"); |
ff29fde8 | 415 | #endif |
10183a69 | 416 | case Opt_poolperm: |
82995cc6 DH |
417 | if (!result.negated) |
418 | fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM; | |
419 | else | |
420 | fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; | |
10183a69 | 421 | break; |
e9e427f0 | 422 | case Opt_require_active_mds: |
82995cc6 DH |
423 | if (!result.negated) |
424 | fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT; | |
425 | else | |
426 | fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT; | |
e9e427f0 | 427 | break; |
9122eed5 | 428 | case Opt_quotadf: |
82995cc6 DH |
429 | if (!result.negated) |
430 | fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF; | |
431 | else | |
432 | fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF; | |
9122eed5 | 433 | break; |
ea4cdc54 | 434 | case Opt_copyfrom: |
82995cc6 DH |
435 | if (!result.negated) |
436 | fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM; | |
437 | else | |
438 | fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM; | |
ea4cdc54 | 439 | break; |
45195e42 | 440 | case Opt_acl: |
82995cc6 DH |
441 | if (!result.negated) { |
442 | #ifdef CONFIG_CEPH_FS_POSIX_ACL | |
443 | fc->sb_flags |= SB_POSIXACL; | |
444 | #else | |
d53d0f74 | 445 | return invalfc(fc, "POSIX ACL support is disabled"); |
45195e42 | 446 | #endif |
82995cc6 DH |
447 | } else { |
448 | fc->sb_flags &= ~SB_POSIXACL; | |
449 | } | |
45195e42 | 450 | break; |
2ccb4546 JL |
451 | case Opt_wsync: |
452 | if (!result.negated) | |
453 | fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS; | |
454 | else | |
455 | fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS; | |
456 | break; | |
3d14c5d2 | 457 | default: |
82995cc6 | 458 | BUG(); |
3d14c5d2 YS |
459 | } |
460 | return 0; | |
82995cc6 DH |
461 | |
462 | out_of_range: | |
d53d0f74 | 463 | return invalfc(fc, "%s out of range", param->key); |
c309f0ab | 464 | } |
16725b9d | 465 | |
3d14c5d2 | 466 | static void destroy_mount_options(struct ceph_mount_options *args) |
16725b9d | 467 | { |
3d14c5d2 | 468 | dout("destroy_mount_options %p\n", args); |
82995cc6 DH |
469 | if (!args) |
470 | return; | |
471 | ||
3d14c5d2 | 472 | kfree(args->snapdir_name); |
430afbad | 473 | kfree(args->mds_namespace); |
3f384954 | 474 | kfree(args->server_path); |
1d8f8360 | 475 | kfree(args->fscache_uniq); |
3d14c5d2 YS |
476 | kfree(args); |
477 | } | |
16725b9d | 478 | |
3d14c5d2 YS |
479 | static int strcmp_null(const char *s1, const char *s2) |
480 | { | |
481 | if (!s1 && !s2) | |
482 | return 0; | |
483 | if (s1 && !s2) | |
484 | return -1; | |
485 | if (!s1 && s2) | |
486 | return 1; | |
487 | return strcmp(s1, s2); | |
488 | } | |
16725b9d | 489 | |
3d14c5d2 YS |
490 | static int compare_mount_options(struct ceph_mount_options *new_fsopt, |
491 | struct ceph_options *new_opt, | |
492 | struct ceph_fs_client *fsc) | |
493 | { | |
494 | struct ceph_mount_options *fsopt1 = new_fsopt; | |
495 | struct ceph_mount_options *fsopt2 = fsc->mount_options; | |
496 | int ofs = offsetof(struct ceph_mount_options, snapdir_name); | |
497 | int ret; | |
16725b9d | 498 | |
3d14c5d2 YS |
499 | ret = memcmp(fsopt1, fsopt2, ofs); |
500 | if (ret) | |
501 | return ret; | |
502 | ||
503 | ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); | |
430afbad YZ |
504 | if (ret) |
505 | return ret; | |
b27a939e | 506 | |
430afbad | 507 | ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); |
3d14c5d2 YS |
508 | if (ret) |
509 | return ret; | |
4fbc0c71 | 510 | |
b27a939e | 511 | ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); |
1d8f8360 YZ |
512 | if (ret) |
513 | return ret; | |
4fbc0c71 | 514 | |
1d8f8360 | 515 | ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); |
3f384954 YZ |
516 | if (ret) |
517 | return ret; | |
518 | ||
3d14c5d2 YS |
519 | return ceph_compare_options(new_opt, fsc->client); |
520 | } | |
521 | ||
3d14c5d2 YS |
522 | /** |
523 | * ceph_show_options - Show mount options in /proc/mounts | |
524 | * @m: seq_file to write to | |
34c80b1d | 525 | * @root: root of that (sub)tree |
3d14c5d2 | 526 | */ |
34c80b1d | 527 | static int ceph_show_options(struct seq_file *m, struct dentry *root) |
16725b9d | 528 | { |
34c80b1d | 529 | struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb); |
3d14c5d2 | 530 | struct ceph_mount_options *fsopt = fsc->mount_options; |
ff40f9ae ID |
531 | size_t pos; |
532 | int ret; | |
533 | ||
534 | /* a comma between MNT/MS and client options */ | |
535 | seq_putc(m, ','); | |
536 | pos = m->count; | |
537 | ||
02b2f549 | 538 | ret = ceph_print_client_options(m, fsc->client, false); |
ff40f9ae ID |
539 | if (ret) |
540 | return ret; | |
541 | ||
542 | /* retract our comma if no client options */ | |
543 | if (m->count == pos) | |
544 | m->count--; | |
3d14c5d2 YS |
545 | |
546 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | |
547 | seq_puts(m, ",dirstat"); | |
133e9156 YZ |
548 | if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES)) |
549 | seq_puts(m, ",rbytes"); | |
3d14c5d2 YS |
550 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) |
551 | seq_puts(m, ",noasyncreaddir"); | |
ff7eeb82 | 552 | if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0) |
a40dc6cc | 553 | seq_puts(m, ",nodcache"); |
3619aa8b CX |
554 | if (fsopt->flags & CEPH_MOUNT_OPT_INO32) |
555 | seq_puts(m, ",ino32"); | |
1d8f8360 | 556 | if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) { |
4d8969af | 557 | seq_show_option(m, "fsc", fsopt->fscache_uniq); |
1d8f8360 | 558 | } |
10183a69 YZ |
559 | if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM) |
560 | seq_puts(m, ",nopoolperm"); | |
9122eed5 LH |
561 | if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF) |
562 | seq_puts(m, ",noquotadf"); | |
3d14c5d2 | 563 | |
45195e42 | 564 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
82995cc6 | 565 | if (root->d_sb->s_flags & SB_POSIXACL) |
45195e42 SW |
566 | seq_puts(m, ",acl"); |
567 | else | |
568 | seq_puts(m, ",noacl"); | |
569 | #endif | |
570 | ||
6f9718fe LH |
571 | if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0) |
572 | seq_puts(m, ",copyfrom"); | |
ea4cdc54 | 573 | |
430afbad | 574 | if (fsopt->mds_namespace) |
4d8969af | 575 | seq_show_option(m, "mds_namespace", fsopt->mds_namespace); |
131d7eb4 YZ |
576 | |
577 | if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER) | |
578 | seq_show_option(m, "recover_session", "clean"); | |
579 | ||
2ccb4546 JL |
580 | if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) |
581 | seq_puts(m, ",nowsync"); | |
582 | ||
6dd4940b | 583 | if (fsopt->wsize != CEPH_MAX_WRITE_SIZE) |
ad8c28a9 | 584 | seq_printf(m, ",wsize=%u", fsopt->wsize); |
aa187926 | 585 | if (fsopt->rsize != CEPH_MAX_READ_SIZE) |
ad8c28a9 | 586 | seq_printf(m, ",rsize=%u", fsopt->rsize); |
83817e35 | 587 | if (fsopt->rasize != CEPH_RASIZE_DEFAULT) |
ad8c28a9 | 588 | seq_printf(m, ",rasize=%u", fsopt->rasize); |
3d14c5d2 | 589 | if (fsopt->congestion_kb != default_congestion_kb()) |
ad8c28a9 | 590 | seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb); |
fe33032d YZ |
591 | if (fsopt->caps_max) |
592 | seq_printf(m, ",caps_max=%d", fsopt->caps_max); | |
3d14c5d2 | 593 | if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) |
ad8c28a9 | 594 | seq_printf(m, ",caps_wanted_delay_min=%u", |
3d14c5d2 YS |
595 | fsopt->caps_wanted_delay_min); |
596 | if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | |
ad8c28a9 | 597 | seq_printf(m, ",caps_wanted_delay_max=%u", |
3d14c5d2 | 598 | fsopt->caps_wanted_delay_max); |
3d14c5d2 | 599 | if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) |
ad8c28a9 | 600 | seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir); |
3d14c5d2 | 601 | if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) |
ad8c28a9 | 602 | seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes); |
3d14c5d2 | 603 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) |
a068acf2 | 604 | seq_show_option(m, "snapdirname", fsopt->snapdir_name); |
ff40f9ae | 605 | |
3d14c5d2 | 606 | return 0; |
16725b9d SW |
607 | } |
608 | ||
609 | /* | |
3d14c5d2 YS |
610 | * handle any mon messages the standard library doesn't understand. |
611 | * return error if we don't either. | |
16725b9d | 612 | */ |
3d14c5d2 | 613 | static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) |
16725b9d | 614 | { |
3d14c5d2 YS |
615 | struct ceph_fs_client *fsc = client->private; |
616 | int type = le16_to_cpu(msg->hdr.type); | |
617 | ||
618 | switch (type) { | |
619 | case CEPH_MSG_MDS_MAP: | |
430afbad YZ |
620 | ceph_mdsc_handle_mdsmap(fsc->mdsc, msg); |
621 | return 0; | |
622 | case CEPH_MSG_FS_MAP_USER: | |
623 | ceph_mdsc_handle_fsmap(fsc->mdsc, msg); | |
3d14c5d2 | 624 | return 0; |
3d14c5d2 YS |
625 | default: |
626 | return -1; | |
627 | } | |
628 | } | |
629 | ||
630 | /* | |
631 | * create a new fs client | |
8aaff151 ID |
632 | * |
633 | * Success or not, this function consumes @fsopt and @opt. | |
3d14c5d2 | 634 | */ |
0c6d4b4e | 635 | static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, |
3d14c5d2 YS |
636 | struct ceph_options *opt) |
637 | { | |
638 | struct ceph_fs_client *fsc; | |
8aaff151 | 639 | int err; |
16725b9d | 640 | |
3d14c5d2 | 641 | fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); |
8aaff151 ID |
642 | if (!fsc) { |
643 | err = -ENOMEM; | |
644 | goto fail; | |
645 | } | |
16725b9d | 646 | |
74da4a0f | 647 | fsc->client = ceph_create_client(opt, fsc); |
3d14c5d2 YS |
648 | if (IS_ERR(fsc->client)) { |
649 | err = PTR_ERR(fsc->client); | |
650 | goto fail; | |
651 | } | |
8aaff151 | 652 | opt = NULL; /* fsc->client now owns this */ |
c843d13c | 653 | |
3d14c5d2 | 654 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; |
02b2f549 | 655 | ceph_set_opt(fsc->client, ABORT_ON_FULL); |
430afbad | 656 | |
d37b1d99 | 657 | if (!fsopt->mds_namespace) { |
430afbad YZ |
658 | ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, |
659 | 0, true); | |
660 | } else { | |
661 | ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP, | |
662 | 0, false); | |
663 | } | |
16725b9d | 664 | |
3d14c5d2 | 665 | fsc->mount_options = fsopt; |
16725b9d | 666 | |
3d14c5d2 YS |
667 | fsc->sb = NULL; |
668 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | |
81f148a9 | 669 | fsc->filp_gen = 1; |
78beb0ff | 670 | fsc->have_copy_from2 = true; |
16725b9d | 671 | |
3d14c5d2 | 672 | atomic_long_set(&fsc->writeback_count, 0); |
16725b9d SW |
673 | |
674 | err = -ENOMEM; | |
01e6acc4 TH |
675 | /* |
676 | * The number of concurrent works can be high but they don't need | |
677 | * to be processed in parallel, limit concurrency. | |
678 | */ | |
1cf89a8d YZ |
679 | fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); |
680 | if (!fsc->inode_wq) | |
09dc9fc2 | 681 | goto fail_client; |
e3ec8d68 YZ |
682 | fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); |
683 | if (!fsc->cap_wq) | |
1cf89a8d | 684 | goto fail_inode_wq; |
16725b9d | 685 | |
18f473b3 XL |
686 | spin_lock(&ceph_fsc_lock); |
687 | list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list); | |
688 | spin_unlock(&ceph_fsc_lock); | |
689 | ||
3d14c5d2 | 690 | return fsc; |
b9bfb93c | 691 | |
1cf89a8d YZ |
692 | fail_inode_wq: |
693 | destroy_workqueue(fsc->inode_wq); | |
3d14c5d2 YS |
694 | fail_client: |
695 | ceph_destroy_client(fsc->client); | |
16725b9d | 696 | fail: |
3d14c5d2 | 697 | kfree(fsc); |
8aaff151 ID |
698 | if (opt) |
699 | ceph_destroy_options(opt); | |
700 | destroy_mount_options(fsopt); | |
16725b9d SW |
701 | return ERR_PTR(err); |
702 | } | |
703 | ||
a57d9064 YZ |
704 | static void flush_fs_workqueues(struct ceph_fs_client *fsc) |
705 | { | |
1cf89a8d | 706 | flush_workqueue(fsc->inode_wq); |
e3ec8d68 | 707 | flush_workqueue(fsc->cap_wq); |
a57d9064 YZ |
708 | } |
709 | ||
0c6d4b4e | 710 | static void destroy_fs_client(struct ceph_fs_client *fsc) |
16725b9d | 711 | { |
3d14c5d2 | 712 | dout("destroy_fs_client %p\n", fsc); |
16725b9d | 713 | |
18f473b3 XL |
714 | spin_lock(&ceph_fsc_lock); |
715 | list_del(&fsc->metric_wakeup); | |
716 | spin_unlock(&ceph_fsc_lock); | |
717 | ||
3ee5a701 | 718 | ceph_mdsc_destroy(fsc); |
1cf89a8d | 719 | destroy_workqueue(fsc->inode_wq); |
e3ec8d68 | 720 | destroy_workqueue(fsc->cap_wq); |
16725b9d | 721 | |
3d14c5d2 | 722 | destroy_mount_options(fsc->mount_options); |
5dfc589a | 723 | |
3d14c5d2 | 724 | ceph_destroy_client(fsc->client); |
16725b9d | 725 | |
3d14c5d2 YS |
726 | kfree(fsc); |
727 | dout("destroy_fs_client %p done\n", fsc); | |
16725b9d SW |
728 | } |
729 | ||
0743304d | 730 | /* |
3d14c5d2 | 731 | * caches |
0743304d | 732 | */ |
3d14c5d2 YS |
733 | struct kmem_cache *ceph_inode_cachep; |
734 | struct kmem_cache *ceph_cap_cachep; | |
f66fd9f0 | 735 | struct kmem_cache *ceph_cap_flush_cachep; |
3d14c5d2 YS |
736 | struct kmem_cache *ceph_dentry_cachep; |
737 | struct kmem_cache *ceph_file_cachep; | |
bb48bd4d | 738 | struct kmem_cache *ceph_dir_file_cachep; |
058daab7 | 739 | struct kmem_cache *ceph_mds_request_cachep; |
a0102bda | 740 | mempool_t *ceph_wb_pagevec_pool; |
3d14c5d2 YS |
741 | |
742 | static void ceph_inode_init_once(void *foo) | |
0743304d | 743 | { |
3d14c5d2 YS |
744 | struct ceph_inode_info *ci = foo; |
745 | inode_init_once(&ci->vfs_inode); | |
746 | } | |
747 | ||
748 | static int __init init_caches(void) | |
749 | { | |
99ccbd22 MT |
750 | int error = -ENOMEM; |
751 | ||
3d14c5d2 YS |
752 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
753 | sizeof(struct ceph_inode_info), | |
754 | __alignof__(struct ceph_inode_info), | |
5d097056 VD |
755 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| |
756 | SLAB_ACCOUNT, ceph_inode_init_once); | |
d37b1d99 | 757 | if (!ceph_inode_cachep) |
3d14c5d2 YS |
758 | return -ENOMEM; |
759 | ||
bc4b5ad3 | 760 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); |
d37b1d99 | 761 | if (!ceph_cap_cachep) |
3d14c5d2 | 762 | goto bad_cap; |
f66fd9f0 YZ |
763 | ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, |
764 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | |
d37b1d99 | 765 | if (!ceph_cap_flush_cachep) |
f66fd9f0 | 766 | goto bad_cap_flush; |
3d14c5d2 YS |
767 | |
768 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | |
769 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | |
d37b1d99 | 770 | if (!ceph_dentry_cachep) |
3d14c5d2 YS |
771 | goto bad_dentry; |
772 | ||
6b1a9a6c | 773 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, SLAB_MEM_SPREAD); |
d37b1d99 | 774 | if (!ceph_file_cachep) |
3d14c5d2 YS |
775 | goto bad_file; |
776 | ||
bb48bd4d CX |
777 | ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD); |
778 | if (!ceph_dir_file_cachep) | |
779 | goto bad_dir_file; | |
780 | ||
058daab7 JL |
781 | ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, SLAB_MEM_SPREAD); |
782 | if (!ceph_mds_request_cachep) | |
783 | goto bad_mds_req; | |
784 | ||
a0102bda JL |
785 | ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); |
786 | if (!ceph_wb_pagevec_pool) | |
787 | goto bad_pagevec_pool; | |
788 | ||
1c789249 CX |
789 | error = ceph_fscache_register(); |
790 | if (error) | |
791 | goto bad_fscache; | |
3d14c5d2 | 792 | |
99ccbd22 | 793 | return 0; |
1c789249 CX |
794 | |
795 | bad_fscache: | |
058daab7 | 796 | kmem_cache_destroy(ceph_mds_request_cachep); |
a0102bda JL |
797 | bad_pagevec_pool: |
798 | mempool_destroy(ceph_wb_pagevec_pool); | |
058daab7 | 799 | bad_mds_req: |
bb48bd4d CX |
800 | kmem_cache_destroy(ceph_dir_file_cachep); |
801 | bad_dir_file: | |
1c789249 | 802 | kmem_cache_destroy(ceph_file_cachep); |
3d14c5d2 YS |
803 | bad_file: |
804 | kmem_cache_destroy(ceph_dentry_cachep); | |
805 | bad_dentry: | |
f66fd9f0 YZ |
806 | kmem_cache_destroy(ceph_cap_flush_cachep); |
807 | bad_cap_flush: | |
3d14c5d2 YS |
808 | kmem_cache_destroy(ceph_cap_cachep); |
809 | bad_cap: | |
810 | kmem_cache_destroy(ceph_inode_cachep); | |
99ccbd22 | 811 | return error; |
0743304d SW |
812 | } |
813 | ||
3d14c5d2 YS |
814 | static void destroy_caches(void) |
815 | { | |
8c0a8537 KS |
816 | /* |
817 | * Make sure all delayed rcu free inodes are flushed before we | |
818 | * destroy cache. | |
819 | */ | |
820 | rcu_barrier(); | |
99ccbd22 | 821 | |
3d14c5d2 YS |
822 | kmem_cache_destroy(ceph_inode_cachep); |
823 | kmem_cache_destroy(ceph_cap_cachep); | |
f66fd9f0 | 824 | kmem_cache_destroy(ceph_cap_flush_cachep); |
3d14c5d2 YS |
825 | kmem_cache_destroy(ceph_dentry_cachep); |
826 | kmem_cache_destroy(ceph_file_cachep); | |
bb48bd4d | 827 | kmem_cache_destroy(ceph_dir_file_cachep); |
058daab7 | 828 | kmem_cache_destroy(ceph_mds_request_cachep); |
a0102bda | 829 | mempool_destroy(ceph_wb_pagevec_pool); |
99ccbd22 MT |
830 | |
831 | ceph_fscache_unregister(); | |
3d14c5d2 YS |
832 | } |
833 | ||
50c9132d JL |
834 | static void __ceph_umount_begin(struct ceph_fs_client *fsc) |
835 | { | |
836 | ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); | |
837 | ceph_mdsc_force_umount(fsc->mdsc); | |
838 | fsc->filp_gen++; // invalidate open files | |
839 | } | |
840 | ||
16725b9d | 841 | /* |
f1f565a2 | 842 | * ceph_umount_begin - initiate forced umount. Tear down the |
3d14c5d2 | 843 | * mount, skipping steps that may hang while waiting for server(s). |
16725b9d | 844 | */ |
3d14c5d2 | 845 | static void ceph_umount_begin(struct super_block *sb) |
16725b9d | 846 | { |
3d14c5d2 YS |
847 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
848 | ||
849 | dout("ceph_umount_begin - starting forced umount\n"); | |
850 | if (!fsc) | |
851 | return; | |
852 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | |
50c9132d | 853 | __ceph_umount_begin(fsc); |
16725b9d SW |
854 | } |
855 | ||
3d14c5d2 YS |
856 | static const struct super_operations ceph_super_ops = { |
857 | .alloc_inode = ceph_alloc_inode, | |
cfa6d412 | 858 | .free_inode = ceph_free_inode, |
3d14c5d2 | 859 | .write_inode = ceph_write_inode, |
52dd0f1b | 860 | .drop_inode = generic_delete_inode, |
87bc5b89 | 861 | .evict_inode = ceph_evict_inode, |
3d14c5d2 YS |
862 | .sync_fs = ceph_sync_fs, |
863 | .put_super = ceph_put_super, | |
864 | .show_options = ceph_show_options, | |
865 | .statfs = ceph_statfs, | |
866 | .umount_begin = ceph_umount_begin, | |
867 | }; | |
868 | ||
16725b9d SW |
869 | /* |
870 | * Bootstrap mount by opening the root directory. Note the mount | |
871 | * @started time from caller, and time out if this takes too long. | |
872 | */ | |
3d14c5d2 | 873 | static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, |
16725b9d SW |
874 | const char *path, |
875 | unsigned long started) | |
876 | { | |
3d14c5d2 | 877 | struct ceph_mds_client *mdsc = fsc->mdsc; |
16725b9d SW |
878 | struct ceph_mds_request *req = NULL; |
879 | int err; | |
880 | struct dentry *root; | |
881 | ||
882 | /* open dir */ | |
883 | dout("open_root_inode opening '%s'\n", path); | |
884 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); | |
885 | if (IS_ERR(req)) | |
7e34bc52 | 886 | return ERR_CAST(req); |
16725b9d | 887 | req->r_path1 = kstrdup(path, GFP_NOFS); |
a149bb9a SK |
888 | if (!req->r_path1) { |
889 | root = ERR_PTR(-ENOMEM); | |
890 | goto out; | |
891 | } | |
892 | ||
16725b9d SW |
893 | req->r_ino1.ino = CEPH_INO_ROOT; |
894 | req->r_ino1.snap = CEPH_NOSNAP; | |
895 | req->r_started = started; | |
a319bf56 | 896 | req->r_timeout = fsc->client->options->mount_timeout; |
16725b9d SW |
897 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
898 | req->r_num_caps = 2; | |
899 | err = ceph_mdsc_do_request(mdsc, NULL, req); | |
900 | if (err == 0) { | |
3c5184ef AV |
901 | struct inode *inode = req->r_target_inode; |
902 | req->r_target_inode = NULL; | |
16725b9d | 903 | dout("open_root_inode success\n"); |
ce2728aa YZ |
904 | root = d_make_root(inode); |
905 | if (!root) { | |
906 | root = ERR_PTR(-ENOMEM); | |
907 | goto out; | |
774ac21d | 908 | } |
16725b9d SW |
909 | dout("open_root_inode success, root dentry is %p\n", root); |
910 | } else { | |
911 | root = ERR_PTR(err); | |
912 | } | |
3c5184ef | 913 | out: |
16725b9d SW |
914 | ceph_mdsc_put_request(req); |
915 | return root; | |
916 | } | |
917 | ||
918 | /* | |
919 | * mount: join the ceph cluster, and open root directory. | |
920 | */ | |
82995cc6 DH |
921 | static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc, |
922 | struct fs_context *fc) | |
16725b9d | 923 | { |
16725b9d | 924 | int err; |
16725b9d SW |
925 | unsigned long started = jiffies; /* note the start time */ |
926 | struct dentry *root; | |
927 | ||
132ca7e1 | 928 | dout("mount start %p\n", fsc); |
3d14c5d2 | 929 | mutex_lock(&fsc->client->mount_mutex); |
16725b9d | 930 | |
132ca7e1 | 931 | if (!fsc->sb->s_root) { |
b27a939e ID |
932 | const char *path = fsc->mount_options->server_path ? |
933 | fsc->mount_options->server_path + 1 : ""; | |
934 | ||
132ca7e1 YZ |
935 | err = __ceph_open_session(fsc->client, started); |
936 | if (err < 0) | |
937 | goto out; | |
16725b9d | 938 | |
1d8f8360 YZ |
939 | /* setup fscache */ |
940 | if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) { | |
82995cc6 | 941 | err = ceph_fscache_register_fs(fsc, fc); |
1d8f8360 YZ |
942 | if (err < 0) |
943 | goto out; | |
944 | } | |
945 | ||
4fbc0c71 | 946 | dout("mount opening path '%s'\n", path); |
18106734 | 947 | |
1a829ff2 | 948 | ceph_fs_debugfs_init(fsc); |
18106734 | 949 | |
ce2728aa | 950 | root = open_root_dentry(fsc, path, started); |
132ca7e1 YZ |
951 | if (IS_ERR(root)) { |
952 | err = PTR_ERR(root); | |
953 | goto out; | |
954 | } | |
ce2728aa | 955 | fsc->sb->s_root = dget(root); |
31ca5878 GU |
956 | } else { |
957 | root = dget(fsc->sb->s_root); | |
3d14c5d2 | 958 | } |
16725b9d | 959 | |
3d14c5d2 | 960 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
16725b9d | 961 | dout("mount success\n"); |
a7f9fb20 AV |
962 | mutex_unlock(&fsc->client->mount_mutex); |
963 | return root; | |
16725b9d | 964 | |
132ca7e1 YZ |
965 | out: |
966 | mutex_unlock(&fsc->client->mount_mutex); | |
967 | return ERR_PTR(err); | |
16725b9d SW |
968 | } |
969 | ||
82995cc6 | 970 | static int ceph_set_super(struct super_block *s, struct fs_context *fc) |
16725b9d | 971 | { |
82995cc6 | 972 | struct ceph_fs_client *fsc = s->s_fs_info; |
16725b9d SW |
973 | int ret; |
974 | ||
82995cc6 | 975 | dout("set_super %p\n", s); |
16725b9d | 976 | |
719784ba | 977 | s->s_maxbytes = MAX_LFS_FILESIZE; |
16725b9d | 978 | |
7221fe4c | 979 | s->s_xattr = ceph_xattr_handlers; |
3d14c5d2 | 980 | fsc->sb = s; |
719784ba | 981 | fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */ |
16725b9d SW |
982 | |
983 | s->s_op = &ceph_super_ops; | |
18fc8abd | 984 | s->s_d_op = &ceph_dentry_ops; |
16725b9d SW |
985 | s->s_export_op = &ceph_export_ops; |
986 | ||
0f7cf80a | 987 | s->s_time_gran = 1; |
028ca4db DD |
988 | s->s_time_min = 0; |
989 | s->s_time_max = U32_MAX; | |
16725b9d | 990 | |
82995cc6 | 991 | ret = set_anon_super_fc(s, fc); |
16725b9d | 992 | if (ret != 0) |
82995cc6 | 993 | fsc->sb = NULL; |
16725b9d SW |
994 | return ret; |
995 | } | |
996 | ||
997 | /* | |
998 | * share superblock if same fs AND options | |
999 | */ | |
82995cc6 | 1000 | static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) |
16725b9d | 1001 | { |
82995cc6 | 1002 | struct ceph_fs_client *new = fc->s_fs_info; |
3d14c5d2 YS |
1003 | struct ceph_mount_options *fsopt = new->mount_options; |
1004 | struct ceph_options *opt = new->client->options; | |
1005 | struct ceph_fs_client *other = ceph_sb_to_client(sb); | |
16725b9d SW |
1006 | |
1007 | dout("ceph_compare_super %p\n", sb); | |
3d14c5d2 YS |
1008 | |
1009 | if (compare_mount_options(fsopt, opt, other)) { | |
1010 | dout("monitor(s)/mount options don't match\n"); | |
1011 | return 0; | |
16725b9d | 1012 | } |
3d14c5d2 YS |
1013 | if ((opt->flags & CEPH_OPT_FSID) && |
1014 | ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { | |
1015 | dout("fsid doesn't match\n"); | |
1016 | return 0; | |
1017 | } | |
82995cc6 | 1018 | if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) { |
16725b9d SW |
1019 | dout("flags differ\n"); |
1020 | return 0; | |
1021 | } | |
1022 | return 1; | |
1023 | } | |
1024 | ||
1025 | /* | |
1026 | * construct our own bdi so we can control readahead, etc. | |
1027 | */ | |
00d5643e | 1028 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
31e0cf8f | 1029 | |
09dc9fc2 | 1030 | static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc) |
16725b9d SW |
1031 | { |
1032 | int err; | |
1033 | ||
09dc9fc2 JK |
1034 | err = super_setup_bdi_name(sb, "ceph-%ld", |
1035 | atomic_long_inc_return(&bdi_seq)); | |
1036 | if (err) | |
1037 | return err; | |
1038 | ||
83817e35 | 1039 | /* set ra_pages based on rasize mount option? */ |
4214fb15 | 1040 | sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT; |
e9852227 | 1041 | |
aa187926 YZ |
1042 | /* set io_pages based on max osd read size */ |
1043 | sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT; | |
7c94ba27 | 1044 | |
09dc9fc2 | 1045 | return 0; |
16725b9d SW |
1046 | } |
1047 | ||
82995cc6 | 1048 | static int ceph_get_tree(struct fs_context *fc) |
16725b9d | 1049 | { |
82995cc6 | 1050 | struct ceph_parse_opts_ctx *pctx = fc->fs_private; |
16725b9d | 1051 | struct super_block *sb; |
3d14c5d2 | 1052 | struct ceph_fs_client *fsc; |
a7f9fb20 | 1053 | struct dentry *res; |
82995cc6 DH |
1054 | int (*compare_super)(struct super_block *, struct fs_context *) = |
1055 | ceph_compare_super; | |
16725b9d | 1056 | int err; |
16725b9d | 1057 | |
82995cc6 DH |
1058 | dout("ceph_get_tree\n"); |
1059 | ||
1060 | if (!fc->source) | |
d53d0f74 | 1061 | return invalfc(fc, "No source"); |
45195e42 | 1062 | |
16725b9d | 1063 | /* create client (which we may/may not use) */ |
82995cc6 DH |
1064 | fsc = create_fs_client(pctx->opts, pctx->copts); |
1065 | pctx->opts = NULL; | |
1066 | pctx->copts = NULL; | |
3d14c5d2 | 1067 | if (IS_ERR(fsc)) { |
82995cc6 | 1068 | err = PTR_ERR(fsc); |
6b805185 SW |
1069 | goto out_final; |
1070 | } | |
16725b9d | 1071 | |
3d14c5d2 | 1072 | err = ceph_mdsc_init(fsc); |
82995cc6 | 1073 | if (err < 0) |
3d14c5d2 YS |
1074 | goto out; |
1075 | ||
1076 | if (ceph_test_opt(fsc->client, NOSHARE)) | |
16725b9d | 1077 | compare_super = NULL; |
82995cc6 DH |
1078 | |
1079 | fc->s_fs_info = fsc; | |
1080 | sb = sget_fc(fc, compare_super, ceph_set_super); | |
1081 | fc->s_fs_info = NULL; | |
16725b9d | 1082 | if (IS_ERR(sb)) { |
82995cc6 | 1083 | err = PTR_ERR(sb); |
16725b9d SW |
1084 | goto out; |
1085 | } | |
1086 | ||
3d14c5d2 | 1087 | if (ceph_sb_to_client(sb) != fsc) { |
3d14c5d2 YS |
1088 | destroy_fs_client(fsc); |
1089 | fsc = ceph_sb_to_client(sb); | |
1090 | dout("get_sb got existing client %p\n", fsc); | |
16725b9d | 1091 | } else { |
3d14c5d2 | 1092 | dout("get_sb using new client %p\n", fsc); |
09dc9fc2 | 1093 | err = ceph_setup_bdi(sb, fsc); |
82995cc6 | 1094 | if (err < 0) |
16725b9d SW |
1095 | goto out_splat; |
1096 | } | |
1097 | ||
82995cc6 DH |
1098 | res = ceph_real_mount(fsc, fc); |
1099 | if (IS_ERR(res)) { | |
1100 | err = PTR_ERR(res); | |
16725b9d | 1101 | goto out_splat; |
82995cc6 | 1102 | } |
a7f9fb20 | 1103 | dout("root %p inode %p ino %llx.%llx\n", res, |
2b0143b5 | 1104 | d_inode(res), ceph_vinop(d_inode(res))); |
82995cc6 DH |
1105 | fc->root = fsc->sb->s_root; |
1106 | return 0; | |
16725b9d SW |
1107 | |
1108 | out_splat: | |
97820058 XL |
1109 | if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { |
1110 | pr_info("No mds server is up or the cluster is laggy\n"); | |
1111 | err = -EHOSTUNREACH; | |
1112 | } | |
1113 | ||
3d14c5d2 | 1114 | ceph_mdsc_close_sessions(fsc->mdsc); |
3981f2e2 | 1115 | deactivate_locked_super(sb); |
16725b9d SW |
1116 | goto out_final; |
1117 | ||
1118 | out: | |
3d14c5d2 | 1119 | destroy_fs_client(fsc); |
16725b9d | 1120 | out_final: |
82995cc6 DH |
1121 | dout("ceph_get_tree fail %d\n", err); |
1122 | return err; | |
1123 | } | |
1124 | ||
1125 | static void ceph_free_fc(struct fs_context *fc) | |
1126 | { | |
1127 | struct ceph_parse_opts_ctx *pctx = fc->fs_private; | |
1128 | ||
1129 | if (pctx) { | |
1130 | destroy_mount_options(pctx->opts); | |
1131 | ceph_destroy_options(pctx->copts); | |
1132 | kfree(pctx); | |
1133 | } | |
1134 | } | |
1135 | ||
1136 | static int ceph_reconfigure_fc(struct fs_context *fc) | |
1137 | { | |
2ccb4546 JL |
1138 | struct ceph_parse_opts_ctx *pctx = fc->fs_private; |
1139 | struct ceph_mount_options *fsopt = pctx->opts; | |
1140 | struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb); | |
1141 | ||
1142 | if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS) | |
1143 | ceph_set_mount_opt(fsc, ASYNC_DIROPS); | |
1144 | else | |
1145 | ceph_clear_mount_opt(fsc, ASYNC_DIROPS); | |
1146 | ||
82995cc6 DH |
1147 | sync_filesystem(fc->root->d_sb); |
1148 | return 0; | |
1149 | } | |
1150 | ||
1151 | static const struct fs_context_operations ceph_context_ops = { | |
1152 | .free = ceph_free_fc, | |
1153 | .parse_param = ceph_parse_mount_param, | |
1154 | .get_tree = ceph_get_tree, | |
1155 | .reconfigure = ceph_reconfigure_fc, | |
1156 | }; | |
1157 | ||
1158 | /* | |
1159 | * Set up the filesystem mount context. | |
1160 | */ | |
1161 | static int ceph_init_fs_context(struct fs_context *fc) | |
1162 | { | |
1163 | struct ceph_parse_opts_ctx *pctx; | |
1164 | struct ceph_mount_options *fsopt; | |
1165 | ||
1166 | pctx = kzalloc(sizeof(*pctx), GFP_KERNEL); | |
1167 | if (!pctx) | |
1168 | return -ENOMEM; | |
1169 | ||
1170 | pctx->copts = ceph_alloc_options(); | |
1171 | if (!pctx->copts) | |
1172 | goto nomem; | |
1173 | ||
1174 | pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL); | |
1175 | if (!pctx->opts) | |
1176 | goto nomem; | |
1177 | ||
1178 | fsopt = pctx->opts; | |
1179 | fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; | |
1180 | ||
1181 | fsopt->wsize = CEPH_MAX_WRITE_SIZE; | |
1182 | fsopt->rsize = CEPH_MAX_READ_SIZE; | |
1183 | fsopt->rasize = CEPH_RASIZE_DEFAULT; | |
1184 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | |
1185 | if (!fsopt->snapdir_name) | |
1186 | goto nomem; | |
1187 | ||
1188 | fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | |
1189 | fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | |
1190 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | |
1191 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | |
1192 | fsopt->congestion_kb = default_congestion_kb(); | |
1193 | ||
3b20bc2f XL |
1194 | #ifdef CONFIG_CEPH_FS_POSIX_ACL |
1195 | fc->sb_flags |= SB_POSIXACL; | |
1196 | #endif | |
1197 | ||
82995cc6 DH |
1198 | fc->fs_private = pctx; |
1199 | fc->ops = &ceph_context_ops; | |
1200 | return 0; | |
1201 | ||
1202 | nomem: | |
1203 | destroy_mount_options(pctx->opts); | |
1204 | ceph_destroy_options(pctx->copts); | |
1205 | kfree(pctx); | |
1206 | return -ENOMEM; | |
16725b9d SW |
1207 | } |
1208 | ||
1209 | static void ceph_kill_sb(struct super_block *s) | |
1210 | { | |
3d14c5d2 | 1211 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
e4d27509 | 1212 | |
16725b9d | 1213 | dout("kill_sb %p\n", s); |
e4d27509 | 1214 | |
3d14c5d2 | 1215 | ceph_mdsc_pre_umount(fsc->mdsc); |
a57d9064 YZ |
1216 | flush_fs_workqueues(fsc); |
1217 | ||
470a5c77 | 1218 | kill_anon_super(s); |
62a65f36 YZ |
1219 | |
1220 | fsc->client->extra_mon_dispatch = NULL; | |
1221 | ceph_fs_debugfs_cleanup(fsc); | |
1222 | ||
1d8f8360 YZ |
1223 | ceph_fscache_unregister_fs(fsc); |
1224 | ||
3d14c5d2 | 1225 | destroy_fs_client(fsc); |
16725b9d SW |
1226 | } |
1227 | ||
1228 | static struct file_system_type ceph_fs_type = { | |
1229 | .owner = THIS_MODULE, | |
1230 | .name = "ceph", | |
82995cc6 | 1231 | .init_fs_context = ceph_init_fs_context, |
16725b9d SW |
1232 | .kill_sb = ceph_kill_sb, |
1233 | .fs_flags = FS_RENAME_DOES_D_MOVE, | |
1234 | }; | |
7f78e035 | 1235 | MODULE_ALIAS_FS("ceph"); |
16725b9d | 1236 | |
d468e729 YZ |
1237 | int ceph_force_reconnect(struct super_block *sb) |
1238 | { | |
1239 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); | |
1240 | int err = 0; | |
1241 | ||
50c9132d JL |
1242 | fsc->mount_state = CEPH_MOUNT_RECOVER; |
1243 | __ceph_umount_begin(fsc); | |
d468e729 YZ |
1244 | |
1245 | /* Make sure all page caches get invalidated. | |
1246 | * see remove_session_caps_cb() */ | |
1247 | flush_workqueue(fsc->inode_wq); | |
1248 | ||
0b98acd6 | 1249 | /* In case that we were blocklisted. This also reset |
d468e729 YZ |
1250 | * all mon/osd connections */ |
1251 | ceph_reset_client_addr(fsc->client); | |
1252 | ||
1253 | ceph_osdc_clear_abort_err(&fsc->client->osdc); | |
131d7eb4 | 1254 | |
0b98acd6 | 1255 | fsc->blocklisted = false; |
d468e729 YZ |
1256 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
1257 | ||
1258 | if (sb->s_root) { | |
1259 | err = __ceph_do_getattr(d_inode(sb->s_root), NULL, | |
1260 | CEPH_STAT_CAP_INODE, true); | |
1261 | } | |
1262 | return err; | |
1263 | } | |
1264 | ||
16725b9d SW |
1265 | static int __init init_ceph(void) |
1266 | { | |
3d14c5d2 | 1267 | int ret = init_caches(); |
16725b9d | 1268 | if (ret) |
3d14c5d2 | 1269 | goto out; |
16725b9d | 1270 | |
eb13e832 | 1271 | ceph_flock_init(); |
16725b9d SW |
1272 | ret = register_filesystem(&ceph_fs_type); |
1273 | if (ret) | |
d0f191d2 | 1274 | goto out_caches; |
16725b9d | 1275 | |
3d14c5d2 YS |
1276 | pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); |
1277 | ||
16725b9d SW |
1278 | return 0; |
1279 | ||
d0f191d2 | 1280 | out_caches: |
16725b9d | 1281 | destroy_caches(); |
16725b9d SW |
1282 | out: |
1283 | return ret; | |
1284 | } | |
1285 | ||
1286 | static void __exit exit_ceph(void) | |
1287 | { | |
1288 | dout("exit_ceph\n"); | |
1289 | unregister_filesystem(&ceph_fs_type); | |
16725b9d | 1290 | destroy_caches(); |
16725b9d SW |
1291 | } |
1292 | ||
18f473b3 XL |
1293 | static int param_set_metrics(const char *val, const struct kernel_param *kp) |
1294 | { | |
1295 | struct ceph_fs_client *fsc; | |
1296 | int ret; | |
1297 | ||
1298 | ret = param_set_bool(val, kp); | |
1299 | if (ret) { | |
1300 | pr_err("Failed to parse sending metrics switch value '%s'\n", | |
1301 | val); | |
1302 | return ret; | |
1303 | } else if (!disable_send_metrics) { | |
1304 | // wake up all the mds clients | |
1305 | spin_lock(&ceph_fsc_lock); | |
1306 | list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) { | |
1307 | metric_schedule_delayed(&fsc->mdsc->metric); | |
1308 | } | |
1309 | spin_unlock(&ceph_fsc_lock); | |
1310 | } | |
1311 | ||
1312 | return 0; | |
1313 | } | |
1314 | ||
1315 | static const struct kernel_param_ops param_ops_metrics = { | |
1316 | .set = param_set_metrics, | |
1317 | .get = param_get_bool, | |
1318 | }; | |
1319 | ||
1320 | bool disable_send_metrics = false; | |
1321 | module_param_cb(disable_send_metrics, ¶m_ops_metrics, &disable_send_metrics, 0644); | |
1322 | MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)"); | |
1323 | ||
16725b9d SW |
1324 | module_init(init_ceph); |
1325 | module_exit(exit_ceph); | |
1326 | ||
1327 | MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); | |
1328 | MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); | |
1329 | MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); | |
1330 | MODULE_DESCRIPTION("Ceph filesystem for Linux"); | |
1331 | MODULE_LICENSE("GPL"); |