Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
1dc563a6 | 30 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
31 | */ |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * Implementation of cl_io for VVP layer. | |
37 | * | |
38 | * Author: Nikita Danilov <nikita.danilov@sun.com> | |
39 | * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com> | |
40 | */ | |
41 | ||
42 | #define DEBUG_SUBSYSTEM S_LLITE | |
43 | ||
67a235f5 GKH |
44 | #include "../include/obd.h" |
45 | #include "../include/lustre_lite.h" | |
d7e09d03 | 46 | |
0d345656 | 47 | #include "llite_internal.h" |
d7e09d03 PT |
48 | #include "vvp_internal.h" |
49 | ||
50 | static struct vvp_io *cl2vvp_io(const struct lu_env *env, | |
51 | const struct cl_io_slice *slice); | |
52 | ||
53 | /** | |
74c0da19 | 54 | * True, if \a io is a normal io, False for splice_{read,write} |
d7e09d03 PT |
55 | */ |
56 | int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) | |
57 | { | |
58 | struct vvp_io *vio = vvp_env_io(env); | |
59 | ||
60 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); | |
61 | ||
62 | return vio->cui_io_subtype == IO_NORMAL; | |
63 | } | |
64 | ||
65 | /** | |
66 | * For swapping layout. The file's layout may have changed. | |
67 | * To avoid populating pages to a wrong stripe, we have to verify the | |
68 | * correctness of layout. It works because swapping layout processes | |
69 | * have to acquire group lock. | |
70 | */ | |
71 | static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, | |
e15ba45d | 72 | struct inode *inode) |
d7e09d03 PT |
73 | { |
74 | struct ll_inode_info *lli = ll_i2info(inode); | |
75 | struct ccc_io *cio = ccc_env_io(env); | |
76 | bool rc = true; | |
77 | ||
78 | switch (io->ci_type) { | |
79 | case CIT_READ: | |
80 | case CIT_WRITE: | |
81 | /* don't need lock here to check lli_layout_gen as we have held | |
c0894c6c OD |
82 | * extent lock and GROUP lock has to hold to swap layout |
83 | */ | |
09aed8a5 | 84 | if (ll_layout_version_get(lli) != cio->cui_layout_gen) { |
d7e09d03 PT |
85 | io->ci_need_restart = 1; |
86 | /* this will return application a short read/write */ | |
87 | io->ci_continue = 0; | |
88 | rc = false; | |
89 | } | |
90 | case CIT_FAULT: | |
91 | /* fault is okay because we've already had a page. */ | |
92 | default: | |
93 | break; | |
94 | } | |
95 | ||
96 | return rc; | |
97 | } | |
98 | ||
99 | /***************************************************************************** | |
100 | * | |
101 | * io operations. | |
102 | * | |
103 | */ | |
104 | ||
77605e41 JX |
105 | static int vvp_io_write_iter_init(const struct lu_env *env, |
106 | const struct cl_io_slice *ios) | |
107 | { | |
108 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
109 | ||
110 | cl_page_list_init(&cio->u.write.cui_queue); | |
111 | cio->u.write.cui_written = 0; | |
112 | cio->u.write.cui_from = 0; | |
113 | cio->u.write.cui_to = PAGE_SIZE; | |
114 | ||
115 | return 0; | |
116 | } | |
117 | ||
118 | static void vvp_io_write_iter_fini(const struct lu_env *env, | |
119 | const struct cl_io_slice *ios) | |
120 | { | |
121 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
122 | ||
123 | LASSERT(cio->u.write.cui_queue.pl_nr == 0); | |
124 | } | |
125 | ||
d7e09d03 PT |
126 | static int vvp_io_fault_iter_init(const struct lu_env *env, |
127 | const struct cl_io_slice *ios) | |
128 | { | |
129 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
8c7b0e1a | 130 | struct inode *inode = vvp_object_inode(ios->cis_obj); |
d7e09d03 PT |
131 | |
132 | LASSERT(inode == | |
2a8a3597 | 133 | file_inode(cl2ccc_io(env, ios)->cui_fd->fd_file)); |
46c360f9 | 134 | vio->u.fault.ft_mtime = inode->i_mtime.tv_sec; |
d7e09d03 PT |
135 | return 0; |
136 | } | |
137 | ||
138 | static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios) | |
139 | { | |
140 | struct cl_io *io = ios->cis_io; | |
141 | struct cl_object *obj = io->ci_obj; | |
142 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
143 | ||
8c7b0e1a | 144 | CLOBINVRNT(env, obj, vvp_object_invariant(obj)); |
d7e09d03 | 145 | |
5ea17d6c JL |
146 | CDEBUG(D_VFSTRACE, DFID |
147 | " ignore/verify layout %d/%d, layout version %d restore needed %d\n", | |
148 | PFID(lu_object_fid(&obj->co_lu)), | |
149 | io->ci_ignore_layout, io->ci_verify_layout, | |
150 | cio->cui_layout_gen, io->ci_restore_needed); | |
151 | ||
152 | if (io->ci_restore_needed == 1) { | |
153 | int rc; | |
154 | ||
155 | /* file was detected release, we need to restore it | |
156 | * before finishing the io | |
157 | */ | |
8c7b0e1a | 158 | rc = ll_layout_restore(vvp_object_inode(obj)); |
5ea17d6c | 159 | /* if restore registration failed, no restart, |
c0894c6c OD |
160 | * we will return -ENODATA |
161 | */ | |
5ea17d6c JL |
162 | /* The layout will change after restore, so we need to |
163 | * block on layout lock hold by the MDT | |
164 | * as MDT will not send new layout in lvb (see LU-3124) | |
165 | * we have to explicitly fetch it, all this will be done | |
166 | * by ll_layout_refresh() | |
167 | */ | |
168 | if (rc == 0) { | |
169 | io->ci_restore_needed = 0; | |
170 | io->ci_need_restart = 1; | |
171 | io->ci_verify_layout = 1; | |
172 | } else { | |
173 | io->ci_restore_needed = 1; | |
174 | io->ci_need_restart = 0; | |
175 | io->ci_verify_layout = 0; | |
176 | io->ci_result = rc; | |
177 | } | |
178 | } | |
d7e09d03 PT |
179 | |
180 | if (!io->ci_ignore_layout && io->ci_verify_layout) { | |
181 | __u32 gen = 0; | |
182 | ||
183 | /* check layout version */ | |
8c7b0e1a | 184 | ll_layout_refresh(vvp_object_inode(obj), &gen); |
d7e09d03 | 185 | io->ci_need_restart = cio->cui_layout_gen != gen; |
5ea17d6c JL |
186 | if (io->ci_need_restart) { |
187 | CDEBUG(D_VFSTRACE, | |
188 | DFID" layout changed from %d to %d.\n", | |
189 | PFID(lu_object_fid(&obj->co_lu)), | |
190 | cio->cui_layout_gen, gen); | |
c0894c6c | 191 | /* today successful restore is the only possible case */ |
5ea17d6c | 192 | /* restore was done, clear restoring state */ |
8c7b0e1a | 193 | ll_i2info(vvp_object_inode(obj))->lli_flags &= |
5ea17d6c JL |
194 | ~LLIF_FILE_RESTORING; |
195 | } | |
d7e09d03 PT |
196 | } |
197 | } | |
198 | ||
199 | static void vvp_io_fault_fini(const struct lu_env *env, | |
200 | const struct cl_io_slice *ios) | |
201 | { | |
202 | struct cl_io *io = ios->cis_io; | |
203 | struct cl_page *page = io->u.ci_fault.ft_page; | |
204 | ||
8c7b0e1a | 205 | CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj)); |
d7e09d03 | 206 | |
6e16818b | 207 | if (page) { |
d7e09d03 PT |
208 | lu_ref_del(&page->cp_reference, "fault", io); |
209 | cl_page_put(env, page); | |
210 | io->u.ci_fault.ft_page = NULL; | |
211 | } | |
212 | vvp_io_fini(env, ios); | |
213 | } | |
214 | ||
2d95f10e | 215 | static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma) |
d7e09d03 PT |
216 | { |
217 | /* | |
218 | * we only want to hold PW locks if the mmap() can generate | |
219 | * writes back to the file and that only happens in shared | |
220 | * writable vmas | |
221 | */ | |
222 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) | |
223 | return CLM_WRITE; | |
224 | return CLM_READ; | |
225 | } | |
226 | ||
227 | static int vvp_mmap_locks(const struct lu_env *env, | |
228 | struct ccc_io *vio, struct cl_io *io) | |
229 | { | |
230 | struct ccc_thread_info *cti = ccc_env_info(env); | |
231 | struct mm_struct *mm = current->mm; | |
232 | struct vm_area_struct *vma; | |
233 | struct cl_lock_descr *descr = &cti->cti_descr; | |
234 | ldlm_policy_data_t policy; | |
235 | unsigned long addr; | |
d7e09d03 | 236 | ssize_t count; |
06563b56 | 237 | int result = 0; |
b42b15fd AV |
238 | struct iov_iter i; |
239 | struct iovec iov; | |
d7e09d03 PT |
240 | |
241 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); | |
242 | ||
243 | if (!cl_is_normalio(env, io)) | |
0a3bdb00 | 244 | return 0; |
d7e09d03 | 245 | |
6e16818b | 246 | if (!vio->cui_iter) /* nfs or loop back device write */ |
0a3bdb00 | 247 | return 0; |
d7e09d03 PT |
248 | |
249 | /* No MM (e.g. NFS)? No vmas too. */ | |
6e16818b | 250 | if (!mm) |
0a3bdb00 | 251 | return 0; |
d7e09d03 | 252 | |
b42b15fd AV |
253 | iov_for_each(iov, i, *(vio->cui_iter)) { |
254 | addr = (unsigned long)iov.iov_base; | |
255 | count = iov.iov_len; | |
d7e09d03 PT |
256 | if (count == 0) |
257 | continue; | |
258 | ||
616387e8 OD |
259 | count += addr & (~PAGE_MASK); |
260 | addr &= PAGE_MASK; | |
d7e09d03 PT |
261 | |
262 | down_read(&mm->mmap_sem); | |
a58a38ac | 263 | while ((vma = our_vma(mm, addr, count)) != NULL) { |
2a8a3597 | 264 | struct inode *inode = file_inode(vma->vm_file); |
d7e09d03 PT |
265 | int flags = CEF_MUST; |
266 | ||
267 | if (ll_file_nolock(vma->vm_file)) { | |
268 | /* | |
06563b56 | 269 | * For no lock case is not allowed for mmap |
d7e09d03 | 270 | */ |
06563b56 JX |
271 | result = -EINVAL; |
272 | break; | |
d7e09d03 PT |
273 | } |
274 | ||
275 | /* | |
276 | * XXX: Required lock mode can be weakened: CIT_WRITE | |
277 | * io only ever reads user level buffer, and CIT_READ | |
278 | * only writes on it. | |
279 | */ | |
280 | policy_from_vma(&policy, vma, addr, count); | |
281 | descr->cld_mode = vvp_mode_from_vma(vma); | |
282 | descr->cld_obj = ll_i2info(inode)->lli_clob; | |
283 | descr->cld_start = cl_index(descr->cld_obj, | |
284 | policy.l_extent.start); | |
285 | descr->cld_end = cl_index(descr->cld_obj, | |
286 | policy.l_extent.end); | |
287 | descr->cld_enq_flags = flags; | |
288 | result = cl_io_lock_alloc_add(env, io, descr); | |
289 | ||
290 | CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n", | |
291 | descr->cld_mode, descr->cld_start, | |
292 | descr->cld_end); | |
293 | ||
06563b56 JX |
294 | if (result < 0) |
295 | break; | |
d7e09d03 PT |
296 | |
297 | if (vma->vm_end - addr >= count) | |
298 | break; | |
299 | ||
300 | count -= vma->vm_end - addr; | |
301 | addr = vma->vm_end; | |
302 | } | |
303 | up_read(&mm->mmap_sem); | |
06563b56 JX |
304 | if (result < 0) |
305 | break; | |
d7e09d03 | 306 | } |
06563b56 | 307 | return result; |
d7e09d03 PT |
308 | } |
309 | ||
310 | static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io, | |
311 | enum cl_lock_mode mode, loff_t start, loff_t end) | |
312 | { | |
313 | struct ccc_io *cio = ccc_env_io(env); | |
314 | int result; | |
315 | int ast_flags = 0; | |
316 | ||
317 | LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); | |
d7e09d03 PT |
318 | |
319 | ccc_io_update_iov(env, cio, io); | |
320 | ||
321 | if (io->u.ci_rw.crw_nonblock) | |
322 | ast_flags |= CEF_NONBLOCK; | |
323 | result = vvp_mmap_locks(env, cio, io); | |
324 | if (result == 0) | |
325 | result = ccc_io_one_lock(env, io, ast_flags, mode, start, end); | |
0a3bdb00 | 326 | return result; |
d7e09d03 PT |
327 | } |
328 | ||
329 | static int vvp_io_read_lock(const struct lu_env *env, | |
330 | const struct cl_io_slice *ios) | |
331 | { | |
4c309612 JX |
332 | struct cl_io *io = ios->cis_io; |
333 | struct cl_io_rw_common *rd = &io->u.ci_rd.rd; | |
d7e09d03 PT |
334 | int result; |
335 | ||
4c309612 JX |
336 | result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos, |
337 | rd->crw_pos + rd->crw_count - 1); | |
338 | ||
0a3bdb00 | 339 | return result; |
d7e09d03 PT |
340 | } |
341 | ||
342 | static int vvp_io_fault_lock(const struct lu_env *env, | |
343 | const struct cl_io_slice *ios) | |
344 | { | |
345 | struct cl_io *io = ios->cis_io; | |
346 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
347 | /* | |
348 | * XXX LDLM_FL_CBPENDING | |
349 | */ | |
350 | return ccc_io_one_lock_index | |
351 | (env, io, 0, vvp_mode_from_vma(vio->u.fault.ft_vma), | |
352 | io->u.ci_fault.ft_index, io->u.ci_fault.ft_index); | |
353 | } | |
354 | ||
355 | static int vvp_io_write_lock(const struct lu_env *env, | |
356 | const struct cl_io_slice *ios) | |
357 | { | |
358 | struct cl_io *io = ios->cis_io; | |
359 | loff_t start; | |
360 | loff_t end; | |
361 | ||
362 | if (io->u.ci_wr.wr_append) { | |
363 | start = 0; | |
364 | end = OBD_OBJECT_EOF; | |
365 | } else { | |
366 | start = io->u.ci_wr.wr.crw_pos; | |
367 | end = start + io->u.ci_wr.wr.crw_count - 1; | |
368 | } | |
369 | return vvp_io_rw_lock(env, io, CLM_WRITE, start, end); | |
370 | } | |
371 | ||
372 | static int vvp_io_setattr_iter_init(const struct lu_env *env, | |
373 | const struct cl_io_slice *ios) | |
374 | { | |
375 | return 0; | |
376 | } | |
377 | ||
378 | /** | |
379 | * Implementation of cl_io_operations::cio_lock() method for CIT_SETATTR io. | |
380 | * | |
381 | * Handles "lockless io" mode when extent locking is done by server. | |
382 | */ | |
383 | static int vvp_io_setattr_lock(const struct lu_env *env, | |
384 | const struct cl_io_slice *ios) | |
385 | { | |
386 | struct ccc_io *cio = ccc_env_io(env); | |
387 | struct cl_io *io = ios->cis_io; | |
388 | __u64 new_size; | |
389 | __u32 enqflags = 0; | |
390 | ||
391 | if (cl_io_is_trunc(io)) { | |
392 | new_size = io->u.ci_setattr.sa_attr.lvb_size; | |
393 | if (new_size == 0) | |
394 | enqflags = CEF_DISCARD_DATA; | |
395 | } else { | |
396 | if ((io->u.ci_setattr.sa_attr.lvb_mtime >= | |
397 | io->u.ci_setattr.sa_attr.lvb_ctime) || | |
398 | (io->u.ci_setattr.sa_attr.lvb_atime >= | |
399 | io->u.ci_setattr.sa_attr.lvb_ctime)) | |
400 | return 0; | |
401 | new_size = 0; | |
402 | } | |
403 | cio->u.setattr.cui_local_lock = SETATTR_EXTENT_LOCK; | |
404 | return ccc_io_one_lock(env, io, enqflags, CLM_WRITE, | |
405 | new_size, OBD_OBJECT_EOF); | |
406 | } | |
407 | ||
408 | static int vvp_do_vmtruncate(struct inode *inode, size_t size) | |
409 | { | |
410 | int result; | |
411 | /* | |
412 | * Only ll_inode_size_lock is taken at this level. | |
413 | */ | |
414 | ll_inode_size_lock(inode); | |
415 | result = inode_newsize_ok(inode, size); | |
416 | if (result < 0) { | |
417 | ll_inode_size_unlock(inode); | |
418 | return result; | |
419 | } | |
420 | truncate_setsize(inode, size); | |
421 | ll_inode_size_unlock(inode); | |
422 | return result; | |
423 | } | |
424 | ||
425 | static int vvp_io_setattr_trunc(const struct lu_env *env, | |
426 | const struct cl_io_slice *ios, | |
427 | struct inode *inode, loff_t size) | |
428 | { | |
429 | inode_dio_wait(inode); | |
430 | return 0; | |
431 | } | |
432 | ||
433 | static int vvp_io_setattr_time(const struct lu_env *env, | |
434 | const struct cl_io_slice *ios) | |
435 | { | |
436 | struct cl_io *io = ios->cis_io; | |
437 | struct cl_object *obj = io->ci_obj; | |
438 | struct cl_attr *attr = ccc_env_thread_attr(env); | |
439 | int result; | |
440 | unsigned valid = CAT_CTIME; | |
441 | ||
442 | cl_object_attr_lock(obj); | |
443 | attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime; | |
444 | if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) { | |
445 | attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime; | |
446 | valid |= CAT_ATIME; | |
447 | } | |
448 | if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) { | |
449 | attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime; | |
450 | valid |= CAT_MTIME; | |
451 | } | |
452 | result = cl_object_attr_set(env, obj, attr, valid); | |
453 | cl_object_attr_unlock(obj); | |
454 | ||
455 | return result; | |
456 | } | |
457 | ||
458 | static int vvp_io_setattr_start(const struct lu_env *env, | |
459 | const struct cl_io_slice *ios) | |
460 | { | |
461 | struct cl_io *io = ios->cis_io; | |
8c7b0e1a | 462 | struct inode *inode = vvp_object_inode(io->ci_obj); |
5dd16419 | 463 | int result = 0; |
d7e09d03 | 464 | |
5955102c | 465 | inode_lock(inode); |
d7e09d03 | 466 | if (cl_io_is_trunc(io)) |
5dd16419 JX |
467 | result = vvp_io_setattr_trunc(env, ios, inode, |
468 | io->u.ci_setattr.sa_attr.lvb_size); | |
469 | if (result == 0) | |
470 | result = vvp_io_setattr_time(env, ios); | |
471 | return result; | |
d7e09d03 PT |
472 | } |
473 | ||
474 | static void vvp_io_setattr_end(const struct lu_env *env, | |
475 | const struct cl_io_slice *ios) | |
476 | { | |
477 | struct cl_io *io = ios->cis_io; | |
8c7b0e1a | 478 | struct inode *inode = vvp_object_inode(io->ci_obj); |
d7e09d03 | 479 | |
81e053c7 | 480 | if (cl_io_is_trunc(io)) |
d7e09d03 | 481 | /* Truncate in memory pages - they must be clean pages |
c0894c6c OD |
482 | * because osc has already notified to destroy osc_extents. |
483 | */ | |
d7e09d03 | 484 | vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); |
81e053c7 | 485 | |
5955102c | 486 | inode_unlock(inode); |
d7e09d03 PT |
487 | } |
488 | ||
489 | static void vvp_io_setattr_fini(const struct lu_env *env, | |
490 | const struct cl_io_slice *ios) | |
491 | { | |
492 | vvp_io_fini(env, ios); | |
493 | } | |
494 | ||
d7e09d03 PT |
495 | static int vvp_io_read_start(const struct lu_env *env, |
496 | const struct cl_io_slice *ios) | |
497 | { | |
498 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
499 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
500 | struct cl_io *io = ios->cis_io; | |
501 | struct cl_object *obj = io->ci_obj; | |
8c7b0e1a | 502 | struct inode *inode = vvp_object_inode(obj); |
d7e09d03 PT |
503 | struct ll_ra_read *bead = &vio->cui_bead; |
504 | struct file *file = cio->cui_fd->fd_file; | |
505 | ||
506 | int result; | |
507 | loff_t pos = io->u.ci_rd.rd.crw_pos; | |
508 | long cnt = io->u.ci_rd.rd.crw_count; | |
509 | long tot = cio->cui_tot_count; | |
510 | int exceed = 0; | |
511 | ||
8c7b0e1a | 512 | CLOBINVRNT(env, obj, vvp_object_invariant(obj)); |
d7e09d03 PT |
513 | |
514 | CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); | |
515 | ||
516 | if (!can_populate_pages(env, io, inode)) | |
517 | return 0; | |
518 | ||
519 | result = ccc_prep_size(env, obj, io, pos, tot, &exceed); | |
520 | if (result != 0) | |
521 | return result; | |
522 | else if (exceed != 0) | |
523 | goto out; | |
524 | ||
525 | LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, | |
e15ba45d OD |
526 | "Read ino %lu, %lu bytes, offset %lld, size %llu\n", |
527 | inode->i_ino, cnt, pos, i_size_read(inode)); | |
d7e09d03 PT |
528 | |
529 | /* turn off the kernel's read-ahead */ | |
530 | cio->cui_fd->fd_file->f_ra.ra_pages = 0; | |
531 | ||
532 | /* initialize read-ahead window once per syscall */ | |
533 | if (!vio->cui_ra_window_set) { | |
534 | vio->cui_ra_window_set = 1; | |
535 | bead->lrr_start = cl_index(obj, pos); | |
536 | /* | |
537 | * XXX: explicit PAGE_CACHE_SIZE | |
538 | */ | |
539 | bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1); | |
540 | ll_ra_read_in(file, bead); | |
541 | } | |
542 | ||
543 | /* BUG: 5972 */ | |
544 | file_accessed(file); | |
545 | switch (vio->cui_io_subtype) { | |
546 | case IO_NORMAL: | |
74c0da19 | 547 | LASSERT(cio->cui_iocb->ki_pos == pos); |
b42b15fd | 548 | result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter); |
74c0da19 | 549 | break; |
d7e09d03 PT |
550 | case IO_SPLICE: |
551 | result = generic_file_splice_read(file, &pos, | |
e15ba45d OD |
552 | vio->u.splice.cui_pipe, cnt, |
553 | vio->u.splice.cui_flags); | |
d7e09d03 PT |
554 | /* LU-1109: do splice read stripe by stripe otherwise if it |
555 | * may make nfsd stuck if this read occupied all internal pipe | |
c0894c6c OD |
556 | * buffers. |
557 | */ | |
d7e09d03 PT |
558 | io->ci_continue = 0; |
559 | break; | |
560 | default: | |
561 | CERROR("Wrong IO type %u\n", vio->cui_io_subtype); | |
562 | LBUG(); | |
563 | } | |
564 | ||
565 | out: | |
566 | if (result >= 0) { | |
567 | if (result < cnt) | |
568 | io->ci_continue = 0; | |
569 | io->ci_nob += result; | |
570 | ll_rw_stats_tally(ll_i2sbi(inode), current->pid, | |
4f37bc04 | 571 | cio->cui_fd, pos, result, READ); |
d7e09d03 PT |
572 | result = 0; |
573 | } | |
574 | return result; | |
575 | } | |
576 | ||
577 | static void vvp_io_read_fini(const struct lu_env *env, const struct cl_io_slice *ios) | |
578 | { | |
579 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
580 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
581 | ||
582 | if (vio->cui_ra_window_set) | |
583 | ll_ra_read_ex(cio->cui_fd->fd_file, &vio->cui_bead); | |
584 | ||
585 | vvp_io_fini(env, ios); | |
586 | } | |
587 | ||
77605e41 JX |
588 | static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io, |
589 | struct cl_page_list *plist, int from, int to) | |
590 | { | |
591 | struct cl_2queue *queue = &io->ci_queue; | |
592 | struct cl_page *page; | |
593 | unsigned int bytes = 0; | |
594 | int rc = 0; | |
595 | ||
596 | if (plist->pl_nr == 0) | |
597 | return 0; | |
598 | ||
c11599b8 | 599 | if (from > 0 || to != PAGE_SIZE) { |
77605e41 | 600 | page = cl_page_list_first(plist); |
c11599b8 JX |
601 | if (plist->pl_nr == 1) { |
602 | cl_page_clip(env, page, from, to); | |
c11599b8 | 603 | } else { |
902a34ad LD |
604 | if (from > 0) |
605 | cl_page_clip(env, page, from, PAGE_SIZE); | |
606 | if (to != PAGE_SIZE) { | |
607 | page = cl_page_list_last(plist); | |
608 | cl_page_clip(env, page, 0, to); | |
609 | } | |
610 | } | |
c11599b8 | 611 | } |
77605e41 JX |
612 | |
613 | cl_2queue_init(queue); | |
614 | cl_page_list_splice(plist, &queue->c2_qin); | |
615 | rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0); | |
616 | ||
617 | /* plist is not sorted any more */ | |
618 | cl_page_list_splice(&queue->c2_qin, plist); | |
619 | cl_page_list_splice(&queue->c2_qout, plist); | |
620 | cl_2queue_fini(env, queue); | |
621 | ||
622 | if (rc == 0) { | |
623 | /* calculate bytes */ | |
624 | bytes = plist->pl_nr << PAGE_SHIFT; | |
625 | bytes -= from + PAGE_SIZE - to; | |
626 | ||
627 | while (plist->pl_nr > 0) { | |
628 | page = cl_page_list_first(plist); | |
629 | cl_page_list_del(env, plist, page); | |
630 | ||
631 | cl_page_clip(env, page, 0, PAGE_SIZE); | |
632 | ||
7addf402 | 633 | SetPageUptodate(cl_page_vmpage(page)); |
77605e41 JX |
634 | cl_page_disown(env, io, page); |
635 | ||
636 | /* held in ll_cl_init() */ | |
637 | lu_ref_del(&page->cp_reference, "cl_io", io); | |
638 | cl_page_put(env, page); | |
639 | } | |
640 | } | |
641 | ||
642 | return bytes > 0 ? bytes : rc; | |
643 | } | |
644 | ||
645 | static void write_commit_callback(const struct lu_env *env, struct cl_io *io, | |
646 | struct cl_page *page) | |
647 | { | |
3a52f803 | 648 | struct vvp_page *vpg; |
7addf402 JX |
649 | struct page *vmpage = page->cp_vmpage; |
650 | struct cl_object *clob = cl_io_top(io)->ci_obj; | |
77605e41 JX |
651 | |
652 | SetPageUptodate(vmpage); | |
653 | set_page_dirty(vmpage); | |
7addf402 | 654 | |
3a52f803 JH |
655 | vpg = cl2vvp_page(cl_object_page_slice(clob, page)); |
656 | vvp_write_pending(cl2vvp(clob), vpg); | |
77605e41 JX |
657 | |
658 | cl_page_disown(env, io, page); | |
659 | ||
660 | /* held in ll_cl_init() */ | |
661 | lu_ref_del(&page->cp_reference, "cl_io", io); | |
662 | cl_page_put(env, page); | |
663 | } | |
664 | ||
665 | /* make sure the page list is contiguous */ | |
7addf402 JX |
666 | static bool page_list_sanity_check(struct cl_object *obj, |
667 | struct cl_page_list *plist) | |
77605e41 JX |
668 | { |
669 | struct cl_page *page; | |
670 | pgoff_t index = CL_PAGE_EOF; | |
671 | ||
672 | cl_page_list_for_each(page, plist) { | |
3a52f803 | 673 | struct vvp_page *vpg = cl_object_page_slice(obj, page); |
7addf402 | 674 | |
77605e41 | 675 | if (index == CL_PAGE_EOF) { |
3a52f803 | 676 | index = vvp_index(vpg); |
77605e41 JX |
677 | continue; |
678 | } | |
679 | ||
680 | ++index; | |
3a52f803 | 681 | if (index == vvp_index(vpg)) |
77605e41 JX |
682 | continue; |
683 | ||
684 | return false; | |
685 | } | |
686 | return true; | |
687 | } | |
688 | ||
689 | /* Return how many bytes have queued or written */ | |
690 | int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io) | |
691 | { | |
692 | struct cl_object *obj = io->ci_obj; | |
8c7b0e1a | 693 | struct inode *inode = vvp_object_inode(obj); |
77605e41 JX |
694 | struct ccc_io *cio = ccc_env_io(env); |
695 | struct cl_page_list *queue = &cio->u.write.cui_queue; | |
696 | struct cl_page *page; | |
697 | int rc = 0; | |
698 | int bytes = 0; | |
699 | unsigned int npages = cio->u.write.cui_queue.pl_nr; | |
700 | ||
701 | if (npages == 0) | |
702 | return 0; | |
703 | ||
704 | CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n", | |
705 | npages, cio->u.write.cui_from, cio->u.write.cui_to); | |
706 | ||
7addf402 | 707 | LASSERT(page_list_sanity_check(obj, queue)); |
77605e41 JX |
708 | |
709 | /* submit IO with async write */ | |
710 | rc = cl_io_commit_async(env, io, queue, | |
711 | cio->u.write.cui_from, cio->u.write.cui_to, | |
712 | write_commit_callback); | |
713 | npages -= queue->pl_nr; /* already committed pages */ | |
714 | if (npages > 0) { | |
715 | /* calculate how many bytes were written */ | |
716 | bytes = npages << PAGE_SHIFT; | |
717 | ||
718 | /* first page */ | |
719 | bytes -= cio->u.write.cui_from; | |
720 | if (queue->pl_nr == 0) /* last page */ | |
721 | bytes -= PAGE_SIZE - cio->u.write.cui_to; | |
722 | LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages); | |
723 | ||
724 | cio->u.write.cui_written += bytes; | |
725 | ||
726 | CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n", | |
727 | npages, bytes, cio->u.write.cui_written); | |
728 | ||
729 | /* the first page must have been written. */ | |
730 | cio->u.write.cui_from = 0; | |
731 | } | |
7addf402 | 732 | LASSERT(page_list_sanity_check(obj, queue)); |
77605e41 JX |
733 | LASSERT(ergo(rc == 0, queue->pl_nr == 0)); |
734 | ||
735 | /* out of quota, try sync write */ | |
736 | if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) { | |
737 | rc = vvp_io_commit_sync(env, io, queue, | |
738 | cio->u.write.cui_from, | |
739 | cio->u.write.cui_to); | |
740 | if (rc > 0) { | |
741 | cio->u.write.cui_written += rc; | |
742 | rc = 0; | |
743 | } | |
744 | } | |
745 | ||
746 | /* update inode size */ | |
d2995737 | 747 | ll_merge_attr(env, inode); |
77605e41 JX |
748 | |
749 | /* Now the pages in queue were failed to commit, discard them | |
750 | * unless they were dirtied before. | |
751 | */ | |
752 | while (queue->pl_nr > 0) { | |
753 | page = cl_page_list_first(queue); | |
754 | cl_page_list_del(env, queue, page); | |
755 | ||
7addf402 | 756 | if (!PageDirty(cl_page_vmpage(page))) |
77605e41 JX |
757 | cl_page_discard(env, io, page); |
758 | ||
759 | cl_page_disown(env, io, page); | |
760 | ||
761 | /* held in ll_cl_init() */ | |
762 | lu_ref_del(&page->cp_reference, "cl_io", io); | |
763 | cl_page_put(env, page); | |
764 | } | |
765 | cl_page_list_fini(env, queue); | |
766 | ||
767 | return rc; | |
768 | } | |
769 | ||
d7e09d03 PT |
770 | static int vvp_io_write_start(const struct lu_env *env, |
771 | const struct cl_io_slice *ios) | |
772 | { | |
773 | struct ccc_io *cio = cl2ccc_io(env, ios); | |
774 | struct cl_io *io = ios->cis_io; | |
775 | struct cl_object *obj = io->ci_obj; | |
8c7b0e1a | 776 | struct inode *inode = vvp_object_inode(obj); |
d7e09d03 PT |
777 | ssize_t result = 0; |
778 | loff_t pos = io->u.ci_wr.wr.crw_pos; | |
779 | size_t cnt = io->u.ci_wr.wr.crw_count; | |
780 | ||
d7e09d03 PT |
781 | if (!can_populate_pages(env, io, inode)) |
782 | return 0; | |
783 | ||
784 | if (cl_io_is_append(io)) { | |
785 | /* | |
786 | * PARALLEL IO This has to be changed for parallel IO doing | |
787 | * out-of-order writes. | |
788 | */ | |
06563b56 | 789 | ll_merge_attr(env, inode); |
d7e09d03 PT |
790 | pos = io->u.ci_wr.wr.crw_pos = i_size_read(inode); |
791 | cio->cui_iocb->ki_pos = pos; | |
74c0da19 JX |
792 | } else { |
793 | LASSERT(cio->cui_iocb->ki_pos == pos); | |
d7e09d03 PT |
794 | } |
795 | ||
796 | CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt); | |
797 | ||
6e16818b | 798 | if (!cio->cui_iter) /* from a temp io in ll_cl_init(). */ |
d7e09d03 PT |
799 | result = 0; |
800 | else | |
b42b15fd AV |
801 | result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter); |
802 | ||
d7e09d03 | 803 | if (result > 0) { |
77605e41 JX |
804 | result = vvp_io_write_commit(env, io); |
805 | if (cio->u.write.cui_written > 0) { | |
806 | result = cio->u.write.cui_written; | |
807 | io->ci_nob += result; | |
808 | ||
809 | CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n", | |
810 | io->ci_nob, result); | |
811 | } | |
812 | } | |
813 | if (result > 0) { | |
814 | struct ll_inode_info *lli = ll_i2info(inode); | |
815 | ||
816 | spin_lock(&lli->lli_lock); | |
817 | lli->lli_flags |= LLIF_DATA_MODIFIED; | |
818 | spin_unlock(&lli->lli_lock); | |
819 | ||
d7e09d03 PT |
820 | if (result < cnt) |
821 | io->ci_continue = 0; | |
d7e09d03 | 822 | ll_rw_stats_tally(ll_i2sbi(inode), current->pid, |
4f37bc04 | 823 | cio->cui_fd, pos, result, WRITE); |
d7e09d03 PT |
824 | result = 0; |
825 | } | |
0a3bdb00 | 826 | return result; |
d7e09d03 PT |
827 | } |
828 | ||
829 | static int vvp_io_kernel_fault(struct vvp_fault_io *cfio) | |
830 | { | |
831 | struct vm_fault *vmf = cfio->fault.ft_vmf; | |
832 | ||
833 | cfio->fault.ft_flags = filemap_fault(cfio->ft_vma, vmf); | |
6aa51072 | 834 | cfio->fault.ft_flags_valid = 1; |
d7e09d03 PT |
835 | |
836 | if (vmf->page) { | |
aa3bee0d GKH |
837 | CDEBUG(D_PAGE, |
838 | "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n", | |
839 | vmf->page, vmf->page->mapping, vmf->page->index, | |
840 | (long)vmf->page->flags, page_count(vmf->page), | |
841 | page_private(vmf->page), vmf->virtual_address); | |
d7e09d03 PT |
842 | if (unlikely(!(cfio->fault.ft_flags & VM_FAULT_LOCKED))) { |
843 | lock_page(vmf->page); | |
733bd244 | 844 | cfio->fault.ft_flags |= VM_FAULT_LOCKED; |
d7e09d03 PT |
845 | } |
846 | ||
847 | cfio->ft_vmpage = vmf->page; | |
848 | return 0; | |
849 | } | |
850 | ||
33692f27 | 851 | if (cfio->fault.ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) { |
d7e09d03 PT |
852 | CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address); |
853 | return -EFAULT; | |
854 | } | |
855 | ||
856 | if (cfio->fault.ft_flags & VM_FAULT_OOM) { | |
857 | CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address); | |
858 | return -ENOMEM; | |
859 | } | |
860 | ||
861 | if (cfio->fault.ft_flags & VM_FAULT_RETRY) | |
862 | return -EAGAIN; | |
863 | ||
d0a0acc3 | 864 | CERROR("Unknown error in page fault %d!\n", cfio->fault.ft_flags); |
d7e09d03 PT |
865 | return -EINVAL; |
866 | } | |
867 | ||
77605e41 JX |
868 | static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io, |
869 | struct cl_page *page) | |
870 | { | |
3a52f803 | 871 | struct vvp_page *vpg; |
7addf402 | 872 | struct cl_object *clob = cl_io_top(io)->ci_obj; |
77605e41 | 873 | |
7addf402 | 874 | set_page_dirty(page->cp_vmpage); |
77605e41 | 875 | |
3a52f803 JH |
876 | vpg = cl2vvp_page(cl_object_page_slice(clob, page)); |
877 | vvp_write_pending(cl2vvp(clob), vpg); | |
77605e41 JX |
878 | } |
879 | ||
d7e09d03 PT |
880 | static int vvp_io_fault_start(const struct lu_env *env, |
881 | const struct cl_io_slice *ios) | |
882 | { | |
883 | struct vvp_io *vio = cl2vvp_io(env, ios); | |
884 | struct cl_io *io = ios->cis_io; | |
885 | struct cl_object *obj = io->ci_obj; | |
8c7b0e1a | 886 | struct inode *inode = vvp_object_inode(obj); |
d7e09d03 PT |
887 | struct cl_fault_io *fio = &io->u.ci_fault; |
888 | struct vvp_fault_io *cfio = &vio->u.fault; | |
889 | loff_t offset; | |
890 | int result = 0; | |
891 | struct page *vmpage = NULL; | |
892 | struct cl_page *page; | |
893 | loff_t size; | |
77605e41 | 894 | pgoff_t last_index; |
d7e09d03 PT |
895 | |
896 | if (fio->ft_executable && | |
46c360f9 | 897 | inode->i_mtime.tv_sec != vio->u.fault.ft_mtime) |
d7e09d03 PT |
898 | CWARN("binary "DFID |
899 | " changed while waiting for the page fault lock\n", | |
900 | PFID(lu_object_fid(&obj->co_lu))); | |
901 | ||
902 | /* offset of the last byte on the page */ | |
903 | offset = cl_offset(obj, fio->ft_index + 1) - 1; | |
904 | LASSERT(cl_index(obj, offset) == fio->ft_index); | |
905 | result = ccc_prep_size(env, obj, io, 0, offset + 1, NULL); | |
906 | if (result != 0) | |
907 | return result; | |
908 | ||
909 | /* must return locked page */ | |
910 | if (fio->ft_mkwrite) { | |
6e16818b | 911 | LASSERT(cfio->ft_vmpage); |
d7e09d03 PT |
912 | lock_page(cfio->ft_vmpage); |
913 | } else { | |
914 | result = vvp_io_kernel_fault(cfio); | |
915 | if (result != 0) | |
916 | return result; | |
917 | } | |
918 | ||
919 | vmpage = cfio->ft_vmpage; | |
920 | LASSERT(PageLocked(vmpage)); | |
921 | ||
922 | if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE)) | |
923 | ll_invalidate_page(vmpage); | |
924 | ||
925 | size = i_size_read(inode); | |
926 | /* Though we have already held a cl_lock upon this page, but | |
c0894c6c OD |
927 | * it still can be truncated locally. |
928 | */ | |
d7e09d03 PT |
929 | if (unlikely((vmpage->mapping != inode->i_mapping) || |
930 | (page_offset(vmpage) > size))) { | |
931 | CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n"); | |
932 | ||
933 | /* return +1 to stop cl_io_loop() and ll_fault() will catch | |
c0894c6c OD |
934 | * and retry. |
935 | */ | |
b2952d62 | 936 | result = 1; |
34e1f2bb | 937 | goto out; |
d7e09d03 PT |
938 | } |
939 | ||
77605e41 JX |
940 | last_index = cl_index(obj, size - 1); |
941 | ||
557732ad | 942 | if (fio->ft_mkwrite) { |
d7e09d03 PT |
943 | /* |
944 | * Capture the size while holding the lli_trunc_sem from above | |
945 | * we want to make sure that we complete the mkwrite action | |
946 | * while holding this lock. We need to make sure that we are | |
947 | * not past the end of the file. | |
948 | */ | |
d7e09d03 PT |
949 | if (last_index < fio->ft_index) { |
950 | CDEBUG(D_PAGE, | |
2d00bd17 JP |
951 | "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n", |
952 | vmpage->mapping, fio->ft_index, last_index); | |
d7e09d03 PT |
953 | /* |
954 | * We need to return if we are | |
955 | * passed the end of the file. This will propagate | |
956 | * up the call stack to ll_page_mkwrite where | |
957 | * we will return VM_FAULT_NOPAGE. Any non-negative | |
958 | * value returned here will be silently | |
959 | * converted to 0. If the vmpage->mapping is null | |
960 | * the error code would be converted back to ENODATA | |
961 | * in ll_page_mkwrite0. Thus we return -ENODATA | |
962 | * to handle both cases | |
963 | */ | |
34e1f2bb JL |
964 | result = -ENODATA; |
965 | goto out; | |
d7e09d03 PT |
966 | } |
967 | } | |
968 | ||
969 | page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE); | |
34e1f2bb JL |
970 | if (IS_ERR(page)) { |
971 | result = PTR_ERR(page); | |
972 | goto out; | |
973 | } | |
d7e09d03 PT |
974 | |
975 | /* if page is going to be written, we should add this page into cache | |
c0894c6c OD |
976 | * earlier. |
977 | */ | |
d7e09d03 PT |
978 | if (fio->ft_mkwrite) { |
979 | wait_on_page_writeback(vmpage); | |
77605e41 JX |
980 | if (!PageDirty(vmpage)) { |
981 | struct cl_page_list *plist = &io->ci_queue.c2_qin; | |
3a52f803 | 982 | struct vvp_page *vpg = cl_object_page_slice(obj, page); |
77605e41 | 983 | int to = PAGE_SIZE; |
d7e09d03 PT |
984 | |
985 | /* vvp_page_assume() calls wait_on_page_writeback(). */ | |
986 | cl_page_assume(env, io, page); | |
987 | ||
77605e41 JX |
988 | cl_page_list_init(plist); |
989 | cl_page_list_add(plist, page); | |
990 | ||
991 | /* size fixup */ | |
3a52f803 | 992 | if (last_index == vvp_index(vpg)) |
77605e41 | 993 | to = size & ~PAGE_MASK; |
d7e09d03 PT |
994 | |
995 | /* Do not set Dirty bit here so that in case IO is | |
996 | * started before the page is really made dirty, we | |
c0894c6c OD |
997 | * still have chance to detect it. |
998 | */ | |
77605e41 JX |
999 | result = cl_io_commit_async(env, io, plist, 0, to, |
1000 | mkwrite_commit_callback); | |
d7e09d03 | 1001 | LASSERT(cl_page_is_owned(page, io)); |
77605e41 | 1002 | cl_page_list_fini(env, plist); |
d7e09d03 PT |
1003 | |
1004 | vmpage = NULL; | |
1005 | if (result < 0) { | |
d7e09d03 PT |
1006 | cl_page_discard(env, io, page); |
1007 | cl_page_disown(env, io, page); | |
1008 | ||
1009 | cl_page_put(env, page); | |
1010 | ||
1011 | /* we're in big trouble, what can we do now? */ | |
1012 | if (result == -EDQUOT) | |
1013 | result = -ENOSPC; | |
34e1f2bb | 1014 | goto out; |
d7e09d03 PT |
1015 | } else |
1016 | cl_page_disown(env, io, page); | |
1017 | } | |
1018 | } | |
1019 | ||
d7e09d03 PT |
1020 | /* |
1021 | * The ft_index is only used in the case of | |
1022 | * a mkwrite action. We need to check | |
1023 | * our assertions are correct, since | |
1024 | * we should have caught this above | |
1025 | */ | |
77605e41 JX |
1026 | LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index); |
1027 | if (fio->ft_index == last_index) | |
d7e09d03 PT |
1028 | /* |
1029 | * Last page is mapped partially. | |
1030 | */ | |
1031 | fio->ft_nob = size - cl_offset(obj, fio->ft_index); | |
1032 | else | |
1033 | fio->ft_nob = cl_page_size(obj); | |
1034 | ||
1035 | lu_ref_add(&page->cp_reference, "fault", io); | |
1036 | fio->ft_page = page; | |
d7e09d03 PT |
1037 | |
1038 | out: | |
1039 | /* return unlocked vmpage to avoid deadlocking */ | |
6e16818b | 1040 | if (vmpage) |
d7e09d03 PT |
1041 | unlock_page(vmpage); |
1042 | cfio->fault.ft_flags &= ~VM_FAULT_LOCKED; | |
1043 | return result; | |
1044 | } | |
1045 | ||
1046 | static int vvp_io_fsync_start(const struct lu_env *env, | |
1047 | const struct cl_io_slice *ios) | |
1048 | { | |
1049 | /* we should mark TOWRITE bit to each dirty page in radix tree to | |
1050 | * verify pages have been written, but this is difficult because of | |
c0894c6c OD |
1051 | * race. |
1052 | */ | |
d7e09d03 PT |
1053 | return 0; |
1054 | } | |
1055 | ||
1056 | static int vvp_io_read_page(const struct lu_env *env, | |
1057 | const struct cl_io_slice *ios, | |
1058 | const struct cl_page_slice *slice) | |
1059 | { | |
1060 | struct cl_io *io = ios->cis_io; | |
3a52f803 | 1061 | struct vvp_page *vpg = cl2vvp_page(slice); |
d7e09d03 | 1062 | struct cl_page *page = slice->cpl_page; |
8c7b0e1a | 1063 | struct inode *inode = vvp_object_inode(slice->cpl_obj); |
d7e09d03 PT |
1064 | struct ll_sb_info *sbi = ll_i2sbi(inode); |
1065 | struct ll_file_data *fd = cl2ccc_io(env, ios)->cui_fd; | |
1066 | struct ll_readahead_state *ras = &fd->fd_ras; | |
d7e09d03 | 1067 | struct cl_2queue *queue = &io->ci_queue; |
d7e09d03 | 1068 | |
d7e09d03 PT |
1069 | if (sbi->ll_ra_info.ra_max_pages_per_file && |
1070 | sbi->ll_ra_info.ra_max_pages) | |
3a52f803 JH |
1071 | ras_update(sbi, inode, ras, vvp_index(vpg), |
1072 | vpg->vpg_defer_uptodate); | |
d7e09d03 | 1073 | |
3a52f803 JH |
1074 | if (vpg->vpg_defer_uptodate) { |
1075 | vpg->vpg_ra_used = 1; | |
d7e09d03 PT |
1076 | cl_page_export(env, page, 1); |
1077 | } | |
1078 | /* | |
1079 | * Add page into the queue even when it is marked uptodate above. | |
1080 | * this will unlock it automatically as part of cl_page_list_disown(). | |
1081 | */ | |
fd7444fe | 1082 | |
53f1a127 | 1083 | cl_page_list_add(&queue->c2_qin, page); |
d7e09d03 PT |
1084 | if (sbi->ll_ra_info.ra_max_pages_per_file && |
1085 | sbi->ll_ra_info.ra_max_pages) | |
fd7444fe | 1086 | ll_readahead(env, io, &queue->c2_qin, ras, |
3a52f803 | 1087 | vpg->vpg_defer_uptodate); |
d7e09d03 | 1088 | |
0a3bdb00 | 1089 | return 0; |
d7e09d03 PT |
1090 | } |
1091 | ||
d7e09d03 PT |
1092 | static const struct cl_io_operations vvp_io_ops = { |
1093 | .op = { | |
1094 | [CIT_READ] = { | |
1095 | .cio_fini = vvp_io_read_fini, | |
1096 | .cio_lock = vvp_io_read_lock, | |
1097 | .cio_start = vvp_io_read_start, | |
1098 | .cio_advance = ccc_io_advance | |
1099 | }, | |
1100 | [CIT_WRITE] = { | |
1101 | .cio_fini = vvp_io_fini, | |
77605e41 JX |
1102 | .cio_iter_init = vvp_io_write_iter_init, |
1103 | .cio_iter_fini = vvp_io_write_iter_fini, | |
d7e09d03 PT |
1104 | .cio_lock = vvp_io_write_lock, |
1105 | .cio_start = vvp_io_write_start, | |
1106 | .cio_advance = ccc_io_advance | |
1107 | }, | |
1108 | [CIT_SETATTR] = { | |
1109 | .cio_fini = vvp_io_setattr_fini, | |
1110 | .cio_iter_init = vvp_io_setattr_iter_init, | |
1111 | .cio_lock = vvp_io_setattr_lock, | |
1112 | .cio_start = vvp_io_setattr_start, | |
1113 | .cio_end = vvp_io_setattr_end | |
1114 | }, | |
1115 | [CIT_FAULT] = { | |
1116 | .cio_fini = vvp_io_fault_fini, | |
1117 | .cio_iter_init = vvp_io_fault_iter_init, | |
1118 | .cio_lock = vvp_io_fault_lock, | |
1119 | .cio_start = vvp_io_fault_start, | |
1120 | .cio_end = ccc_io_end | |
1121 | }, | |
1122 | [CIT_FSYNC] = { | |
1123 | .cio_start = vvp_io_fsync_start, | |
1124 | .cio_fini = vvp_io_fini | |
1125 | }, | |
1126 | [CIT_MISC] = { | |
1127 | .cio_fini = vvp_io_fini | |
1128 | } | |
1129 | }, | |
1130 | .cio_read_page = vvp_io_read_page, | |
d7e09d03 PT |
1131 | }; |
1132 | ||
1133 | int vvp_io_init(const struct lu_env *env, struct cl_object *obj, | |
1134 | struct cl_io *io) | |
1135 | { | |
1136 | struct vvp_io *vio = vvp_env_io(env); | |
1137 | struct ccc_io *cio = ccc_env_io(env); | |
8c7b0e1a | 1138 | struct inode *inode = vvp_object_inode(obj); |
d7e09d03 PT |
1139 | int result; |
1140 | ||
8c7b0e1a | 1141 | CLOBINVRNT(env, obj, vvp_object_invariant(obj)); |
d7e09d03 | 1142 | |
5ea17d6c JL |
1143 | CDEBUG(D_VFSTRACE, DFID |
1144 | " ignore/verify layout %d/%d, layout version %d restore needed %d\n", | |
1145 | PFID(lu_object_fid(&obj->co_lu)), | |
1146 | io->ci_ignore_layout, io->ci_verify_layout, | |
1147 | cio->cui_layout_gen, io->ci_restore_needed); | |
1148 | ||
d7e09d03 PT |
1149 | CL_IO_SLICE_CLEAN(cio, cui_cl); |
1150 | cl_io_slice_add(io, &cio->cui_cl, obj, &vvp_io_ops); | |
1151 | vio->cui_ra_window_set = 0; | |
1152 | result = 0; | |
1153 | if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { | |
1154 | size_t count; | |
1155 | struct ll_inode_info *lli = ll_i2info(inode); | |
1156 | ||
1157 | count = io->u.ci_rw.crw_count; | |
1158 | /* "If nbyte is 0, read() will return 0 and have no other | |
c0894c6c OD |
1159 | * results." -- Single Unix Spec |
1160 | */ | |
d7e09d03 PT |
1161 | if (count == 0) |
1162 | result = 1; | |
b42b15fd | 1163 | else |
d7e09d03 | 1164 | cio->cui_tot_count = count; |
b42b15fd | 1165 | |
d7e09d03 PT |
1166 | /* for read/write, we store the jobid in the inode, and |
1167 | * it'll be fetched by osc when building RPC. | |
1168 | * | |
1169 | * it's not accurate if the file is shared by different | |
1170 | * jobs. | |
1171 | */ | |
1172 | lustre_get_jobid(lli->lli_jobid); | |
1173 | } else if (io->ci_type == CIT_SETATTR) { | |
1174 | if (!cl_io_is_trunc(io)) | |
1175 | io->ci_lockreq = CILR_MANDATORY; | |
1176 | } | |
1177 | ||
1178 | /* ignore layout change for generic CIT_MISC but not for glimpse. | |
1179 | * io context for glimpse must set ci_verify_layout to true, | |
c0894c6c OD |
1180 | * see cl_glimpse_size0() for details. |
1181 | */ | |
d7e09d03 PT |
1182 | if (io->ci_type == CIT_MISC && !io->ci_verify_layout) |
1183 | io->ci_ignore_layout = 1; | |
1184 | ||
1185 | /* Enqueue layout lock and get layout version. We need to do this | |
1186 | * even for operations requiring to open file, such as read and write, | |
c0894c6c OD |
1187 | * because it might not grant layout lock in IT_OPEN. |
1188 | */ | |
65fb55d1 | 1189 | if (result == 0 && !io->ci_ignore_layout) { |
d7e09d03 | 1190 | result = ll_layout_refresh(inode, &cio->cui_layout_gen); |
65fb55d1 NY |
1191 | if (result == -ENOENT) |
1192 | /* If the inode on MDS has been removed, but the objects | |
1193 | * on OSTs haven't been destroyed (async unlink), layout | |
d0a0acc3 | 1194 | * fetch will return -ENOENT, we'd ignore this error |
c0894c6c OD |
1195 | * and continue with dirty flush. LU-3230. |
1196 | */ | |
65fb55d1 NY |
1197 | result = 0; |
1198 | if (result < 0) | |
1199 | CERROR("%s: refresh file layout " DFID " error %d.\n", | |
e15ba45d OD |
1200 | ll_get_fsname(inode->i_sb, NULL, 0), |
1201 | PFID(lu_object_fid(&obj->co_lu)), result); | |
65fb55d1 | 1202 | } |
d7e09d03 | 1203 | |
0a3bdb00 | 1204 | return result; |
d7e09d03 PT |
1205 | } |
1206 | ||
1207 | static struct vvp_io *cl2vvp_io(const struct lu_env *env, | |
1208 | const struct cl_io_slice *slice) | |
1209 | { | |
d0a0acc3 | 1210 | /* Calling just for assertion */ |
d7e09d03 PT |
1211 | cl2ccc_io(env, slice); |
1212 | return vvp_env_io(env); | |
1213 | } |