Commit | Line | Data |
---|---|---|
c93407d0 BH |
1 | /* |
2 | * pNFS Objects layout implementation over open-osd initiator library | |
3 | * | |
4 | * Copyright (C) 2009 Panasas Inc. [year of first publication] | |
5 | * All rights reserved. | |
6 | * | |
7 | * Benny Halevy <bhalevy@panasas.com> | |
aa281ac6 | 8 | * Boaz Harrosh <ooo@electrozaur.com> |
c93407d0 BH |
9 | * |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License version 2 | |
12 | * See the file COPYING included with this distribution for more details. | |
13 | * | |
14 | * Redistribution and use in source and binary forms, with or without | |
15 | * modification, are permitted provided that the following conditions | |
16 | * are met: | |
17 | * | |
18 | * 1. Redistributions of source code must retain the above copyright | |
19 | * notice, this list of conditions and the following disclaimer. | |
20 | * 2. Redistributions in binary form must reproduce the above copyright | |
21 | * notice, this list of conditions and the following disclaimer in the | |
22 | * documentation and/or other materials provided with the distribution. | |
23 | * 3. Neither the name of the Panasas company nor the names of its | |
24 | * contributors may be used to endorse or promote products derived | |
25 | * from this software without specific prior written permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | |
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | |
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
38 | */ | |
39 | ||
40 | #include <linux/module.h> | |
af4f5b54 | 41 | #include <scsi/osd_ore.h> |
09f5bf4e BH |
42 | |
43 | #include "objlayout.h" | |
6296556f | 44 | #include "../internal.h" |
09f5bf4e BH |
45 | |
46 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
47 | ||
b6c05f16 BH |
48 | struct objio_dev_ent { |
49 | struct nfs4_deviceid_node id_node; | |
af4f5b54 | 50 | struct ore_dev od; |
b6c05f16 BH |
51 | }; |
52 | ||
53 | static void | |
54 | objio_free_deviceid_node(struct nfs4_deviceid_node *d) | |
55 | { | |
56 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); | |
57 | ||
af4f5b54 BH |
58 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
59 | osduld_put_device(de->od.od); | |
84a80f62 | 60 | kfree_rcu(d, rcu); |
b6c05f16 BH |
61 | } |
62 | ||
09f5bf4e BH |
63 | struct objio_segment { |
64 | struct pnfs_layout_segment lseg; | |
65 | ||
af4f5b54 BH |
66 | struct ore_layout layout; |
67 | struct ore_components oc; | |
09f5bf4e BH |
68 | }; |
69 | ||
70 | static inline struct objio_segment * | |
71 | OBJIO_LSEG(struct pnfs_layout_segment *lseg) | |
72 | { | |
73 | return container_of(lseg, struct objio_segment, lseg); | |
74 | } | |
75 | ||
04f83450 BH |
76 | struct objio_state { |
77 | /* Generic layer */ | |
e2e04355 | 78 | struct objlayout_io_res oir; |
04f83450 | 79 | |
96218556 | 80 | bool sync; |
eecfc631 BH |
81 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ |
82 | struct ore_io_state *ios; | |
04f83450 BH |
83 | }; |
84 | ||
b6c05f16 BH |
85 | /* Send and wait for a get_device_info of devices in the layout, |
86 | then look them up with the osd_initiator library */ | |
661373b1 CH |
87 | struct nfs4_deviceid_node * |
88 | objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
89 | gfp_t gfp_flags) | |
b6c05f16 BH |
90 | { |
91 | struct pnfs_osd_deviceaddr *deviceaddr; | |
661373b1 | 92 | struct objio_dev_ent *ode = NULL; |
b6c05f16 BH |
93 | struct osd_dev *od; |
94 | struct osd_dev_info odi; | |
18d98f6c | 95 | bool retry_flag = true; |
fd41b474 | 96 | __be32 *p; |
b6c05f16 BH |
97 | int err; |
98 | ||
661373b1 CH |
99 | deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags); |
100 | if (!deviceaddr) | |
101 | return NULL; | |
b6c05f16 | 102 | |
661373b1 CH |
103 | p = page_address(pdev->pages[0]); |
104 | pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p); | |
b6c05f16 BH |
105 | |
106 | odi.systemid_len = deviceaddr->oda_systemid.len; | |
107 | if (odi.systemid_len > sizeof(odi.systemid)) { | |
af4f5b54 BH |
108 | dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", |
109 | __func__, sizeof(odi.systemid)); | |
b6c05f16 BH |
110 | err = -EINVAL; |
111 | goto out; | |
112 | } else if (odi.systemid_len) | |
113 | memcpy(odi.systemid, deviceaddr->oda_systemid.data, | |
114 | odi.systemid_len); | |
115 | odi.osdname_len = deviceaddr->oda_osdname.len; | |
116 | odi.osdname = (u8 *)deviceaddr->oda_osdname.data; | |
117 | ||
118 | if (!odi.osdname_len && !odi.systemid_len) { | |
119 | dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", | |
120 | __func__); | |
121 | err = -ENODEV; | |
122 | goto out; | |
123 | } | |
124 | ||
18d98f6c | 125 | retry_lookup: |
b6c05f16 | 126 | od = osduld_info_lookup(&odi); |
a1c83681 | 127 | if (IS_ERR(od)) { |
b6c05f16 BH |
128 | err = PTR_ERR(od); |
129 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); | |
18d98f6c SB |
130 | if (err == -ENODEV && retry_flag) { |
131 | err = objlayout_autologin(deviceaddr); | |
132 | if (likely(!err)) { | |
133 | retry_flag = false; | |
134 | goto retry_lookup; | |
135 | } | |
136 | } | |
b6c05f16 BH |
137 | goto out; |
138 | } | |
139 | ||
af4f5b54 | 140 | dprintk("Adding new dev_id(%llx:%llx)\n", |
661373b1 CH |
141 | _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id)); |
142 | ||
143 | ode = kzalloc(sizeof(*ode), gfp_flags); | |
144 | if (!ode) { | |
145 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); | |
146 | goto out; | |
147 | } | |
148 | ||
149 | nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id); | |
150 | kfree(deviceaddr); | |
151 | ||
152 | ode->od.od = od; | |
153 | return &ode->id_node; | |
154 | ||
b6c05f16 | 155 | out: |
661373b1 CH |
156 | kfree(deviceaddr); |
157 | return NULL; | |
b6c05f16 BH |
158 | } |
159 | ||
af4f5b54 BH |
160 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
161 | struct pnfs_osd_object_cred *src_comp) | |
09f5bf4e | 162 | { |
af4f5b54 | 163 | struct ore_comp *ocomp = &oc->comps[c]; |
09f5bf4e | 164 | |
af4f5b54 BH |
165 | WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ |
166 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); | |
09f5bf4e | 167 | |
af4f5b54 BH |
168 | ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; |
169 | ocomp->obj.id = src_comp->oc_object_id.oid_object_id; | |
09f5bf4e | 170 | |
af4f5b54 BH |
171 | memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); |
172 | } | |
173 | ||
1385b811 | 174 | static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
af4f5b54 BH |
175 | struct objio_segment **pseg) |
176 | { | |
5318a29c BH |
177 | /* This is the in memory structure of the objio_segment |
178 | * | |
179 | * struct __alloc_objio_segment { | |
180 | * struct objio_segment olseg; | |
181 | * struct ore_dev *ods[numdevs]; | |
182 | * struct ore_comp comps[numdevs]; | |
183 | * } *aolseg; | |
184 | * NOTE: The code as above compiles and runs perfectly. It is elegant, | |
185 | * type safe and compact. At some Past time Linus has decided he does not | |
186 | * like variable length arrays, For the sake of this principal we uglify | |
187 | * the code as below. | |
188 | */ | |
189 | struct objio_segment *lseg; | |
190 | size_t lseg_size = sizeof(*lseg) + | |
191 | numdevs * sizeof(lseg->oc.ods[0]) + | |
192 | numdevs * sizeof(*lseg->oc.comps); | |
193 | ||
194 | lseg = kzalloc(lseg_size, gfp_flags); | |
195 | if (unlikely(!lseg)) { | |
a895d57d | 196 | dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__, |
5318a29c | 197 | numdevs, lseg_size); |
af4f5b54 BH |
198 | return -ENOMEM; |
199 | } | |
200 | ||
5318a29c BH |
201 | lseg->oc.numdevs = numdevs; |
202 | lseg->oc.single_comp = EC_MULTPLE_COMPS; | |
203 | lseg->oc.ods = (void *)(lseg + 1); | |
204 | lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); | |
af4f5b54 | 205 | |
5318a29c | 206 | *pseg = lseg; |
af4f5b54 | 207 | return 0; |
09f5bf4e BH |
208 | } |
209 | ||
210 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, | |
211 | struct pnfs_layout_hdr *pnfslay, | |
212 | struct pnfs_layout_range *range, | |
213 | struct xdr_stream *xdr, | |
214 | gfp_t gfp_flags) | |
215 | { | |
661373b1 | 216 | struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode); |
09f5bf4e BH |
217 | struct objio_segment *objio_seg; |
218 | struct pnfs_osd_xdr_decode_layout_iter iter; | |
219 | struct pnfs_osd_layout layout; | |
af4f5b54 BH |
220 | struct pnfs_osd_object_cred src_comp; |
221 | unsigned cur_comp; | |
09f5bf4e BH |
222 | int err; |
223 | ||
224 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); | |
225 | if (unlikely(err)) | |
226 | return err; | |
227 | ||
af4f5b54 | 228 | err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); |
09f5bf4e BH |
229 | if (unlikely(err)) |
230 | return err; | |
231 | ||
af4f5b54 BH |
232 | objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; |
233 | objio_seg->layout.group_width = layout.olo_map.odm_group_width; | |
234 | objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; | |
235 | objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; | |
236 | objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; | |
09f5bf4e | 237 | |
af4f5b54 BH |
238 | err = ore_verify_layout(layout.olo_map.odm_num_comps, |
239 | &objio_seg->layout); | |
09f5bf4e BH |
240 | if (unlikely(err)) |
241 | goto err; | |
242 | ||
af4f5b54 BH |
243 | objio_seg->oc.first_dev = layout.olo_comps_index; |
244 | cur_comp = 0; | |
245 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { | |
661373b1 CH |
246 | struct nfs4_deviceid_node *d; |
247 | struct objio_dev_ent *ode; | |
248 | ||
af4f5b54 | 249 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
661373b1 CH |
250 | |
251 | d = nfs4_find_get_deviceid(server, | |
252 | &src_comp.oc_object_id.oid_device_id, | |
253 | pnfslay->plh_lc_cred, gfp_flags); | |
254 | if (!d) { | |
255 | err = -ENXIO; | |
af4f5b54 | 256 | goto err; |
661373b1 CH |
257 | } |
258 | ||
259 | ode = container_of(d, struct objio_dev_ent, id_node); | |
260 | objio_seg->oc.ods[cur_comp++] = &ode->od; | |
09f5bf4e | 261 | } |
af4f5b54 BH |
262 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
263 | if (unlikely(err)) | |
264 | goto err; | |
93420770 | 265 | |
09f5bf4e BH |
266 | *outp = &objio_seg->lseg; |
267 | return 0; | |
268 | ||
269 | err: | |
270 | kfree(objio_seg); | |
271 | dprintk("%s: Error: return %d\n", __func__, err); | |
272 | *outp = NULL; | |
273 | return err; | |
274 | } | |
275 | ||
276 | void objio_free_lseg(struct pnfs_layout_segment *lseg) | |
277 | { | |
b6c05f16 | 278 | int i; |
09f5bf4e BH |
279 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
280 | ||
af4f5b54 BH |
281 | for (i = 0; i < objio_seg->oc.numdevs; i++) { |
282 | struct ore_dev *od = objio_seg->oc.ods[i]; | |
283 | struct objio_dev_ent *ode; | |
284 | ||
285 | if (!od) | |
b6c05f16 | 286 | break; |
af4f5b54 BH |
287 | ode = container_of(od, typeof(*ode), od); |
288 | nfs4_put_deviceid_node(&ode->id_node); | |
b6c05f16 | 289 | } |
09f5bf4e BH |
290 | kfree(objio_seg); |
291 | } | |
292 | ||
96218556 | 293 | static int |
eecfc631 | 294 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
96218556 BH |
295 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
296 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, | |
297 | struct objio_state **outp) | |
04f83450 BH |
298 | { |
299 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); | |
eecfc631 BH |
300 | struct ore_io_state *ios; |
301 | int ret; | |
96218556 BH |
302 | struct __alloc_objio_state { |
303 | struct objio_state objios; | |
af4f5b54 | 304 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
96218556 BH |
305 | } *aos; |
306 | ||
307 | aos = kzalloc(sizeof(*aos), gfp_flags); | |
308 | if (unlikely(!aos)) | |
04f83450 BH |
309 | return -ENOMEM; |
310 | ||
af4f5b54 | 311 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
96218556 BH |
312 | aos->ioerrs, rpcdata, pnfs_layout_type); |
313 | ||
eecfc631 BH |
314 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, |
315 | offset, count, &ios); | |
316 | if (unlikely(ret)) { | |
317 | kfree(aos); | |
318 | return ret; | |
319 | } | |
320 | ||
96218556 BH |
321 | ios->pages = pages; |
322 | ios->pgbase = pgbase; | |
eecfc631 | 323 | ios->private = aos; |
96218556 BH |
324 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); |
325 | ||
eecfc631 BH |
326 | aos->objios.sync = 0; |
327 | aos->objios.ios = ios; | |
328 | *outp = &aos->objios; | |
04f83450 BH |
329 | return 0; |
330 | } | |
331 | ||
e2e04355 | 332 | void objio_free_result(struct objlayout_io_res *oir) |
04f83450 | 333 | { |
eecfc631 | 334 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
04f83450 | 335 | |
eecfc631 BH |
336 | ore_put_io_state(objios->ios); |
337 | kfree(objios); | |
04f83450 BH |
338 | } |
339 | ||
2e928e48 | 340 | static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
adb58535 BH |
341 | { |
342 | switch (oep) { | |
343 | case OSD_ERR_PRI_NO_ERROR: | |
344 | return (enum pnfs_osd_errno)0; | |
345 | ||
346 | case OSD_ERR_PRI_CLEAR_PAGES: | |
347 | BUG_ON(1); | |
348 | return 0; | |
349 | ||
350 | case OSD_ERR_PRI_RESOURCE: | |
351 | return PNFS_OSD_ERR_RESOURCE; | |
352 | case OSD_ERR_PRI_BAD_CRED: | |
353 | return PNFS_OSD_ERR_BAD_CRED; | |
354 | case OSD_ERR_PRI_NO_ACCESS: | |
355 | return PNFS_OSD_ERR_NO_ACCESS; | |
356 | case OSD_ERR_PRI_UNREACHABLE: | |
357 | return PNFS_OSD_ERR_UNREACHABLE; | |
358 | case OSD_ERR_PRI_NOT_FOUND: | |
359 | return PNFS_OSD_ERR_NOT_FOUND; | |
360 | case OSD_ERR_PRI_NO_SPACE: | |
361 | return PNFS_OSD_ERR_NO_SPACE; | |
362 | default: | |
363 | WARN_ON(1); | |
364 | /* fallthrough */ | |
365 | case OSD_ERR_PRI_EIO: | |
366 | return PNFS_OSD_ERR_EIO; | |
367 | } | |
368 | } | |
369 | ||
eecfc631 | 370 | static void __on_dev_error(struct ore_io_state *ios, |
af4f5b54 BH |
371 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, |
372 | u64 dev_offset, u64 dev_len) | |
373 | { | |
374 | struct objio_state *objios = ios->private; | |
375 | struct pnfs_osd_objid pooid; | |
376 | struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); | |
377 | /* FIXME: what to do with more-then-one-group layouts. We need to | |
378 | * translate from ore_io_state index to oc->comps index | |
379 | */ | |
380 | unsigned comp = dev_index; | |
381 | ||
382 | pooid.oid_device_id = ode->id_node.deviceid; | |
383 | pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; | |
384 | pooid.oid_object_id = ios->oc->comps[comp].obj.id; | |
385 | ||
386 | objlayout_io_set_result(&objios->oir, comp, | |
387 | &pooid, osd_pri_2_pnfs_err(oep), | |
eecfc631 | 388 | dev_offset, dev_len, !ios->reading); |
af4f5b54 BH |
389 | } |
390 | ||
04f83450 BH |
391 | /* |
392 | * read | |
393 | */ | |
eecfc631 | 394 | static void _read_done(struct ore_io_state *ios, void *private) |
04f83450 | 395 | { |
eecfc631 | 396 | struct objio_state *objios = private; |
04f83450 | 397 | ssize_t status; |
eecfc631 | 398 | int ret = ore_check_io(ios, &__on_dev_error); |
04f83450 | 399 | |
eecfc631 | 400 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
04f83450 BH |
401 | |
402 | if (likely(!ret)) | |
403 | status = ios->length; | |
404 | else | |
405 | status = ret; | |
406 | ||
eecfc631 | 407 | objlayout_read_done(&objios->oir, status, objios->sync); |
04f83450 BH |
408 | } |
409 | ||
d45f60c6 | 410 | int objio_read_pagelist(struct nfs_pgio_header *hdr) |
04f83450 | 411 | { |
eecfc631 | 412 | struct objio_state *objios; |
04f83450 BH |
413 | int ret; |
414 | ||
cd841605 | 415 | ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, |
d45f60c6 WAA |
416 | hdr->lseg, hdr->args.pages, hdr->args.pgbase, |
417 | hdr->args.offset, hdr->args.count, hdr, | |
eecfc631 | 418 | GFP_KERNEL, &objios); |
04f83450 BH |
419 | if (unlikely(ret)) |
420 | return ret; | |
421 | ||
eecfc631 BH |
422 | objios->ios->done = _read_done; |
423 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | |
d45f60c6 | 424 | hdr->args.offset, hdr->args.count); |
9909d45a BH |
425 | ret = ore_read(objios->ios); |
426 | if (unlikely(ret)) | |
427 | objio_free_result(&objios->oir); | |
428 | return ret; | |
04f83450 BH |
429 | } |
430 | ||
431 | /* | |
432 | * write | |
433 | */ | |
eecfc631 | 434 | static void _write_done(struct ore_io_state *ios, void *private) |
04f83450 | 435 | { |
eecfc631 | 436 | struct objio_state *objios = private; |
04f83450 | 437 | ssize_t status; |
eecfc631 | 438 | int ret = ore_check_io(ios, &__on_dev_error); |
04f83450 | 439 | |
eecfc631 | 440 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
04f83450 BH |
441 | |
442 | if (likely(!ret)) { | |
443 | /* FIXME: should be based on the OSD's persistence model | |
444 | * See OSD2r05 Section 4.13 Data persistence model */ | |
eecfc631 | 445 | objios->oir.committed = NFS_FILE_SYNC; |
04f83450 BH |
446 | status = ios->length; |
447 | } else { | |
448 | status = ret; | |
449 | } | |
450 | ||
eecfc631 | 451 | objlayout_write_done(&objios->oir, status, objios->sync); |
04f83450 BH |
452 | } |
453 | ||
278c023a BH |
454 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
455 | { | |
456 | struct objio_state *objios = priv; | |
d45f60c6 WAA |
457 | struct nfs_pgio_header *hdr = objios->oir.rpcdata; |
458 | struct address_space *mapping = hdr->inode->i_mapping; | |
278c023a | 459 | pgoff_t index = offset / PAGE_SIZE; |
c999ff68 | 460 | struct page *page; |
d45f60c6 | 461 | loff_t i_size = i_size_read(hdr->inode); |
278c023a | 462 | |
c999ff68 BH |
463 | if (offset >= i_size) { |
464 | *uptodate = true; | |
465 | dprintk("%s: g_zero_page index=0x%lx\n", __func__, index); | |
466 | return ZERO_PAGE(0); | |
467 | } | |
468 | ||
469 | page = find_get_page(mapping, index); | |
278c023a | 470 | if (!page) { |
cd841605 | 471 | page = find_or_create_page(mapping, index, GFP_NOFS); |
278c023a BH |
472 | if (unlikely(!page)) { |
473 | dprintk("%s: grab_cache_page Failed index=0x%lx\n", | |
474 | __func__, index); | |
475 | return NULL; | |
476 | } | |
477 | unlock_page(page); | |
478 | } | |
3066a967 | 479 | *uptodate = PageUptodate(page); |
278c023a BH |
480 | dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); |
481 | return page; | |
482 | } | |
483 | ||
484 | static void __r4w_put_page(void *priv, struct page *page) | |
485 | { | |
c999ff68 BH |
486 | dprintk("%s: index=0x%lx\n", __func__, |
487 | (page == ZERO_PAGE(0)) ? -1UL : page->index); | |
488 | if (ZERO_PAGE(0) != page) | |
09cbfeaf | 489 | put_page(page); |
278c023a BH |
490 | return; |
491 | } | |
492 | ||
493 | static const struct _ore_r4w_op _r4w_op = { | |
494 | .get_page = &__r4w_get_page, | |
495 | .put_page = &__r4w_put_page, | |
496 | }; | |
497 | ||
d45f60c6 | 498 | int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) |
04f83450 | 499 | { |
eecfc631 | 500 | struct objio_state *objios; |
04f83450 BH |
501 | int ret; |
502 | ||
cd841605 | 503 | ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, |
d45f60c6 WAA |
504 | hdr->lseg, hdr->args.pages, hdr->args.pgbase, |
505 | hdr->args.offset, hdr->args.count, hdr, GFP_NOFS, | |
eecfc631 | 506 | &objios); |
96218556 BH |
507 | if (unlikely(ret)) |
508 | return ret; | |
509 | ||
eecfc631 | 510 | objios->sync = 0 != (how & FLUSH_SYNC); |
278c023a | 511 | objios->ios->r4w = &_r4w_op; |
96218556 | 512 | |
eecfc631 BH |
513 | if (!objios->sync) |
514 | objios->ios->done = _write_done; | |
515 | ||
516 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, | |
d45f60c6 | 517 | hdr->args.offset, hdr->args.count); |
eecfc631 | 518 | ret = ore_write(objios->ios); |
9909d45a BH |
519 | if (unlikely(ret)) { |
520 | objio_free_result(&objios->oir); | |
04f83450 | 521 | return ret; |
9909d45a | 522 | } |
04f83450 | 523 | |
eecfc631 BH |
524 | if (objios->sync) |
525 | _write_done(objios->ios, objios); | |
526 | ||
527 | return 0; | |
04f83450 BH |
528 | } |
529 | ||
b4fdac1a WAA |
530 | /* |
531 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | |
532 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | |
533 | */ | |
534 | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | |
93420770 BH |
535 | struct nfs_page *prev, struct nfs_page *req) |
536 | { | |
48d635f1 | 537 | struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); |
0f9c429e WAA |
538 | unsigned int size; |
539 | ||
540 | size = pnfs_generic_pg_test(pgio, prev, req); | |
541 | ||
a7d42ddb | 542 | if (!size || mirror->pg_count + req->wb_bytes > |
b4fdac1a WAA |
543 | (unsigned long)pgio->pg_layout_private) |
544 | return 0; | |
93420770 | 545 | |
0f9c429e | 546 | return min(size, req->wb_bytes); |
7de6e284 BH |
547 | } |
548 | ||
2e928e48 | 549 | static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
7de6e284 BH |
550 | { |
551 | pnfs_generic_pg_init_read(pgio, req); | |
552 | if (unlikely(pgio->pg_lseg == NULL)) | |
553 | return; /* Not pNFS */ | |
554 | ||
555 | pgio->pg_layout_private = (void *) | |
556 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | |
557 | } | |
558 | ||
559 | static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout, | |
560 | unsigned long *stripe_end) | |
561 | { | |
562 | u32 stripe_off; | |
563 | unsigned stripe_size; | |
564 | ||
565 | if (layout->raid_algorithm == PNFS_OSD_RAID_0) | |
566 | return true; | |
567 | ||
568 | stripe_size = layout->stripe_unit * | |
569 | (layout->group_width - layout->parity); | |
570 | ||
571 | div_u64_rem(offset, stripe_size, &stripe_off); | |
572 | if (!stripe_off) | |
573 | return true; | |
574 | ||
575 | *stripe_end = stripe_size - stripe_off; | |
576 | return false; | |
577 | } | |
578 | ||
2e928e48 | 579 | static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
7de6e284 BH |
580 | { |
581 | unsigned long stripe_end = 0; | |
6296556f | 582 | u64 wb_size; |
7de6e284 | 583 | |
6296556f PT |
584 | if (pgio->pg_dreq == NULL) |
585 | wb_size = i_size_read(pgio->pg_inode) - req_offset(req); | |
586 | else | |
587 | wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); | |
588 | ||
589 | pnfs_generic_pg_init_write(pgio, req, wb_size); | |
7de6e284 BH |
590 | if (unlikely(pgio->pg_lseg == NULL)) |
591 | return; /* Not pNFS */ | |
592 | ||
593 | if (req->wb_offset || | |
594 | !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE, | |
595 | &OBJIO_LSEG(pgio->pg_lseg)->layout, | |
596 | &stripe_end)) { | |
597 | pgio->pg_layout_private = (void *)stripe_end; | |
598 | } else { | |
599 | pgio->pg_layout_private = (void *) | |
600 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; | |
601 | } | |
93420770 BH |
602 | } |
603 | ||
1751c363 | 604 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
7de6e284 | 605 | .pg_init = objio_init_read, |
1751c363 | 606 | .pg_test = objio_pg_test, |
493292dd | 607 | .pg_doio = pnfs_generic_pg_readpages, |
180bb5ec | 608 | .pg_cleanup = pnfs_generic_pg_cleanup, |
1751c363 TM |
609 | }; |
610 | ||
611 | static const struct nfs_pageio_ops objio_pg_write_ops = { | |
7de6e284 | 612 | .pg_init = objio_init_write, |
1751c363 | 613 | .pg_test = objio_pg_test, |
dce81290 | 614 | .pg_doio = pnfs_generic_pg_writepages, |
180bb5ec | 615 | .pg_cleanup = pnfs_generic_pg_cleanup, |
1751c363 TM |
616 | }; |
617 | ||
c93407d0 BH |
618 | static struct pnfs_layoutdriver_type objlayout_type = { |
619 | .id = LAYOUT_OSD2_OBJECTS, | |
620 | .name = "LAYOUT_OSD2_OBJECTS", | |
fe0fe835 BH |
621 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
622 | PNFS_LAYOUTRET_ON_ERROR, | |
09f5bf4e | 623 | |
661373b1 | 624 | .max_deviceinfo_size = PAGE_SIZE, |
5a12cca6 | 625 | .owner = THIS_MODULE, |
e51b841d BH |
626 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
627 | .free_layout_hdr = objlayout_free_layout_hdr, | |
628 | ||
09f5bf4e BH |
629 | .alloc_lseg = objlayout_alloc_lseg, |
630 | .free_lseg = objlayout_free_lseg, | |
b6c05f16 | 631 | |
04f83450 BH |
632 | .read_pagelist = objlayout_read_pagelist, |
633 | .write_pagelist = objlayout_write_pagelist, | |
1751c363 TM |
634 | .pg_read_ops = &objio_pg_read_ops, |
635 | .pg_write_ops = &objio_pg_write_ops, | |
04f83450 | 636 | |
5bb89b47 TM |
637 | .sync = pnfs_generic_sync, |
638 | ||
b6c05f16 | 639 | .free_deviceid_node = objio_free_deviceid_node, |
adb58535 | 640 | |
a0fe8bf4 | 641 | .encode_layoutcommit = objlayout_encode_layoutcommit, |
adb58535 | 642 | .encode_layoutreturn = objlayout_encode_layoutreturn, |
c93407d0 BH |
643 | }; |
644 | ||
645 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); | |
646 | MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); | |
647 | MODULE_LICENSE("GPL"); | |
648 | ||
649 | static int __init | |
650 | objlayout_init(void) | |
651 | { | |
652 | int ret = pnfs_register_layoutdriver(&objlayout_type); | |
653 | ||
654 | if (ret) | |
655 | printk(KERN_INFO | |
a030889a | 656 | "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", |
c93407d0 BH |
657 | __func__, ret); |
658 | else | |
a030889a | 659 | printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", |
c93407d0 BH |
660 | __func__); |
661 | return ret; | |
662 | } | |
663 | ||
664 | static void __exit | |
665 | objlayout_exit(void) | |
666 | { | |
667 | pnfs_unregister_layoutdriver(&objlayout_type); | |
a030889a | 668 | printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", |
c93407d0 BH |
669 | __func__); |
670 | } | |
671 | ||
f85ef69c BF |
672 | MODULE_ALIAS("nfs-layouttype4-2"); |
673 | ||
c93407d0 BH |
674 | module_init(objlayout_init); |
675 | module_exit(objlayout_exit); |