Commit | Line | Data |
---|---|---|
16b374ca AA |
1 | /* |
2 | * Device operations for the pnfs nfs4 file layout driver. | |
3 | * | |
4 | * Copyright (c) 2002 | |
5 | * The Regents of the University of Michigan | |
6 | * All Rights Reserved | |
7 | * | |
8 | * Dean Hildebrand <dhildebz@umich.edu> | |
9 | * Garth Goodson <Garth.Goodson@netapp.com> | |
10 | * | |
11 | * Permission is granted to use, copy, create derivative works, and | |
12 | * redistribute this software and such derivative works for any purpose, | |
13 | * so long as the name of the University of Michigan is not used in | |
14 | * any advertising or publicity pertaining to the use or distribution | |
15 | * of this software without specific, written prior authorization. If | |
16 | * the above copyright notice or any other identification of the | |
17 | * University of Michigan is included in any copy of any portion of | |
18 | * this software, then the disclaimer below must also be included. | |
19 | * | |
20 | * This software is provided as is, without representation or warranty | |
21 | * of any kind either express or implied, including without limitation | |
22 | * the implied warranties of merchantability, fitness for a particular | |
23 | * purpose, or noninfringement. The Regents of the University of | |
24 | * Michigan shall not be liable for any damages, including special, | |
25 | * indirect, incidental, or consequential damages, with respect to any | |
26 | * claim arising out of or in connection with the use of the software, | |
27 | * even if it has been or is hereafter advised of the possibility of | |
28 | * such damages. | |
29 | */ | |
30 | ||
31 | #include <linux/nfs_fs.h> | |
32 | #include <linux/vmalloc.h> | |
98fc685a | 33 | #include <linux/module.h> |
16b374ca | 34 | |
b5968725 TH |
35 | #include "../internal.h" |
36 | #include "../nfs4session.h" | |
37 | #include "filelayout.h" | |
16b374ca AA |
38 | |
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
40 | ||
98fc685a AA |
41 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; |
42 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | |
43 | ||
1775bc34 | 44 | void |
16b374ca AA |
45 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
46 | { | |
47 | struct nfs4_pnfs_ds *ds; | |
48 | int i; | |
49 | ||
a1eaecbc | 50 | nfs4_print_deviceid(&dsaddr->id_node.deviceid); |
16b374ca AA |
51 | |
52 | for (i = 0; i < dsaddr->ds_num; i++) { | |
53 | ds = dsaddr->ds_list[i]; | |
875ae069 PT |
54 | if (ds != NULL) |
55 | nfs4_pnfs_ds_put(ds); | |
16b374ca AA |
56 | } |
57 | kfree(dsaddr->stripe_indices); | |
58 | kfree(dsaddr); | |
59 | } | |
60 | ||
16b374ca | 61 | /* Decode opaque device data and return the result */ |
661373b1 CH |
62 | struct nfs4_file_layout_dsaddr * |
63 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
64 | gfp_t gfp_flags) | |
16b374ca | 65 | { |
35124a09 | 66 | int i; |
16b374ca AA |
67 | u32 cnt, num; |
68 | u8 *indexp; | |
35124a09 WAA |
69 | __be32 *p; |
70 | u8 *stripe_indices; | |
71 | u8 max_stripe_index; | |
72 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | |
73 | struct xdr_stream stream; | |
f7da7a12 | 74 | struct xdr_buf buf; |
35124a09 | 75 | struct page *scratch; |
14f9a607 WAA |
76 | struct list_head dsaddrs; |
77 | struct nfs4_pnfs_ds_addr *da; | |
35124a09 WAA |
78 | |
79 | /* set up xdr stream */ | |
a75b9df9 | 80 | scratch = alloc_page(gfp_flags); |
35124a09 WAA |
81 | if (!scratch) |
82 | goto out_err; | |
83 | ||
f7da7a12 | 84 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); |
35124a09 | 85 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); |
16b374ca AA |
86 | |
87 | /* Get the stripe count (number of stripe index) */ | |
35124a09 WAA |
88 | p = xdr_inline_decode(&stream, 4); |
89 | if (unlikely(!p)) | |
90 | goto out_err_free_scratch; | |
91 | ||
92 | cnt = be32_to_cpup(p); | |
16b374ca AA |
93 | dprintk("%s stripe count %d\n", __func__, cnt); |
94 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | |
a030889a | 95 | printk(KERN_WARNING "NFS: %s: stripe count %d greater than " |
16b374ca AA |
96 | "supported maximum %d\n", __func__, |
97 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | |
35124a09 WAA |
98 | goto out_err_free_scratch; |
99 | } | |
100 | ||
101 | /* read stripe indices */ | |
a75b9df9 | 102 | stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); |
35124a09 WAA |
103 | if (!stripe_indices) |
104 | goto out_err_free_scratch; | |
105 | ||
106 | p = xdr_inline_decode(&stream, cnt << 2); | |
107 | if (unlikely(!p)) | |
108 | goto out_err_free_stripe_indices; | |
109 | ||
110 | indexp = &stripe_indices[0]; | |
111 | max_stripe_index = 0; | |
112 | for (i = 0; i < cnt; i++) { | |
113 | *indexp = be32_to_cpup(p++); | |
114 | max_stripe_index = max(max_stripe_index, *indexp); | |
115 | indexp++; | |
16b374ca AA |
116 | } |
117 | ||
118 | /* Check the multipath list count */ | |
35124a09 WAA |
119 | p = xdr_inline_decode(&stream, 4); |
120 | if (unlikely(!p)) | |
121 | goto out_err_free_stripe_indices; | |
122 | ||
123 | num = be32_to_cpup(p); | |
16b374ca AA |
124 | dprintk("%s ds_num %u\n", __func__, num); |
125 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | |
a030889a | 126 | printk(KERN_WARNING "NFS: %s: multipath count %d greater than " |
16b374ca AA |
127 | "supported maximum %d\n", __func__, |
128 | num, NFS4_PNFS_MAX_MULTI_CNT); | |
35124a09 | 129 | goto out_err_free_stripe_indices; |
16b374ca | 130 | } |
35124a09 WAA |
131 | |
132 | /* validate stripe indices are all < num */ | |
133 | if (max_stripe_index >= num) { | |
a030889a | 134 | printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", |
35124a09 WAA |
135 | __func__, max_stripe_index, num); |
136 | goto out_err_free_stripe_indices; | |
137 | } | |
138 | ||
16b374ca AA |
139 | dsaddr = kzalloc(sizeof(*dsaddr) + |
140 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | |
a75b9df9 | 141 | gfp_flags); |
16b374ca | 142 | if (!dsaddr) |
35124a09 | 143 | goto out_err_free_stripe_indices; |
16b374ca AA |
144 | |
145 | dsaddr->stripe_count = cnt; | |
35124a09 WAA |
146 | dsaddr->stripe_indices = stripe_indices; |
147 | stripe_indices = NULL; | |
16b374ca | 148 | dsaddr->ds_num = num; |
661373b1 | 149 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
16b374ca | 150 | |
14f9a607 WAA |
151 | INIT_LIST_HEAD(&dsaddrs); |
152 | ||
16b374ca AA |
153 | for (i = 0; i < dsaddr->ds_num; i++) { |
154 | int j; | |
35124a09 WAA |
155 | u32 mp_count; |
156 | ||
157 | p = xdr_inline_decode(&stream, 4); | |
158 | if (unlikely(!p)) | |
159 | goto out_err_free_deviceid; | |
16b374ca | 160 | |
35124a09 | 161 | mp_count = be32_to_cpup(p); /* multipath count */ |
35124a09 | 162 | for (j = 0; j < mp_count; j++) { |
6b7f3cf9 PT |
163 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, |
164 | &stream, gfp_flags); | |
14f9a607 WAA |
165 | if (da) |
166 | list_add_tail(&da->da_node, &dsaddrs); | |
167 | } | |
168 | if (list_empty(&dsaddrs)) { | |
169 | dprintk("%s: no suitable DS addresses found\n", | |
170 | __func__); | |
171 | goto out_err_free_deviceid; | |
172 | } | |
173 | ||
174 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | |
175 | if (!dsaddr->ds_list[i]) | |
176 | goto out_err_drain_dsaddrs; | |
177 | ||
178 | /* If DS was already in cache, free ds addrs */ | |
179 | while (!list_empty(&dsaddrs)) { | |
180 | da = list_first_entry(&dsaddrs, | |
181 | struct nfs4_pnfs_ds_addr, | |
182 | da_node); | |
183 | list_del_init(&da->da_node); | |
184 | kfree(da->da_remotestr); | |
185 | kfree(da); | |
16b374ca AA |
186 | } |
187 | } | |
35124a09 WAA |
188 | |
189 | __free_page(scratch); | |
16b374ca AA |
190 | return dsaddr; |
191 | ||
14f9a607 WAA |
192 | out_err_drain_dsaddrs: |
193 | while (!list_empty(&dsaddrs)) { | |
194 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | |
195 | da_node); | |
196 | list_del_init(&da->da_node); | |
197 | kfree(da->da_remotestr); | |
198 | kfree(da); | |
199 | } | |
35124a09 | 200 | out_err_free_deviceid: |
16b374ca | 201 | nfs4_fl_free_deviceid(dsaddr); |
35124a09 WAA |
202 | /* stripe_indicies was part of dsaddr */ |
203 | goto out_err_free_scratch; | |
204 | out_err_free_stripe_indices: | |
205 | kfree(stripe_indices); | |
206 | out_err_free_scratch: | |
207 | __free_page(scratch); | |
16b374ca AA |
208 | out_err: |
209 | dprintk("%s ERROR: returning NULL\n", __func__); | |
210 | return NULL; | |
211 | } | |
212 | ||
ea8eecdd CH |
213 | void |
214 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |
16b374ca | 215 | { |
1775bc34 | 216 | nfs4_put_deviceid_node(&dsaddr->id_node); |
16b374ca | 217 | } |
cfe7f412 FI |
218 | |
219 | /* | |
220 | * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit | |
221 | * Then: ((res + fsi) % dsaddr->stripe_count) | |
222 | */ | |
223 | u32 | |
224 | nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) | |
225 | { | |
226 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | |
227 | u64 tmp; | |
228 | ||
229 | tmp = offset - flseg->pattern_offset; | |
230 | do_div(tmp, flseg->stripe_unit); | |
231 | tmp += flseg->first_stripe_index; | |
232 | return do_div(tmp, flseg->dsaddr->stripe_count); | |
233 | } | |
234 | ||
235 | u32 | |
236 | nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) | |
237 | { | |
238 | return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; | |
239 | } | |
240 | ||
241 | struct nfs_fh * | |
242 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |
243 | { | |
244 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | |
245 | u32 i; | |
246 | ||
247 | if (flseg->stripe_type == STRIPE_SPARSE) { | |
248 | if (flseg->num_fh == 1) | |
249 | i = 0; | |
250 | else if (flseg->num_fh == 0) | |
251 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ | |
252 | return NULL; | |
253 | else | |
254 | i = nfs4_fl_calc_ds_index(lseg, j); | |
255 | } else | |
256 | i = j; | |
257 | return flseg->fh_array[i]; | |
258 | } | |
259 | ||
7405f9e1 | 260 | /* Upon return, either ds is connected, or ds is NULL */ |
cfe7f412 FI |
261 | struct nfs4_pnfs_ds * |
262 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |
263 | { | |
264 | struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; | |
265 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | |
554d458d | 266 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); |
52b26a3e | 267 | struct nfs4_pnfs_ds *ret = ds; |
7405f9e1 | 268 | struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); |
cfe7f412 FI |
269 | |
270 | if (ds == NULL) { | |
a030889a | 271 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", |
cfe7f412 | 272 | __func__, ds_idx); |
f54bcf2e | 273 | pnfs_generic_mark_devid_invalid(devid); |
52b26a3e | 274 | goto out; |
cfe7f412 | 275 | } |
acd65e5b | 276 | smp_rmb(); |
c23266d5 | 277 | if (ds->ds_clp) |
52b26a3e | 278 | goto out_test_devid; |
cfe7f412 | 279 | |
7405f9e1 PT |
280 | nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, |
281 | dataserver_retrans); | |
282 | ||
52b26a3e TM |
283 | out_test_devid: |
284 | if (filelayout_test_devid_unavailable(devid)) | |
285 | ret = NULL; | |
286 | out: | |
287 | return ret; | |
cfe7f412 | 288 | } |
98fc685a AA |
289 | |
290 | module_param(dataserver_retrans, uint, 0644); | |
291 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | |
292 | "retries a request before it attempts further " | |
293 | " recovery action."); | |
294 | module_param(dataserver_timeo, uint, 0644); | |
295 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | |
296 | "NFSv4.1 client waits for a response from a " | |
297 | " data server before it retries an NFS request."); |