Commit | Line | Data |
---|---|---|
16b374ca AA |
1 | /* |
2 | * Device operations for the pnfs nfs4 file layout driver. | |
3 | * | |
4 | * Copyright (c) 2002 | |
5 | * The Regents of the University of Michigan | |
6 | * All Rights Reserved | |
7 | * | |
8 | * Dean Hildebrand <dhildebz@umich.edu> | |
9 | * Garth Goodson <Garth.Goodson@netapp.com> | |
10 | * | |
11 | * Permission is granted to use, copy, create derivative works, and | |
12 | * redistribute this software and such derivative works for any purpose, | |
13 | * so long as the name of the University of Michigan is not used in | |
14 | * any advertising or publicity pertaining to the use or distribution | |
15 | * of this software without specific, written prior authorization. If | |
16 | * the above copyright notice or any other identification of the | |
17 | * University of Michigan is included in any copy of any portion of | |
18 | * this software, then the disclaimer below must also be included. | |
19 | * | |
20 | * This software is provided as is, without representation or warranty | |
21 | * of any kind either express or implied, including without limitation | |
22 | * the implied warranties of merchantability, fitness for a particular | |
23 | * purpose, or noninfringement. The Regents of the University of | |
24 | * Michigan shall not be liable for any damages, including special, | |
25 | * indirect, incidental, or consequential damages, with respect to any | |
26 | * claim arising out of or in connection with the use of the software, | |
27 | * even if it has been or is hereafter advised of the possibility of | |
28 | * such damages. | |
29 | */ | |
30 | ||
31 | #include <linux/nfs_fs.h> | |
32 | #include <linux/vmalloc.h> | |
98fc685a | 33 | #include <linux/module.h> |
16b374ca | 34 | |
b5968725 TH |
35 | #include "../internal.h" |
36 | #include "../nfs4session.h" | |
37 | #include "filelayout.h" | |
16b374ca AA |
38 | |
39 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
40 | ||
98fc685a AA |
41 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; |
42 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | |
43 | ||
1775bc34 | 44 | void |
16b374ca AA |
45 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
46 | { | |
47 | struct nfs4_pnfs_ds *ds; | |
48 | int i; | |
49 | ||
a1eaecbc | 50 | nfs4_print_deviceid(&dsaddr->id_node.deviceid); |
16b374ca AA |
51 | |
52 | for (i = 0; i < dsaddr->ds_num; i++) { | |
53 | ds = dsaddr->ds_list[i]; | |
875ae069 PT |
54 | if (ds != NULL) |
55 | nfs4_pnfs_ds_put(ds); | |
16b374ca AA |
56 | } |
57 | kfree(dsaddr->stripe_indices); | |
84a80f62 | 58 | kfree_rcu(dsaddr, id_node.rcu); |
16b374ca AA |
59 | } |
60 | ||
16b374ca | 61 | /* Decode opaque device data and return the result */ |
661373b1 CH |
62 | struct nfs4_file_layout_dsaddr * |
63 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
64 | gfp_t gfp_flags) | |
16b374ca | 65 | { |
35124a09 | 66 | int i; |
16b374ca AA |
67 | u32 cnt, num; |
68 | u8 *indexp; | |
35124a09 WAA |
69 | __be32 *p; |
70 | u8 *stripe_indices; | |
71 | u8 max_stripe_index; | |
72 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | |
73 | struct xdr_stream stream; | |
f7da7a12 | 74 | struct xdr_buf buf; |
35124a09 | 75 | struct page *scratch; |
14f9a607 WAA |
76 | struct list_head dsaddrs; |
77 | struct nfs4_pnfs_ds_addr *da; | |
35124a09 WAA |
78 | |
79 | /* set up xdr stream */ | |
a75b9df9 | 80 | scratch = alloc_page(gfp_flags); |
35124a09 WAA |
81 | if (!scratch) |
82 | goto out_err; | |
83 | ||
f7da7a12 | 84 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); |
0ae4c3e8 | 85 | xdr_set_scratch_page(&stream, scratch); |
16b374ca AA |
86 | |
87 | /* Get the stripe count (number of stripe index) */ | |
35124a09 WAA |
88 | p = xdr_inline_decode(&stream, 4); |
89 | if (unlikely(!p)) | |
90 | goto out_err_free_scratch; | |
91 | ||
92 | cnt = be32_to_cpup(p); | |
16b374ca AA |
93 | dprintk("%s stripe count %d\n", __func__, cnt); |
94 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | |
a030889a | 95 | printk(KERN_WARNING "NFS: %s: stripe count %d greater than " |
16b374ca AA |
96 | "supported maximum %d\n", __func__, |
97 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | |
35124a09 WAA |
98 | goto out_err_free_scratch; |
99 | } | |
100 | ||
101 | /* read stripe indices */ | |
a75b9df9 | 102 | stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); |
35124a09 WAA |
103 | if (!stripe_indices) |
104 | goto out_err_free_scratch; | |
105 | ||
106 | p = xdr_inline_decode(&stream, cnt << 2); | |
107 | if (unlikely(!p)) | |
108 | goto out_err_free_stripe_indices; | |
109 | ||
110 | indexp = &stripe_indices[0]; | |
111 | max_stripe_index = 0; | |
112 | for (i = 0; i < cnt; i++) { | |
113 | *indexp = be32_to_cpup(p++); | |
114 | max_stripe_index = max(max_stripe_index, *indexp); | |
115 | indexp++; | |
16b374ca AA |
116 | } |
117 | ||
118 | /* Check the multipath list count */ | |
35124a09 WAA |
119 | p = xdr_inline_decode(&stream, 4); |
120 | if (unlikely(!p)) | |
121 | goto out_err_free_stripe_indices; | |
122 | ||
123 | num = be32_to_cpup(p); | |
16b374ca AA |
124 | dprintk("%s ds_num %u\n", __func__, num); |
125 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | |
a030889a | 126 | printk(KERN_WARNING "NFS: %s: multipath count %d greater than " |
16b374ca AA |
127 | "supported maximum %d\n", __func__, |
128 | num, NFS4_PNFS_MAX_MULTI_CNT); | |
35124a09 | 129 | goto out_err_free_stripe_indices; |
16b374ca | 130 | } |
35124a09 WAA |
131 | |
132 | /* validate stripe indices are all < num */ | |
133 | if (max_stripe_index >= num) { | |
a030889a | 134 | printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", |
35124a09 WAA |
135 | __func__, max_stripe_index, num); |
136 | goto out_err_free_stripe_indices; | |
137 | } | |
138 | ||
c72a8268 | 139 | dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags); |
16b374ca | 140 | if (!dsaddr) |
35124a09 | 141 | goto out_err_free_stripe_indices; |
16b374ca AA |
142 | |
143 | dsaddr->stripe_count = cnt; | |
35124a09 WAA |
144 | dsaddr->stripe_indices = stripe_indices; |
145 | stripe_indices = NULL; | |
16b374ca | 146 | dsaddr->ds_num = num; |
661373b1 | 147 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
16b374ca | 148 | |
14f9a607 WAA |
149 | INIT_LIST_HEAD(&dsaddrs); |
150 | ||
16b374ca AA |
151 | for (i = 0; i < dsaddr->ds_num; i++) { |
152 | int j; | |
35124a09 WAA |
153 | u32 mp_count; |
154 | ||
155 | p = xdr_inline_decode(&stream, 4); | |
156 | if (unlikely(!p)) | |
157 | goto out_err_free_deviceid; | |
16b374ca | 158 | |
35124a09 | 159 | mp_count = be32_to_cpup(p); /* multipath count */ |
35124a09 | 160 | for (j = 0; j < mp_count; j++) { |
6b7f3cf9 PT |
161 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, |
162 | &stream, gfp_flags); | |
14f9a607 WAA |
163 | if (da) |
164 | list_add_tail(&da->da_node, &dsaddrs); | |
165 | } | |
166 | if (list_empty(&dsaddrs)) { | |
167 | dprintk("%s: no suitable DS addresses found\n", | |
168 | __func__); | |
169 | goto out_err_free_deviceid; | |
170 | } | |
171 | ||
172 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | |
173 | if (!dsaddr->ds_list[i]) | |
174 | goto out_err_drain_dsaddrs; | |
175 | ||
176 | /* If DS was already in cache, free ds addrs */ | |
177 | while (!list_empty(&dsaddrs)) { | |
178 | da = list_first_entry(&dsaddrs, | |
179 | struct nfs4_pnfs_ds_addr, | |
180 | da_node); | |
181 | list_del_init(&da->da_node); | |
182 | kfree(da->da_remotestr); | |
183 | kfree(da); | |
16b374ca AA |
184 | } |
185 | } | |
35124a09 WAA |
186 | |
187 | __free_page(scratch); | |
16b374ca AA |
188 | return dsaddr; |
189 | ||
14f9a607 WAA |
190 | out_err_drain_dsaddrs: |
191 | while (!list_empty(&dsaddrs)) { | |
192 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | |
193 | da_node); | |
194 | list_del_init(&da->da_node); | |
195 | kfree(da->da_remotestr); | |
196 | kfree(da); | |
197 | } | |
35124a09 | 198 | out_err_free_deviceid: |
16b374ca | 199 | nfs4_fl_free_deviceid(dsaddr); |
35124a09 WAA |
200 | /* stripe_indicies was part of dsaddr */ |
201 | goto out_err_free_scratch; | |
202 | out_err_free_stripe_indices: | |
203 | kfree(stripe_indices); | |
204 | out_err_free_scratch: | |
205 | __free_page(scratch); | |
16b374ca AA |
206 | out_err: |
207 | dprintk("%s ERROR: returning NULL\n", __func__); | |
208 | return NULL; | |
209 | } | |
210 | ||
ea8eecdd CH |
211 | void |
212 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |
16b374ca | 213 | { |
1775bc34 | 214 | nfs4_put_deviceid_node(&dsaddr->id_node); |
16b374ca | 215 | } |
cfe7f412 FI |
216 | |
217 | /* | |
218 | * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit | |
219 | * Then: ((res + fsi) % dsaddr->stripe_count) | |
220 | */ | |
221 | u32 | |
222 | nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) | |
223 | { | |
224 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | |
225 | u64 tmp; | |
226 | ||
227 | tmp = offset - flseg->pattern_offset; | |
228 | do_div(tmp, flseg->stripe_unit); | |
229 | tmp += flseg->first_stripe_index; | |
230 | return do_div(tmp, flseg->dsaddr->stripe_count); | |
231 | } | |
232 | ||
233 | u32 | |
234 | nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) | |
235 | { | |
236 | return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; | |
237 | } | |
238 | ||
239 | struct nfs_fh * | |
240 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) | |
241 | { | |
242 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | |
243 | u32 i; | |
244 | ||
245 | if (flseg->stripe_type == STRIPE_SPARSE) { | |
246 | if (flseg->num_fh == 1) | |
247 | i = 0; | |
248 | else if (flseg->num_fh == 0) | |
249 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ | |
250 | return NULL; | |
251 | else | |
252 | i = nfs4_fl_calc_ds_index(lseg, j); | |
253 | } else | |
254 | i = j; | |
255 | return flseg->fh_array[i]; | |
256 | } | |
257 | ||
7405f9e1 | 258 | /* Upon return, either ds is connected, or ds is NULL */ |
cfe7f412 FI |
259 | struct nfs4_pnfs_ds * |
260 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) | |
261 | { | |
262 | struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; | |
263 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; | |
554d458d | 264 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); |
52b26a3e | 265 | struct nfs4_pnfs_ds *ret = ds; |
7405f9e1 | 266 | struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); |
a33e4b03 | 267 | int status; |
cfe7f412 FI |
268 | |
269 | if (ds == NULL) { | |
a030889a | 270 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", |
cfe7f412 | 271 | __func__, ds_idx); |
f54bcf2e | 272 | pnfs_generic_mark_devid_invalid(devid); |
52b26a3e | 273 | goto out; |
cfe7f412 | 274 | } |
acd65e5b | 275 | smp_rmb(); |
c23266d5 | 276 | if (ds->ds_clp) |
52b26a3e | 277 | goto out_test_devid; |
cfe7f412 | 278 | |
a33e4b03 | 279 | status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, |
30626f9c | 280 | dataserver_retrans, 4, |
7d38de3f | 281 | s->nfs_client->cl_minorversion); |
a33e4b03 | 282 | if (status) { |
da066f3f | 283 | nfs4_mark_deviceid_unavailable(devid); |
a33e4b03 WAA |
284 | ret = NULL; |
285 | goto out; | |
286 | } | |
7405f9e1 | 287 | |
52b26a3e | 288 | out_test_devid: |
cfd278c2 N |
289 | if (ret->ds_clp == NULL || |
290 | filelayout_test_devid_unavailable(devid)) | |
52b26a3e TM |
291 | ret = NULL; |
292 | out: | |
293 | return ret; | |
cfe7f412 | 294 | } |
98fc685a AA |
295 | |
296 | module_param(dataserver_retrans, uint, 0644); | |
297 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | |
298 | "retries a request before it attempts further " | |
299 | " recovery action."); | |
300 | module_param(dataserver_timeo, uint, 0644); | |
301 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | |
302 | "NFSv4.1 client waits for a response from a " | |
303 | " data server before it retries an NFS request."); |