Commit | Line | Data |
---|---|---|
16b374ca AA |
1 | /* |
2 | * Device operations for the pnfs nfs4 file layout driver. | |
3 | * | |
4 | * Copyright (c) 2002 | |
5 | * The Regents of the University of Michigan | |
6 | * All Rights Reserved | |
7 | * | |
8 | * Dean Hildebrand <dhildebz@umich.edu> | |
9 | * Garth Goodson <Garth.Goodson@netapp.com> | |
10 | * | |
11 | * Permission is granted to use, copy, create derivative works, and | |
12 | * redistribute this software and such derivative works for any purpose, | |
13 | * so long as the name of the University of Michigan is not used in | |
14 | * any advertising or publicity pertaining to the use or distribution | |
15 | * of this software without specific, written prior authorization. If | |
16 | * the above copyright notice or any other identification of the | |
17 | * University of Michigan is included in any copy of any portion of | |
18 | * this software, then the disclaimer below must also be included. | |
19 | * | |
20 | * This software is provided as is, without representation or warranty | |
21 | * of any kind either express or implied, including without limitation | |
22 | * the implied warranties of merchantability, fitness for a particular | |
23 | * purpose, or noninfringement. The Regents of the University of | |
24 | * Michigan shall not be liable for any damages, including special, | |
25 | * indirect, incidental, or consequential damages, with respect to any | |
26 | * claim arising out of or in connection with the use of the software, | |
27 | * even if it has been or is hereafter advised of the possibility of | |
28 | * such damages. | |
29 | */ | |
30 | ||
31 | #include <linux/nfs_fs.h> | |
32 | #include <linux/vmalloc.h> | |
33 | ||
34 | #include "internal.h" | |
35 | #include "nfs4filelayout.h" | |
36 | ||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
38 | ||
39 | /* | |
40 | * Data server cache | |
41 | * | |
42 | * Data servers can be mapped to different device ids. | |
43 | * nfs4_pnfs_ds reference counting | |
44 | * - set to 1 on allocation | |
45 | * - incremented when a device id maps a data server already in the cache. | |
46 | * - decremented when deviceid is removed from the cache. | |
47 | */ | |
48 | DEFINE_SPINLOCK(nfs4_ds_cache_lock); | |
49 | static LIST_HEAD(nfs4_data_server_cache); | |
50 | ||
51 | /* Debug routines */ | |
52 | void | |
53 | print_ds(struct nfs4_pnfs_ds *ds) | |
54 | { | |
55 | if (ds == NULL) { | |
56 | printk("%s NULL device\n", __func__); | |
57 | return; | |
58 | } | |
59 | printk(" ip_addr %x port %hu\n" | |
60 | " ref count %d\n" | |
61 | " client %p\n" | |
62 | " cl_exchange_flags %x\n", | |
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | |
64 | atomic_read(&ds->ds_count), ds->ds_clp, | |
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | |
66 | } | |
67 | ||
68 | void | |
69 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | |
70 | { | |
71 | int i; | |
72 | ||
73 | ifdebug(FACILITY) { | |
74 | printk("%s dsaddr->ds_num %d\n", __func__, | |
75 | dsaddr->ds_num); | |
76 | for (i = 0; i < dsaddr->ds_num; i++) | |
77 | print_ds(dsaddr->ds_list[i]); | |
78 | } | |
79 | } | |
80 | ||
81 | void print_deviceid(struct nfs4_deviceid *id) | |
82 | { | |
83 | u32 *p = (u32 *)id; | |
84 | ||
85 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | |
86 | p[0], p[1], p[2], p[3]); | |
87 | } | |
88 | ||
89 | /* nfs4_ds_cache_lock is held */ | |
90 | static struct nfs4_pnfs_ds * | |
91 | _data_server_lookup_locked(u32 ip_addr, u32 port) | |
92 | { | |
93 | struct nfs4_pnfs_ds *ds; | |
94 | ||
95 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | |
96 | ntohl(ip_addr), ntohs(port)); | |
97 | ||
98 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | |
99 | if (ds->ds_ip_addr == ip_addr && | |
100 | ds->ds_port == port) { | |
101 | return ds; | |
102 | } | |
103 | } | |
104 | return NULL; | |
105 | } | |
106 | ||
107 | static void | |
108 | destroy_ds(struct nfs4_pnfs_ds *ds) | |
109 | { | |
110 | dprintk("--> %s\n", __func__); | |
111 | ifdebug(FACILITY) | |
112 | print_ds(ds); | |
113 | ||
114 | if (ds->ds_clp) | |
115 | nfs_put_client(ds->ds_clp); | |
116 | kfree(ds); | |
117 | } | |
118 | ||
119 | static void | |
120 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | |
121 | { | |
122 | struct nfs4_pnfs_ds *ds; | |
123 | int i; | |
124 | ||
125 | print_deviceid(&dsaddr->deviceid.de_id); | |
126 | ||
127 | for (i = 0; i < dsaddr->ds_num; i++) { | |
128 | ds = dsaddr->ds_list[i]; | |
129 | if (ds != NULL) { | |
130 | if (atomic_dec_and_lock(&ds->ds_count, | |
131 | &nfs4_ds_cache_lock)) { | |
132 | list_del_init(&ds->ds_node); | |
133 | spin_unlock(&nfs4_ds_cache_lock); | |
134 | destroy_ds(ds); | |
135 | } | |
136 | } | |
137 | } | |
138 | kfree(dsaddr->stripe_indices); | |
139 | kfree(dsaddr); | |
140 | } | |
141 | ||
142 | void | |
143 | nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) | |
144 | { | |
145 | struct nfs4_file_layout_dsaddr *dsaddr = | |
146 | container_of(device, struct nfs4_file_layout_dsaddr, deviceid); | |
147 | ||
148 | nfs4_fl_free_deviceid(dsaddr); | |
149 | } | |
150 | ||
151 | static struct nfs4_pnfs_ds * | |
152 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) | |
153 | { | |
154 | struct nfs4_pnfs_ds *tmp_ds, *ds; | |
155 | ||
156 | ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); | |
157 | if (!ds) | |
158 | goto out; | |
159 | ||
160 | spin_lock(&nfs4_ds_cache_lock); | |
161 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | |
162 | if (tmp_ds == NULL) { | |
163 | ds->ds_ip_addr = ip_addr; | |
164 | ds->ds_port = port; | |
165 | atomic_set(&ds->ds_count, 1); | |
166 | INIT_LIST_HEAD(&ds->ds_node); | |
167 | ds->ds_clp = NULL; | |
168 | list_add(&ds->ds_node, &nfs4_data_server_cache); | |
169 | dprintk("%s add new data server ip 0x%x\n", __func__, | |
170 | ds->ds_ip_addr); | |
171 | } else { | |
172 | kfree(ds); | |
173 | atomic_inc(&tmp_ds->ds_count); | |
174 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | |
175 | __func__, tmp_ds->ds_ip_addr, | |
176 | atomic_read(&tmp_ds->ds_count)); | |
177 | ds = tmp_ds; | |
178 | } | |
179 | spin_unlock(&nfs4_ds_cache_lock); | |
180 | out: | |
181 | return ds; | |
182 | } | |
183 | ||
184 | /* | |
185 | * Currently only support ipv4, and one multi-path address. | |
186 | */ | |
187 | static struct nfs4_pnfs_ds * | |
188 | decode_and_add_ds(__be32 **pp, struct inode *inode) | |
189 | { | |
190 | struct nfs4_pnfs_ds *ds = NULL; | |
191 | char *buf; | |
192 | const char *ipend, *pstr; | |
193 | u32 ip_addr, port; | |
194 | int nlen, rlen, i; | |
195 | int tmp[2]; | |
196 | __be32 *r_netid, *r_addr, *p = *pp; | |
197 | ||
198 | /* r_netid */ | |
199 | nlen = be32_to_cpup(p++); | |
200 | r_netid = p; | |
201 | p += XDR_QUADLEN(nlen); | |
202 | ||
203 | /* r_addr */ | |
204 | rlen = be32_to_cpup(p++); | |
205 | r_addr = p; | |
206 | p += XDR_QUADLEN(rlen); | |
207 | *pp = p; | |
208 | ||
209 | /* Check that netid is "tcp" */ | |
210 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | |
211 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | |
212 | goto out_err; | |
213 | } | |
214 | ||
215 | /* ipv6 length plus port is legal */ | |
216 | if (rlen > INET6_ADDRSTRLEN + 8) { | |
ad3d2eed | 217 | dprintk("%s: Invalid address, length %d\n", __func__, |
16b374ca AA |
218 | rlen); |
219 | goto out_err; | |
220 | } | |
221 | buf = kmalloc(rlen + 1, GFP_KERNEL); | |
222 | buf[rlen] = '\0'; | |
223 | memcpy(buf, r_addr, rlen); | |
224 | ||
225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | |
226 | for (i = 0; i < 2; i++) { | |
227 | char *res = strrchr(buf, '.'); | |
ad3d2eed JJ |
228 | if (!res) { |
229 | dprintk("%s: Failed finding expected dots in port\n", | |
230 | __func__); | |
231 | goto out_free; | |
232 | } | |
16b374ca AA |
233 | *res = '-'; |
234 | } | |
235 | ||
236 | /* Currently only support ipv4 address */ | |
237 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | |
238 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | |
239 | goto out_free; | |
240 | } | |
241 | ||
242 | /* port */ | |
243 | pstr = ipend; | |
244 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | |
245 | port = htons((tmp[0] << 8) | (tmp[1])); | |
246 | ||
247 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); | |
ad3d2eed | 248 | dprintk("%s: Decoded address and port %s\n", __func__, buf); |
16b374ca AA |
249 | out_free: |
250 | kfree(buf); | |
251 | out_err: | |
252 | return ds; | |
253 | } | |
254 | ||
255 | /* Decode opaque device data and return the result */ | |
256 | static struct nfs4_file_layout_dsaddr* | |
257 | decode_device(struct inode *ino, struct pnfs_device *pdev) | |
258 | { | |
259 | int i, dummy; | |
260 | u32 cnt, num; | |
261 | u8 *indexp; | |
262 | __be32 *p = (__be32 *)pdev->area, *indicesp; | |
263 | struct nfs4_file_layout_dsaddr *dsaddr; | |
264 | ||
265 | /* Get the stripe count (number of stripe index) */ | |
266 | cnt = be32_to_cpup(p++); | |
267 | dprintk("%s stripe count %d\n", __func__, cnt); | |
268 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | |
269 | printk(KERN_WARNING "%s: stripe count %d greater than " | |
270 | "supported maximum %d\n", __func__, | |
271 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | |
272 | goto out_err; | |
273 | } | |
274 | ||
275 | /* Check the multipath list count */ | |
276 | indicesp = p; | |
277 | p += XDR_QUADLEN(cnt << 2); | |
278 | num = be32_to_cpup(p++); | |
279 | dprintk("%s ds_num %u\n", __func__, num); | |
280 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | |
281 | printk(KERN_WARNING "%s: multipath count %d greater than " | |
282 | "supported maximum %d\n", __func__, | |
283 | num, NFS4_PNFS_MAX_MULTI_CNT); | |
284 | goto out_err; | |
285 | } | |
286 | dsaddr = kzalloc(sizeof(*dsaddr) + | |
287 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | |
288 | GFP_KERNEL); | |
289 | if (!dsaddr) | |
290 | goto out_err; | |
291 | ||
292 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | |
293 | if (!dsaddr->stripe_indices) | |
294 | goto out_err_free; | |
295 | ||
296 | dsaddr->stripe_count = cnt; | |
297 | dsaddr->ds_num = num; | |
298 | ||
299 | memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); | |
300 | ||
301 | /* Go back an read stripe indices */ | |
302 | p = indicesp; | |
303 | indexp = &dsaddr->stripe_indices[0]; | |
304 | for (i = 0; i < dsaddr->stripe_count; i++) { | |
305 | *indexp = be32_to_cpup(p++); | |
306 | if (*indexp >= num) | |
307 | goto out_err_free; | |
308 | indexp++; | |
309 | } | |
310 | /* Skip already read multipath list count */ | |
311 | p++; | |
312 | ||
313 | for (i = 0; i < dsaddr->ds_num; i++) { | |
314 | int j; | |
315 | ||
316 | dummy = be32_to_cpup(p++); /* multipath count */ | |
317 | if (dummy > 1) { | |
318 | printk(KERN_WARNING | |
319 | "%s: Multipath count %d not supported, " | |
320 | "skipping all greater than 1\n", __func__, | |
321 | dummy); | |
322 | } | |
323 | for (j = 0; j < dummy; j++) { | |
324 | if (j == 0) { | |
325 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | |
326 | if (dsaddr->ds_list[i] == NULL) | |
327 | goto out_err_free; | |
328 | } else { | |
329 | u32 len; | |
330 | /* skip extra multipath */ | |
331 | len = be32_to_cpup(p++); | |
332 | p += XDR_QUADLEN(len); | |
333 | len = be32_to_cpup(p++); | |
334 | p += XDR_QUADLEN(len); | |
335 | continue; | |
336 | } | |
337 | } | |
338 | } | |
339 | return dsaddr; | |
340 | ||
341 | out_err_free: | |
342 | nfs4_fl_free_deviceid(dsaddr); | |
343 | out_err: | |
344 | dprintk("%s ERROR: returning NULL\n", __func__); | |
345 | return NULL; | |
346 | } | |
347 | ||
348 | /* | |
349 | * Decode the opaque device specified in 'dev' | |
350 | * and add it to the list of available devices. | |
351 | * If the deviceid is already cached, nfs4_add_deviceid will return | |
352 | * a pointer to the cached struct and throw away the new. | |
353 | */ | |
354 | static struct nfs4_file_layout_dsaddr* | |
355 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev) | |
356 | { | |
357 | struct nfs4_file_layout_dsaddr *dsaddr; | |
358 | struct pnfs_deviceid_node *d; | |
359 | ||
360 | dsaddr = decode_device(inode, dev); | |
361 | if (!dsaddr) { | |
362 | printk(KERN_WARNING "%s: Could not decode or add device\n", | |
363 | __func__); | |
364 | return NULL; | |
365 | } | |
366 | ||
367 | d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, | |
368 | &dsaddr->deviceid); | |
369 | ||
370 | return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | |
371 | } | |
372 | ||
373 | /* | |
374 | * Retrieve the information for dev_id, add it to the list | |
375 | * of available devices, and return it. | |
376 | */ | |
377 | struct nfs4_file_layout_dsaddr * | |
378 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | |
379 | { | |
380 | struct pnfs_device *pdev = NULL; | |
381 | u32 max_resp_sz; | |
382 | int max_pages; | |
383 | struct page **pages = NULL; | |
384 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | |
385 | int rc, i; | |
386 | struct nfs_server *server = NFS_SERVER(inode); | |
387 | ||
388 | /* | |
389 | * Use the session max response size as the basis for setting | |
390 | * GETDEVICEINFO's maxcount | |
391 | */ | |
392 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | |
393 | max_pages = max_resp_sz >> PAGE_SHIFT; | |
394 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | |
395 | __func__, inode, max_resp_sz, max_pages); | |
396 | ||
397 | pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); | |
398 | if (pdev == NULL) | |
399 | return NULL; | |
400 | ||
401 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | |
402 | if (pages == NULL) { | |
403 | kfree(pdev); | |
404 | return NULL; | |
405 | } | |
406 | for (i = 0; i < max_pages; i++) { | |
407 | pages[i] = alloc_page(GFP_KERNEL); | |
408 | if (!pages[i]) | |
409 | goto out_free; | |
410 | } | |
411 | ||
412 | /* set pdev->area */ | |
413 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | |
414 | if (!pdev->area) | |
415 | goto out_free; | |
416 | ||
417 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | |
418 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | |
419 | pdev->pages = pages; | |
420 | pdev->pgbase = 0; | |
421 | pdev->pglen = PAGE_SIZE * max_pages; | |
422 | pdev->mincount = 0; | |
423 | ||
424 | rc = nfs4_proc_getdeviceinfo(server, pdev); | |
425 | dprintk("%s getdevice info returns %d\n", __func__, rc); | |
426 | if (rc) | |
427 | goto out_free; | |
428 | ||
429 | /* | |
430 | * Found new device, need to decode it and then add it to the | |
431 | * list of known devices for this mountpoint. | |
432 | */ | |
433 | dsaddr = decode_and_add_device(inode, pdev); | |
434 | out_free: | |
435 | if (pdev->area != NULL) | |
436 | vunmap(pdev->area); | |
437 | for (i = 0; i < max_pages; i++) | |
438 | __free_page(pages[i]); | |
439 | kfree(pages); | |
440 | kfree(pdev); | |
441 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | |
442 | return dsaddr; | |
443 | } | |
444 | ||
445 | struct nfs4_file_layout_dsaddr * | |
446 | nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) | |
447 | { | |
448 | struct pnfs_deviceid_node *d; | |
449 | ||
450 | d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); | |
451 | return (d == NULL) ? NULL : | |
452 | container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | |
453 | } |