Commit | Line | Data |
---|---|---|
0b61f8a4 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2a82b8be DC |
2 | /* |
3 | * Copyright (c) 2006-2007 Silicon Graphics, Inc. | |
2cd2ef6a | 4 | * Copyright (c) 2014 Christoph Hellwig. |
2a82b8be | 5 | * All Rights Reserved. |
2a82b8be DC |
6 | */ |
7 | #include "xfs.h" | |
5467b34b | 8 | #include "xfs_shared.h" |
a4fbe6ab | 9 | #include "xfs_format.h" |
239880ef DC |
10 | #include "xfs_log_format.h" |
11 | #include "xfs_trans_resv.h" | |
239880ef | 12 | #include "xfs_mount.h" |
2a82b8be | 13 | #include "xfs_inode.h" |
2a82b8be | 14 | #include "xfs_bmap.h" |
8f7747ad | 15 | #include "xfs_bmap_util.h" |
2a82b8be | 16 | #include "xfs_alloc.h" |
2a82b8be | 17 | #include "xfs_mru_cache.h" |
0b1b213f | 18 | #include "xfs_trace.h" |
9bbafc71 | 19 | #include "xfs_ag.h" |
3fd129b6 | 20 | #include "xfs_ag_resv.h" |
3e3673e3 | 21 | #include "xfs_trans.h" |
f368b29b | 22 | #include "xfs_filestream.h" |
2a82b8be | 23 | |
2cd2ef6a CH |
24 | struct xfs_fstrm_item { |
25 | struct xfs_mru_cache_elem mru; | |
3054face | 26 | struct xfs_perag *pag; /* AG in use for this directory */ |
2cd2ef6a CH |
27 | }; |
28 | ||
29 | enum xfs_fstrm_alloc { | |
30 | XFS_PICK_USERDATA = 1, | |
31 | XFS_PICK_LOWSPACE = 2, | |
32 | }; | |
2a82b8be | 33 | |
2cd2ef6a CH |
34 | static void |
35 | xfs_fstrm_free_func( | |
7fcd3efa | 36 | void *data, |
2cd2ef6a CH |
37 | struct xfs_mru_cache_elem *mru) |
38 | { | |
39 | struct xfs_fstrm_item *item = | |
40 | container_of(mru, struct xfs_fstrm_item, mru); | |
3054face | 41 | struct xfs_perag *pag = item->pag; |
2cd2ef6a | 42 | |
571e2592 | 43 | trace_xfs_filestream_free(pag, mru->key); |
3054face DC |
44 | atomic_dec(&pag->pagf_fstrms); |
45 | xfs_perag_rele(pag); | |
2cd2ef6a | 46 | |
1919adda | 47 | kmem_free(item); |
2cd2ef6a CH |
48 | } |
49 | ||
2a82b8be | 50 | /* |
bd4f5d09 DC |
51 | * Scan the AGs starting at start_agno looking for an AG that isn't in use and |
52 | * has at least minlen blocks free. If no AG is found to match the allocation | |
53 | * requirements, pick the AG with the most free space in it. | |
2a82b8be DC |
54 | */ |
55 | static int | |
2cd2ef6a | 56 | xfs_filestream_pick_ag( |
f8f1ed1a | 57 | struct xfs_alloc_arg *args, |
bd4f5d09 | 58 | xfs_ino_t pino, |
f8f1ed1a | 59 | xfs_agnumber_t start_agno, |
2cd2ef6a | 60 | int flags, |
ba34de8d | 61 | xfs_extlen_t *longest) |
2a82b8be | 62 | { |
bd4f5d09 | 63 | struct xfs_mount *mp = args->mp; |
2cd2ef6a | 64 | struct xfs_perag *pag; |
3054face | 65 | struct xfs_perag *max_pag = NULL; |
ba34de8d DC |
66 | xfs_extlen_t minlen = *longest; |
67 | xfs_extlen_t free = 0, minfree, maxfree = 0; | |
eb70aa2d | 68 | xfs_agnumber_t agno; |
bd4f5d09 DC |
69 | bool first_pass = true; |
70 | int err; | |
2a82b8be DC |
71 | |
72 | /* 2% of an AG's blocks must be free for it to be chosen. */ | |
73 | minfree = mp->m_sb.sb_agblocks / 50; | |
74 | ||
eb70aa2d DC |
75 | restart: |
76 | for_each_perag_wrap(mp, start_agno, agno, pag) { | |
bd4f5d09 | 77 | trace_xfs_filestream_scan(pag, pino); |
ba34de8d DC |
78 | *longest = 0; |
79 | err = xfs_bmap_longest_free_extent(pag, NULL, longest); | |
05cf492a | 80 | if (err) { |
3054face | 81 | xfs_perag_rele(pag); |
05cf492a | 82 | if (err != -EAGAIN) |
3054face | 83 | break; |
05cf492a | 84 | /* Couldn't lock the AGF, skip this AG. */ |
eb70aa2d | 85 | err = 0; |
f8f1ed1a | 86 | continue; |
2a82b8be DC |
87 | } |
88 | ||
2a82b8be DC |
89 | /* Keep track of the AG with the most free blocks. */ |
90 | if (pag->pagf_freeblks > maxfree) { | |
91 | maxfree = pag->pagf_freeblks; | |
3054face DC |
92 | if (max_pag) |
93 | xfs_perag_rele(max_pag); | |
94 | atomic_inc(&pag->pag_active_ref); | |
95 | max_pag = pag; | |
2a82b8be DC |
96 | } |
97 | ||
98 | /* | |
99 | * The AG reference count does two things: it enforces mutual | |
100 | * exclusion when examining the suitability of an AG in this | |
101 | * loop, and it guards against two filestreams being established | |
102 | * in the same AG as each other. | |
103 | */ | |
eb70aa2d DC |
104 | if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { |
105 | if (((minlen && *longest >= minlen) || | |
106 | (!minlen && pag->pagf_freeblks >= minfree)) && | |
107 | (!xfs_perag_prefers_metadata(pag) || | |
108 | !(flags & XFS_PICK_USERDATA) || | |
109 | (flags & XFS_PICK_LOWSPACE))) { | |
110 | /* Break out, retaining the reference on the AG. */ | |
111 | free = pag->pagf_freeblks; | |
112 | break; | |
113 | } | |
2a82b8be DC |
114 | } |
115 | ||
116 | /* Drop the reference on this AG, it's not usable. */ | |
3054face | 117 | atomic_dec(&pag->pagf_fstrms); |
eb70aa2d | 118 | } |
2a82b8be | 119 | |
eb70aa2d DC |
120 | if (err) { |
121 | xfs_perag_rele(pag); | |
122 | if (max_pag) | |
123 | xfs_perag_rele(max_pag); | |
124 | return err; | |
125 | } | |
2a82b8be | 126 | |
eb70aa2d | 127 | if (!pag) { |
bd4f5d09 DC |
128 | /* |
129 | * Allow a second pass to give xfs_bmap_longest_free_extent() | |
130 | * another attempt at locking AGFs that it might have skipped | |
131 | * over before we fail. | |
132 | */ | |
133 | if (first_pass) { | |
134 | first_pass = false; | |
eb70aa2d | 135 | goto restart; |
2a82b8be DC |
136 | } |
137 | ||
bd4f5d09 DC |
138 | /* |
139 | * We must be low on data space, so run a final lowspace | |
140 | * optimised selection pass if we haven't already. | |
141 | */ | |
2a82b8be DC |
142 | if (!(flags & XFS_PICK_LOWSPACE)) { |
143 | flags |= XFS_PICK_LOWSPACE; | |
eb70aa2d | 144 | goto restart; |
2a82b8be DC |
145 | } |
146 | ||
147 | /* | |
eb70aa2d DC |
148 | * No unassociated AGs are available, so select the AG with the |
149 | * most free space, regardless of whether it's already in use by | |
f8f1ed1a DC |
150 | * another filestream. It none suit, just use whatever AG we can |
151 | * grab. | |
2a82b8be | 152 | */ |
eb70aa2d | 153 | if (!max_pag) { |
bd4f5d09 | 154 | for_each_perag_wrap(args->mp, 0, start_agno, args->pag) |
f8f1ed1a | 155 | break; |
bd4f5d09 | 156 | atomic_inc(&args->pag->pagf_fstrms); |
f8f1ed1a DC |
157 | *longest = 0; |
158 | } else { | |
159 | pag = max_pag; | |
160 | free = maxfree; | |
161 | atomic_inc(&pag->pagf_fstrms); | |
2a82b8be | 162 | } |
eb70aa2d | 163 | } else if (max_pag) { |
3054face | 164 | xfs_perag_rele(max_pag); |
3054face | 165 | } |
2a82b8be | 166 | |
bd4f5d09 | 167 | trace_xfs_filestream_pick(pag, pino, free); |
f8f1ed1a | 168 | args->pag = pag; |
2a82b8be | 169 | return 0; |
2a82b8be | 170 | |
2a82b8be DC |
171 | } |
172 | ||
2cd2ef6a CH |
173 | static struct xfs_inode * |
174 | xfs_filestream_get_parent( | |
175 | struct xfs_inode *ip) | |
2a82b8be | 176 | { |
2cd2ef6a CH |
177 | struct inode *inode = VFS_I(ip), *dir = NULL; |
178 | struct dentry *dentry, *parent; | |
2a82b8be | 179 | |
2cd2ef6a CH |
180 | dentry = d_find_alias(inode); |
181 | if (!dentry) | |
182 | goto out; | |
2a82b8be | 183 | |
2cd2ef6a CH |
184 | parent = dget_parent(dentry); |
185 | if (!parent) | |
186 | goto out_dput; | |
2a82b8be | 187 | |
2b0143b5 | 188 | dir = igrab(d_inode(parent)); |
2cd2ef6a | 189 | dput(parent); |
2a82b8be | 190 | |
2cd2ef6a CH |
191 | out_dput: |
192 | dput(dentry); | |
193 | out: | |
194 | return dir ? XFS_I(dir) : NULL; | |
2a82b8be DC |
195 | } |
196 | ||
f38b46bb DC |
197 | /* |
198 | * Lookup the mru cache for an existing association. If one exists and we can | |
bd4f5d09 DC |
199 | * use it, return with an active perag reference indicating that the allocation |
200 | * will proceed with that association. | |
f38b46bb DC |
201 | * |
202 | * If we have no association, or we cannot use the current one and have to | |
bd4f5d09 DC |
203 | * destroy it, return with longest = 0 to tell the caller to create a new |
204 | * association. | |
f38b46bb | 205 | */ |
bd4f5d09 DC |
206 | static int |
207 | xfs_filestream_lookup_association( | |
f38b46bb DC |
208 | struct xfs_bmalloca *ap, |
209 | struct xfs_alloc_arg *args, | |
bd4f5d09 DC |
210 | xfs_ino_t pino, |
211 | xfs_extlen_t *longest) | |
f38b46bb | 212 | { |
bd4f5d09 | 213 | struct xfs_mount *mp = args->mp; |
f38b46bb DC |
214 | struct xfs_perag *pag; |
215 | struct xfs_mru_cache_elem *mru; | |
bd4f5d09 | 216 | int error = 0; |
f38b46bb | 217 | |
bd4f5d09 DC |
218 | *longest = 0; |
219 | mru = xfs_mru_cache_lookup(mp->m_filestream, pino); | |
f38b46bb | 220 | if (!mru) |
bd4f5d09 | 221 | return 0; |
f8f1ed1a DC |
222 | /* |
223 | * Grab the pag and take an extra active reference for the caller whilst | |
224 | * the mru item cannot go away. This means we'll pin the perag with | |
225 | * the reference we get here even if the filestreams association is torn | |
226 | * down immediately after we mark the lookup as done. | |
227 | */ | |
3054face | 228 | pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; |
f8f1ed1a | 229 | atomic_inc(&pag->pag_active_ref); |
f38b46bb DC |
230 | xfs_mru_cache_done(mp->m_filestream); |
231 | ||
571e2592 | 232 | trace_xfs_filestream_lookup(pag, ap->ip->i_ino); |
f38b46bb | 233 | |
3054face | 234 | ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); |
f38b46bb DC |
235 | xfs_bmap_adjacent(ap); |
236 | ||
f38b46bb | 237 | /* |
f8f1ed1a | 238 | * If there is very little free space before we start a filestreams |
bd4f5d09 DC |
239 | * allocation, we're almost guaranteed to fail to find a large enough |
240 | * free space available so just use the cached AG. | |
f38b46bb | 241 | */ |
bd4f5d09 DC |
242 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { |
243 | *longest = 1; | |
244 | goto out_done; | |
f8f1ed1a | 245 | } |
f38b46bb | 246 | |
bd4f5d09 DC |
247 | error = xfs_bmap_longest_free_extent(pag, args->tp, longest); |
248 | if (error == -EAGAIN) | |
249 | error = 0; | |
250 | if (error || *longest < args->maxlen) { | |
251 | /* We aren't going to use this perag */ | |
252 | *longest = 0; | |
253 | xfs_perag_rele(pag); | |
254 | return error; | |
255 | } | |
256 | ||
257 | out_done: | |
258 | args->pag = pag; | |
259 | return 0; | |
260 | } | |
261 | ||
262 | static int | |
263 | xfs_filestream_create_association( | |
264 | struct xfs_bmalloca *ap, | |
265 | struct xfs_alloc_arg *args, | |
266 | xfs_ino_t pino, | |
267 | xfs_extlen_t *longest) | |
268 | { | |
269 | struct xfs_mount *mp = args->mp; | |
270 | struct xfs_mru_cache_elem *mru; | |
271 | struct xfs_fstrm_item *item; | |
272 | xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); | |
273 | int flags = 0; | |
274 | int error; | |
275 | ||
f38b46bb | 276 | /* Changing parent AG association now, so remove the existing one. */ |
bd4f5d09 | 277 | mru = xfs_mru_cache_remove(mp->m_filestream, pino); |
f38b46bb DC |
278 | if (mru) { |
279 | struct xfs_fstrm_item *item = | |
280 | container_of(mru, struct xfs_fstrm_item, mru); | |
f38b46bb | 281 | |
bd4f5d09 DC |
282 | agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; |
283 | xfs_fstrm_free_func(mp, mru); | |
284 | } else if (xfs_is_inode32(mp)) { | |
f38b46bb | 285 | xfs_agnumber_t rotorstep = xfs_rotorstep; |
bd4f5d09 DC |
286 | |
287 | agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; | |
f38b46bb DC |
288 | mp->m_agfrotor = (mp->m_agfrotor + 1) % |
289 | (mp->m_sb.sb_agcount * rotorstep); | |
f38b46bb | 290 | } |
bd4f5d09 DC |
291 | |
292 | ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); | |
293 | xfs_bmap_adjacent(ap); | |
294 | ||
295 | if (ap->datatype & XFS_ALLOC_USERDATA) | |
296 | flags |= XFS_PICK_USERDATA; | |
297 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) | |
298 | flags |= XFS_PICK_LOWSPACE; | |
299 | ||
300 | *longest = ap->length; | |
301 | error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); | |
302 | if (error) | |
303 | return error; | |
304 | ||
305 | /* | |
306 | * We are going to use this perag now, so create an assoication for it. | |
307 | * xfs_filestream_pick_ag() has already bumped the perag fstrms counter | |
308 | * for us, so all we need to do here is take another active reference to | |
309 | * the perag for the cached association. | |
310 | * | |
311 | * If we fail to store the association, we need to drop the fstrms | |
312 | * counter as well as drop the perag reference we take here for the | |
313 | * item. We do not need to return an error for this failure - as long as | |
314 | * we return a referenced AG, the allocation can still go ahead just | |
315 | * fine. | |
316 | */ | |
317 | item = kmem_alloc(sizeof(*item), KM_MAYFAIL); | |
318 | if (!item) | |
319 | goto out_put_fstrms; | |
320 | ||
321 | atomic_inc(&args->pag->pag_active_ref); | |
322 | item->pag = args->pag; | |
323 | error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru); | |
324 | if (error) | |
325 | goto out_free_item; | |
f38b46bb DC |
326 | return 0; |
327 | ||
bd4f5d09 DC |
328 | out_free_item: |
329 | xfs_perag_rele(item->pag); | |
330 | kmem_free(item); | |
331 | out_put_fstrms: | |
332 | atomic_dec(&args->pag->pagf_fstrms); | |
333 | return 0; | |
f38b46bb DC |
334 | } |
335 | ||
a52dc2ad DC |
336 | /* |
337 | * Search for an allocation group with a single extent large enough for | |
bd4f5d09 DC |
338 | * the request. First we look for an existing association and use that if it |
339 | * is found. Otherwise, we create a new association by selecting an AG that fits | |
340 | * the allocation criteria. | |
341 | * | |
342 | * We return with a referenced perag in args->pag to indicate which AG we are | |
343 | * allocating into or an error with no references held. | |
a52dc2ad DC |
344 | */ |
345 | int | |
346 | xfs_filestream_select_ag( | |
8f7747ad DC |
347 | struct xfs_bmalloca *ap, |
348 | struct xfs_alloc_arg *args, | |
bd4f5d09 | 349 | xfs_extlen_t *longest) |
8f7747ad | 350 | { |
bd4f5d09 DC |
351 | struct xfs_mount *mp = args->mp; |
352 | struct xfs_inode *pip; | |
353 | xfs_ino_t ino = 0; | |
f8f1ed1a | 354 | int error = 0; |
8f7747ad | 355 | |
bd4f5d09 | 356 | *longest = 0; |
8f7747ad | 357 | args->total = ap->total; |
a52dc2ad | 358 | pip = xfs_filestream_get_parent(ap->ip); |
bd4f5d09 DC |
359 | if (pip) { |
360 | ino = pip->i_ino; | |
361 | error = xfs_filestream_lookup_association(ap, args, ino, | |
362 | longest); | |
363 | xfs_irele(pip); | |
364 | if (error) | |
365 | return error; | |
366 | if (*longest >= args->maxlen) | |
367 | goto out_select; | |
368 | if (ap->tp->t_flags & XFS_TRANS_LOWMODE) | |
369 | goto out_select; | |
8f7747ad DC |
370 | } |
371 | ||
bd4f5d09 | 372 | error = xfs_filestream_create_association(ap, args, ino, longest); |
f8f1ed1a | 373 | if (error) |
bd4f5d09 | 374 | return error; |
ba34de8d | 375 | |
a52dc2ad | 376 | out_select: |
f8f1ed1a | 377 | ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0); |
bd4f5d09 | 378 | return 0; |
ba34de8d | 379 | } |
a52dc2ad | 380 | |
2cd2ef6a CH |
381 | void |
382 | xfs_filestream_deassociate( | |
383 | struct xfs_inode *ip) | |
384 | { | |
385 | xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); | |
386 | } | |
2a82b8be | 387 | |
2cd2ef6a CH |
388 | int |
389 | xfs_filestream_mount( | |
390 | xfs_mount_t *mp) | |
391 | { | |
2a82b8be | 392 | /* |
2cd2ef6a CH |
393 | * The filestream timer tunable is currently fixed within the range of |
394 | * one second to four minutes, with five seconds being the default. The | |
395 | * group count is somewhat arbitrary, but it'd be nice to adhere to the | |
396 | * timer tunable to within about 10 percent. This requires at least 10 | |
397 | * groups. | |
2a82b8be | 398 | */ |
7fcd3efa CH |
399 | return xfs_mru_cache_create(&mp->m_filestream, mp, |
400 | xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func); | |
2cd2ef6a | 401 | } |
2a82b8be | 402 | |
2cd2ef6a CH |
403 | void |
404 | xfs_filestream_unmount( | |
405 | xfs_mount_t *mp) | |
406 | { | |
407 | xfs_mru_cache_destroy(mp->m_filestream); | |
408 | } |