Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / fs / xfs / xfs_refcount_item.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0+
baf4bcac
DW
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
baf4bcac 4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
baf4bcac
DW
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_format.h"
9#include "xfs_log_format.h"
10#include "xfs_trans_resv.h"
f997ee21 11#include "xfs_bit.h"
b31c2bdc 12#include "xfs_shared.h"
baf4bcac 13#include "xfs_mount.h"
f997ee21 14#include "xfs_defer.h"
baf4bcac
DW
15#include "xfs_trans.h"
16#include "xfs_trans_priv.h"
17#include "xfs_buf_item.h"
18#include "xfs_refcount_item.h"
19#include "xfs_log.h"
f997ee21 20#include "xfs_refcount.h"
baf4bcac
DW
21
22
23kmem_zone_t *xfs_cui_zone;
24kmem_zone_t *xfs_cud_zone;
25
26static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip)
27{
28 return container_of(lip, struct xfs_cui_log_item, cui_item);
29}
30
31void
32xfs_cui_item_free(
33 struct xfs_cui_log_item *cuip)
34{
35 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
36 kmem_free(cuip);
37 else
38 kmem_zone_free(xfs_cui_zone, cuip);
39}
40
0612d116
DC
41/*
42 * Freeing the CUI requires that we remove it from the AIL if it has already
43 * been placed there. However, the CUI may not yet have been placed in the AIL
44 * when called by xfs_cui_release() from CUD processing due to the ordering of
45 * committed vs unpin operations in bulk insert operations. Hence the reference
46 * count to ensure only the last caller frees the CUI.
47 */
48void
49xfs_cui_release(
50 struct xfs_cui_log_item *cuip)
51{
52 ASSERT(atomic_read(&cuip->cui_refcount) > 0);
53 if (atomic_dec_and_test(&cuip->cui_refcount)) {
54 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
55 xfs_cui_item_free(cuip);
56 }
57}
58
59
baf4bcac
DW
60STATIC void
61xfs_cui_item_size(
62 struct xfs_log_item *lip,
63 int *nvecs,
64 int *nbytes)
65{
66 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
67
68 *nvecs += 1;
69 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents);
70}
71
72/*
73 * This is called to fill in the vector of log iovecs for the
74 * given cui log item. We use only 1 iovec, and we point that
75 * at the cui_log_format structure embedded in the cui item.
76 * It is at this point that we assert that all of the extent
77 * slots in the cui item have been filled.
78 */
79STATIC void
80xfs_cui_item_format(
81 struct xfs_log_item *lip,
82 struct xfs_log_vec *lv)
83{
84 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
85 struct xfs_log_iovec *vecp = NULL;
86
87 ASSERT(atomic_read(&cuip->cui_next_extent) ==
88 cuip->cui_format.cui_nextents);
89
90 cuip->cui_format.cui_type = XFS_LI_CUI;
91 cuip->cui_format.cui_size = 1;
92
93 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format,
94 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents));
95}
96
97/*
98 * Pinning has no meaning for an cui item, so just return.
99 */
100STATIC void
101xfs_cui_item_pin(
102 struct xfs_log_item *lip)
103{
104}
105
106/*
107 * The unpin operation is the last place an CUI is manipulated in the log. It is
108 * either inserted in the AIL or aborted in the event of a log I/O error. In
109 * either case, the CUI transaction has been successfully committed to make it
110 * this far. Therefore, we expect whoever committed the CUI to either construct
111 * and commit the CUD or drop the CUD's reference in the event of error. Simply
112 * drop the log's CUI reference now that the log is done with it.
113 */
114STATIC void
115xfs_cui_item_unpin(
116 struct xfs_log_item *lip,
117 int remove)
118{
119 struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
120
121 xfs_cui_release(cuip);
122}
123
124/*
125 * CUI items have no locking or pushing. However, since CUIs are pulled from
126 * the AIL when their corresponding CUDs are committed to disk, their situation
127 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
128 * will eventually flush the log. This should help in getting the CUI out of
129 * the AIL.
130 */
131STATIC uint
132xfs_cui_item_push(
133 struct xfs_log_item *lip,
134 struct list_head *buffer_list)
135{
136 return XFS_ITEM_PINNED;
137}
138
139/*
140 * The CUI has been either committed or aborted if the transaction has been
141 * cancelled. If the transaction was cancelled, an CUD isn't going to be
142 * constructed and thus we free the CUI here directly.
143 */
144STATIC void
145xfs_cui_item_unlock(
146 struct xfs_log_item *lip)
147{
22525c17 148 if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
0612d116 149 xfs_cui_release(CUI_ITEM(lip));
baf4bcac
DW
150}
151
152/*
153 * The CUI is logged only once and cannot be moved in the log, so simply return
154 * the lsn at which it's been logged.
155 */
156STATIC xfs_lsn_t
157xfs_cui_item_committed(
158 struct xfs_log_item *lip,
159 xfs_lsn_t lsn)
160{
161 return lsn;
162}
163
164/*
165 * The CUI dependency tracking op doesn't do squat. It can't because
166 * it doesn't know where the free extent is coming from. The dependency
167 * tracking has to be handled by the "enclosing" metadata object. For
168 * example, for inodes, the inode is locked throughout the extent freeing
169 * so the dependency should be recorded there.
170 */
171STATIC void
172xfs_cui_item_committing(
173 struct xfs_log_item *lip,
174 xfs_lsn_t lsn)
175{
176}
177
178/*
179 * This is the ops vector shared by all cui log items.
180 */
181static const struct xfs_item_ops xfs_cui_item_ops = {
182 .iop_size = xfs_cui_item_size,
183 .iop_format = xfs_cui_item_format,
184 .iop_pin = xfs_cui_item_pin,
185 .iop_unpin = xfs_cui_item_unpin,
186 .iop_unlock = xfs_cui_item_unlock,
187 .iop_committed = xfs_cui_item_committed,
188 .iop_push = xfs_cui_item_push,
189 .iop_committing = xfs_cui_item_committing,
190};
191
192/*
193 * Allocate and initialize an cui item with the given number of extents.
194 */
195struct xfs_cui_log_item *
196xfs_cui_init(
197 struct xfs_mount *mp,
198 uint nextents)
199
200{
201 struct xfs_cui_log_item *cuip;
202
203 ASSERT(nextents > 0);
204 if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
205 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
206 KM_SLEEP);
207 else
208 cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
209
210 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
211 cuip->cui_format.cui_nextents = nextents;
212 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip;
213 atomic_set(&cuip->cui_next_extent, 0);
214 atomic_set(&cuip->cui_refcount, 2);
215
216 return cuip;
217}
218
baf4bcac
DW
219static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
220{
221 return container_of(lip, struct xfs_cud_log_item, cud_item);
222}
223
224STATIC void
225xfs_cud_item_size(
226 struct xfs_log_item *lip,
227 int *nvecs,
228 int *nbytes)
229{
230 *nvecs += 1;
231 *nbytes += sizeof(struct xfs_cud_log_format);
232}
233
234/*
235 * This is called to fill in the vector of log iovecs for the
236 * given cud log item. We use only 1 iovec, and we point that
237 * at the cud_log_format structure embedded in the cud item.
238 * It is at this point that we assert that all of the extent
239 * slots in the cud item have been filled.
240 */
241STATIC void
242xfs_cud_item_format(
243 struct xfs_log_item *lip,
244 struct xfs_log_vec *lv)
245{
246 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
247 struct xfs_log_iovec *vecp = NULL;
248
249 cudp->cud_format.cud_type = XFS_LI_CUD;
250 cudp->cud_format.cud_size = 1;
251
252 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format,
253 sizeof(struct xfs_cud_log_format));
254}
255
256/*
257 * Pinning has no meaning for an cud item, so just return.
258 */
259STATIC void
260xfs_cud_item_pin(
261 struct xfs_log_item *lip)
262{
263}
264
265/*
266 * Since pinning has no meaning for an cud item, unpinning does
267 * not either.
268 */
269STATIC void
270xfs_cud_item_unpin(
271 struct xfs_log_item *lip,
272 int remove)
273{
274}
275
276/*
277 * There isn't much you can do to push on an cud item. It is simply stuck
278 * waiting for the log to be flushed to disk.
279 */
280STATIC uint
281xfs_cud_item_push(
282 struct xfs_log_item *lip,
283 struct list_head *buffer_list)
284{
285 return XFS_ITEM_PINNED;
286}
287
288/*
289 * The CUD is either committed or aborted if the transaction is cancelled. If
290 * the transaction is cancelled, drop our reference to the CUI and free the
291 * CUD.
292 */
293STATIC void
294xfs_cud_item_unlock(
295 struct xfs_log_item *lip)
296{
297 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
298
22525c17 299 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
baf4bcac
DW
300 xfs_cui_release(cudp->cud_cuip);
301 kmem_zone_free(xfs_cud_zone, cudp);
302 }
303}
304
305/*
306 * When the cud item is committed to disk, all we need to do is delete our
307 * reference to our partner cui item and then free ourselves. Since we're
308 * freeing ourselves we must return -1 to keep the transaction code from
309 * further referencing this item.
310 */
311STATIC xfs_lsn_t
312xfs_cud_item_committed(
313 struct xfs_log_item *lip,
314 xfs_lsn_t lsn)
315{
316 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
317
318 /*
319 * Drop the CUI reference regardless of whether the CUD has been
320 * aborted. Once the CUD transaction is constructed, it is the sole
321 * responsibility of the CUD to release the CUI (even if the CUI is
322 * aborted due to log I/O error).
323 */
324 xfs_cui_release(cudp->cud_cuip);
325 kmem_zone_free(xfs_cud_zone, cudp);
326
327 return (xfs_lsn_t)-1;
328}
329
330/*
331 * The CUD dependency tracking op doesn't do squat. It can't because
332 * it doesn't know where the free extent is coming from. The dependency
333 * tracking has to be handled by the "enclosing" metadata object. For
334 * example, for inodes, the inode is locked throughout the extent freeing
335 * so the dependency should be recorded there.
336 */
337STATIC void
338xfs_cud_item_committing(
339 struct xfs_log_item *lip,
340 xfs_lsn_t lsn)
341{
342}
343
344/*
345 * This is the ops vector shared by all cud log items.
346 */
347static const struct xfs_item_ops xfs_cud_item_ops = {
348 .iop_size = xfs_cud_item_size,
349 .iop_format = xfs_cud_item_format,
350 .iop_pin = xfs_cud_item_pin,
351 .iop_unpin = xfs_cud_item_unpin,
352 .iop_unlock = xfs_cud_item_unlock,
353 .iop_committed = xfs_cud_item_committed,
354 .iop_push = xfs_cud_item_push,
355 .iop_committing = xfs_cud_item_committing,
356};
357
358/*
359 * Allocate and initialize an cud item with the given number of extents.
360 */
361struct xfs_cud_log_item *
362xfs_cud_init(
363 struct xfs_mount *mp,
364 struct xfs_cui_log_item *cuip)
365
366{
367 struct xfs_cud_log_item *cudp;
368
369 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
370 xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
371 cudp->cud_cuip = cuip;
372 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
373
374 return cudp;
375}
f997ee21
DW
376
377/*
378 * Process a refcount update intent item that was recovered from the log.
379 * We need to update the refcountbt.
380 */
381int
382xfs_cui_recover(
fbfa977d
BF
383 struct xfs_trans *parent_tp,
384 struct xfs_cui_log_item *cuip)
f997ee21
DW
385{
386 int i;
387 int error = 0;
33ba6129 388 unsigned int refc_type;
f997ee21
DW
389 struct xfs_phys_extent *refc;
390 xfs_fsblock_t startblock_fsb;
391 bool op_ok;
33ba6129
DW
392 struct xfs_cud_log_item *cudp;
393 struct xfs_trans *tp;
394 struct xfs_btree_cur *rcur = NULL;
395 enum xfs_refcount_intent_type type;
33ba6129
DW
396 xfs_fsblock_t new_fsb;
397 xfs_extlen_t new_len;
398 struct xfs_bmbt_irec irec;
33ba6129 399 bool requeue_only = false;
fbfa977d 400 struct xfs_mount *mp = parent_tp->t_mountp;
f997ee21
DW
401
402 ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
403
404 /*
405 * First check the validity of the extents described by the
406 * CUI. If any are bad, then assume that all are bad and
407 * just toss the CUI.
408 */
409 for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
410 refc = &cuip->cui_format.cui_extents[i];
411 startblock_fsb = XFS_BB_TO_FSB(mp,
412 XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
413 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
414 case XFS_REFCOUNT_INCREASE:
415 case XFS_REFCOUNT_DECREASE:
416 case XFS_REFCOUNT_ALLOC_COW:
417 case XFS_REFCOUNT_FREE_COW:
418 op_ok = true;
419 break;
420 default:
421 op_ok = false;
422 break;
423 }
424 if (!op_ok || startblock_fsb == 0 ||
425 refc->pe_len == 0 ||
426 startblock_fsb >= mp->m_sb.sb_dblocks ||
427 refc->pe_len >= mp->m_sb.sb_agblocks ||
428 (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
429 /*
430 * This will pull the CUI from the AIL and
431 * free the memory associated with it.
432 */
433 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
434 xfs_cui_release(cuip);
435 return -EIO;
436 }
437 }
438
33ba6129
DW
439 /*
440 * Under normal operation, refcount updates are deferred, so we
441 * wouldn't be adding them directly to a transaction. All
442 * refcount updates manage reservation usage internally and
443 * dynamically by deferring work that won't fit in the
444 * transaction. Normally, any work that needs to be deferred
445 * gets attached to the same defer_ops that scheduled the
446 * refcount update. However, we're in log recovery here, so we
b31c2bdc
DW
447 * we use the passed in defer_ops and to finish up any work that
448 * doesn't fit. We need to reserve enough blocks to handle a
449 * full btree split on either end of the refcount range.
33ba6129 450 */
b31c2bdc
DW
451 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
452 mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
33ba6129
DW
453 if (error)
454 return error;
91ef75b6
BF
455 /*
456 * Recovery stashes all deferred ops during intent processing and
457 * finishes them on completion. Transfer current dfops state to this
458 * transaction and transfer the result back before we return.
459 */
ce356d64 460 xfs_defer_move(tp, parent_tp);
33ba6129
DW
461 cudp = xfs_trans_get_cud(tp, cuip);
462
33ba6129
DW
463 for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
464 refc = &cuip->cui_format.cui_extents[i];
465 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
466 switch (refc_type) {
467 case XFS_REFCOUNT_INCREASE:
468 case XFS_REFCOUNT_DECREASE:
469 case XFS_REFCOUNT_ALLOC_COW:
470 case XFS_REFCOUNT_FREE_COW:
471 type = refc_type;
472 break;
473 default:
474 error = -EFSCORRUPTED;
475 goto abort_error;
476 }
477 if (requeue_only) {
478 new_fsb = refc->pe_startblock;
479 new_len = refc->pe_len;
480 } else
481 error = xfs_trans_log_finish_refcount_update(tp, cudp,
7dbddbac
BF
482 type, refc->pe_startblock, refc->pe_len,
483 &new_fsb, &new_len, &rcur);
33ba6129
DW
484 if (error)
485 goto abort_error;
486
487 /* Requeue what we didn't finish. */
488 if (new_len > 0) {
489 irec.br_startblock = new_fsb;
490 irec.br_blockcount = new_len;
491 switch (type) {
492 case XFS_REFCOUNT_INCREASE:
0f37d178 493 error = xfs_refcount_increase_extent(tp, &irec);
33ba6129
DW
494 break;
495 case XFS_REFCOUNT_DECREASE:
0f37d178 496 error = xfs_refcount_decrease_extent(tp, &irec);
33ba6129 497 break;
174edb0e 498 case XFS_REFCOUNT_ALLOC_COW:
0f37d178 499 error = xfs_refcount_alloc_cow_extent(tp,
174edb0e
DW
500 irec.br_startblock,
501 irec.br_blockcount);
502 break;
503 case XFS_REFCOUNT_FREE_COW:
0f37d178 504 error = xfs_refcount_free_cow_extent(tp,
174edb0e
DW
505 irec.br_startblock,
506 irec.br_blockcount);
507 break;
33ba6129
DW
508 default:
509 ASSERT(0);
510 }
511 if (error)
512 goto abort_error;
513 requeue_only = true;
514 }
515 }
516
517 xfs_refcount_finish_one_cleanup(tp, rcur, error);
f997ee21 518 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
ce356d64 519 xfs_defer_move(parent_tp, tp);
33ba6129
DW
520 error = xfs_trans_commit(tp);
521 return error;
522
523abort_error:
524 xfs_refcount_finish_one_cleanup(tp, rcur, error);
ce356d64 525 xfs_defer_move(parent_tp, tp);
33ba6129 526 xfs_trans_cancel(tp);
f997ee21
DW
527 return error;
528}