xfs: rename the m_writeio_* fields in struct xfs_mount
[linux-block.git] / fs / xfs / xfs_iomap.c
CommitLineData
0b61f8a4 1// SPDX-License-Identifier: GPL-2.0
1da177e4 2/*
3e57ecf6 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
98c1a7c0 4 * Copyright (c) 2016-2018 Christoph Hellwig.
7b718769 5 * All Rights Reserved.
1da177e4 6 */
1da177e4 7#include "xfs.h"
1da177e4 8#include "xfs_fs.h"
70a9883c 9#include "xfs_shared.h"
239880ef
DC
10#include "xfs_format.h"
11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h"
1da177e4 13#include "xfs_mount.h"
1da177e4 14#include "xfs_inode.h"
a844f451 15#include "xfs_btree.h"
a4fbe6ab 16#include "xfs_bmap_btree.h"
1da177e4 17#include "xfs_bmap.h"
68988114 18#include "xfs_bmap_util.h"
e9e899a2 19#include "xfs_errortag.h"
1da177e4 20#include "xfs_error.h"
a4fbe6ab 21#include "xfs_trans.h"
1da177e4 22#include "xfs_trans_space.h"
a39e596b 23#include "xfs_inode_item.h"
1da177e4 24#include "xfs_iomap.h"
0b1b213f 25#include "xfs_trace.h"
a4fbe6ab 26#include "xfs_quota.h"
76a4202a
BF
27#include "xfs_dquot_item.h"
28#include "xfs_dquot.h"
2a06705c 29#include "xfs_reflink.h"
1da177e4 30
1da177e4 31
5da8a07c
CH
32#define XFS_ALLOC_ALIGN(mp, off) \
33 (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
1da177e4 34
16be1433
CH
35static int
36xfs_alert_fsblock_zero(
37 xfs_inode_t *ip,
38 xfs_bmbt_irec_t *imap)
39{
40 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
41 "Access to block zero in inode %llu "
42 "start_block: %llx start_off: %llx "
43 "blkcnt: %llx extent-state: %x",
44 (unsigned long long)ip->i_ino,
45 (unsigned long long)imap->br_startblock,
46 (unsigned long long)imap->br_startoff,
47 (unsigned long long)imap->br_blockcount,
48 imap->br_state);
49 return -EFSCORRUPTED;
50}
51
52int
e9c49736
CH
53xfs_bmbt_to_iomap(
54 struct xfs_inode *ip,
55 struct iomap *iomap,
16be1433 56 struct xfs_bmbt_irec *imap,
2492a606 57 u16 flags)
e9c49736
CH
58{
59 struct xfs_mount *mp = ip->i_mount;
30fa529e 60 struct xfs_buftarg *target = xfs_inode_buftarg(ip);
e9c49736 61
eb77b23b 62 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
16be1433
CH
63 return xfs_alert_fsblock_zero(ip, imap);
64
e9c49736 65 if (imap->br_startblock == HOLESTARTBLOCK) {
19fe5f64 66 iomap->addr = IOMAP_NULL_ADDR;
e9c49736 67 iomap->type = IOMAP_HOLE;
16be1433
CH
68 } else if (imap->br_startblock == DELAYSTARTBLOCK ||
69 isnullstartblock(imap->br_startblock)) {
19fe5f64 70 iomap->addr = IOMAP_NULL_ADDR;
e9c49736
CH
71 iomap->type = IOMAP_DELALLOC;
72 } else {
19fe5f64 73 iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
e9c49736
CH
74 if (imap->br_state == XFS_EXT_UNWRITTEN)
75 iomap->type = IOMAP_UNWRITTEN;
76 else
77 iomap->type = IOMAP_MAPPED;
78 }
79 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
80 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
30fa529e
CH
81 iomap->bdev = target->bt_bdev;
82 iomap->dax_dev = target->bt_daxdev;
2492a606 83 iomap->flags = flags;
16be1433
CH
84
85 if (xfs_ipincount(ip) &&
86 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
87 iomap->flags |= IOMAP_F_DIRTY;
16be1433 88 return 0;
e9c49736
CH
89}
90
0365c5d6
CH
91static void
92xfs_hole_to_iomap(
93 struct xfs_inode *ip,
94 struct iomap *iomap,
95 xfs_fileoff_t offset_fsb,
96 xfs_fileoff_t end_fsb)
97{
30fa529e
CH
98 struct xfs_buftarg *target = xfs_inode_buftarg(ip);
99
0365c5d6
CH
100 iomap->addr = IOMAP_NULL_ADDR;
101 iomap->type = IOMAP_HOLE;
102 iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
103 iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
30fa529e
CH
104 iomap->bdev = target->bt_bdev;
105 iomap->dax_dev = target->bt_daxdev;
0365c5d6
CH
106}
107
43568226
CH
108static inline xfs_fileoff_t
109xfs_iomap_end_fsb(
110 struct xfs_mount *mp,
111 loff_t offset,
112 loff_t count)
113{
114 ASSERT(offset <= mp->m_super->s_maxbytes);
115 return min(XFS_B_TO_FSB(mp, offset + count),
116 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
117}
118
f7ca3522 119xfs_extlen_t
f8e3a825
CH
120xfs_eof_alignment(
121 struct xfs_inode *ip,
122 xfs_extlen_t extsize)
dd9f438e 123{
f8e3a825
CH
124 struct xfs_mount *mp = ip->i_mount;
125 xfs_extlen_t align = 0;
dd9f438e 126
bf322d98
CH
127 if (!XFS_IS_REALTIME_INODE(ip)) {
128 /*
129 * Round up the allocation request to a stripe unit
130 * (m_dalign) boundary if the file size is >= stripe unit
131 * size, and we are allocating past the allocation eof.
132 *
133 * If mounted with the "-o swalloc" option the alignment is
134 * increased from the strip unit size to the stripe width.
135 */
136 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
137 align = mp->m_swidth;
138 else if (mp->m_dalign)
139 align = mp->m_dalign;
140
76b57302
PW
141 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align))
142 align = 0;
bf322d98 143 }
dd9f438e
NS
144
145 /*
146 * Always round up the allocation request to an extent boundary
147 * (when file on a real-time subvolume or has di_extsize hint).
148 */
149 if (extsize) {
76b57302
PW
150 if (align)
151 align = roundup_64(align, extsize);
dd9f438e
NS
152 else
153 align = extsize;
dd9f438e
NS
154 }
155
f8e3a825
CH
156 return align;
157}
158
159STATIC int
160xfs_iomap_eof_align_last_fsb(
161 struct xfs_inode *ip,
162 xfs_extlen_t extsize,
163 xfs_fileoff_t *last_fsb)
164{
165 xfs_extlen_t align = xfs_eof_alignment(ip, extsize);
166
76b57302
PW
167 if (align) {
168 xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align);
f8e3a825
CH
169 int eof, error;
170
541d7d3c 171 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
dd9f438e
NS
172 if (error)
173 return error;
174 if (eof)
175 *last_fsb = new_last_fsb;
176 }
177 return 0;
178}
179
a206c817 180int
1da177e4
LT
181xfs_iomap_write_direct(
182 xfs_inode_t *ip,
f403b7f4 183 xfs_off_t offset,
1da177e4 184 size_t count,
3070451e 185 xfs_bmbt_irec_t *imap,
405f8042 186 int nmaps)
1da177e4
LT
187{
188 xfs_mount_t *mp = ip->i_mount;
43568226
CH
189 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
190 xfs_fileoff_t last_fsb = xfs_iomap_end_fsb(mp, offset, count);
dd9f438e 191 xfs_filblks_t count_fsb, resaligned;
f13eb205 192 xfs_extlen_t extsz;
0116d935 193 int nimaps;
06d10dd9 194 int quota_flag;
1da177e4
LT
195 int rt;
196 xfs_trans_t *tp;
dd9f438e 197 uint qblocks, resblks, resrtextents;
dd9f438e 198 int error;
009c6e87 199 int lockmode;
1ca19157 200 int bmapi_flags = XFS_BMAPI_PREALLOC;
253f4911 201 uint tflags = 0;
1da177e4 202
dd9f438e 203 rt = XFS_IS_REALTIME_INODE(ip);
957d0ebe 204 extsz = xfs_get_extsz_hint(ip);
009c6e87
BF
205 lockmode = XFS_ILOCK_SHARED; /* locked by caller */
206
207 ASSERT(xfs_isilocked(ip, lockmode));
1da177e4 208
ce7ae151 209 if ((offset + count) > XFS_ISIZE(ip)) {
009c6e87
BF
210 /*
211 * Assert that the in-core extent list is present since this can
212 * call xfs_iread_extents() and we only have the ilock shared.
213 * This should be safe because the lock was held around a bmapi
214 * call in the caller and we only need it to access the in-core
215 * list.
216 */
217 ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
218 XFS_IFEXTENTS);
f8e3a825 219 error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb);
dd9f438e 220 if (error)
009c6e87 221 goto out_unlock;
1da177e4 222 } else {
405f8042 223 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
9bb54cb5 224 last_fsb = min(last_fsb, (xfs_fileoff_t)
3070451e
CH
225 imap->br_blockcount +
226 imap->br_startoff);
1da177e4 227 }
dd9f438e
NS
228 count_fsb = last_fsb - offset_fsb;
229 ASSERT(count_fsb > 0);
f13eb205 230 resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
dd9f438e
NS
231
232 if (unlikely(rt)) {
233 resrtextents = qblocks = resaligned;
234 resrtextents /= mp->m_sb.sb_rextsize;
84e1e99f
DC
235 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
236 quota_flag = XFS_QMOPT_RES_RTBLKS;
237 } else {
238 resrtextents = 0;
dd9f438e 239 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
84e1e99f
DC
240 quota_flag = XFS_QMOPT_RES_REGBLKS;
241 }
1da177e4 242
009c6e87
BF
243 /*
244 * Drop the shared lock acquired by the caller, attach the dquot if
245 * necessary and move on to transaction setup.
246 */
247 xfs_iunlock(ip, lockmode);
c14cfcca 248 error = xfs_qm_dqattach(ip);
009c6e87
BF
249 if (error)
250 return error;
251
1ca19157
DC
252 /*
253 * For DAX, we do not allocate unwritten extents, but instead we zero
254 * the block before we commit the transaction. Ideally we'd like to do
255 * this outside the transaction context, but if we commit and then crash
256 * we may not have zeroed the blocks and this will be exposed on
257 * recovery of the allocation. Hence we must zero before commit.
3b0fe478 258 *
1ca19157
DC
259 * Further, if we are mapping unwritten extents here, we need to zero
260 * and convert them to written so that we don't need an unwritten extent
261 * callback for DAX. This also means that we need to be able to dip into
3b0fe478
DC
262 * the reserve block pool for bmbt block allocation if there is no space
263 * left but we need to do unwritten extent conversion.
1ca19157
DC
264 */
265 if (IS_DAX(VFS_I(ip))) {
266 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
63fbb4c1 267 if (imap->br_state == XFS_EXT_UNWRITTEN) {
253f4911 268 tflags |= XFS_TRANS_RESERVE;
3b0fe478
DC
269 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
270 }
1ca19157 271 }
253f4911
CH
272 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents,
273 tflags, &tp);
274 if (error)
b474c7ae 275 return error;
507630b2 276
009c6e87
BF
277 lockmode = XFS_ILOCK_EXCL;
278 xfs_ilock(ip, lockmode);
1da177e4 279
7d095257 280 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
dd9f438e 281 if (error)
507630b2 282 goto out_trans_cancel;
1da177e4 283
ddc3415a 284 xfs_trans_ijoin(tp, ip, 0);
1da177e4 285
1da177e4 286 /*
3070451e
CH
287 * From this point onwards we overwrite the imap pointer that the
288 * caller gave to us.
1da177e4 289 */
06d10dd9 290 nimaps = 1;
da781e64
BF
291 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0,
292 imap, &nimaps);
06d10dd9 293 if (error)
c8eac49e 294 goto out_res_cancel;
1da177e4
LT
295
296 /*
06d10dd9 297 * Complete the transaction
1da177e4 298 */
70393313 299 error = xfs_trans_commit(tp);
06d10dd9 300 if (error)
507630b2 301 goto out_unlock;
1da177e4 302
06d10dd9
NS
303 /*
304 * Copy any maps to caller's array and return any error.
305 */
1da177e4 306 if (nimaps == 0) {
2451337d 307 error = -ENOSPC;
507630b2 308 goto out_unlock;
572d95f4
NS
309 }
310
eb77b23b 311 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
6d4a8ecb 312 error = xfs_alert_fsblock_zero(ip, imap);
1da177e4 313
507630b2 314out_unlock:
009c6e87 315 xfs_iunlock(ip, lockmode);
507630b2 316 return error;
1da177e4 317
c8eac49e 318out_res_cancel:
ea562ed6 319 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
507630b2 320out_trans_cancel:
4906e215 321 xfs_trans_cancel(tp);
507630b2 322 goto out_unlock;
1da177e4
LT
323}
324
76a4202a
BF
325STATIC bool
326xfs_quota_need_throttle(
327 struct xfs_inode *ip,
328 int type,
329 xfs_fsblock_t alloc_blocks)
330{
331 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
332
333 if (!dq || !xfs_this_quota_on(ip->i_mount, type))
334 return false;
335
336 /* no hi watermark, no throttle */
337 if (!dq->q_prealloc_hi_wmark)
338 return false;
339
340 /* under the lo watermark, no throttle */
341 if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark)
342 return false;
343
344 return true;
345}
346
347STATIC void
348xfs_quota_calc_throttle(
349 struct xfs_inode *ip,
350 int type,
351 xfs_fsblock_t *qblocks,
f074051f
BF
352 int *qshift,
353 int64_t *qfreesp)
76a4202a
BF
354{
355 int64_t freesp;
356 int shift = 0;
357 struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
358
5cca3f61
ES
359 /* no dq, or over hi wmark, squash the prealloc completely */
360 if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) {
76a4202a 361 *qblocks = 0;
f074051f 362 *qfreesp = 0;
76a4202a
BF
363 return;
364 }
365
366 freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount;
367 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
368 shift = 2;
369 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
370 shift += 2;
371 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
372 shift += 2;
373 }
374
f074051f
BF
375 if (freesp < *qfreesp)
376 *qfreesp = freesp;
377
76a4202a
BF
378 /* only overwrite the throttle values if we are more aggressive */
379 if ((freesp >> shift) < (*qblocks >> *qshift)) {
380 *qblocks = freesp;
381 *qshift = shift;
382 }
383}
384
055388a3 385/*
51446f5b
CH
386 * If we are doing a write at the end of the file and there are no allocations
387 * past this one, then extend the allocation out to the file system's write
388 * iosize.
389 *
055388a3 390 * If we don't have a user specified preallocation size, dynamically increase
51446f5b 391 * the preallocation size as the size of the file grows. Cap the maximum size
055388a3
DC
392 * at a single extent or less if the filesystem is near full. The closer the
393 * filesystem is to full, the smaller the maximum prealocation.
51446f5b
CH
394 *
395 * As an exception we don't do any preallocation at all if the file is smaller
396 * than the minimum preallocation and we are using the default dynamic
397 * preallocation scheme, as it is likely this is the only write to the file that
398 * is going to be done.
399 *
400 * We clean up any extra space left over when the file is closed in
401 * xfs_inactive().
055388a3
DC
402 */
403STATIC xfs_fsblock_t
404xfs_iomap_prealloc_size(
a1e16c26 405 struct xfs_inode *ip,
66ae56a5 406 int whichfork,
51446f5b
CH
407 loff_t offset,
408 loff_t count,
b2b1712a 409 struct xfs_iext_cursor *icur)
055388a3 410{
51446f5b 411 struct xfs_mount *mp = ip->i_mount;
66ae56a5 412 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
51446f5b 413 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
656152e5 414 struct xfs_bmbt_irec prev;
3c58b5f8
BF
415 int shift = 0;
416 int64_t freesp;
76a4202a
BF
417 xfs_fsblock_t qblocks;
418 int qshift = 0;
51446f5b
CH
419 xfs_fsblock_t alloc_blocks = 0;
420
421 if (offset + count <= XFS_ISIZE(ip))
422 return 0;
423
424 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
5da8a07c 425 (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)))
51446f5b
CH
426 return 0;
427
428 /*
429 * If an explicit allocsize is set, the file is small, or we
430 * are writing behind a hole, then use the minimum prealloc:
431 */
432 if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
433 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
b2b1712a 434 !xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
656152e5 435 prev.br_startoff + prev.br_blockcount < offset_fsb)
5da8a07c 436 return mp->m_allocsize_blocks;
055388a3 437
51446f5b
CH
438 /*
439 * Determine the initial size of the preallocation. We are beyond the
440 * current EOF here, but we need to take into account whether this is
441 * a sparse write or an extending write when determining the
442 * preallocation size. Hence we need to look up the extent that ends
443 * at the current write offset and use the result to determine the
444 * preallocation size.
445 *
446 * If the extent is a hole, then preallocation is essentially disabled.
447 * Otherwise we take the size of the preceding data extent as the basis
448 * for the preallocation size. If the size of the extent is greater than
449 * half the maximum extent length, then use the current offset as the
450 * basis. This ensures that for large files the preallocation size
451 * always extends to MAXEXTLEN rather than falling short due to things
452 * like stripe unit/width alignment of real extents.
453 */
656152e5
CH
454 if (prev.br_blockcount <= (MAXEXTLEN >> 1))
455 alloc_blocks = prev.br_blockcount << 1;
51446f5b
CH
456 else
457 alloc_blocks = XFS_B_TO_FSB(mp, offset);
3c58b5f8
BF
458 if (!alloc_blocks)
459 goto check_writeio;
76a4202a 460 qblocks = alloc_blocks;
3c58b5f8 461
c9bdbdc0
BF
462 /*
463 * MAXEXTLEN is not a power of two value but we round the prealloc down
464 * to the nearest power of two value after throttling. To prevent the
465 * round down from unconditionally reducing the maximum supported prealloc
466 * size, we round up first, apply appropriate throttling, round down and
467 * cap the value to MAXEXTLEN.
468 */
469 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
470 alloc_blocks);
3c58b5f8 471
0d485ada 472 freesp = percpu_counter_read_positive(&mp->m_fdblocks);
3c58b5f8
BF
473 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
474 shift = 2;
475 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
476 shift++;
477 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT])
478 shift++;
479 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT])
480 shift++;
481 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT])
482 shift++;
055388a3 483 }
76a4202a
BF
484
485 /*
f074051f
BF
486 * Check each quota to cap the prealloc size, provide a shift value to
487 * throttle with and adjust amount of available space.
76a4202a
BF
488 */
489 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks))
f074051f
BF
490 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift,
491 &freesp);
76a4202a 492 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks))
f074051f
BF
493 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift,
494 &freesp);
76a4202a 495 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks))
f074051f
BF
496 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift,
497 &freesp);
76a4202a
BF
498
499 /*
500 * The final prealloc size is set to the minimum of free space available
501 * in each of the quotas and the overall filesystem.
502 *
503 * The shift throttle value is set to the maximum value as determined by
504 * the global low free space values and per-quota low free space values.
505 */
9bb54cb5
DC
506 alloc_blocks = min(alloc_blocks, qblocks);
507 shift = max(shift, qshift);
76a4202a 508
3c58b5f8
BF
509 if (shift)
510 alloc_blocks >>= shift;
c9bdbdc0
BF
511 /*
512 * rounddown_pow_of_two() returns an undefined result if we pass in
513 * alloc_blocks = 0.
514 */
515 if (alloc_blocks)
516 alloc_blocks = rounddown_pow_of_two(alloc_blocks);
517 if (alloc_blocks > MAXEXTLEN)
518 alloc_blocks = MAXEXTLEN;
3c58b5f8
BF
519
520 /*
521 * If we are still trying to allocate more space than is
522 * available, squash the prealloc hard. This can happen if we
523 * have a large file on a small filesystem and the above
524 * lowspace thresholds are smaller than MAXEXTLEN.
525 */
526 while (alloc_blocks && alloc_blocks >= freesp)
527 alloc_blocks >>= 4;
3c58b5f8 528check_writeio:
5da8a07c
CH
529 if (alloc_blocks < mp->m_allocsize_blocks)
530 alloc_blocks = mp->m_allocsize_blocks;
19cb7e38 531 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
5da8a07c 532 mp->m_allocsize_blocks);
055388a3
DC
533 return alloc_blocks;
534}
535
1da177e4
LT
536int
537xfs_iomap_write_unwritten(
538 xfs_inode_t *ip,
f403b7f4 539 xfs_off_t offset,
ee70daab
EG
540 xfs_off_t count,
541 bool update_isize)
1da177e4
LT
542{
543 xfs_mount_t *mp = ip->i_mount;
1da177e4
LT
544 xfs_fileoff_t offset_fsb;
545 xfs_filblks_t count_fsb;
546 xfs_filblks_t numblks_fsb;
dd9f438e
NS
547 int nimaps;
548 xfs_trans_t *tp;
549 xfs_bmbt_irec_t imap;
ee70daab 550 struct inode *inode = VFS_I(ip);
84803fb7 551 xfs_fsize_t i_size;
dd9f438e 552 uint resblks;
1da177e4 553 int error;
1da177e4 554
0b1b213f 555 trace_xfs_unwritten_convert(ip, offset, count);
1da177e4
LT
556
557 offset_fsb = XFS_B_TO_FSBT(mp, offset);
558 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
559 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
560
4ddd8bb1
LM
561 /*
562 * Reserve enough blocks in this transaction for two complete extent
563 * btree splits. We may be converting the middle part of an unwritten
564 * extent and in this case we will insert two new extents in the btree
565 * each of which could cause a full split.
566 *
567 * This reservation amount will be used in the first call to
568 * xfs_bmbt_split() to select an AG with enough space to satisfy the
569 * rest of the operation.
570 */
dd9f438e 571 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
1da177e4 572
dd9f438e 573 do {
1da177e4 574 /*
253f4911 575 * Set up a transaction to convert the range of extents
1da177e4
LT
576 * from unwritten to real. Do allocations in a loop until
577 * we have covered the range passed in.
80641dc6 578 *
253f4911
CH
579 * Note that we can't risk to recursing back into the filesystem
580 * here as we might be asked to write out the same inode that we
581 * complete here and might deadlock on the iolock.
1da177e4 582 */
253f4911 583 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
73d30d48 584 XFS_TRANS_RESERVE, &tp);
253f4911 585 if (error)
b474c7ae 586 return error;
1da177e4
LT
587
588 xfs_ilock(ip, XFS_ILOCK_EXCL);
ddc3415a 589 xfs_trans_ijoin(tp, ip, 0);
1da177e4
LT
590
591 /*
592 * Modify the unwritten extent state of the buffer.
593 */
1da177e4 594 nimaps = 1;
c0dc7828 595 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
a7beabea
BF
596 XFS_BMAPI_CONVERT, resblks, &imap,
597 &nimaps);
1da177e4
LT
598 if (error)
599 goto error_on_bmapi_transaction;
600
84803fb7
CH
601 /*
602 * Log the updated inode size as we go. We have to be careful
603 * to only log it up to the actual write offset if it is
604 * halfway into a block.
605 */
606 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
607 if (i_size > offset + count)
608 i_size = offset + count;
ee70daab
EG
609 if (update_isize && i_size > i_size_read(inode))
610 i_size_write(inode, i_size);
84803fb7
CH
611 i_size = xfs_new_eof(ip, i_size);
612 if (i_size) {
613 ip->i_d.di_size = i_size;
614 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
615 }
616
70393313 617 error = xfs_trans_commit(tp);
1da177e4
LT
618 xfs_iunlock(ip, XFS_ILOCK_EXCL);
619 if (error)
b474c7ae 620 return error;
572d95f4 621
eb77b23b 622 if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock)))
6d4a8ecb 623 return xfs_alert_fsblock_zero(ip, &imap);
1da177e4
LT
624
625 if ((numblks_fsb = imap.br_blockcount) == 0) {
626 /*
627 * The numblks_fsb value should always get
628 * smaller, otherwise the loop is stuck.
629 */
630 ASSERT(imap.br_blockcount);
631 break;
632 }
633 offset_fsb += numblks_fsb;
634 count_fsb -= numblks_fsb;
635 } while (count_fsb > 0);
636
637 return 0;
638
639error_on_bmapi_transaction:
4906e215 640 xfs_trans_cancel(tp);
1da177e4 641 xfs_iunlock(ip, XFS_ILOCK_EXCL);
b474c7ae 642 return error;
1da177e4 643}
3b3dce05 644
dfa03a5f
DC
645static inline bool
646imap_needs_alloc(
647 struct inode *inode,
5c5b6f75 648 unsigned flags,
dfa03a5f
DC
649 struct xfs_bmbt_irec *imap,
650 int nimaps)
68a9f5e7 651{
5c5b6f75
CH
652 /* don't allocate blocks when just zeroing */
653 if (flags & IOMAP_ZERO)
654 return false;
655 if (!nimaps ||
656 imap->br_startblock == HOLESTARTBLOCK ||
657 imap->br_startblock == DELAYSTARTBLOCK)
658 return true;
659 /* we convert unwritten extents before copying the data for DAX */
660 if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
661 return true;
662 return false;
68a9f5e7
CH
663}
664
dfa03a5f 665static inline bool
5c5b6f75
CH
666imap_needs_cow(
667 struct xfs_inode *ip,
668 unsigned int flags,
dfa03a5f
DC
669 struct xfs_bmbt_irec *imap,
670 int nimaps)
172ed391 671{
5c5b6f75
CH
672 if (!xfs_is_cow_inode(ip))
673 return false;
674
675 /* when zeroing we don't have to COW holes or unwritten extents */
676 if (flags & IOMAP_ZERO) {
677 if (!nimaps ||
678 imap->br_startblock == HOLESTARTBLOCK ||
679 imap->br_state == XFS_EXT_UNWRITTEN)
680 return false;
681 }
682
683 return true;
172ed391
CH
684}
685
dfa03a5f
DC
686static int
687xfs_ilock_for_iomap(
688 struct xfs_inode *ip,
689 unsigned flags,
690 unsigned *lockmode)
acdda3aa 691{
dfa03a5f 692 unsigned mode = XFS_ILOCK_SHARED;
5bd88d15 693 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO);
dfa03a5f 694
acdda3aa 695 /*
af5b5afe
CH
696 * COW writes may allocate delalloc space or convert unwritten COW
697 * extents, so we need to make sure to take the lock exclusively here.
acdda3aa 698 */
1e190f8e 699 if (xfs_is_cow_inode(ip) && is_write)
dfa03a5f 700 mode = XFS_ILOCK_EXCL;
ff3d8b9c
CH
701
702 /*
dfa03a5f
DC
703 * Extents not yet cached requires exclusive access, don't block. This
704 * is an opencoded xfs_ilock_data_map_shared() call but with
ff3d8b9c
CH
705 * non-blocking behaviour.
706 */
dfa03a5f
DC
707 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
708 if (flags & IOMAP_NOWAIT)
709 return -EAGAIN;
710 mode = XFS_ILOCK_EXCL;
711 }
712
5bd88d15 713relock:
dfa03a5f
DC
714 if (flags & IOMAP_NOWAIT) {
715 if (!xfs_ilock_nowait(ip, mode))
716 return -EAGAIN;
717 } else {
718 xfs_ilock(ip, mode);
719 }
720
5bd88d15
DW
721 /*
722 * The reflink iflag could have changed since the earlier unlocked
723 * check, so if we got ILOCK_SHARED for a write and but we're now a
724 * reflink inode we have to switch to ILOCK_EXCL and relock.
725 */
66ae56a5 726 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) {
5bd88d15
DW
727 xfs_iunlock(ip, mode);
728 mode = XFS_ILOCK_EXCL;
729 goto relock;
730 }
731
dfa03a5f
DC
732 *lockmode = mode;
733 return 0;
acdda3aa
CH
734}
735
a526c85c 736static int
f150b423 737xfs_direct_write_iomap_begin(
68a9f5e7
CH
738 struct inode *inode,
739 loff_t offset,
740 loff_t length,
741 unsigned flags,
c039b997
GR
742 struct iomap *iomap,
743 struct iomap *srcmap)
68a9f5e7
CH
744{
745 struct xfs_inode *ip = XFS_I(inode);
746 struct xfs_mount *mp = ip->i_mount;
36adcbac 747 struct xfs_bmbt_irec imap, cmap;
43568226
CH
748 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
749 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
68a9f5e7 750 int nimaps = 1, error = 0;
d392bc81 751 bool shared = false;
2492a606 752 u16 iomap_flags = 0;
66642c5c 753 unsigned lockmode;
68a9f5e7 754
690c2a38
CH
755 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
756
68a9f5e7
CH
757 if (XFS_FORCED_SHUTDOWN(mp))
758 return -EIO;
759
5c5b6f75
CH
760 /*
761 * Writes that span EOF might trigger an IO size update on completion,
762 * so consider them to be dirty for the purposes of O_DSYNC even if
763 * there is no other metadata changes pending or have been made here.
764 */
765 if (offset + length > i_size_read(inode))
766 iomap_flags |= IOMAP_F_DIRTY;
767
dfa03a5f
DC
768 error = xfs_ilock_for_iomap(ip, flags, &lockmode);
769 if (error)
770 return error;
29a5d29e 771
68a9f5e7 772 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
db1327b1 773 &nimaps, 0);
3ba020be
CH
774 if (error)
775 goto out_unlock;
db1327b1 776
5c5b6f75 777 if (imap_needs_cow(ip, flags, &imap, nimaps)) {
1e190f8e
CH
778 error = -EAGAIN;
779 if (flags & IOMAP_NOWAIT)
780 goto out_unlock;
781
78f0cc9d 782 /* may drop and re-acquire the ilock */
ffb375a8 783 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
36adcbac 784 &lockmode, flags & IOMAP_DIRECT);
78f0cc9d
CH
785 if (error)
786 goto out_unlock;
36adcbac
CH
787 if (shared)
788 goto out_found_cow;
3ba020be
CH
789 end_fsb = imap.br_startoff + imap.br_blockcount;
790 length = XFS_FSB_TO_B(mp, end_fsb) - offset;
68a9f5e7
CH
791 }
792
5c5b6f75
CH
793 if (imap_needs_alloc(inode, flags, &imap, nimaps))
794 goto allocate_blocks;
68a9f5e7 795
5c5b6f75
CH
796 xfs_iunlock(ip, lockmode);
797 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
798 return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
b95a2127 799
5c5b6f75
CH
800allocate_blocks:
801 error = -EAGAIN;
802 if (flags & IOMAP_NOWAIT)
d0641780 803 goto out_unlock;
68a9f5e7 804
d0641780
DC
805 /*
806 * We cap the maximum length we map to a sane size to keep the chunks
807 * of work done where somewhat symmetric with the work writeback does.
808 * This is a completely arbitrary number pulled out of thin air as a
809 * best guess for initial testing.
810 *
811 * Note that the values needs to be less than 32-bits wide until the
812 * lower level functions are updated.
813 */
814 length = min_t(loff_t, length, 1024 * PAGE_SIZE);
815
816 /*
817 * xfs_iomap_write_direct() expects the shared lock. It is unlocked on
818 * return.
819 */
820 if (lockmode == XFS_ILOCK_EXCL)
821 xfs_ilock_demote(ip, lockmode);
5c5b6f75 822 error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps);
d0641780
DC
823 if (error)
824 return error;
825
be225fec 826 trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
5c5b6f75 827 return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
d0641780 828
36adcbac
CH
829out_found_cow:
830 xfs_iunlock(ip, lockmode);
831 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
832 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
833 if (imap.br_startblock != HOLESTARTBLOCK) {
834 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
835 if (error)
836 return error;
837 }
838 return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
839
3ba020be
CH
840out_unlock:
841 xfs_iunlock(ip, lockmode);
842 return error;
68a9f5e7
CH
843}
844
f150b423
CH
845const struct iomap_ops xfs_direct_write_iomap_ops = {
846 .iomap_begin = xfs_direct_write_iomap_begin,
847};
848
a526c85c 849static int
f150b423 850xfs_buffered_write_iomap_begin(
a526c85c
CH
851 struct inode *inode,
852 loff_t offset,
853 loff_t count,
854 unsigned flags,
855 struct iomap *iomap,
856 struct iomap *srcmap)
857{
858 struct xfs_inode *ip = XFS_I(inode);
859 struct xfs_mount *mp = ip->i_mount;
860 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
861 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
862 struct xfs_bmbt_irec imap, cmap;
863 struct xfs_iext_cursor icur, ccur;
864 xfs_fsblock_t prealloc_blocks = 0;
865 bool eof = false, cow_eof = false, shared = false;
12dfb58a 866 int allocfork = XFS_DATA_FORK;
a526c85c
CH
867 int error = 0;
868
f150b423
CH
869 /* we can't use delayed allocations when using extent size hints */
870 if (xfs_get_extsz_hint(ip))
871 return xfs_direct_write_iomap_begin(inode, offset, count,
872 flags, iomap, srcmap);
873
a526c85c 874 ASSERT(!XFS_IS_REALTIME_INODE(ip));
a526c85c
CH
875
876 xfs_ilock(ip, XFS_ILOCK_EXCL);
877
878 if (unlikely(XFS_TEST_ERROR(
879 (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
880 XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
881 mp, XFS_ERRTAG_BMAPIFORMAT))) {
882 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
883 error = -EFSCORRUPTED;
884 goto out_unlock;
885 }
886
887 XFS_STATS_INC(mp, xs_blk_mapw);
888
889 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
890 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
891 if (error)
892 goto out_unlock;
893 }
894
895 /*
896 * Search the data fork fork first to look up our source mapping. We
897 * always need the data fork map, as we have to return it to the
898 * iomap code so that the higher level write code can read data in to
899 * perform read-modify-write cycles for unaligned writes.
900 */
901 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
902 if (eof)
903 imap.br_startoff = end_fsb; /* fake hole until the end */
904
905 /* We never need to allocate blocks for zeroing a hole. */
906 if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
907 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
908 goto out_unlock;
909 }
910
911 /*
912 * Search the COW fork extent list even if we did not find a data fork
913 * extent. This serves two purposes: first this implements the
914 * speculative preallocation using cowextsize, so that we also unshare
915 * block adjacent to shared blocks instead of just the shared blocks
916 * themselves. Second the lookup in the extent list is generally faster
917 * than going out to the shared extent tree.
918 */
919 if (xfs_is_cow_inode(ip)) {
920 if (!ip->i_cowfp) {
921 ASSERT(!xfs_is_reflink_inode(ip));
922 xfs_ifork_init_cow(ip);
923 }
924 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
925 &ccur, &cmap);
926 if (!cow_eof && cmap.br_startoff <= offset_fsb) {
927 trace_xfs_reflink_cow_found(ip, &cmap);
928 goto found_cow;
929 }
930 }
931
932 if (imap.br_startoff <= offset_fsb) {
933 /*
934 * For reflink files we may need a delalloc reservation when
935 * overwriting shared extents. This includes zeroing of
936 * existing extents that contain data.
937 */
938 if (!xfs_is_cow_inode(ip) ||
939 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
940 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
941 &imap);
942 goto found_imap;
943 }
944
945 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
946
947 /* Trim the mapping to the nearest shared extent boundary. */
948 error = xfs_inode_need_cow(ip, &imap, &shared);
949 if (error)
950 goto out_unlock;
951
952 /* Not shared? Just report the (potentially capped) extent. */
953 if (!shared) {
954 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
955 &imap);
956 goto found_imap;
957 }
958
959 /*
960 * Fork all the shared blocks from our write offset until the
961 * end of the extent.
962 */
12dfb58a 963 allocfork = XFS_COW_FORK;
a526c85c
CH
964 end_fsb = imap.br_startoff + imap.br_blockcount;
965 } else {
966 /*
967 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
968 * pages to keep the chunks of work done where somewhat
969 * symmetric with the work writeback does. This is a completely
970 * arbitrary number pulled out of thin air.
971 *
972 * Note that the values needs to be less than 32-bits wide until
973 * the lower level functions are updated.
974 */
975 count = min_t(loff_t, count, 1024 * PAGE_SIZE);
976 end_fsb = xfs_iomap_end_fsb(mp, offset, count);
977
978 if (xfs_is_always_cow_inode(ip))
12dfb58a 979 allocfork = XFS_COW_FORK;
a526c85c
CH
980 }
981
982 error = xfs_qm_dqattach_locked(ip, false);
983 if (error)
984 goto out_unlock;
985
986 if (eof) {
12dfb58a 987 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset,
a526c85c
CH
988 count, &icur);
989 if (prealloc_blocks) {
990 xfs_extlen_t align;
991 xfs_off_t end_offset;
992 xfs_fileoff_t p_end_fsb;
993
5da8a07c 994 end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1);
a526c85c
CH
995 p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
996 prealloc_blocks;
997
998 align = xfs_eof_alignment(ip, 0);
999 if (align)
1000 p_end_fsb = roundup_64(p_end_fsb, align);
1001
1002 p_end_fsb = min(p_end_fsb,
1003 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
1004 ASSERT(p_end_fsb > offset_fsb);
1005 prealloc_blocks = p_end_fsb - end_fsb;
1006 }
1007 }
1008
1009retry:
12dfb58a 1010 error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
a526c85c 1011 end_fsb - offset_fsb, prealloc_blocks,
12dfb58a
CH
1012 allocfork == XFS_DATA_FORK ? &imap : &cmap,
1013 allocfork == XFS_DATA_FORK ? &icur : &ccur,
1014 allocfork == XFS_DATA_FORK ? eof : cow_eof);
a526c85c
CH
1015 switch (error) {
1016 case 0:
1017 break;
1018 case -ENOSPC:
1019 case -EDQUOT:
1020 /* retry without any preallocation */
1021 trace_xfs_delalloc_enospc(ip, offset, count);
1022 if (prealloc_blocks) {
1023 prealloc_blocks = 0;
1024 goto retry;
1025 }
1026 /*FALLTHRU*/
1027 default:
1028 goto out_unlock;
1029 }
1030
12dfb58a
CH
1031 if (allocfork == XFS_COW_FORK) {
1032 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
a526c85c
CH
1033 goto found_cow;
1034 }
1035
1036 /*
1037 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
1038 * them out if the write happens to fail.
1039 */
1040 xfs_iunlock(ip, XFS_ILOCK_EXCL);
12dfb58a 1041 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
a526c85c
CH
1042 return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
1043
1044found_imap:
1045 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1046 return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
1047
1048found_cow:
1049 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1050 if (imap.br_startoff <= offset_fsb) {
1051 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
1052 if (error)
1053 return error;
1054 } else {
1055 xfs_trim_extent(&cmap, offset_fsb,
1056 imap.br_startoff - offset_fsb);
1057 }
1058 return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
1059
1060out_unlock:
1061 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1062 return error;
1063}
1064
68a9f5e7 1065static int
f150b423
CH
1066xfs_buffered_write_iomap_end(
1067 struct inode *inode,
68a9f5e7
CH
1068 loff_t offset,
1069 loff_t length,
f65e6fad 1070 ssize_t written,
f150b423 1071 unsigned flags,
f65e6fad 1072 struct iomap *iomap)
68a9f5e7 1073{
f150b423 1074 struct xfs_inode *ip = XFS_I(inode);
68a9f5e7
CH
1075 struct xfs_mount *mp = ip->i_mount;
1076 xfs_fileoff_t start_fsb;
1077 xfs_fileoff_t end_fsb;
1078 int error = 0;
1079
f150b423
CH
1080 if (iomap->type != IOMAP_DELALLOC)
1081 return 0;
1082
f65e6fad
BF
1083 /*
1084 * Behave as if the write failed if drop writes is enabled. Set the NEW
1085 * flag to force delalloc cleanup.
1086 */
f8c47250 1087 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
f65e6fad 1088 iomap->flags |= IOMAP_F_NEW;
9dbddd7b 1089 written = 0;
f65e6fad 1090 }
9dbddd7b 1091
fa7f138a
BF
1092 /*
1093 * start_fsb refers to the first unused block after a short write. If
1094 * nothing was written, round offset down to point at the first block in
1095 * the range.
1096 */
1097 if (unlikely(!written))
1098 start_fsb = XFS_B_TO_FSBT(mp, offset);
1099 else
1100 start_fsb = XFS_B_TO_FSB(mp, offset + written);
68a9f5e7
CH
1101 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1102
1103 /*
f65e6fad
BF
1104 * Trim delalloc blocks if they were allocated by this write and we
1105 * didn't manage to write the whole range.
68a9f5e7
CH
1106 *
1107 * We don't need to care about racing delalloc as we hold i_mutex
1108 * across the reserve/allocate/unreserve calls. If there are delalloc
1109 * blocks in the range, they are ours.
1110 */
f65e6fad 1111 if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
fa7f138a
BF
1112 truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
1113 XFS_FSB_TO_B(mp, end_fsb) - 1);
1114
68a9f5e7
CH
1115 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1116 end_fsb - start_fsb);
68a9f5e7
CH
1117 if (error && !XFS_FORCED_SHUTDOWN(mp)) {
1118 xfs_alert(mp, "%s: unable to clean up ino %lld",
1119 __func__, ip->i_ino);
1120 return error;
1121 }
1122 }
1123
1124 return 0;
1125}
1126
f150b423
CH
1127const struct iomap_ops xfs_buffered_write_iomap_ops = {
1128 .iomap_begin = xfs_buffered_write_iomap_begin,
1129 .iomap_end = xfs_buffered_write_iomap_end,
68a9f5e7 1130};
1d4795e7 1131
690c2a38
CH
1132static int
1133xfs_read_iomap_begin(
1134 struct inode *inode,
1135 loff_t offset,
1136 loff_t length,
1137 unsigned flags,
1138 struct iomap *iomap,
1139 struct iomap *srcmap)
1140{
1141 struct xfs_inode *ip = XFS_I(inode);
1142 struct xfs_mount *mp = ip->i_mount;
1143 struct xfs_bmbt_irec imap;
1144 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1145 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
1146 int nimaps = 1, error = 0;
1147 bool shared = false;
1148 unsigned lockmode;
1149
1150 ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
1151
1152 if (XFS_FORCED_SHUTDOWN(mp))
1153 return -EIO;
1154
1155 error = xfs_ilock_for_iomap(ip, flags, &lockmode);
1156 if (error)
1157 return error;
1158 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1159 &nimaps, 0);
1160 if (!error && (flags & IOMAP_REPORT))
1161 error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
1162 xfs_iunlock(ip, lockmode);
1163
1164 if (error)
1165 return error;
1166 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
1167 return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
1168}
1169
1170const struct iomap_ops xfs_read_iomap_ops = {
1171 .iomap_begin = xfs_read_iomap_begin,
1172};
1173
60271ab7
CH
1174static int
1175xfs_seek_iomap_begin(
1176 struct inode *inode,
1177 loff_t offset,
1178 loff_t length,
1179 unsigned flags,
c039b997
GR
1180 struct iomap *iomap,
1181 struct iomap *srcmap)
60271ab7
CH
1182{
1183 struct xfs_inode *ip = XFS_I(inode);
1184 struct xfs_mount *mp = ip->i_mount;
1185 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1186 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
1187 xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF;
1188 struct xfs_iext_cursor icur;
1189 struct xfs_bmbt_irec imap, cmap;
1190 int error = 0;
1191 unsigned lockmode;
1192
1193 if (XFS_FORCED_SHUTDOWN(mp))
1194 return -EIO;
1195
1196 lockmode = xfs_ilock_data_map_shared(ip);
1197 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
1198 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
1199 if (error)
1200 goto out_unlock;
1201 }
1202
1203 if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) {
1204 /*
1205 * If we found a data extent we are done.
1206 */
1207 if (imap.br_startoff <= offset_fsb)
1208 goto done;
1209 data_fsb = imap.br_startoff;
1210 } else {
1211 /*
1212 * Fake a hole until the end of the file.
1213 */
43568226 1214 data_fsb = xfs_iomap_end_fsb(mp, offset, length);
60271ab7
CH
1215 }
1216
1217 /*
1218 * If a COW fork extent covers the hole, report it - capped to the next
1219 * data fork extent:
1220 */
1221 if (xfs_inode_has_cow_data(ip) &&
1222 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
1223 cow_fsb = cmap.br_startoff;
1224 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
1225 if (data_fsb < cow_fsb + cmap.br_blockcount)
1226 end_fsb = min(end_fsb, data_fsb);
1227 xfs_trim_extent(&cmap, offset_fsb, end_fsb);
2492a606 1228 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
60271ab7
CH
1229 /*
1230 * This is a COW extent, so we must probe the page cache
1231 * because there could be dirty page cache being backed
1232 * by this extent.
1233 */
1234 iomap->type = IOMAP_UNWRITTEN;
1235 goto out_unlock;
1236 }
1237
1238 /*
1239 * Else report a hole, capped to the next found data or COW extent.
1240 */
1241 if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb)
1242 imap.br_blockcount = cow_fsb - offset_fsb;
1243 else
1244 imap.br_blockcount = data_fsb - offset_fsb;
1245 imap.br_startoff = offset_fsb;
1246 imap.br_startblock = HOLESTARTBLOCK;
1247 imap.br_state = XFS_EXT_NORM;
1248done:
1249 xfs_trim_extent(&imap, offset_fsb, end_fsb);
2492a606 1250 error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
60271ab7
CH
1251out_unlock:
1252 xfs_iunlock(ip, lockmode);
1253 return error;
1254}
1255
1256const struct iomap_ops xfs_seek_iomap_ops = {
1257 .iomap_begin = xfs_seek_iomap_begin,
1258};
1259
1d4795e7
CH
1260static int
1261xfs_xattr_iomap_begin(
1262 struct inode *inode,
1263 loff_t offset,
1264 loff_t length,
1265 unsigned flags,
c039b997
GR
1266 struct iomap *iomap,
1267 struct iomap *srcmap)
1d4795e7
CH
1268{
1269 struct xfs_inode *ip = XFS_I(inode);
1270 struct xfs_mount *mp = ip->i_mount;
1271 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1272 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length);
1273 struct xfs_bmbt_irec imap;
1274 int nimaps = 1, error = 0;
1275 unsigned lockmode;
1276
1277 if (XFS_FORCED_SHUTDOWN(mp))
1278 return -EIO;
1279
84358536 1280 lockmode = xfs_ilock_attr_map_shared(ip);
1d4795e7
CH
1281
1282 /* if there are no attribute fork or extents, return ENOENT */
84358536 1283 if (!XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
1d4795e7
CH
1284 error = -ENOENT;
1285 goto out_unlock;
1286 }
1287
1288 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
1289 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
b7e0b6ff 1290 &nimaps, XFS_BMAPI_ATTRFORK);
1d4795e7
CH
1291out_unlock:
1292 xfs_iunlock(ip, lockmode);
1293
16be1433
CH
1294 if (error)
1295 return error;
1296 ASSERT(nimaps);
2492a606 1297 return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
1d4795e7
CH
1298}
1299
8ff6daa1 1300const struct iomap_ops xfs_xattr_iomap_ops = {
1d4795e7
CH
1301 .iomap_begin = xfs_xattr_iomap_begin,
1302};