| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2008 Oracle. All rights reserved. |
| 4 | * |
| 5 | * Based on jffs2 zlib code: |
| 6 | * Copyright © 2001-2007 Red Hat, Inc. |
| 7 | * Created by David Woodhouse <dwmw2@infradead.org> |
| 8 | */ |
| 9 | |
| 10 | #include <linux/kernel.h> |
| 11 | #include <linux/slab.h> |
| 12 | #include <linux/zlib.h> |
| 13 | #include <linux/zutil.h> |
| 14 | #include <linux/mm.h> |
| 15 | #include <linux/init.h> |
| 16 | #include <linux/err.h> |
| 17 | #include <linux/sched.h> |
| 18 | #include <linux/pagemap.h> |
| 19 | #include <linux/bio.h> |
| 20 | #include <linux/refcount.h> |
| 21 | #include "btrfs_inode.h" |
| 22 | #include "compression.h" |
| 23 | #include "fs.h" |
| 24 | #include "subpage.h" |
| 25 | |
| 26 | /* workspace buffer size for s390 zlib hardware support */ |
| 27 | #define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE) |
| 28 | |
| 29 | struct workspace { |
| 30 | z_stream strm; |
| 31 | char *buf; |
| 32 | unsigned int buf_size; |
| 33 | struct list_head list; |
| 34 | int level; |
| 35 | }; |
| 36 | |
| 37 | static struct workspace_manager wsm; |
| 38 | |
| 39 | struct list_head *zlib_get_workspace(unsigned int level) |
| 40 | { |
| 41 | struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level); |
| 42 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 43 | |
| 44 | workspace->level = level; |
| 45 | |
| 46 | return ws; |
| 47 | } |
| 48 | |
| 49 | void zlib_free_workspace(struct list_head *ws) |
| 50 | { |
| 51 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 52 | |
| 53 | kvfree(workspace->strm.workspace); |
| 54 | kfree(workspace->buf); |
| 55 | kfree(workspace); |
| 56 | } |
| 57 | |
| 58 | struct list_head *zlib_alloc_workspace(unsigned int level) |
| 59 | { |
| 60 | struct workspace *workspace; |
| 61 | int workspacesize; |
| 62 | |
| 63 | workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); |
| 64 | if (!workspace) |
| 65 | return ERR_PTR(-ENOMEM); |
| 66 | |
| 67 | workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), |
| 68 | zlib_inflate_workspacesize()); |
| 69 | workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN); |
| 70 | workspace->level = level; |
| 71 | workspace->buf = NULL; |
| 72 | /* |
| 73 | * In case of s390 zlib hardware support, allocate lager workspace |
| 74 | * buffer. If allocator fails, fall back to a single page buffer. |
| 75 | */ |
| 76 | if (zlib_deflate_dfltcc_enabled()) { |
| 77 | workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, |
| 78 | __GFP_NOMEMALLOC | __GFP_NORETRY | |
| 79 | __GFP_NOWARN | GFP_NOIO); |
| 80 | workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; |
| 81 | } |
| 82 | if (!workspace->buf) { |
| 83 | workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| 84 | workspace->buf_size = PAGE_SIZE; |
| 85 | } |
| 86 | if (!workspace->strm.workspace || !workspace->buf) |
| 87 | goto fail; |
| 88 | |
| 89 | INIT_LIST_HEAD(&workspace->list); |
| 90 | |
| 91 | return &workspace->list; |
| 92 | fail: |
| 93 | zlib_free_workspace(&workspace->list); |
| 94 | return ERR_PTR(-ENOMEM); |
| 95 | } |
| 96 | |
| 97 | /* |
| 98 | * Helper for S390x with hardware zlib compression support. |
| 99 | * |
| 100 | * That hardware acceleration requires a buffer size larger than a single page |
| 101 | * to get ideal performance, thus we need to do the memory copy rather than |
| 102 | * use the page cache directly as input buffer. |
| 103 | */ |
| 104 | static int copy_data_into_buffer(struct address_space *mapping, |
| 105 | struct workspace *workspace, u64 filepos, |
| 106 | unsigned long length) |
| 107 | { |
| 108 | u64 cur = filepos; |
| 109 | |
| 110 | /* It's only for hardware accelerated zlib code. */ |
| 111 | ASSERT(zlib_deflate_dfltcc_enabled()); |
| 112 | |
| 113 | while (cur < filepos + length) { |
| 114 | struct folio *folio; |
| 115 | void *data_in; |
| 116 | unsigned int offset; |
| 117 | unsigned long copy_length; |
| 118 | int ret; |
| 119 | |
| 120 | ret = btrfs_compress_filemap_get_folio(mapping, cur, &folio); |
| 121 | if (ret < 0) |
| 122 | return ret; |
| 123 | |
| 124 | offset = offset_in_folio(folio, cur); |
| 125 | copy_length = min(folio_size(folio) - offset, |
| 126 | filepos + length - cur); |
| 127 | |
| 128 | data_in = kmap_local_folio(folio, offset); |
| 129 | memcpy(workspace->buf + cur - filepos, data_in, copy_length); |
| 130 | kunmap_local(data_in); |
| 131 | cur += copy_length; |
| 132 | } |
| 133 | return 0; |
| 134 | } |
| 135 | |
| 136 | int zlib_compress_folios(struct list_head *ws, struct address_space *mapping, |
| 137 | u64 start, struct folio **folios, unsigned long *out_folios, |
| 138 | unsigned long *total_in, unsigned long *total_out) |
| 139 | { |
| 140 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 141 | int ret; |
| 142 | char *data_in = NULL; |
| 143 | char *cfolio_out; |
| 144 | int nr_folios = 0; |
| 145 | struct folio *in_folio = NULL; |
| 146 | struct folio *out_folio = NULL; |
| 147 | unsigned long len = *total_out; |
| 148 | unsigned long nr_dest_folios = *out_folios; |
| 149 | const unsigned long max_out = nr_dest_folios * PAGE_SIZE; |
| 150 | const u64 orig_end = start + len; |
| 151 | |
| 152 | *out_folios = 0; |
| 153 | *total_out = 0; |
| 154 | *total_in = 0; |
| 155 | |
| 156 | ret = zlib_deflateInit(&workspace->strm, workspace->level); |
| 157 | if (unlikely(ret != Z_OK)) { |
| 158 | struct btrfs_inode *inode = BTRFS_I(mapping->host); |
| 159 | |
| 160 | btrfs_err(inode->root->fs_info, |
| 161 | "zlib compression init failed, error %d root %llu inode %llu offset %llu", |
| 162 | ret, btrfs_root_id(inode->root), btrfs_ino(inode), start); |
| 163 | ret = -EIO; |
| 164 | goto out; |
| 165 | } |
| 166 | |
| 167 | workspace->strm.total_in = 0; |
| 168 | workspace->strm.total_out = 0; |
| 169 | |
| 170 | out_folio = btrfs_alloc_compr_folio(); |
| 171 | if (out_folio == NULL) { |
| 172 | ret = -ENOMEM; |
| 173 | goto out; |
| 174 | } |
| 175 | cfolio_out = folio_address(out_folio); |
| 176 | folios[0] = out_folio; |
| 177 | nr_folios = 1; |
| 178 | |
| 179 | workspace->strm.next_in = workspace->buf; |
| 180 | workspace->strm.avail_in = 0; |
| 181 | workspace->strm.next_out = cfolio_out; |
| 182 | workspace->strm.avail_out = PAGE_SIZE; |
| 183 | |
| 184 | while (workspace->strm.total_in < len) { |
| 185 | /* |
| 186 | * Get next input pages and copy the contents to |
| 187 | * the workspace buffer if required. |
| 188 | */ |
| 189 | if (workspace->strm.avail_in == 0) { |
| 190 | unsigned long bytes_left = len - workspace->strm.total_in; |
| 191 | unsigned int copy_length = min(bytes_left, workspace->buf_size); |
| 192 | |
| 193 | /* |
| 194 | * This can only happen when hardware zlib compression is |
| 195 | * enabled. |
| 196 | */ |
| 197 | if (copy_length > PAGE_SIZE) { |
| 198 | ret = copy_data_into_buffer(mapping, workspace, |
| 199 | start, copy_length); |
| 200 | if (ret < 0) |
| 201 | goto out; |
| 202 | start += copy_length; |
| 203 | workspace->strm.next_in = workspace->buf; |
| 204 | workspace->strm.avail_in = copy_length; |
| 205 | } else { |
| 206 | unsigned int cur_len; |
| 207 | |
| 208 | if (data_in) { |
| 209 | kunmap_local(data_in); |
| 210 | folio_put(in_folio); |
| 211 | data_in = NULL; |
| 212 | } |
| 213 | ret = btrfs_compress_filemap_get_folio(mapping, |
| 214 | start, &in_folio); |
| 215 | if (ret < 0) |
| 216 | goto out; |
| 217 | cur_len = btrfs_calc_input_length(in_folio, orig_end, start); |
| 218 | data_in = kmap_local_folio(in_folio, |
| 219 | offset_in_folio(in_folio, start)); |
| 220 | start += cur_len; |
| 221 | workspace->strm.next_in = data_in; |
| 222 | workspace->strm.avail_in = cur_len; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); |
| 227 | if (unlikely(ret != Z_OK)) { |
| 228 | struct btrfs_inode *inode = BTRFS_I(mapping->host); |
| 229 | |
| 230 | btrfs_warn(inode->root->fs_info, |
| 231 | "zlib compression failed, error %d root %llu inode %llu offset %llu", |
| 232 | ret, btrfs_root_id(inode->root), btrfs_ino(inode), |
| 233 | start); |
| 234 | zlib_deflateEnd(&workspace->strm); |
| 235 | ret = -EIO; |
| 236 | goto out; |
| 237 | } |
| 238 | |
| 239 | /* we're making it bigger, give up */ |
| 240 | if (workspace->strm.total_in > 8192 && |
| 241 | workspace->strm.total_in < |
| 242 | workspace->strm.total_out) { |
| 243 | ret = -E2BIG; |
| 244 | goto out; |
| 245 | } |
| 246 | /* we need another page for writing out. Test this |
| 247 | * before the total_in so we will pull in a new page for |
| 248 | * the stream end if required |
| 249 | */ |
| 250 | if (workspace->strm.avail_out == 0) { |
| 251 | if (nr_folios == nr_dest_folios) { |
| 252 | ret = -E2BIG; |
| 253 | goto out; |
| 254 | } |
| 255 | out_folio = btrfs_alloc_compr_folio(); |
| 256 | if (out_folio == NULL) { |
| 257 | ret = -ENOMEM; |
| 258 | goto out; |
| 259 | } |
| 260 | cfolio_out = folio_address(out_folio); |
| 261 | folios[nr_folios] = out_folio; |
| 262 | nr_folios++; |
| 263 | workspace->strm.avail_out = PAGE_SIZE; |
| 264 | workspace->strm.next_out = cfolio_out; |
| 265 | } |
| 266 | /* we're all done */ |
| 267 | if (workspace->strm.total_in >= len) |
| 268 | break; |
| 269 | if (workspace->strm.total_out > max_out) |
| 270 | break; |
| 271 | } |
| 272 | workspace->strm.avail_in = 0; |
| 273 | /* |
| 274 | * Call deflate with Z_FINISH flush parameter providing more output |
| 275 | * space but no more input data, until it returns with Z_STREAM_END. |
| 276 | */ |
| 277 | while (ret != Z_STREAM_END) { |
| 278 | ret = zlib_deflate(&workspace->strm, Z_FINISH); |
| 279 | if (ret == Z_STREAM_END) |
| 280 | break; |
| 281 | if (ret != Z_OK && ret != Z_BUF_ERROR) { |
| 282 | zlib_deflateEnd(&workspace->strm); |
| 283 | ret = -EIO; |
| 284 | goto out; |
| 285 | } else if (workspace->strm.avail_out == 0) { |
| 286 | /* Get another folio for the stream end. */ |
| 287 | if (nr_folios == nr_dest_folios) { |
| 288 | ret = -E2BIG; |
| 289 | goto out; |
| 290 | } |
| 291 | out_folio = btrfs_alloc_compr_folio(); |
| 292 | if (out_folio == NULL) { |
| 293 | ret = -ENOMEM; |
| 294 | goto out; |
| 295 | } |
| 296 | cfolio_out = folio_address(out_folio); |
| 297 | folios[nr_folios] = out_folio; |
| 298 | nr_folios++; |
| 299 | workspace->strm.avail_out = PAGE_SIZE; |
| 300 | workspace->strm.next_out = cfolio_out; |
| 301 | } |
| 302 | } |
| 303 | zlib_deflateEnd(&workspace->strm); |
| 304 | |
| 305 | if (workspace->strm.total_out >= workspace->strm.total_in) { |
| 306 | ret = -E2BIG; |
| 307 | goto out; |
| 308 | } |
| 309 | |
| 310 | ret = 0; |
| 311 | *total_out = workspace->strm.total_out; |
| 312 | *total_in = workspace->strm.total_in; |
| 313 | out: |
| 314 | *out_folios = nr_folios; |
| 315 | if (data_in) { |
| 316 | kunmap_local(data_in); |
| 317 | folio_put(in_folio); |
| 318 | } |
| 319 | |
| 320 | return ret; |
| 321 | } |
| 322 | |
| 323 | int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) |
| 324 | { |
| 325 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 326 | int ret = 0, ret2; |
| 327 | int wbits = MAX_WBITS; |
| 328 | char *data_in; |
| 329 | size_t total_out = 0; |
| 330 | unsigned long folio_in_index = 0; |
| 331 | size_t srclen = cb->compressed_len; |
| 332 | unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE); |
| 333 | unsigned long buf_start; |
| 334 | struct folio **folios_in = cb->compressed_folios; |
| 335 | |
| 336 | data_in = kmap_local_folio(folios_in[folio_in_index], 0); |
| 337 | workspace->strm.next_in = data_in; |
| 338 | workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE); |
| 339 | workspace->strm.total_in = 0; |
| 340 | |
| 341 | workspace->strm.total_out = 0; |
| 342 | workspace->strm.next_out = workspace->buf; |
| 343 | workspace->strm.avail_out = workspace->buf_size; |
| 344 | |
| 345 | /* If it's deflate, and it's got no preset dictionary, then |
| 346 | we can tell zlib to skip the adler32 check. */ |
| 347 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
| 348 | ((data_in[0] & 0x0f) == Z_DEFLATED) && |
| 349 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
| 350 | |
| 351 | wbits = -((data_in[0] >> 4) + 8); |
| 352 | workspace->strm.next_in += 2; |
| 353 | workspace->strm.avail_in -= 2; |
| 354 | } |
| 355 | |
| 356 | ret = zlib_inflateInit2(&workspace->strm, wbits); |
| 357 | if (unlikely(ret != Z_OK)) { |
| 358 | struct btrfs_inode *inode = cb->bbio.inode; |
| 359 | |
| 360 | kunmap_local(data_in); |
| 361 | btrfs_err(inode->root->fs_info, |
| 362 | "zlib decompression init failed, error %d root %llu inode %llu offset %llu", |
| 363 | ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); |
| 364 | return -EIO; |
| 365 | } |
| 366 | while (workspace->strm.total_in < srclen) { |
| 367 | ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); |
| 368 | if (ret != Z_OK && ret != Z_STREAM_END) |
| 369 | break; |
| 370 | |
| 371 | buf_start = total_out; |
| 372 | total_out = workspace->strm.total_out; |
| 373 | |
| 374 | /* we didn't make progress in this inflate call, we're done */ |
| 375 | if (buf_start == total_out) |
| 376 | break; |
| 377 | |
| 378 | ret2 = btrfs_decompress_buf2page(workspace->buf, |
| 379 | total_out - buf_start, cb, buf_start); |
| 380 | if (ret2 == 0) { |
| 381 | ret = 0; |
| 382 | goto done; |
| 383 | } |
| 384 | |
| 385 | workspace->strm.next_out = workspace->buf; |
| 386 | workspace->strm.avail_out = workspace->buf_size; |
| 387 | |
| 388 | if (workspace->strm.avail_in == 0) { |
| 389 | unsigned long tmp; |
| 390 | kunmap_local(data_in); |
| 391 | folio_in_index++; |
| 392 | if (folio_in_index >= total_folios_in) { |
| 393 | data_in = NULL; |
| 394 | break; |
| 395 | } |
| 396 | data_in = kmap_local_folio(folios_in[folio_in_index], 0); |
| 397 | workspace->strm.next_in = data_in; |
| 398 | tmp = srclen - workspace->strm.total_in; |
| 399 | workspace->strm.avail_in = min(tmp, PAGE_SIZE); |
| 400 | } |
| 401 | } |
| 402 | if (unlikely(ret != Z_STREAM_END)) { |
| 403 | btrfs_err(cb->bbio.inode->root->fs_info, |
| 404 | "zlib decompression failed, error %d root %llu inode %llu offset %llu", |
| 405 | ret, btrfs_root_id(cb->bbio.inode->root), |
| 406 | btrfs_ino(cb->bbio.inode), cb->start); |
| 407 | ret = -EIO; |
| 408 | } else { |
| 409 | ret = 0; |
| 410 | } |
| 411 | done: |
| 412 | zlib_inflateEnd(&workspace->strm); |
| 413 | if (data_in) |
| 414 | kunmap_local(data_in); |
| 415 | return ret; |
| 416 | } |
| 417 | |
| 418 | int zlib_decompress(struct list_head *ws, const u8 *data_in, |
| 419 | struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, |
| 420 | size_t destlen) |
| 421 | { |
| 422 | struct workspace *workspace = list_entry(ws, struct workspace, list); |
| 423 | int ret = 0; |
| 424 | int wbits = MAX_WBITS; |
| 425 | unsigned long to_copy; |
| 426 | |
| 427 | workspace->strm.next_in = data_in; |
| 428 | workspace->strm.avail_in = srclen; |
| 429 | workspace->strm.total_in = 0; |
| 430 | |
| 431 | workspace->strm.next_out = workspace->buf; |
| 432 | workspace->strm.avail_out = workspace->buf_size; |
| 433 | workspace->strm.total_out = 0; |
| 434 | /* If it's deflate, and it's got no preset dictionary, then |
| 435 | we can tell zlib to skip the adler32 check. */ |
| 436 | if (srclen > 2 && !(data_in[1] & PRESET_DICT) && |
| 437 | ((data_in[0] & 0x0f) == Z_DEFLATED) && |
| 438 | !(((data_in[0]<<8) + data_in[1]) % 31)) { |
| 439 | |
| 440 | wbits = -((data_in[0] >> 4) + 8); |
| 441 | workspace->strm.next_in += 2; |
| 442 | workspace->strm.avail_in -= 2; |
| 443 | } |
| 444 | |
| 445 | ret = zlib_inflateInit2(&workspace->strm, wbits); |
| 446 | if (unlikely(ret != Z_OK)) { |
| 447 | struct btrfs_inode *inode = folio_to_inode(dest_folio); |
| 448 | |
| 449 | btrfs_err(inode->root->fs_info, |
| 450 | "zlib decompression init failed, error %d root %llu inode %llu offset %llu", |
| 451 | ret, btrfs_root_id(inode->root), btrfs_ino(inode), |
| 452 | folio_pos(dest_folio)); |
| 453 | return -EIO; |
| 454 | } |
| 455 | |
| 456 | /* |
| 457 | * Everything (in/out buf) should be at most one sector, there should |
| 458 | * be no need to switch any input/output buffer. |
| 459 | */ |
| 460 | ret = zlib_inflate(&workspace->strm, Z_FINISH); |
| 461 | to_copy = min(workspace->strm.total_out, destlen); |
| 462 | if (ret != Z_STREAM_END) |
| 463 | goto out; |
| 464 | |
| 465 | memcpy_to_folio(dest_folio, dest_pgoff, workspace->buf, to_copy); |
| 466 | |
| 467 | out: |
| 468 | if (unlikely(to_copy != destlen)) { |
| 469 | struct btrfs_inode *inode = folio_to_inode(dest_folio); |
| 470 | |
| 471 | btrfs_err(inode->root->fs_info, |
| 472 | "zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu", |
| 473 | ret, btrfs_root_id(inode->root), btrfs_ino(inode), |
| 474 | folio_pos(dest_folio), to_copy, destlen); |
| 475 | ret = -EIO; |
| 476 | } else { |
| 477 | ret = 0; |
| 478 | } |
| 479 | |
| 480 | zlib_inflateEnd(&workspace->strm); |
| 481 | |
| 482 | if (unlikely(to_copy < destlen)) |
| 483 | folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy); |
| 484 | return ret; |
| 485 | } |
| 486 | |
| 487 | const struct btrfs_compress_op btrfs_zlib_compress = { |
| 488 | .workspace_manager = &wsm, |
| 489 | .min_level = 1, |
| 490 | .max_level = 9, |
| 491 | .default_level = BTRFS_ZLIB_DEFAULT_LEVEL, |
| 492 | }; |