2 * Copyright (C) 2008 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 * Based on jffs2 zlib code:
19 * Copyright © 2001-2007 Red Hat, Inc.
20 * Created by David Woodhouse <dwmw2@infradead.org>
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
35 /* Plan: call deflate() with avail_in == *sourcelen,
36 avail_out = *dstlen - 12 and flush == Z_FINISH.
37 If it doesn't manage to finish, call it again with
38 avail_in == 0 and avail_out set to the remaining 12
39 bytes for it to clean up.
40 Q: Is 12 bytes sufficient?
42 #define STREAM_END_SPACE 12
48 struct list_head list;
51 static LIST_HEAD(idle_workspace);
52 static DEFINE_SPINLOCK(workspace_lock);
53 static unsigned long num_workspace;
54 static atomic_t alloc_workspace = ATOMIC_INIT(0);
55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
58 * this finds an available zlib workspace or allocates a new one
59 * NULL or an ERR_PTR is returned if things go bad.
61 static struct workspace *find_zlib_workspace(void)
63 struct workspace *workspace;
65 int cpus = num_online_cpus();
68 spin_lock(&workspace_lock);
69 if (!list_empty(&idle_workspace)) {
70 workspace = list_entry(idle_workspace.next, struct workspace,
72 list_del(&workspace->list);
74 spin_unlock(&workspace_lock);
78 spin_unlock(&workspace_lock);
79 if (atomic_read(&alloc_workspace) > cpus) {
81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82 if (atomic_read(&alloc_workspace) > cpus)
84 finish_wait(&workspace_wait, &wait);
87 atomic_inc(&alloc_workspace);
88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95 if (!workspace->def_strm.workspace) {
99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100 if (!workspace->inf_strm.workspace) {
104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105 if (!workspace->buf) {
112 vfree(workspace->inf_strm.workspace);
114 vfree(workspace->def_strm.workspace);
117 atomic_dec(&alloc_workspace);
118 wake_up(&workspace_wait);
123 * put a workspace struct back on the list or free it if we have enough
124 * idle ones sitting around
126 static int free_workspace(struct workspace *workspace)
128 spin_lock(&workspace_lock);
129 if (num_workspace < num_online_cpus()) {
130 list_add_tail(&workspace->list, &idle_workspace);
132 spin_unlock(&workspace_lock);
133 if (waitqueue_active(&workspace_wait))
134 wake_up(&workspace_wait);
137 spin_unlock(&workspace_lock);
138 vfree(workspace->def_strm.workspace);
139 vfree(workspace->inf_strm.workspace);
140 kfree(workspace->buf);
143 atomic_dec(&alloc_workspace);
144 if (waitqueue_active(&workspace_wait))
145 wake_up(&workspace_wait);
150 * cleanup function for module exit
152 static void free_workspaces(void)
154 struct workspace *workspace;
155 while (!list_empty(&idle_workspace)) {
156 workspace = list_entry(idle_workspace.next, struct workspace,
158 list_del(&workspace->list);
159 vfree(workspace->def_strm.workspace);
160 vfree(workspace->inf_strm.workspace);
161 kfree(workspace->buf);
163 atomic_dec(&alloc_workspace);
168 * given an address space and start/len, compress the bytes.
170 * pages are allocated to hold the compressed result and stored
173 * out_pages is used to return the number of pages allocated. There
174 * may be pages allocated even if we return an error
176 * total_in is used to return the number of bytes actually read. It
177 * may be smaller then len if we had to exit early because we
178 * ran out of room in the pages array or because we cross the
181 * total_out is used to return the total number of compressed bytes
183 * max_out tells us the max number of bytes that we're allowed to
186 int btrfs_zlib_compress_pages(struct address_space *mapping,
187 u64 start, unsigned long len,
189 unsigned long nr_dest_pages,
190 unsigned long *out_pages,
191 unsigned long *total_in,
192 unsigned long *total_out,
193 unsigned long max_out)
196 struct workspace *workspace;
200 struct page *in_page = NULL;
201 struct page *out_page = NULL;
202 unsigned long bytes_left;
208 workspace = find_zlib_workspace();
209 if (IS_ERR(workspace))
212 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
213 printk(KERN_WARNING "deflateInit failed\n");
218 workspace->def_strm.total_in = 0;
219 workspace->def_strm.total_out = 0;
221 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
222 data_in = kmap(in_page);
224 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
225 cpage_out = kmap(out_page);
229 workspace->def_strm.next_in = data_in;
230 workspace->def_strm.next_out = cpage_out;
231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
234 while (workspace->def_strm.total_in < len) {
235 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
237 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
239 zlib_deflateEnd(&workspace->def_strm);
244 /* we're making it bigger, give up */
245 if (workspace->def_strm.total_in > 8192 &&
246 workspace->def_strm.total_in <
247 workspace->def_strm.total_out) {
251 /* we need another page for writing out. Test this
252 * before the total_in so we will pull in a new page for
253 * the stream end if required
255 if (workspace->def_strm.avail_out == 0) {
257 if (nr_pages == nr_dest_pages) {
262 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
263 cpage_out = kmap(out_page);
264 pages[nr_pages] = out_page;
266 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
267 workspace->def_strm.next_out = cpage_out;
270 if (workspace->def_strm.total_in >= len)
273 /* we've read in a full page, get a new one */
274 if (workspace->def_strm.avail_in == 0) {
275 if (workspace->def_strm.total_out > max_out)
278 bytes_left = len - workspace->def_strm.total_in;
280 page_cache_release(in_page);
282 start += PAGE_CACHE_SIZE;
283 in_page = find_get_page(mapping,
284 start >> PAGE_CACHE_SHIFT);
285 data_in = kmap(in_page);
286 workspace->def_strm.avail_in = min(bytes_left,
288 workspace->def_strm.next_in = data_in;
291 workspace->def_strm.avail_in = 0;
292 ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
293 zlib_deflateEnd(&workspace->def_strm);
295 if (ret != Z_STREAM_END) {
300 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306 *total_out = workspace->def_strm.total_out;
307 *total_in = workspace->def_strm.total_in;
309 *out_pages = nr_pages;
315 page_cache_release(in_page);
317 free_workspace(workspace);
322 * pages_in is an array of pages with compressed data.
324 * disk_start is the starting logical offset of this array in the file
326 * bvec is a bio_vec of pages from the file that we want to decompress into
328 * vcnt is the count of pages in the biovec
330 * srclen is the number of bytes in pages_in
332 * The basic idea is that we have a bio that was created by readpages.
333 * The pages in the bio are for the uncompressed data, and they may not
334 * be contiguous. They all correspond to the range of bytes covered by
335 * the compressed extent.
337 int btrfs_zlib_decompress_biovec(struct page **pages_in,
339 struct bio_vec *bvec,
344 int wbits = MAX_WBITS;
345 struct workspace *workspace;
347 size_t total_out = 0;
348 unsigned long page_bytes_left;
349 unsigned long page_in_index = 0;
350 unsigned long page_out_index = 0;
351 struct page *page_out;
352 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
354 unsigned long buf_start;
355 unsigned long buf_offset;
357 unsigned long working_bytes;
358 unsigned long pg_offset;
359 unsigned long start_byte;
360 unsigned long current_buf_start;
363 workspace = find_zlib_workspace();
364 if (IS_ERR(workspace))
367 data_in = kmap(pages_in[page_in_index]);
368 workspace->inf_strm.next_in = data_in;
369 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
370 workspace->inf_strm.total_in = 0;
372 workspace->inf_strm.total_out = 0;
373 workspace->inf_strm.next_out = workspace->buf;
374 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
375 page_out = bvec[page_out_index].bv_page;
376 page_bytes_left = PAGE_CACHE_SIZE;
379 /* If it's deflate, and it's got no preset dictionary, then
380 we can tell zlib to skip the adler32 check. */
381 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
382 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
383 !(((data_in[0]<<8) + data_in[1]) % 31)) {
385 wbits = -((data_in[0] >> 4) + 8);
386 workspace->inf_strm.next_in += 2;
387 workspace->inf_strm.avail_in -= 2;
390 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
391 printk(KERN_WARNING "inflateInit failed\n");
395 while (workspace->inf_strm.total_in < srclen) {
396 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
397 if (ret != Z_OK && ret != Z_STREAM_END)
400 * buf start is the byte offset we're of the start of
401 * our workspace buffer
403 buf_start = total_out;
405 /* total_out is the last byte of the workspace buffer */
406 total_out = workspace->inf_strm.total_out;
408 working_bytes = total_out - buf_start;
411 * start byte is the first byte of the page we're currently
412 * copying into relative to the start of the compressed data.
414 start_byte = page_offset(page_out) - disk_start;
416 if (working_bytes == 0) {
417 /* we didn't make progress in this inflate
420 if (ret != Z_STREAM_END)
425 /* we haven't yet hit data corresponding to this page */
426 if (total_out <= start_byte)
430 * the start of the data we care about is offset into
431 * the middle of our working buffer
433 if (total_out > start_byte && buf_start < start_byte) {
434 buf_offset = start_byte - buf_start;
435 working_bytes -= buf_offset;
439 current_buf_start = buf_start;
441 /* copy bytes from the working buffer into the pages */
442 while (working_bytes > 0) {
443 bytes = min(PAGE_CACHE_SIZE - pg_offset,
444 PAGE_CACHE_SIZE - buf_offset);
445 bytes = min(bytes, working_bytes);
446 kaddr = kmap_atomic(page_out, KM_USER0);
447 memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
449 kunmap_atomic(kaddr, KM_USER0);
450 flush_dcache_page(page_out);
453 page_bytes_left -= bytes;
455 working_bytes -= bytes;
456 current_buf_start += bytes;
458 /* check if we need to pick another page */
459 if (page_bytes_left == 0) {
461 if (page_out_index >= vcnt) {
466 page_out = bvec[page_out_index].bv_page;
468 page_bytes_left = PAGE_CACHE_SIZE;
469 start_byte = page_offset(page_out) - disk_start;
472 * make sure our new page is covered by this
475 if (total_out <= start_byte)
478 /* the next page in the biovec might not
479 * be adjacent to the last page, but it
480 * might still be found inside this working
481 * buffer. bump our offset pointer
483 if (total_out > start_byte &&
484 current_buf_start < start_byte) {
485 buf_offset = start_byte - buf_start;
486 working_bytes = total_out - start_byte;
487 current_buf_start = buf_start +
493 workspace->inf_strm.next_out = workspace->buf;
494 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
496 if (workspace->inf_strm.avail_in == 0) {
498 kunmap(pages_in[page_in_index]);
500 if (page_in_index >= total_pages_in) {
504 data_in = kmap(pages_in[page_in_index]);
505 workspace->inf_strm.next_in = data_in;
506 tmp = srclen - workspace->inf_strm.total_in;
507 workspace->inf_strm.avail_in = min(tmp,
511 if (ret != Z_STREAM_END)
516 zlib_inflateEnd(&workspace->inf_strm);
518 kunmap(pages_in[page_in_index]);
520 free_workspace(workspace);
525 * a less complex decompression routine. Our compressed data fits in a
526 * single page, and we want to read a single page out of it.
527 * start_byte tells us the offset into the compressed data we're interested in
529 int btrfs_zlib_decompress(unsigned char *data_in,
530 struct page *dest_page,
531 unsigned long start_byte,
532 size_t srclen, size_t destlen)
535 int wbits = MAX_WBITS;
536 struct workspace *workspace;
537 unsigned long bytes_left = destlen;
538 unsigned long total_out = 0;
541 if (destlen > PAGE_CACHE_SIZE)
544 workspace = find_zlib_workspace();
545 if (IS_ERR(workspace))
548 workspace->inf_strm.next_in = data_in;
549 workspace->inf_strm.avail_in = srclen;
550 workspace->inf_strm.total_in = 0;
552 workspace->inf_strm.next_out = workspace->buf;
553 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
554 workspace->inf_strm.total_out = 0;
555 /* If it's deflate, and it's got no preset dictionary, then
556 we can tell zlib to skip the adler32 check. */
557 if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
558 ((data_in[0] & 0x0f) == Z_DEFLATED) &&
559 !(((data_in[0]<<8) + data_in[1]) % 31)) {
561 wbits = -((data_in[0] >> 4) + 8);
562 workspace->inf_strm.next_in += 2;
563 workspace->inf_strm.avail_in -= 2;
566 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
567 printk(KERN_WARNING "inflateInit failed\n");
572 while (bytes_left > 0) {
573 unsigned long buf_start;
574 unsigned long buf_offset;
576 unsigned long pg_offset = 0;
578 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
579 if (ret != Z_OK && ret != Z_STREAM_END)
582 buf_start = total_out;
583 total_out = workspace->inf_strm.total_out;
585 if (total_out == buf_start) {
590 if (total_out <= start_byte)
593 if (total_out > start_byte && buf_start < start_byte)
594 buf_offset = start_byte - buf_start;
598 bytes = min(PAGE_CACHE_SIZE - pg_offset,
599 PAGE_CACHE_SIZE - buf_offset);
600 bytes = min(bytes, bytes_left);
602 kaddr = kmap_atomic(dest_page, KM_USER0);
603 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
604 kunmap_atomic(kaddr, KM_USER0);
609 workspace->inf_strm.next_out = workspace->buf;
610 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
613 if (ret != Z_STREAM_END && bytes_left != 0)
618 zlib_inflateEnd(&workspace->inf_strm);
620 free_workspace(workspace);
624 void btrfs_zlib_exit(void)