erofs: support DEFLATE decompression by using Intel QAT
authorBo Liu <liubo03@inspur.com>
Thu, 22 May 2025 09:49:31 +0000 (05:49 -0400)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Sun, 25 May 2025 07:27:40 +0000 (15:27 +0800)
This patch introduces the use of the Intel QAT to offload EROFS data
decompression, aiming to improve the decompression performance.

A 285MiB dataset is used with the following command to create EROFS
images with different cluster sizes:
     $ mkfs.erofs -zdeflate,level=9 -C{4096,16384,65536,131072,262144}

Fio is used to test the following read patterns:
     $ fio -filename=testfile -bs=4k -rw=read -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread --io_size=14m -name=job1

Here are some performance numbers for reference:

Processors: Intel(R) Xeon(R) 6766E (144 cores)
Memory:     512 GiB

|-----------------------------------------------------------------------------|
|           | Cluster size | sequential read | randread  | small randread(5%) |
|-----------|--------------|-----------------|-----------|--------------------|
| Intel QAT |    4096      |    538  MiB/s   | 112 MiB/s |     20.76 MiB/s    |
| Intel QAT |    16384     |    699  MiB/s   | 158 MiB/s |     21.02 MiB/s    |
| Intel QAT |    65536     |    917  MiB/s   | 278 MiB/s |     20.90 MiB/s    |
| Intel QAT |    131072    |    1056 MiB/s   | 351 MiB/s |     23.36 MiB/s    |
| Intel QAT |    262144    |    1145 MiB/s   | 431 MiB/s |     26.66 MiB/s    |
| deflate   |    4096      |    499  MiB/s   | 108 MiB/s |     21.50 MiB/s    |
| deflate   |    16384     |    422  MiB/s   | 125 MiB/s |     18.94 MiB/s    |
| deflate   |    65536     |    452  MiB/s   | 159 MiB/s |     13.02 MiB/s    |
| deflate   |    131072    |    452  MiB/s   | 177 MiB/s |     11.44 MiB/s    |
| deflate   |    262144    |    466  MiB/s   | 194 MiB/s |     10.60 MiB/s    |

Signed-off-by: Bo Liu <liubo03@inspur.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20250522094931.28956-1-liubo03@inspur.com
[ Gao Xiang: refine the commit message. ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Documentation/ABI/testing/sysfs-fs-erofs
fs/erofs/Kconfig
fs/erofs/Makefile
fs/erofs/compress.h
fs/erofs/decompressor_crypto.c [new file with mode: 0644]
fs/erofs/decompressor_deflate.c
fs/erofs/sysfs.c
fs/erofs/zdata.c

index b134146d735bc59d0b633eae8fd01ca53156bab6..bf3b6299c15e6e86fde36f836dd1c939763285e7 100644 (file)
@@ -27,3 +27,11 @@ Description: Writing to this will drop compression-related caches,
                - 1 : invalidate cached compressed folios
                - 2 : drop in-memory pclusters
                - 3 : drop in-memory pclusters and cached compressed folios
+
+What:          /sys/fs/erofs/accel
+Date:          May 2025
+Contact:       "Bo Liu" <liubo03@inspur.com>
+Description:   Used to set or show hardware accelerators in effect
+               and multiple accelerators are separated by '\n'.
+               Supported accelerator(s): qat_deflate.
+               Disable all accelerators with an empty string (echo > accel).
index 8f68ec49ad897b647aa1fb1ea1c305304d70edeb..6beeb7063871ccbde0e415f776770deae3e2804a 100644 (file)
@@ -144,6 +144,20 @@ config EROFS_FS_ZIP_ZSTD
 
          If unsure, say N.
 
+config EROFS_FS_ZIP_ACCEL
+       bool "EROFS hardware decompression support"
+       depends on EROFS_FS_ZIP
+       help
+         Saying Y here includes hardware accelerator support for reading
+         EROFS file systems containing compressed data.  It gives better
+         decompression speed than the software-implemented decompression, and
+         it costs lower CPU overhead.
+
+         Hardware accelerator support is an experimental feature for now and
+         file systems are still readable without selecting this option.
+
+         If unsure, say N.
+
 config EROFS_FS_ONDEMAND
        bool "EROFS fscache-based on-demand read support (deprecated)"
        depends on EROFS_FS
index 4331d53c7109550a0518f2ed8df456deecdd2f8c..549abc424763257296ecb0e52db38445dce0a98d 100644 (file)
@@ -7,5 +7,6 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
 erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
 erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
 erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
+erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
 erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
 erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
index 2704d7a592a5e669e813bce5dfd97b06a04efa77..510e922c51931a57b0b1352c315dd80bf3069218 100644 (file)
@@ -76,4 +76,14 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
                         unsigned int padbufsize);
 int __init z_erofs_init_decompressor(void);
 void z_erofs_exit_decompressor(void);
+int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+                             struct page **pgpl);
+int z_erofs_crypto_enable_engine(const char *name, int len);
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+void z_erofs_crypto_disable_all_engines(void);
+int z_erofs_crypto_show_engines(char *buf, int size, char sep);
+#else
+static inline void z_erofs_crypto_disable_all_engines(void) {}
+static inline int z_erofs_crypto_show_engines(char *buf, int size, char sep) { return 0; }
+#endif
 #endif
diff --git a/fs/erofs/decompressor_crypto.c b/fs/erofs/decompressor_crypto.c
new file mode 100644 (file)
index 0000000..97b77ab
--- /dev/null
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/scatterlist.h>
+#include <crypto/acompress.h>
+#include "compress.h"
+
+static int __z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+                                      struct crypto_acomp *tfm)
+{
+       struct sg_table st_src, st_dst;
+       struct acomp_req *req;
+       struct crypto_wait wait;
+       u8 *headpage;
+       int ret;
+
+       headpage = kmap_local_page(*rq->in);
+       ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
+                               min_t(unsigned int, rq->inputsize,
+                                     rq->sb->s_blocksize - rq->pageofs_in));
+       kunmap_local(headpage);
+       if (ret)
+               return ret;
+
+       req = acomp_request_alloc(tfm);
+       if (!req)
+               return -ENOMEM;
+
+       ret = sg_alloc_table_from_pages_segment(&st_src, rq->in, rq->inpages,
+                       rq->pageofs_in, rq->inputsize, UINT_MAX, GFP_KERNEL);
+       if (ret < 0)
+               goto failed_src_alloc;
+
+       ret = sg_alloc_table_from_pages_segment(&st_dst, rq->out, rq->outpages,
+                       rq->pageofs_out, rq->outputsize, UINT_MAX, GFP_KERNEL);
+       if (ret < 0)
+               goto failed_dst_alloc;
+
+       acomp_request_set_params(req, st_src.sgl,
+                                st_dst.sgl, rq->inputsize, rq->outputsize);
+
+       crypto_init_wait(&wait);
+       acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                  crypto_req_done, &wait);
+
+       ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
+       if (ret) {
+               erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
+                         ret, rq->inputsize, rq->pageofs_in, rq->outputsize);
+               ret = -EIO;
+       }
+
+       sg_free_table(&st_dst);
+failed_dst_alloc:
+       sg_free_table(&st_src);
+failed_src_alloc:
+       acomp_request_free(req);
+       return ret;
+}
+
+struct z_erofs_crypto_engine {
+       char *crypto_name;
+       struct crypto_acomp *tfm;
+};
+
+struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
+       [Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) {
+               {},
+       },
+       [Z_EROFS_COMPRESSION_LZMA] = (struct z_erofs_crypto_engine[]) {
+               {},
+       },
+       [Z_EROFS_COMPRESSION_DEFLATE] = (struct z_erofs_crypto_engine[]) {
+               { .crypto_name = "qat_deflate", },
+               {},
+       },
+       [Z_EROFS_COMPRESSION_ZSTD] = (struct z_erofs_crypto_engine[]) {
+               {},
+       },
+};
+static DECLARE_RWSEM(z_erofs_crypto_rwsem);
+
+static struct crypto_acomp *z_erofs_crypto_get_engine(int alg)
+{
+       struct z_erofs_crypto_engine *e;
+
+       for (e = z_erofs_crypto[alg]; e->crypto_name; ++e)
+               if (e->tfm)
+                       return e->tfm;
+       return NULL;
+}
+
+int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
+                             struct page **pgpl)
+{
+       struct crypto_acomp *tfm;
+       int i, err;
+
+       down_read(&z_erofs_crypto_rwsem);
+       tfm = z_erofs_crypto_get_engine(rq->alg);
+       if (!tfm) {
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+       for (i = 0; i < rq->outpages; i++) {
+               struct page *const page = rq->out[i];
+               struct page *victim;
+
+               if (!page) {
+                       victim = __erofs_allocpage(pgpl, rq->gfp, true);
+                       if (!victim) {
+                               err = -ENOMEM;
+                               goto out;
+                       }
+                       set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
+                       rq->out[i] = victim;
+               }
+       }
+       err = __z_erofs_crypto_decompress(rq, tfm);
+out:
+       up_read(&z_erofs_crypto_rwsem);
+       return err;
+}
+
+int z_erofs_crypto_enable_engine(const char *name, int len)
+{
+       struct z_erofs_crypto_engine *e;
+       struct crypto_acomp *tfm;
+       int alg;
+
+       down_write(&z_erofs_crypto_rwsem);
+       for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+               for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+                       if (!strncmp(name, e->crypto_name, len)) {
+                               if (e->tfm)
+                                       break;
+                               tfm = crypto_alloc_acomp(e->crypto_name, 0, 0);
+                               if (IS_ERR(tfm)) {
+                                       up_write(&z_erofs_crypto_rwsem);
+                                       return -EOPNOTSUPP;
+                               }
+                               e->tfm = tfm;
+                               break;
+                       }
+               }
+       }
+       up_write(&z_erofs_crypto_rwsem);
+       return 0;
+}
+
+void z_erofs_crypto_disable_all_engines(void)
+{
+       struct z_erofs_crypto_engine *e;
+       int alg;
+
+       down_write(&z_erofs_crypto_rwsem);
+       for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+               for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+                       if (!e->tfm)
+                               continue;
+                       crypto_free_acomp(e->tfm);
+                       e->tfm = NULL;
+               }
+       }
+       up_write(&z_erofs_crypto_rwsem);
+}
+
+int z_erofs_crypto_show_engines(char *buf, int size, char sep)
+{
+       struct z_erofs_crypto_engine *e;
+       int alg, len = 0;
+
+       for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
+               for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
+                       if (!e->tfm)
+                               continue;
+                       len += scnprintf(buf + len, size - len, "%s%c",
+                                        e->crypto_name, sep);
+               }
+       }
+       return len;
+}
index c6908a487054860a0fbccd0ac25cc87c8b199936..6909b2d529c7a270c4179521edc4af2bc68718b7 100644 (file)
@@ -97,8 +97,8 @@ failed:
        return -ENOMEM;
 }
 
-static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
-                                     struct page **pgpl)
+static int __z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+                                       struct page **pgpl)
 {
        struct super_block *sb = rq->sb;
        struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 };
@@ -178,6 +178,22 @@ failed_zinit:
        return err;
 }
 
+static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+                                     struct page **pgpl)
+{
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+       int err;
+
+       if (!rq->partial_decoding) {
+               err = z_erofs_crypto_decompress(rq, pgpl);
+               if (err != -EOPNOTSUPP)
+                       return err;
+
+       }
+#endif
+       return __z_erofs_deflate_decompress(rq, pgpl);
+}
+
 const struct z_erofs_decompressor z_erofs_deflate_decomp = {
        .config = z_erofs_load_deflate_config,
        .decompress = z_erofs_deflate_decompress,
index c6650350c4cd27658f698663e53ef057846f5f2a..eed8797a193f70a65c6b7df86f5251475600c7ec 100644 (file)
@@ -7,12 +7,14 @@
 #include <linux/kobject.h>
 
 #include "internal.h"
+#include "compress.h"
 
 enum {
        attr_feature,
        attr_drop_caches,
        attr_pointer_ui,
        attr_pointer_bool,
+       attr_accel,
 };
 
 enum {
@@ -60,14 +62,25 @@ static struct erofs_attr erofs_attr_##_name = {                     \
 EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
 EROFS_ATTR_FUNC(drop_caches, 0200);
 #endif
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+EROFS_ATTR_FUNC(accel, 0644);
+#endif
 
-static struct attribute *erofs_attrs[] = {
+static struct attribute *erofs_sb_attrs[] = {
 #ifdef CONFIG_EROFS_FS_ZIP
        ATTR_LIST(sync_decompress),
        ATTR_LIST(drop_caches),
 #endif
        NULL,
 };
+ATTRIBUTE_GROUPS(erofs_sb);
+
+static struct attribute *erofs_attrs[] = {
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+       ATTR_LIST(accel),
+#endif
+       NULL,
+};
 ATTRIBUTE_GROUPS(erofs);
 
 /* Features this copy of erofs supports */
@@ -128,12 +141,14 @@ static ssize_t erofs_attr_show(struct kobject *kobj,
                if (!ptr)
                        return 0;
                return sysfs_emit(buf, "%d\n", *(bool *)ptr);
+       case attr_accel:
+               return z_erofs_crypto_show_engines(buf, PAGE_SIZE, '\n');
        }
        return 0;
 }
 
 static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
-                                               const char *buf, size_t len)
+                               const char *buf, size_t len)
 {
        struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info,
                                                s_kobj);
@@ -181,6 +196,19 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
                if (t & 1)
                        invalidate_mapping_pages(MNGD_MAPPING(sbi), 0, -1);
                return len;
+#endif
+#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
+       case attr_accel:
+               buf = skip_spaces(buf);
+               z_erofs_crypto_disable_all_engines();
+               while (*buf) {
+                       t = strcspn(buf, "\n");
+                       ret = z_erofs_crypto_enable_engine(buf, t);
+                       if (ret < 0)
+                               return ret;
+                       buf += buf[t] != '\0' ? t + 1 : t;
+               }
+               return len;
 #endif
        }
        return 0;
@@ -199,12 +227,13 @@ static const struct sysfs_ops erofs_attr_ops = {
 };
 
 static const struct kobj_type erofs_sb_ktype = {
-       .default_groups = erofs_groups,
+       .default_groups = erofs_sb_groups,
        .sysfs_ops      = &erofs_attr_ops,
        .release        = erofs_sb_release,
 };
 
 static const struct kobj_type erofs_ktype = {
+       .default_groups = erofs_groups,
        .sysfs_ops      = &erofs_attr_ops,
 };
 
index ab61c84d47cd03109f981d323de531ec0e383f3f..fe80718447247d61d2092914a592b13e35706b9f 100644 (file)
@@ -441,6 +441,7 @@ void z_erofs_exit_subsystem(void)
        z_erofs_destroy_pcpu_workers();
        destroy_workqueue(z_erofs_workqueue);
        z_erofs_destroy_pcluster_pool();
+       z_erofs_crypto_disable_all_engines();
        z_erofs_exit_decompressor();
 }