diff options
author | Jens Axboe <axboe@kernel.dk> | 2021-11-08 06:49:26 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2021-11-08 06:49:26 -0700 |
commit | 2970c1a5c492f364a9b95c5fa1c005cf60b0fe35 (patch) | |
tree | 1b1865f8633b8ea24daf43769f1dddb4fde6148a | |
parent | 6b75d88fa81b122cce37ebf17428a849ccd3d0f1 (diff) | |
parent | a15058eaefffc37c31326b59fa08b267b2def603 (diff) |
Merge branch 'for-next/clang/pgo' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux into pgopgo
* 'for-next/clang/pgo' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
pgo: rectify comment to proper kernel-doc syntax
pgo: Clean up prf_open() error paths
pgo: Fix sleep in atomic section in prf_open()
pgo: Limit allocate_node() to vmlinux sections
pgo: rename the raw profile file to vmlinux.profraw
MAINTAINERS: Expand and relocate PGO entry
pgo: Add Clang's Profile Guided Optimization infrastructure
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | Documentation/dev-tools/index.rst | 1 | ||||
-rw-r--r-- | Documentation/dev-tools/pgo.rst | 127 | ||||
-rw-r--r-- | MAINTAINERS | 13 | ||||
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | arch/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/boot/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/boot/compressed/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/crypto/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/entry/vdso/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/x86/platform/efi/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/purgatory/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/realmode/rm/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/um/vdso/Makefile | 1 | ||||
-rw-r--r-- | drivers/firmware/efi/libstub/Makefile | 1 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 32 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/pgo/Kconfig | 37 | ||||
-rw-r--r-- | kernel/pgo/Makefile | 5 | ||||
-rw-r--r-- | kernel/pgo/fs.c | 413 | ||||
-rw-r--r-- | kernel/pgo/instrument.c | 188 | ||||
-rw-r--r-- | kernel/pgo/pgo.h | 211 | ||||
-rw-r--r-- | scripts/Makefile.lib | 10 |
25 files changed, 1059 insertions, 0 deletions
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index 010a2af1e7d9..f15630bb01e2 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst @@ -32,6 +32,7 @@ Documentation/dev-tools/testing-overview.rst kgdb kselftest kunit/index + pgo .. only:: subproject and html diff --git a/Documentation/dev-tools/pgo.rst b/Documentation/dev-tools/pgo.rst new file mode 100644 index 000000000000..0200449c4843 --- /dev/null +++ b/Documentation/dev-tools/pgo.rst @@ -0,0 +1,127 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +Using PGO with the Linux kernel +=============================== + +Clang's profiling kernel support (PGO_) enables profiling of the Linux kernel +when building with Clang. The profiling data is exported via the ``pgo`` +debugfs directory. + +.. _PGO: https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization + + +Preparation +=========== + +Configure the kernel with: + +.. code-block:: make + + CONFIG_DEBUG_FS=y + CONFIG_PGO_CLANG=y + +Note that kernels compiled with profiling flags will be significantly larger +and run slower. + +Profiling data will only become accessible once debugfs has been mounted: + +.. code-block:: sh + + mount -t debugfs none /sys/kernel/debug + + +Customization +============= + +You can enable or disable profiling for individual file and directories by +adding a line similar to the following to the respective kernel Makefile: + +- For a single file (e.g. main.o) + + .. code-block:: make + + PGO_PROFILE_main.o := y + +- For all files in one directory + + .. code-block:: make + + PGO_PROFILE := y + +To exclude files from being profiled use + + .. code-block:: make + + PGO_PROFILE_main.o := n + +and + + .. code-block:: make + + PGO_PROFILE := n + +Only files which are linked to the main kernel image or are compiled as kernel +modules are supported by this mechanism. + + +Files +===== + +The PGO kernel support creates the following files in debugfs: + +``/sys/kernel/debug/pgo`` + Parent directory for all PGO-related files. + +``/sys/kernel/debug/pgo/reset`` + Global reset file: resets all coverage data to zero when written to. + +``/sys/kernel/debug/pgo/vmlinux.profraw`` + The raw PGO data that must be processed with ``llvm_profdata``. + + +Workflow +======== + +The PGO kernel can be run on the host or test machines. The data though should +be analyzed with Clang's tools from the same Clang version as the kernel was +compiled. Clang's tolerant of version skew, but it's easier to use the same +Clang version. + +The profiling data is useful for optimizing the kernel, analyzing coverage, +etc. Clang offers tools to perform these tasks. + +Here is an example workflow for profiling an instrumented kernel with PGO and +using the result to optimize the kernel: + +1) Install the kernel on the TEST machine. + +2) Reset the data counters right before running the load tests + + .. code-block:: sh + + $ echo 1 > /sys/kernel/debug/pgo/reset + +3) Run the load tests. + +4) Collect the raw profile data + + .. code-block:: sh + + $ cp -a /sys/kernel/debug/pgo/vmlinux.profraw /tmp/vmlinux.profraw + +5) (Optional) Download the raw profile data to the HOST machine. + +6) Process the raw profile data + + .. code-block:: sh + + $ llvm-profdata merge --output=vmlinux.profdata vmlinux.profraw + + Note that multiple raw profile data files can be merged during this step. + +7) Rebuild the kernel using the processed profile data (PGO disabled) + + .. code-block:: sh + + $ make LLVM=1 KCFLAGS=-fprofile-use=vmlinux.profdata ... diff --git a/MAINTAINERS b/MAINTAINERS index 9096c64d8d09..b042a7e6fae8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4625,6 +4625,19 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/cla F: include/linux/cfi.h F: kernel/cfi.c +CLANG PROFILE GUIDED OPTIMIZATION SUPPORT +M: Sami Tolvanen <samitolvanen@google.com> +M: Bill Wendling <wcw@google.com> +M: Kees Cook <keescook@chromium.org> +R: Nathan Chancellor <nathan@kernel.org> +R: Nick Desaulniers <ndesaulniers@google.com> +L: clang-built-linux@googlegroups.com +S: Supported +B: https://github.com/ClangBuiltLinux/linux/issues +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features +F: Documentation/dev-tools/pgo.rst +F: kernel/pgo/ + CLEANCACHE API M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> L: linux-kernel@vger.kernel.org @@ -677,6 +677,9 @@ endif # KBUILD_EXTMOD # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux +CFLAGS_PGO_CLANG := -fprofile-generate +export CFLAGS_PGO_CLANG + CFLAGS_GCOV := -fprofile-arcs -ftest-coverage ifdef CONFIG_CC_IS_GCC CFLAGS_GCOV += -fno-tree-loop-im diff --git a/arch/Kconfig b/arch/Kconfig index 26b8ed11639d..fa69deeb1474 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1303,6 +1303,7 @@ config DYNAMIC_SIGFRAME bool source "kernel/gcov/Kconfig" +source "kernel/pgo/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95dd1ee01546..2553380322ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,6 +108,7 @@ config X86 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN + select ARCH_SUPPORTS_PGO_CLANG if X86_64 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b5aecb524a8a..5e0816f5c367 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -71,6 +71,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables GCOV_PROFILE := n +PGO_PROFILE := n UBSAN_SANITIZE := n $(obj)/bzImage: asflags-y := $(SVGA_MODE) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 431bf7f846c3..218626b8acc5 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -56,6 +56,7 @@ CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n +PGO_PROFILE := n UBSAN_SANITIZE :=n KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index f307c93fc90a..a177ff8ad782 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -86,6 +86,9 @@ nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o +# Disable PGO for curve25519-x86_64. With PGO enabled, clang runs out of +# registers for some of the functions. +PGO_PROFILE_curve25519-x86_64.o := n obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index a2dddcc189f6..73e207ad3257 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -180,6 +180,7 @@ quiet_cmd_vdso = VDSO $@ VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ $(call ld-option, --eh-frame-hdr) -Bsymbolic GCOV_PROFILE := n +PGO_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2ff3e600f426..5cd1e99519fd 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -32,6 +32,9 @@ KASAN_SANITIZE_paravirt.o := n KASAN_SANITIZE_sev.o := n KASAN_SANITIZE_cc_platform.o := n +# Cannot write to profiling regions before the page tables are set up. +PGO_PROFILE_head$(BITS).o := n + # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. KCSAN_SANITIZE := n diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3d6dc12d198f..ea720b5c050a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -184,6 +184,8 @@ SECTIONS BUG_TABLE + PGO_CLANG_DATA + ORC_UNWIND_TABLE . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 84b09c230cbd..5f22b31446ad 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -2,6 +2,7 @@ OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y KASAN_SANITIZE := n GCOV_PROFILE := n +PGO_PROFILE := n obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 95ea17a9d20c..36f20e99da0b 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -23,6 +23,7 @@ targets += purgatory.ro purgatory.chk # Sanitizer, etc. runtimes are unavailable and cannot be linked here. GCOV_PROFILE := n +PGO_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n KCSAN_SANITIZE := n diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 83f1b6a56449..21797192f958 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -76,4 +76,5 @@ KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables GCOV_PROFILE := n +PGO_PROFILE := n UBSAN_SANITIZE := n diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 5943387e3f35..54f5768f5853 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -64,6 +64,7 @@ quiet_cmd_vdso = VDSO $@ VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv GCOV_PROFILE := n +PGO_PROFILE := n # # Install the unstripped copy of vdso*.so listed in $(vdso-install-y). diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index d0537573501e..2c20e503cccb 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -43,6 +43,7 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) GCOV_PROFILE := n +PGO_PROFILE := n # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 42f3866bca69..2189839fe895 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -331,6 +331,37 @@ #define DTPM_TABLE() #endif +#ifdef CONFIG_PGO_CLANG +#define PGO_CLANG_DATA \ + __llvm_prf_data : AT(ADDR(__llvm_prf_data) - LOAD_OFFSET) { \ + __llvm_prf_data_start = .; \ + *(__llvm_prf_data) \ + __llvm_prf_data_end = .; \ + } \ + __llvm_prf_cnts : AT(ADDR(__llvm_prf_cnts) - LOAD_OFFSET) { \ + __llvm_prf_cnts_start = .; \ + *(__llvm_prf_cnts) \ + __llvm_prf_cnts_end = .; \ + } \ + __llvm_prf_names : AT(ADDR(__llvm_prf_names) - LOAD_OFFSET) { \ + __llvm_prf_names_start = .; \ + *(__llvm_prf_names) \ + __llvm_prf_names_end = .; \ + } \ + __llvm_prf_vals : AT(ADDR(__llvm_prf_vals) - LOAD_OFFSET) { \ + __llvm_prf_vals_start = .; \ + *(__llvm_prf_vals) \ + __llvm_prf_vals_end = .; \ + } \ + __llvm_prf_vnds : AT(ADDR(__llvm_prf_vnds) - LOAD_OFFSET) { \ + __llvm_prf_vnds_start = .; \ + *(__llvm_prf_vnds) \ + __llvm_prf_vnds_end = .; \ + } +#else +#define PGO_CLANG_DATA +#endif + #define KERNEL_DTB() \ STRUCT_ALIGN(); \ __dtb_start = .; \ @@ -1148,6 +1179,7 @@ CONSTRUCTORS \ } \ BUG_TABLE \ + PGO_CLANG_DATA #define INIT_TEXT_SECTION(inittext_align) \ . = ALIGN(inittext_align); \ diff --git a/kernel/Makefile b/kernel/Makefile index 186c49582f45..2c8143232df6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -114,6 +114,7 @@ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call.o obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_PGO_CLANG) += pgo/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/pgo/Kconfig b/kernel/pgo/Kconfig new file mode 100644 index 000000000000..ce7fe04f303d --- /dev/null +++ b/kernel/pgo/Kconfig @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0-only +menu "Profile Guided Optimization (PGO) (EXPERIMENTAL)" + +config ARCH_SUPPORTS_PGO_CLANG + bool + +config PGO_CLANG + bool "Enable clang's PGO-based kernel profiling" + depends on DEBUG_FS + depends on ARCH_SUPPORTS_PGO_CLANG + depends on CC_IS_CLANG + depends on !ARCH_WANTS_NO_INSTR || CC_HAS_NO_PROFILE_FN_ATTR + help + This option enables clang's PGO (Profile Guided Optimization) based + code profiling to better optimize the kernel. + + If unsure, say N. + + Run a representative workload for your application on a kernel + compiled with this option and download the raw profile file from + /sys/kernel/debug/pgo/vmlinux.profraw. This file needs to be + processed with llvm-profdata. It may be merged with other collected + raw profiles. + + Copy the processed profile file into vmlinux.profdata, and enable + KCFLAGS=-fprofile-use=vmlinux.profdata to produce an optimized + kernel. + + Note that a kernel compiled with profiling flags will be + significantly larger and run slower. Also be sure to exclude files + from profiling which are not linked to the kernel image to prevent + linker errors. + + Note that the debugfs filesystem has to be mounted to access + profiling data. + +endmenu diff --git a/kernel/pgo/Makefile b/kernel/pgo/Makefile new file mode 100644 index 000000000000..41e27cefd9a4 --- /dev/null +++ b/kernel/pgo/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +GCOV_PROFILE := n +PGO_PROFILE := n + +obj-y += fs.o instrument.o diff --git a/kernel/pgo/fs.c b/kernel/pgo/fs.c new file mode 100644 index 000000000000..3c5aa7c2a4ce --- /dev/null +++ b/kernel/pgo/fs.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + * + * Author: + * Sami Tolvanen <samitolvanen@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "pgo: " fmt + +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include "pgo.h" + +static struct dentry *directory; + +struct prf_private_data { + void *buffer; + size_t size; +}; + +/* + * Raw profile data format: + * + * - llvm_prf_header + * - __llvm_prf_data + * - __llvm_prf_cnts + * - __llvm_prf_names + * - zero padding to 8 bytes + * - for each llvm_prf_data in __llvm_prf_data: + * - llvm_prf_value_data + * - llvm_prf_value_record + site count array + * - llvm_prf_value_node_data + * ... + * ... + * ... + */ + +static void prf_fill_header(void **buffer) +{ + struct llvm_prf_header *header = *(struct llvm_prf_header **)buffer; + +#ifdef CONFIG_64BIT + header->magic = LLVM_INSTR_PROF_RAW_MAGIC_64; +#else + header->magic = LLVM_INSTR_PROF_RAW_MAGIC_32; +#endif + header->version = LLVM_VARIANT_MASK_IR_PROF | LLVM_INSTR_PROF_RAW_VERSION; + header->data_size = prf_data_count(); + header->padding_bytes_before_counters = 0; + header->counters_size = prf_cnts_count(); + header->padding_bytes_after_counters = 0; + header->names_size = prf_names_count(); + header->counters_delta = (u64)__llvm_prf_cnts_start; + header->names_delta = (u64)__llvm_prf_names_start; + header->value_kind_last = LLVM_INSTR_PROF_IPVK_LAST; + + *buffer += sizeof(*header); +} + +/* + * Copy the source into the buffer, incrementing the pointer into buffer in the + * process. + */ +static void prf_copy_to_buffer(void **buffer, void *src, unsigned long size) +{ + memcpy(*buffer, src, size); + *buffer += size; +} + +static u32 __prf_get_value_size(struct llvm_prf_data *p, u32 *value_kinds) +{ + struct llvm_prf_value_node **nodes = + (struct llvm_prf_value_node **)p->values; + u32 kinds = 0; + u32 size = 0; + unsigned int kind; + unsigned int n; + unsigned int s = 0; + + for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) { + unsigned int sites = p->num_value_sites[kind]; + + if (!sites) + continue; + + /* Record + site count array */ + size += prf_get_value_record_size(sites); + kinds++; + + if (!nodes) + continue; + + for (n = 0; n < sites; n++) { + u32 count = 0; + struct llvm_prf_value_node *site = nodes[s + n]; + + while (site && ++count <= U8_MAX) + site = site->next; + + size += count * + sizeof(struct llvm_prf_value_node_data); + } + + s += sites; + } + + if (size) + size += sizeof(struct llvm_prf_value_data); + + if (value_kinds) + *value_kinds = kinds; + + return size; +} + +static u32 prf_get_value_size(void) +{ + u32 size = 0; + struct llvm_prf_data *p; + + for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++) + size += __prf_get_value_size(p, NULL); + + return size; +} + +/* Serialize the profiling's value. */ +static void prf_serialize_value(struct llvm_prf_data *p, void **buffer) +{ + struct llvm_prf_value_data header; + struct llvm_prf_value_node **nodes = + (struct llvm_prf_value_node **)p->values; + unsigned int kind; + unsigned int n; + unsigned int s = 0; + + header.total_size = __prf_get_value_size(p, &header.num_value_kinds); + + if (!header.num_value_kinds) + /* Nothing to write. */ + return; + + prf_copy_to_buffer(buffer, &header, sizeof(header)); + + for (kind = 0; kind < ARRAY_SIZE(p->num_value_sites); kind++) { + struct llvm_prf_value_record *record; + u8 *counts; + unsigned int sites = p->num_value_sites[kind]; + + if (!sites) + continue; + + /* Profiling value record. */ + record = *(struct llvm_prf_value_record **)buffer; + *buffer += prf_get_value_record_header_size(); + + record->kind = kind; + record->num_value_sites = sites; + + /* Site count array. */ + counts = *(u8 **)buffer; + *buffer += prf_get_value_record_site_count_size(sites); + + /* + * If we don't have nodes, we can skip updating the site count + * array, because the buffer is zero filled. + */ + if (!nodes) + continue; + + for (n = 0; n < sites; n++) { + u32 count = 0; + struct llvm_prf_value_node *site = nodes[s + n]; + + while (site && ++count <= U8_MAX) { + prf_copy_to_buffer(buffer, site, + sizeof(struct llvm_prf_value_node_data)); + site = site->next; + } + + counts[n] = (u8)count; + } + + s += sites; + } +} + +static void prf_serialize_values(void **buffer) +{ + struct llvm_prf_data *p; + + for (p = __llvm_prf_data_start; p < __llvm_prf_data_end; p++) + prf_serialize_value(p, buffer); +} + +static inline unsigned long prf_get_padding(unsigned long size) +{ + return 7 & (sizeof(u64) - size % sizeof(u64)); +} + +/* Note: caller *must* hold pgo_lock */ +static unsigned long prf_buffer_size(void) +{ + return sizeof(struct llvm_prf_header) + + prf_data_size() + + prf_cnts_size() + + prf_names_size() + + prf_get_padding(prf_names_size()) + + prf_get_value_size(); +} + +/* + * Serialize the profiling data into a format LLVM's tools can understand. + * Returns actual buffer size in p->size. + * Note: p->buffer must point into vzalloc()'d + * area of at least prf_buffer_size() in size. + * Note: caller *must* hold pgo_lock. + */ +static int prf_serialize(struct prf_private_data *p, size_t buf_size) +{ + void *buffer; + + /* get buffer size, again. */ + p->size = prf_buffer_size(); + + /* check for unlikely overflow. */ + if (p->size > buf_size) + return -EAGAIN; + + buffer = p->buffer; + + prf_fill_header(&buffer); + prf_copy_to_buffer(&buffer, __llvm_prf_data_start, prf_data_size()); + prf_copy_to_buffer(&buffer, __llvm_prf_cnts_start, prf_cnts_size()); + prf_copy_to_buffer(&buffer, __llvm_prf_names_start, prf_names_size()); + buffer += prf_get_padding(prf_names_size()); + + prf_serialize_values(&buffer); + + return 0; +} + +/* open() implementation for PGO. Creates a copy of the profiling data set. */ +static int prf_open(struct inode *inode, struct file *file) +{ + struct prf_private_data *data; + unsigned long flags; + size_t buf_size; + int err = -EINVAL; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* Get initial buffer size. */ + flags = prf_lock(); + data->size = prf_buffer_size(); + prf_unlock(flags); + + do { + vfree(data->buffer); + + /* Allocate, round up to page size. */ + buf_size = PAGE_ALIGN(data->size); + data->buffer = vzalloc(buf_size); + + if (!data->buffer) { + err = -ENOMEM; + break; + } + + /* + * Try serialize and get actual + * data length in data->size. + */ + flags = prf_lock(); + err = prf_serialize(data, buf_size); + prf_unlock(flags); + /* In unlikely case, try again. */ + } while (err == -EAGAIN); + + if (err < 0) { + if (data) + vfree(data->buffer); + kfree(data); + } else { + file->private_data = data; + } + + return err; +} + +/* read() implementation for PGO. */ +static ssize_t prf_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct prf_private_data *data = file->private_data; + + if (WARN_ON_ONCE(!data)) + return -ENOMEM; + + return simple_read_from_buffer(buf, count, ppos, data->buffer, + data->size); +} + +/* release() implementation for PGO. Release resources allocated by open(). */ +static int prf_release(struct inode *inode, struct file *file) +{ + struct prf_private_data *data = file->private_data; + + if (data) { + vfree(data->buffer); + kfree(data); + } + + return 0; +} + +static const struct file_operations prf_fops = { + .owner = THIS_MODULE, + .open = prf_open, + .read = prf_read, + .llseek = default_llseek, + .release = prf_release +}; + +/* write() implementation for resetting PGO's profile data. */ +static ssize_t reset_write(struct file *file, const char __user *addr, + size_t len, loff_t *pos) +{ + struct llvm_prf_data *data; + + memset(__llvm_prf_cnts_start, 0, prf_cnts_size()); + + for (data = __llvm_prf_data_start; data < __llvm_prf_data_end; data++) { + struct llvm_prf_value_node **vnodes; + u64 current_vsite_count; + u32 i; + + if (!data->values) + continue; + + current_vsite_count = 0; + vnodes = (struct llvm_prf_value_node **)data->values; + + for (i = LLVM_INSTR_PROF_IPVK_FIRST; i <= LLVM_INSTR_PROF_IPVK_LAST; i++) + current_vsite_count += data->num_value_sites[i]; + + for (i = 0; i < current_vsite_count; i++) { + struct llvm_prf_value_node *current_vnode = vnodes[i]; + + while (current_vnode) { + current_vnode->count = 0; + current_vnode = current_vnode->next; + } + } + } + + return len; +} + +static const struct file_operations prf_reset_fops = { + .owner = THIS_MODULE, + .write = reset_write, + .llseek = noop_llseek, +}; + +/* Create debugfs entries. */ +static int __init pgo_init(void) +{ + directory = debugfs_create_dir("pgo", NULL); + if (!directory) + goto err_remove; + + if (!debugfs_create_file("vmlinux.profraw", 0600, directory, NULL, + &prf_fops)) + goto err_remove; + + if (!debugfs_create_file("reset", 0200, directory, NULL, + &prf_reset_fops)) + goto err_remove; + + return 0; + +err_remove: + pr_err("initialization failed\n"); + return -EIO; +} + +/* Remove debugfs entries. */ +static void __exit pgo_exit(void) +{ + debugfs_remove_recursive(directory); +} + +module_init(pgo_init); +module_exit(pgo_exit); diff --git a/kernel/pgo/instrument.c b/kernel/pgo/instrument.c new file mode 100644 index 000000000000..8b54fb6be336 --- /dev/null +++ b/kernel/pgo/instrument.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + * + * Author: + * Sami Tolvanen <samitolvanen@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#define pr_fmt(fmt) "pgo: " fmt + +#include <asm/sections.h> +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/spinlock.h> +#include <linux/types.h> +#include "pgo.h" + +/* + * This lock guards both profile count updating and serialization of the + * profiling data. Keeping both of these activities separate via locking + * ensures that we don't try to serialize data that's only partially updated. + */ +static DEFINE_SPINLOCK(pgo_lock); +static int current_node; + +unsigned long prf_lock(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pgo_lock, flags); + + return flags; +} + +void prf_unlock(unsigned long flags) +{ + spin_unlock_irqrestore(&pgo_lock, flags); +} + +/* + * Return a newly allocated profiling value node which contains the tracked + * value by the value profiler. + * Note: caller *must* hold pgo_lock. + */ +static struct llvm_prf_value_node *allocate_node(struct llvm_prf_data *p, + u32 index, u64 value) +{ + const int max_vnds = prf_vnds_count(); + + /* + * Check that p is within vmlinux __llvm_prf_data section. + * If not, don't allocate since we can't handle modules yet. + */ + if (!memory_contains(__llvm_prf_data_start, + __llvm_prf_data_end, p, sizeof(*p))) + return NULL; + + if (WARN_ON_ONCE(current_node >= max_vnds)) + return NULL; /* Out of nodes */ + + /* reserve vnode for vmlinux */ + return &__llvm_prf_vnds_start[current_node++]; +} + +/* + * Counts the number of times a target value is seen. + * + * Records the target value for the index if not seen before. Otherwise, + * increments the counter associated w/ the target value. + */ +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index) +{ + struct llvm_prf_data *p = (struct llvm_prf_data *)data; + struct llvm_prf_value_node **counters; + struct llvm_prf_value_node *curr; + struct llvm_prf_value_node *min = NULL; + struct llvm_prf_value_node *prev = NULL; + u64 min_count = U64_MAX; + u8 values = 0; + unsigned long flags; + + if (!p || !p->values) + return; + + counters = (struct llvm_prf_value_node **)p->values; + curr = counters[index]; + + while (curr) { + if (target_value == curr->value) { + curr->count++; + return; + } + + if (curr->count < min_count) { + min_count = curr->count; + min = curr; + } + + prev = curr; + curr = curr->next; + values++; + } + + if (values >= LLVM_INSTR_PROF_MAX_NUM_VAL_PER_SITE) { + if (!min->count || !(--min->count)) { + curr = min; + curr->value = target_value; + curr->count++; + } + return; + } + + /* Lock when updating the value node structure. */ + flags = prf_lock(); + + curr = allocate_node(p, index, target_value); + if (!curr) + goto out; + + curr->value = target_value; + curr->count++; + + if (!counters[index]) + counters[index] = curr; + else if (prev && !prev->next) + prev->next = curr; + +out: + prf_unlock(flags); +} +EXPORT_SYMBOL(__llvm_profile_instrument_target); + +/* Counts the number of times a range of targets values are seen. */ +void __llvm_profile_instrument_range(u64 target_value, void *data, + u32 index, s64 precise_start, + s64 precise_last, s64 large_value) +{ + if (large_value != S64_MIN && (s64)target_value >= large_value) + target_value = large_value; + else if ((s64)target_value < precise_start || + (s64)target_value > precise_last) + target_value = precise_last + 1; + + __llvm_profile_instrument_target(target_value, data, index); +} +EXPORT_SYMBOL(__llvm_profile_instrument_range); + +static u64 inst_prof_get_range_rep_value(u64 value) +{ + if (value <= 8) + /* The first ranges are individually tracked, use it as is. */ + return value; + else if (value >= 513) + /* The last range is mapped to its lowest value. */ + return 513; + else if (hweight64(value) == 1) + /* If it's a power of two, use it as is. */ + return value; + + /* Otherwise, take to the previous power of two + 1. */ + return ((u64)1 << (64 - __builtin_clzll(value) - 1)) + 1; +} + +/* + * The target values are partitioned into multiple ranges. The range spec is + * defined in compiler-rt/include/profile/InstrProfData.inc. + */ +void __llvm_profile_instrument_memop(u64 target_value, void *data, + u32 counter_index) +{ + u64 rep_value; + + /* Map the target value to the representative value of its range. */ + rep_value = inst_prof_get_range_rep_value(target_value); + __llvm_profile_instrument_target(rep_value, data, counter_index); +} +EXPORT_SYMBOL(__llvm_profile_instrument_memop); diff --git a/kernel/pgo/pgo.h b/kernel/pgo/pgo.h new file mode 100644 index 000000000000..04fbf3bcde1e --- /dev/null +++ b/kernel/pgo/pgo.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Google, Inc. + * + * Author: + * Sami Tolvanen <samitolvanen@google.com> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _PGO_H +#define _PGO_H + +/* + * Note: These internal LLVM definitions must match the compiler version. + * See llvm/include/llvm/ProfileData/InstrProfData.inc in LLVM's source code. + */ + +#define LLVM_INSTR_PROF_RAW_MAGIC_64 \ + ((u64)255 << 56 | \ + (u64)'l' << 48 | \ + (u64)'p' << 40 | \ + (u64)'r' << 32 | \ + (u64)'o' << 24 | \ + (u64)'f' << 16 | \ + (u64)'r' << 8 | \ + (u64)129) +#define LLVM_INSTR_PROF_RAW_MAGIC_32 \ + ((u64)255 << 56 | \ + (u64)'l' << 48 | \ + (u64)'p' << 40 | \ + (u64)'r' << 32 | \ + (u64)'o' << 24 | \ + (u64)'f' << 16 | \ + (u64)'R' << 8 | \ + (u64)129) + +#define LLVM_INSTR_PROF_RAW_VERSION 5 +#define LLVM_INSTR_PROF_DATA_ALIGNMENT 8 +#define LLVM_INSTR_PROF_IPVK_FIRST 0 +#define LLVM_INSTR_PROF_IPVK_LAST 1 +#define LLVM_INSTR_PROF_MAX_NUM_VAL_PER_SITE 255 + +#define LLVM_VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define LLVM_VARIANT_MASK_CSIR_PROF (0x1ULL << 57) + +/** + * struct llvm_prf_header - represents the raw profile header data structure. + * @magic: the magic token for the file format. + * @version: the version of the file format. + * @data_size: the number of entries in the profile data section. + * @padding_bytes_before_counters: the number of padding bytes before the + * counters. + * @counters_size: the size in bytes of the LLVM profile section containing the + * counters. + * @padding_bytes_after_counters: the number of padding bytes after the + * counters. + * @names_size: the size in bytes of the LLVM profile section containing the + * counters' names. + * @counters_delta: the beginning of the LLMV profile counters section. + * @names_delta: the beginning of the LLMV profile names section. + * @value_kind_last: the last profile value kind. + */ +struct llvm_prf_header { + u64 magic; + u64 version; + u64 data_size; + u64 padding_bytes_before_counters; + u64 counters_size; + u64 padding_bytes_after_counters; + u64 names_size; + u64 counters_delta; + u64 names_delta; + u64 value_kind_last; +}; + +/** + * struct llvm_prf_data - represents the per-function control structure. + * @name_ref: the reference to the function's name. + * @func_hash: the hash value of the function. + * @counter_ptr: a pointer to the profile counter. + * @function_ptr: a pointer to the function. + * @values: the profiling values associated with this function. + * @num_counters: the number of counters in the function. + * @num_value_sites: the number of value profile sites. + */ +struct llvm_prf_data { + const u64 name_ref; + const u64 func_hash; + const void *counter_ptr; + const void *function_ptr; + void *values; + const u32 num_counters; + const u16 num_value_sites[LLVM_INSTR_PROF_IPVK_LAST + 1]; +} __aligned(LLVM_INSTR_PROF_DATA_ALIGNMENT); + +/** + * struct llvm_prf_value_node_data - represents the data part of the struct + * llvm_prf_value_node data structure. + * @value: the value counters. + * @count: the counters' count. + */ +struct llvm_prf_value_node_data { + u64 value; + u64 count; +}; + +/** + * struct llvm_prf_value_node - represents an internal data structure used by + * the value profiler. + * @value: the value counters. + * @count: the counters' count. + * @next: the next value node. + */ +struct llvm_prf_value_node { + u64 value; + u64 count; + struct llvm_prf_value_node *next; +}; + +/** + * struct llvm_prf_value_data - represents the value profiling data in indexed + * format. + * @total_size: the total size in bytes including this field. + * @num_value_kinds: the number of value profile kinds that has value profile + * data. + */ +struct llvm_prf_value_data { + u32 total_size; + u32 num_value_kinds; +}; + +/** + * struct llvm_prf_value_record - represents the on-disk layout of the value + * profile data of a particular kind for one function. + * @kind: the kind of the value profile record. + * @num_value_sites: the number of value profile sites. + * @site_count_array: the first element of the array that stores the number + * of profiled values for each value site. + */ +struct llvm_prf_value_record { + u32 kind; + u32 num_value_sites; + u8 site_count_array[]; +}; + +#define prf_get_value_record_header_size() \ + offsetof(struct llvm_prf_value_record, site_count_array) +#define prf_get_value_record_site_count_size(sites) \ + roundup((sites), 8) +#define prf_get_value_record_size(sites) \ + (prf_get_value_record_header_size() + \ + prf_get_value_record_site_count_size((sites))) + +/* Data sections */ +extern struct llvm_prf_data __llvm_prf_data_start[]; +extern struct llvm_prf_data __llvm_prf_data_end[]; + +extern u64 __llvm_prf_cnts_start[]; +extern u64 __llvm_prf_cnts_end[]; + +extern char __llvm_prf_names_start[]; +extern char __llvm_prf_names_end[]; + +extern struct llvm_prf_value_node __llvm_prf_vnds_start[]; +extern struct llvm_prf_value_node __llvm_prf_vnds_end[]; + +/* Locking for vnodes */ +extern unsigned long prf_lock(void); +extern void prf_unlock(unsigned long flags); + +/* Declarations for LLVM instrumentation. */ +void __llvm_profile_instrument_target(u64 target_value, void *data, u32 index); +void __llvm_profile_instrument_range(u64 target_value, void *data, + u32 index, s64 precise_start, + s64 precise_last, s64 large_value); +void __llvm_profile_instrument_memop(u64 target_value, void *data, + u32 counter_index); + +#define __DEFINE_PRF_SIZE(s) \ + static inline unsigned long prf_ ## s ## _size(void) \ + { \ + unsigned long start = \ + (unsigned long)__llvm_prf_ ## s ## _start; \ + unsigned long end = \ + (unsigned long)__llvm_prf_ ## s ## _end; \ + return roundup(end - start, \ + sizeof(__llvm_prf_ ## s ## _start[0])); \ + } \ + static inline unsigned long prf_ ## s ## _count(void) \ + { \ + return prf_ ## s ## _size() / \ + sizeof(__llvm_prf_ ## s ## _start[0]); \ + } + +__DEFINE_PRF_SIZE(data); +__DEFINE_PRF_SIZE(cnts); +__DEFINE_PRF_SIZE(names); +__DEFINE_PRF_SIZE(vnds); + +#undef __DEFINE_PRF_SIZE + +#endif /* _PGO_H */ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 56d50eb0cd80..8b24ede279c3 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -151,6 +151,16 @@ _c_flags += $(if $(patsubst n%,, \ endif # +# Enable clang's PGO profiling flags for a file or directory depending on +# variables PGO_PROFILE_obj.o and PGO_PROFILE. +# +ifeq ($(CONFIG_PGO_CLANG),y) +_c_flags += $(if $(patsubst n%,, \ + $(PGO_PROFILE_$(basetarget).o)$(PGO_PROFILE)y), \ + $(CFLAGS_PGO_CLANG)) +endif + +# # Enable address sanitizer flags for kernel except some files or directories # we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) # |