Merge tag 'riscv-for-linus-6.7-mw2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Nov 2023 17:23:17 +0000 (09:23 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Nov 2023 17:23:17 +0000 (09:23 -0800)
Pull more RISC-V updates from Palmer Dabbelt:

 - Support for handling misaligned accesses in S-mode

 - Probing for misaligned access support is now properly cached and
   handled in parallel

 - PTDUMP now reflects the SW reserved bits, as well as the PBMT and
   NAPOT extensions

 - Performance improvements for TLB flushing

 - Support for many new relocations in the module loader

 - Various bug fixes and cleanups

* tag 'riscv-for-linus-6.7-mw2' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (51 commits)
  riscv: Optimize bitops with Zbb extension
  riscv: Rearrange hwcap.h and cpufeature.h
  drivers: perf: Do not broadcast to other cpus when starting a counter
  drivers: perf: Check find_first_bit() return value
  of: property: Add fw_devlink support for msi-parent
  RISC-V: Don't fail in riscv_of_parent_hartid() for disabled HARTs
  riscv: Fix set_memory_XX() and set_direct_map_XX() by splitting huge linear mappings
  riscv: Don't use PGD entries for the linear mapping
  RISC-V: Probe misaligned access speed in parallel
  RISC-V: Remove __init on unaligned_emulation_finish()
  RISC-V: Show accurate per-hart isa in /proc/cpuinfo
  RISC-V: Don't rely on positional structure initialization
  riscv: Add tests for riscv module loading
  riscv: Add remaining module relocations
  riscv: Avoid unaligned access when relocating modules
  riscv: split cache ops out of dma-noncoherent.c
  riscv: Improve flush_tlb_kernel_range()
  riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb
  riscv: Improve flush_tlb_range() for hugetlb pages
  riscv: Improve tlb_flush()
  ...

86 files changed:
Documentation/arch/riscv/uabi.rst
arch/riscv/Kconfig
arch/riscv/Kconfig.debug
arch/riscv/boot/Makefile
arch/riscv/configs/defconfig
arch/riscv/include/asm/bitops.h
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/elf.h
arch/riscv/include/asm/entry-common.h
arch/riscv/include/asm/errata_list.h
arch/riscv/include/asm/hwcap.h
arch/riscv/include/asm/insn-def.h
arch/riscv/include/asm/pgtable-64.h
arch/riscv/include/asm/pgtable-bits.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/sbi.h
arch/riscv/include/asm/switch_to.h
arch/riscv/include/asm/tlb.h
arch/riscv/include/asm/tlbflush.h
arch/riscv/include/asm/vector.h
arch/riscv/include/uapi/asm/elf.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/copy-unaligned.S
arch/riscv/kernel/cpu.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/fpu.S
arch/riscv/kernel/head.S
arch/riscv/kernel/hibernate-asm.S
arch/riscv/kernel/mcount-dyn.S
arch/riscv/kernel/mcount.S
arch/riscv/kernel/module.c
arch/riscv/kernel/probes/rethook_trampoline.S
arch/riscv/kernel/probes/simulate-insn.c
arch/riscv/kernel/probes/uprobes.c
arch/riscv/kernel/process.c
arch/riscv/kernel/sbi.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/suspend_entry.S
arch/riscv/kernel/tests/Kconfig.debug [new file with mode: 0644]
arch/riscv/kernel/tests/Makefile [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/Makefile [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_module_linking_main.c [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_set16.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_set32.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_set6.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_set8.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_sub16.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_sub32.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_sub6.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_sub64.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_sub8.S [new file with mode: 0644]
arch/riscv/kernel/tests/module_test/test_uleb128.S [new file with mode: 0644]
arch/riscv/kernel/traps.c
arch/riscv/kernel/traps_misaligned.c
arch/riscv/kernel/vdso/flush_icache.S
arch/riscv/kernel/vdso/getcpu.S
arch/riscv/kernel/vdso/rt_sigreturn.S
arch/riscv/kernel/vdso/sys_hwprobe.S
arch/riscv/kernel/vdso/vdso.lds.S
arch/riscv/kvm/aia.c
arch/riscv/kvm/main.c
arch/riscv/kvm/tlb.c
arch/riscv/kvm/vcpu_fp.c
arch/riscv/kvm/vcpu_onereg.c
arch/riscv/kvm/vcpu_vector.c
arch/riscv/lib/clear_page.S
arch/riscv/lib/memcpy.S
arch/riscv/lib/memmove.S
arch/riscv/lib/memset.S
arch/riscv/lib/uaccess.S
arch/riscv/mm/Makefile
arch/riscv/mm/cache-ops.c [new file with mode: 0644]
arch/riscv/mm/dma-noncoherent.c
arch/riscv/mm/init.c
arch/riscv/mm/pageattr.c
arch/riscv/mm/pmem.c
arch/riscv/mm/ptdump.c
arch/riscv/mm/tlbflush.c
arch/riscv/purgatory/entry.S
drivers/clocksource/timer-riscv.c
drivers/firmware/efi/libstub/Makefile
drivers/of/property.c
drivers/perf/riscv_pmu_sbi.c

index 8960fac42c40f3c7fd288f86f5f8d2233f422f09..54d199dce78bf50525b0430dcfc5a6a71429bf68 100644 (file)
@@ -42,6 +42,26 @@ An example string following the order is::
 
    rv64imadc_zifoo_zigoo_zafoo_sbar_scar_zxmbaz_xqux_xrux
 
+"isa" and "hart isa" lines in /proc/cpuinfo
+-------------------------------------------
+
+The "isa" line in /proc/cpuinfo describes the lowest common denominator of
+RISC-V ISA extensions recognized by the kernel and implemented on all harts. The
+"hart isa" line, in contrast, describes the set of extensions recognized by the
+kernel on the particular hart being described, even if those extensions may not
+be present on all harts in the system.
+
+In both lines, the presence of an extension guarantees only that the hardware
+has the described capability. Additional kernel support or policy changes may be
+required before an extension's capability is fully usable by userspace programs.
+Similarly, for S-mode extensions, presence in one of these lines does not
+guarantee that the kernel is taking advantage of the extension, or that the
+feature will be visible in guest VMs managed by this kernel.
+
+Inversely, the absence of an extension in these lines does not necessarily mean
+the hardware does not support that feature. The running kernel may not recognize
+the extension, or may have deliberately removed it from the listing.
+
 Misaligned accesses
 -------------------
 
index 18b03ad0e6b973b17e27cb336d02eae6a88d83c0..95a2a06acc6a62412894e491c3bfd5d4a161d15b 100644 (file)
@@ -642,6 +642,15 @@ config THREAD_SIZE_ORDER
          Specify the Pages of thread stack size (from 4KB to 64KB), which also
          affects irq stack size, which is equal to thread stack size.
 
+config RISCV_MISALIGNED
+       bool "Support misaligned load/store traps for kernel and userspace"
+       select SYSCTL_ARCH_UNALIGN_ALLOW
+       default y
+       help
+         Say Y here if you want the kernel to embed support for misaligned
+         load/store for both kernel and userspace. When disable, misaligned
+         accesses will generate SIGBUS in userspace and panic in kernel.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
@@ -909,6 +918,9 @@ config PORTABLE
        select MMU
        select OF
 
+config ARCH_PROC_KCORE_TEXT
+       def_bool y
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..eafe17ebf7102c93925eea57be96fd6ee1e191a1 100644 (file)
@@ -0,0 +1 @@
+source "arch/riscv/kernel/tests/Kconfig.debug"
index 22b13947bd131e842ec36ac56be71cec69dd503e..8e7fc0edf21d3ecef979f217446bf815c6cd4917 100644 (file)
@@ -17,6 +17,7 @@
 KCOV_INSTRUMENT := n
 
 OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+OBJCOPYFLAGS_loader.bin :=-O binary
 OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 
 targets := Image Image.* loader loader.o loader.lds loader.bin
index 938fa07ddf77341707ca3eea1bfdc3b03352eeff..905881282a7cd115fa222a68faab57545e868e10 100644 (file)
@@ -215,6 +215,8 @@ CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_CADENCE=y
 CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_STARFIVE=y
 CONFIG_MMC_SDHI=y
 CONFIG_MMC_SUNXI=y
 CONFIG_RTC_CLASS=y
index 65f6eee4ab8d7751d412c04b9a3f2d6ec078858e..224b4dc02b50bc6761cbef064445e472ef053ce2 100644 (file)
 #include <asm/barrier.h>
 #include <asm/bitsperlong.h>
 
+#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE)
 #include <asm-generic/bitops/__ffs.h>
-#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
+#else
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+#if (BITS_PER_LONG == 64)
+#define CTZW   "ctzw "
+#define CLZW   "clzw "
+#elif (BITS_PER_LONG == 32)
+#define CTZW   "ctz "
+#define CLZW   "clz "
+#else
+#error "Unexpected BITS_PER_LONG"
+#endif
+
+static __always_inline unsigned long variable__ffs(unsigned long word)
+{
+       int num;
+
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm volatile (".option push\n"
+                     ".option arch,+zbb\n"
+                     "ctz %0, %1\n"
+                     ".option pop\n"
+                     : "=r" (word) : "r" (word) :);
+
+       return word;
+
+legacy:
+       num = 0;
+#if BITS_PER_LONG == 64
+       if ((word & 0xffffffff) == 0) {
+               num += 32;
+               word >>= 32;
+       }
+#endif
+       if ((word & 0xffff) == 0) {
+               num += 16;
+               word >>= 16;
+       }
+       if ((word & 0xff) == 0) {
+               num += 8;
+               word >>= 8;
+       }
+       if ((word & 0xf) == 0) {
+               num += 4;
+               word >>= 4;
+       }
+       if ((word & 0x3) == 0) {
+               num += 2;
+               word >>= 2;
+       }
+       if ((word & 0x1) == 0)
+               num += 1;
+       return num;
+}
+
+/**
+ * __ffs - find first set bit in a long word
+ * @word: The word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __ffs(word)                            \
+       (__builtin_constant_p(word) ?           \
+        (unsigned long)__builtin_ctzl(word) :  \
+        variable__ffs(word))
+
+static __always_inline unsigned long variable__fls(unsigned long word)
+{
+       int num;
+
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm volatile (".option push\n"
+                     ".option arch,+zbb\n"
+                     "clz %0, %1\n"
+                     ".option pop\n"
+                     : "=r" (word) : "r" (word) :);
+
+       return BITS_PER_LONG - 1 - word;
+
+legacy:
+       num = BITS_PER_LONG - 1;
+#if BITS_PER_LONG == 64
+       if (!(word & (~0ul << 32))) {
+               num -= 32;
+               word <<= 32;
+       }
+#endif
+       if (!(word & (~0ul << (BITS_PER_LONG - 16)))) {
+               num -= 16;
+               word <<= 16;
+       }
+       if (!(word & (~0ul << (BITS_PER_LONG - 8)))) {
+               num -= 8;
+               word <<= 8;
+       }
+       if (!(word & (~0ul << (BITS_PER_LONG - 4)))) {
+               num -= 4;
+               word <<= 4;
+       }
+       if (!(word & (~0ul << (BITS_PER_LONG - 2)))) {
+               num -= 2;
+               word <<= 2;
+       }
+       if (!(word & (~0ul << (BITS_PER_LONG - 1))))
+               num -= 1;
+       return num;
+}
+
+/**
+ * __fls - find last set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+#define __fls(word)                                                    \
+       (__builtin_constant_p(word) ?                                   \
+        (unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) :    \
+        variable__fls(word))
+
+static __always_inline int variable_ffs(int x)
+{
+       int r;
+
+       if (!x)
+               return 0;
+
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm volatile (".option push\n"
+                     ".option arch,+zbb\n"
+                     CTZW "%0, %1\n"
+                     ".option pop\n"
+                     : "=r" (r) : "r" (x) :);
+
+       return r + 1;
+
+legacy:
+       r = 1;
+       if (!(x & 0xffff)) {
+               x >>= 16;
+               r += 16;
+       }
+       if (!(x & 0xff)) {
+               x >>= 8;
+               r += 8;
+       }
+       if (!(x & 0xf)) {
+               x >>= 4;
+               r += 4;
+       }
+       if (!(x & 3)) {
+               x >>= 2;
+               r += 2;
+       }
+       if (!(x & 1)) {
+               x >>= 1;
+               r += 1;
+       }
+       return r;
+}
+
+/**
+ * ffs - find first set bit in a word
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs routines.
+ *
+ * ffs(value) returns 0 if value is 0 or the position of the first set bit if
+ * value is nonzero. The first (least significant) bit is at position 1.
+ */
+#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
+
+static __always_inline int variable_fls(unsigned int x)
+{
+       int r;
+
+       if (!x)
+               return 0;
+
+       asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                                     RISCV_ISA_EXT_ZBB, 1)
+                         : : : : legacy);
+
+       asm volatile (".option push\n"
+                     ".option arch,+zbb\n"
+                     CLZW "%0, %1\n"
+                     ".option pop\n"
+                     : "=r" (r) : "r" (x) :);
+
+       return 32 - r;
+
+legacy:
+       r = 32;
+       if (!(x & 0xffff0000u)) {
+               x <<= 16;
+               r -= 16;
+       }
+       if (!(x & 0xff000000u)) {
+               x <<= 8;
+               r -= 8;
+       }
+       if (!(x & 0xf0000000u)) {
+               x <<= 4;
+               r -= 4;
+       }
+       if (!(x & 0xc0000000u)) {
+               x <<= 2;
+               r -= 2;
+       }
+       if (!(x & 0x80000000u)) {
+               x <<= 1;
+               r -= 1;
+       }
+       return r;
+}
+
+/**
+ * fls - find last set bit in a word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as ffs, but returns the position of the most
+ * significant set bit.
+ *
+ * fls(value) returns 0 if value is 0 or the position of the last set bit if
+ * value is nonzero. The last (most significant) bit is at position 32.
+ */
+#define fls(x)                                                 \
+({                                                             \
+       typeof(x) x_ = (x);                                     \
+       __builtin_constant_p(x_) ?                              \
+        (int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0)        \
+        :                                                      \
+        variable_fls(x_);                                      \
+})
+
+#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */
+
+#include <asm-generic/bitops/ffz.h>
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/ffs.h>
 
 #include <asm-generic/bitops/hweight.h>
 
index 13b7d35648a9c53d25c724badf65f42345a9ff72..a418c3112cd60cf2207b455d78c4520539364fc3 100644 (file)
@@ -7,7 +7,10 @@
 #define _ASM_CPUFEATURE_H
 
 #include <linux/bitmap.h>
+#include <linux/jump_label.h>
 #include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/errno.h>
 
 /*
  * These are probed via a device_initcall(), via either the SBI or directly
@@ -30,7 +33,104 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
-void check_unaligned_access(int cpu);
 void riscv_user_isa_enable(void);
 
+#ifdef CONFIG_RISCV_MISALIGNED
+bool unaligned_ctl_available(void);
+bool check_unaligned_access_emulated(int cpu);
+void unaligned_emulation_finish(void);
+#else
+static inline bool unaligned_ctl_available(void)
+{
+       return false;
+}
+
+static inline bool check_unaligned_access_emulated(int cpu)
+{
+       return false;
+}
+
+static inline void unaligned_emulation_finish(void) {}
+#endif
+
+unsigned long riscv_get_elf_hwcap(void);
+
+struct riscv_isa_ext_data {
+       const unsigned int id;
+       const char *name;
+       const char *property;
+};
+
+extern const struct riscv_isa_ext_data riscv_isa_ext[];
+extern const size_t riscv_isa_ext_count;
+extern bool riscv_isa_fallback;
+
+unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
+
+bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
+#define riscv_isa_extension_available(isa_bitmap, ext) \
+       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
+
+static __always_inline bool
+riscv_has_extension_likely(const unsigned long ext)
+{
+       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+                          "ext must be < RISCV_ISA_EXT_MAX");
+
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               asm_volatile_goto(
+               ALTERNATIVE("j  %l[l_no]", "nop", 0, %[ext], 1)
+               :
+               : [ext] "i" (ext)
+               :
+               : l_no);
+       } else {
+               if (!__riscv_isa_extension_available(NULL, ext))
+                       goto l_no;
+       }
+
+       return true;
+l_no:
+       return false;
+}
+
+static __always_inline bool
+riscv_has_extension_unlikely(const unsigned long ext)
+{
+       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
+                          "ext must be < RISCV_ISA_EXT_MAX");
+
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
+               asm_volatile_goto(
+               ALTERNATIVE("nop", "j   %l[l_yes]", 0, %[ext], 1)
+               :
+               : [ext] "i" (ext)
+               :
+               : l_yes);
+       } else {
+               if (__riscv_isa_extension_available(NULL, ext))
+                       goto l_yes;
+       }
+
+       return false;
+l_yes:
+       return true;
+}
+
+static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
+{
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+               return true;
+
+       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
+
+static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
+{
+       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+               return true;
+
+       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
+
 #endif
index b3b2dfbdf945efa2adfbc1f022979986ace2b983..06c236bfab53b323491ce6ae3bbdbbbcd6206318 100644 (file)
@@ -14,7 +14,7 @@
 #include <asm/auxvec.h>
 #include <asm/byteorder.h>
 #include <asm/cacheinfo.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 /*
  * These are used to set parameters in the core dumps.
index 6e4dee49d84b985a5ab1bb59b35ecc39a94ed0de..7ab5e34318c85fe05df525a5f80a49d25051bcd7 100644 (file)
@@ -8,4 +8,18 @@
 void handle_page_fault(struct pt_regs *regs);
 void handle_break(struct pt_regs *regs);
 
+#ifdef CONFIG_RISCV_MISALIGNED
+int handle_misaligned_load(struct pt_regs *regs);
+int handle_misaligned_store(struct pt_regs *regs);
+#else
+static inline int handle_misaligned_load(struct pt_regs *regs)
+{
+       return -1;
+}
+static inline int handle_misaligned_store(struct pt_regs *regs)
+{
+       return -1;
+}
+#endif
+
 #endif /* _ASM_RISCV_ENTRY_COMMON_H */
index 0ac18a4135be7da6f57f1ee7d29e782c4679e608..83ed25e4355343c25101882b7c0b31cf462af542 100644 (file)
@@ -117,9 +117,9 @@ asm volatile(ALTERNATIVE(                                           \
  * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
  *   0000000    11001     00000      000      00000  0001011
  */
-#define THEAD_inval_A0 ".long 0x0265000b"
-#define THEAD_clean_A0 ".long 0x0255000b"
-#define THEAD_flush_A0 ".long 0x0275000b"
+#define THEAD_INVAL_A0 ".long 0x0265000b"
+#define THEAD_CLEAN_A0 ".long 0x0255000b"
+#define THEAD_FLUSH_A0 ".long 0x0275000b"
 #define THEAD_SYNC_S   ".long 0x0190000b"
 
 #define ALT_CMO_OP(_op, _start, _size, _cachesize)                     \
index fe6656af967af932900b379a1aa8b2642090e66e..06d30526ef3b837d4e6c7fe8d14cb39f11e676f1 100644 (file)
@@ -8,9 +8,6 @@
 #ifndef _ASM_RISCV_HWCAP_H
 #define _ASM_RISCV_HWCAP_H
 
-#include <asm/alternative-macros.h>
-#include <asm/errno.h>
-#include <linux/bits.h>
 #include <uapi/asm/hwcap.h>
 
 #define RISCV_ISA_EXT_a                ('a' - 'a')
 #define RISCV_ISA_EXT_SxAIA            RISCV_ISA_EXT_SSAIA
 #endif
 
-#ifndef __ASSEMBLY__
-
-#include <linux/jump_label.h>
-#include <asm/cpufeature.h>
-
-unsigned long riscv_get_elf_hwcap(void);
-
-struct riscv_isa_ext_data {
-       const unsigned int id;
-       const char *name;
-       const char *property;
-};
-
-extern const struct riscv_isa_ext_data riscv_isa_ext[];
-extern const size_t riscv_isa_ext_count;
-extern bool riscv_isa_fallback;
-
-unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
-
-#define riscv_isa_extension_mask(ext) BIT_MASK(RISCV_ISA_EXT_##ext)
-
-bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit);
-#define riscv_isa_extension_available(isa_bitmap, ext) \
-       __riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_##ext)
-
-static __always_inline bool
-riscv_has_extension_likely(const unsigned long ext)
-{
-       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-                          "ext must be < RISCV_ISA_EXT_MAX");
-
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm_volatile_goto(
-               ALTERNATIVE("j  %l[l_no]", "nop", 0, %[ext], 1)
-               :
-               : [ext] "i" (ext)
-               :
-               : l_no);
-       } else {
-               if (!__riscv_isa_extension_available(NULL, ext))
-                       goto l_no;
-       }
-
-       return true;
-l_no:
-       return false;
-}
-
-static __always_inline bool
-riscv_has_extension_unlikely(const unsigned long ext)
-{
-       compiletime_assert(ext < RISCV_ISA_EXT_MAX,
-                          "ext must be < RISCV_ISA_EXT_MAX");
-
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
-               asm_volatile_goto(
-               ALTERNATIVE("nop", "j   %l[l_yes]", 0, %[ext], 1)
-               :
-               : [ext] "i" (ext)
-               :
-               : l_yes);
-       } else {
-               if (__riscv_isa_extension_available(NULL, ext))
-                       goto l_yes;
-       }
-
-       return false;
-l_yes:
-       return true;
-}
-
-static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
-{
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
-               return true;
-
-       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
-}
-
-static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
-{
-       if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
-               return true;
-
-       return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
-}
-#endif
-
 #endif /* _ASM_RISCV_HWCAP_H */
index 6960beb75f32942422de90583e6cbdaadcc78006..e27179b26086b376f7ff1babd8b026bd1f1167cc 100644 (file)
        INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51),              \
               __RD(0), RS1(gaddr), RS2(vmid))
 
-#define CBO_inval(base)                                                \
+#define CBO_INVAL(base)                                                \
        INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
               RS1(base), SIMM12(0))
 
-#define CBO_clean(base)                                                \
+#define CBO_CLEAN(base)                                                \
        INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
               RS1(base), SIMM12(1))
 
-#define CBO_flush(base)                                                \
+#define CBO_FLUSH(base)                                                \
        INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
               RS1(base), SIMM12(2))
 
-#define CBO_zero(base)                                         \
+#define CBO_ZERO(base)                                         \
        INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0),              \
               RS1(base), SIMM12(4))
 
index 7a5097202e15709dab5927d6be984570d40c5f41..9a2c780a11e9530bcad95a677553dabeba67bb5f 100644 (file)
@@ -126,14 +126,18 @@ enum napot_cont_order {
 
 /*
  * [63:59] T-Head Memory Type definitions:
- *
- * 00000 - NC   Weakly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ * bit[63] SO - Strong Order
+ * bit[62] C - Cacheable
+ * bit[61] B - Bufferable
+ * bit[60] SH - Shareable
+ * bit[59] Sec - Trustable
+ * 00110 - NC   Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable
  * 01110 - PMA  Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable
- * 10000 - IO   Strongly-ordered, Non-cacheable, Non-bufferable, Non-shareable, Non-trustable
+ * 10010 - IO   Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable
  */
 #define _PAGE_PMA_THEAD                ((1UL << 62) | (1UL << 61) | (1UL << 60))
-#define _PAGE_NOCACHE_THEAD    0UL
-#define _PAGE_IO_THEAD         (1UL << 63)
+#define _PAGE_NOCACHE_THEAD    ((1UL < 61) | (1UL << 60))
+#define _PAGE_IO_THEAD         ((1UL << 63) | (1UL << 60))
 #define _PAGE_MTMASK_THEAD     (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59))
 
 static inline u64 riscv_page_mtmask(void)
index f896708e833127d12e4cb9851b0890a8d7c5d922..179bd4afece46a6b96c33088a14480cb6c18c429 100644 (file)
@@ -16,9 +16,9 @@
 #define _PAGE_GLOBAL    (1 << 5)    /* Global */
 #define _PAGE_ACCESSED  (1 << 6)    /* Set by hardware on any access */
 #define _PAGE_DIRTY     (1 << 7)    /* Set by hardware on any write */
-#define _PAGE_SOFT      (1 << 8)    /* Reserved for software */
+#define _PAGE_SOFT      (3 << 8)    /* Reserved for software */
 
-#define _PAGE_SPECIAL   _PAGE_SOFT
+#define _PAGE_SPECIAL   (1 << 8)    /* RSW: 0x1 */
 #define _PAGE_TABLE     _PAGE_PRESENT
 
 /*
index c8e8867c42f6a9127b5a67fb837dd221f214ed88..294044429e8e15d9230f3b96c7c5579be68857f2 100644 (file)
@@ -291,6 +291,7 @@ static inline pte_t pud_pte(pud_t pud)
 }
 
 #ifdef CONFIG_RISCV_ISA_SVNAPOT
+#include <asm/cpufeature.h>
 
 static __always_inline bool has_svnapot(void)
 {
index 441da1839c947803cdcbe177e4292e2b81a9db49..f19f861cda549014eee042efb651709f5da00475 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/const.h>
 #include <linux/cache.h>
+#include <linux/prctl.h>
 
 #include <vdso/processor.h>
 
@@ -82,6 +83,7 @@ struct thread_struct {
        unsigned long bad_cause;
        unsigned long vstate_ctrl;
        struct __riscv_v_ext_state vstate;
+       unsigned long align_ctl;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -94,6 +96,7 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 
 #define INIT_THREAD {                                  \
        .sp = sizeof(init_stack) + (long)&init_stack,   \
+       .align_ctl = PR_UNALIGN_NOPRINT,                \
 }
 
 #define task_pt_regs(tsk)                                              \
@@ -136,6 +139,12 @@ extern long riscv_v_vstate_ctrl_set_current(unsigned long arg);
 extern long riscv_v_vstate_ctrl_get_current(void);
 #endif /* CONFIG_RISCV_ISA_V */
 
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long addr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
+#define GET_UNALIGN_CTL(tsk, addr)     get_unalign_ctl((tsk), (addr))
+#define SET_UNALIGN_CTL(tsk, val)      set_unalign_ctl((tsk), (val))
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PROCESSOR_H */
index 12dfda6bb9242f402c729e1c08184e04a2f96eec..0892f4421bc4a5d0046750930b5637b355c15c26 100644 (file)
@@ -280,9 +280,6 @@ void sbi_set_timer(uint64_t stime_value);
 void sbi_shutdown(void);
 void sbi_send_ipi(unsigned int cpu);
 int sbi_remote_fence_i(const struct cpumask *cpu_mask);
-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
-                          unsigned long start,
-                          unsigned long size);
 
 int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
                                unsigned long start,
index a727be723c5610f9ce2bc42de6f4dd2987c7536c..f90d8e42f3c7911908ec1f5f19929ab5ba67ff3a 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/jump_label.h>
 #include <linux/sched/task_stack.h>
 #include <asm/vector.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
index 120bcf2ed8a878554000f0d0ac73e14ec4aa6fa2..1eb5682b2af6065c9019e398df729f5b97a573c6 100644 (file)
@@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb);
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-       flush_tlb_mm(tlb->mm);
+#ifdef CONFIG_MMU
+       if (tlb->fullmm || tlb->need_flush_all)
+               flush_tlb_mm(tlb->mm);
+       else
+               flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end,
+                                  tlb_get_unmap_size(tlb));
+#endif
 }
 
 #endif /* _ASM_RISCV_TLB_H */
index a09196f8de688ea90123bb74fc21e080cde19f22..8f3418c5f1724ba45e412ca52e0ef59ba0140638 100644 (file)
@@ -11,6 +11,9 @@
 #include <asm/smp.h>
 #include <asm/errata_list.h>
 
+#define FLUSH_TLB_MAX_SIZE      ((unsigned long)-1)
+#define FLUSH_TLB_NO_ASID       ((unsigned long)-1)
+
 #ifdef CONFIG_MMU
 extern unsigned long asid_mask;
 
@@ -32,9 +35,12 @@ static inline void local_flush_tlb_page(unsigned long addr)
 #if defined(CONFIG_SMP) && defined(CONFIG_MMU)
 void flush_tlb_all(void);
 void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+                       unsigned long end, unsigned int page_size);
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                     unsigned long end);
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -51,14 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
        local_flush_tlb_all();
 }
 
-#define flush_tlb_mm(mm) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
-
 /* Flush a range of kernel pages */
 static inline void flush_tlb_kernel_range(unsigned long start,
        unsigned long end)
 {
-       flush_tlb_all();
+       local_flush_tlb_all();
 }
 
+#define flush_tlb_mm(mm) flush_tlb_all()
+#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#endif /* !CONFIG_SMP || !CONFIG_MMU */
+
 #endif /* _ASM_RISCV_TLBFLUSH_H */
index c5ee07b3df071d16ad956fd62e6981403d9bf133..87aaef656257cbde40331aadaf1cb0b1ea374455 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 #include <asm/ptrace.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/csr.h>
 #include <asm/asm.h>
 
index d696d6610231dd6cacacdeba77e7c96b6184d993..11a71b8533d5759ec724a8359d0ffa2a4f2e976d 100644 (file)
@@ -49,6 +49,7 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_TLS_DTPREL64   9
 #define R_RISCV_TLS_TPREL32    10
 #define R_RISCV_TLS_TPREL64    11
+#define R_RISCV_IRELATIVE      58
 
 /* Relocation types not used by the dynamic linker */
 #define R_RISCV_BRANCH         16
@@ -81,7 +82,6 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_ALIGN          43
 #define R_RISCV_RVC_BRANCH     44
 #define R_RISCV_RVC_JUMP       45
-#define R_RISCV_LUI            46
 #define R_RISCV_GPREL_I                47
 #define R_RISCV_GPREL_S                48
 #define R_RISCV_TPREL_I                49
@@ -93,6 +93,9 @@ typedef union __riscv_fp_state elf_fpregset_t;
 #define R_RISCV_SET16          55
 #define R_RISCV_SET32          56
 #define R_RISCV_32_PCREL       57
+#define R_RISCV_PLT32          59
+#define R_RISCV_SET_ULEB128    60
+#define R_RISCV_SUB_ULEB128    61
 
 
 #endif /* _UAPI_ASM_RISCV_ELF_H */
index 95cf25d484052e88b39ca3a49b74cc2bb1453992..fee22a3d1b53462a33bad285d345465a0100e6a9 100644 (file)
@@ -57,9 +57,10 @@ obj-y        += stacktrace.o
 obj-y  += cacheinfo.o
 obj-y  += patch.o
 obj-y  += probes/
+obj-y  += tests/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
-obj-$(CONFIG_RISCV_M_MODE)     += traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o
 obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_RISCV_ISA_V)      += vector.o
 obj-$(CONFIG_SMP)              += smpboot.o
index cfdecfbaad627153ead8cde6fe9ca7a298c1aa40..2b3d9398c113fbaea3f775b1439058e5a5178c7d 100644 (file)
@@ -9,7 +9,7 @@
 /* void __riscv_copy_words_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using word loads and stores. */
 /* Note: The size is truncated to a multiple of 8 * SZREG */
-ENTRY(__riscv_copy_words_unaligned)
+SYM_FUNC_START(__riscv_copy_words_unaligned)
        andi  a4, a2, ~((8*SZREG)-1)
        beqz  a4, 2f
        add   a3, a1, a4
@@ -36,12 +36,12 @@ ENTRY(__riscv_copy_words_unaligned)
 
 2:
        ret
-END(__riscv_copy_words_unaligned)
+SYM_FUNC_END(__riscv_copy_words_unaligned)
 
 /* void __riscv_copy_bytes_unaligned(void *, const void *, size_t) */
 /* Performs a memcpy without aligning buffers, using only byte accesses. */
 /* Note: The size is truncated to a multiple of 8 */
-ENTRY(__riscv_copy_bytes_unaligned)
+SYM_FUNC_START(__riscv_copy_bytes_unaligned)
        andi a4, a2, ~(8-1)
        beqz a4, 2f
        add  a3, a1, a4
@@ -68,4 +68,4 @@ ENTRY(__riscv_copy_bytes_unaligned)
 
 2:
        ret
-END(__riscv_copy_bytes_unaligned)
+SYM_FUNC_END(__riscv_copy_bytes_unaligned)
index c17dacb1141cb3ca9c077ec8e95c5f39dbc5fabd..d11d6320fb0d2db489f221ab2527297a247fd304 100644 (file)
@@ -125,13 +125,14 @@ old_interface:
  */
 int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
 {
-       int rc;
-
        for (; node; node = node->parent) {
                if (of_device_is_compatible(node, "riscv")) {
-                       rc = riscv_of_processor_hartid(node, hartid);
-                       if (!rc)
-                               return 0;
+                       *hartid = (unsigned long)of_get_cpu_hwid(node, 0);
+                       if (*hartid == ~0UL) {
+                               pr_warn("Found CPU without hart ID\n");
+                               return -ENODEV;
+                       }
+                       return 0;
                }
        }
 
@@ -202,9 +203,8 @@ arch_initcall(riscv_cpuinfo_init);
 
 #ifdef CONFIG_PROC_FS
 
-static void print_isa(struct seq_file *f)
+static void print_isa(struct seq_file *f, const unsigned long *isa_bitmap)
 {
-       seq_puts(f, "isa\t\t: ");
 
        if (IS_ENABLED(CONFIG_32BIT))
                seq_write(f, "rv32", 4);
@@ -212,7 +212,7 @@ static void print_isa(struct seq_file *f)
                seq_write(f, "rv64", 4);
 
        for (int i = 0; i < riscv_isa_ext_count; i++) {
-               if (!__riscv_isa_extension_available(NULL, riscv_isa_ext[i].id))
+               if (!__riscv_isa_extension_available(isa_bitmap, riscv_isa_ext[i].id))
                        continue;
 
                /* Only multi-letter extensions are split by underscores */
@@ -276,7 +276,15 @@ static int c_show(struct seq_file *m, void *v)
 
        seq_printf(m, "processor\t: %lu\n", cpu_id);
        seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
-       print_isa(m);
+
+       /*
+        * For historical raisins, the isa: line is limited to the lowest common
+        * denominator of extensions supported across all harts. A true list of
+        * extensions supported on this hart is printed later in the hart isa:
+        * line.
+        */
+       seq_puts(m, "isa\t\t: ");
+       print_isa(m, NULL);
        print_mmu(m);
 
        if (acpi_disabled) {
@@ -292,6 +300,13 @@ static int c_show(struct seq_file *m, void *v)
        seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid);
        seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid);
        seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid);
+
+       /*
+        * Print the ISA extensions specific to this hart, which may show
+        * additional extensions not present across all harts.
+        */
+       seq_puts(m, "hart isa\t: ");
+       print_isa(m, hart_isa[cpu_id].isa);
        seq_puts(m, "\n");
 
        return 0;
index 7aeba01dcfd408beb0cafc69a5084e38e95ed03a..b3785ffc15703cdf55efc2c523179dd2b64695c1 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
+#include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
@@ -29,6 +30,7 @@
 
 #define MISALIGNED_ACCESS_JIFFIES_LG2 1
 #define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
 #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
 
 unsigned long elf_hwcap __read_mostly;
@@ -559,23 +561,21 @@ unsigned long riscv_get_elf_hwcap(void)
        return hwcap;
 }
 
-void check_unaligned_access(int cpu)
+static int check_unaligned_access(void *param)
 {
+       int cpu = smp_processor_id();
        u64 start_cycles, end_cycles;
        u64 word_cycles;
        u64 byte_cycles;
        int ratio;
        unsigned long start_jiffies, now;
-       struct page *page;
+       struct page *page = param;
        void *dst;
        void *src;
        long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
 
-       page = alloc_pages(GFP_NOWAIT, get_order(MISALIGNED_BUFFER_SIZE));
-       if (!page) {
-               pr_warn("Can't alloc pages to measure memcpy performance");
-               return;
-       }
+       if (check_unaligned_access_emulated(cpu))
+               return 0;
 
        /* Make an unaligned destination buffer. */
        dst = (void *)((unsigned long)page_address(page) | 0x1);
@@ -629,7 +629,7 @@ void check_unaligned_access(int cpu)
                pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
                        cpu);
 
-               goto out;
+               return 0;
        }
 
        if (word_cycles < byte_cycles)
@@ -643,18 +643,84 @@ void check_unaligned_access(int cpu)
                (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
 
        per_cpu(misaligned_access_speed, cpu) = speed;
+       return 0;
+}
 
-out:
-       __free_pages(page, get_order(MISALIGNED_BUFFER_SIZE));
+static void check_unaligned_access_nonboot_cpu(void *param)
+{
+       unsigned int cpu = smp_processor_id();
+       struct page **pages = param;
+
+       if (smp_processor_id() != 0)
+               check_unaligned_access(pages[cpu]);
+}
+
+static int riscv_online_cpu(unsigned int cpu)
+{
+       static struct page *buf;
+
+       /* We are already set since the last check */
+       if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+               return 0;
+
+       buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+       if (!buf) {
+               pr_warn("Allocation failure, not measuring misaligned performance\n");
+               return -ENOMEM;
+       }
+
+       check_unaligned_access(buf);
+       __free_pages(buf, MISALIGNED_BUFFER_ORDER);
+       return 0;
 }
 
-static int check_unaligned_access_boot_cpu(void)
+/* Measure unaligned access on all CPUs present at boot in parallel. */
+static int check_unaligned_access_all_cpus(void)
 {
-       check_unaligned_access(0);
+       unsigned int cpu;
+       unsigned int cpu_count = num_possible_cpus();
+       struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
+                                    GFP_KERNEL);
+
+       if (!bufs) {
+               pr_warn("Allocation failure, not measuring misaligned performance\n");
+               return 0;
+       }
+
+       /*
+        * Allocate separate buffers for each CPU so there's no fighting over
+        * cache lines.
+        */
+       for_each_cpu(cpu, cpu_online_mask) {
+               bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+               if (!bufs[cpu]) {
+                       pr_warn("Allocation failure, not measuring misaligned performance\n");
+                       goto out;
+               }
+       }
+
+       /* Check everybody except 0, who stays behind to tend jiffies. */
+       on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+       /* Check core 0. */
+       smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+       /* Setup hotplug callback for any new CPUs that come online. */
+       cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+                                 riscv_online_cpu, NULL);
+
+out:
+       unaligned_emulation_finish();
+       for_each_cpu(cpu, cpu_online_mask) {
+               if (bufs[cpu])
+                       __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+       }
+
+       kfree(bufs);
        return 0;
 }
 
-arch_initcall(check_unaligned_access_boot_cpu);
+arch_initcall(check_unaligned_access_all_cpus);
 
 void riscv_user_isa_enable(void)
 {
index 9f92c067f7e1e8a53fefe844add95cb9fef17325..54ca4564a92631388783a7978e8f49f40e556364 100644 (file)
@@ -26,9 +26,9 @@ SYM_CODE_START(handle_exception)
         * register will contain 0, and we should continue on the current TP.
         */
        csrrw tp, CSR_SCRATCH, tp
-       bnez tp, _save_context
+       bnez tp, .Lsave_context
 
-_restore_kernel_tpsp:
+.Lrestore_kernel_tpsp:
        csrr tp, CSR_SCRATCH
        REG_S sp, TASK_TI_KERNEL_SP(tp)
 
@@ -40,7 +40,7 @@ _restore_kernel_tpsp:
        REG_L sp, TASK_TI_KERNEL_SP(tp)
 #endif
 
-_save_context:
+.Lsave_context:
        REG_S sp, TASK_TI_USER_SP(tp)
        REG_L sp, TASK_TI_KERNEL_SP(tp)
        addi sp, sp, -(PT_SIZE_ON_STACK)
@@ -322,7 +322,7 @@ SYM_FUNC_END(__switch_to)
        .section ".rodata"
        .align LGREG
        /* Exception vector table */
-SYM_CODE_START(excp_vect_table)
+SYM_DATA_START_LOCAL(excp_vect_table)
        RISCV_PTR do_trap_insn_misaligned
        ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
        RISCV_PTR do_trap_insn_illegal
@@ -340,12 +340,11 @@ SYM_CODE_START(excp_vect_table)
        RISCV_PTR do_page_fault   /* load page fault */
        RISCV_PTR do_trap_unknown
        RISCV_PTR do_page_fault   /* store page fault */
-excp_vect_table_end:
-SYM_CODE_END(excp_vect_table)
+SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end)
 
 #ifndef CONFIG_MMU
-SYM_CODE_START(__user_rt_sigreturn)
+SYM_DATA_START(__user_rt_sigreturn)
        li a7, __NR_rt_sigreturn
        ecall
-SYM_CODE_END(__user_rt_sigreturn)
+SYM_DATA_END(__user_rt_sigreturn)
 #endif
index dd2205473de78571a5a4a4b68bd4b33302a77b31..2c543f130f9389aa4f1d9fee05f5027d99adaa78 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/csr.h>
 #include <asm/asm-offsets.h>
 
-ENTRY(__fstate_save)
+SYM_FUNC_START(__fstate_save)
        li  a2,  TASK_THREAD_F0
        add a0, a0, a2
        li t1, SR_FS
@@ -60,9 +60,9 @@ ENTRY(__fstate_save)
        sw t0, TASK_THREAD_FCSR_F0(a0)
        csrc CSR_STATUS, t1
        ret
-ENDPROC(__fstate_save)
+SYM_FUNC_END(__fstate_save)
 
-ENTRY(__fstate_restore)
+SYM_FUNC_START(__fstate_restore)
        li  a2,  TASK_THREAD_F0
        add a0, a0, a2
        li t1, SR_FS
@@ -103,4 +103,125 @@ ENTRY(__fstate_restore)
        fscsr t0
        csrc CSR_STATUS, t1
        ret
-ENDPROC(__fstate_restore)
+SYM_FUNC_END(__fstate_restore)
+
+#define get_f32(which) fmv.x.s a0, which; j 2f
+#define put_f32(which) fmv.s.x which, a1; j 2f
+#if __riscv_xlen == 64
+# define get_f64(which) fmv.x.d a0, which; j 2f
+# define put_f64(which) fmv.d.x which, a1; j 2f
+#else
+# define get_f64(which) fsd which, 0(a1); j 2f
+# define put_f64(which) fld which, 0(a1); j 2f
+#endif
+
+.macro fp_access_prologue
+       /*
+        * Compute jump offset to store the correct FP register since we don't
+        * have indirect FP register access
+        */
+       sll t0, a0, 3
+       la t2, 1f
+       add t0, t0, t2
+       li t1, SR_FS
+       csrs CSR_STATUS, t1
+       jr t0
+1:
+.endm
+
+.macro fp_access_epilogue
+2:
+       csrc CSR_STATUS, t1
+       ret
+.endm
+
+#define fp_access_body(__access_func) \
+       __access_func(f0); \
+       __access_func(f1); \
+       __access_func(f2); \
+       __access_func(f3); \
+       __access_func(f4); \
+       __access_func(f5); \
+       __access_func(f6); \
+       __access_func(f7); \
+       __access_func(f8); \
+       __access_func(f9); \
+       __access_func(f10); \
+       __access_func(f11); \
+       __access_func(f12); \
+       __access_func(f13); \
+       __access_func(f14); \
+       __access_func(f15); \
+       __access_func(f16); \
+       __access_func(f17); \
+       __access_func(f18); \
+       __access_func(f19); \
+       __access_func(f20); \
+       __access_func(f21); \
+       __access_func(f22); \
+       __access_func(f23); \
+       __access_func(f24); \
+       __access_func(f25); \
+       __access_func(f26); \
+       __access_func(f27); \
+       __access_func(f28); \
+       __access_func(f29); \
+       __access_func(f30); \
+       __access_func(f31)
+
+
+#ifdef CONFIG_RISCV_MISALIGNED
+
+/*
+ * Disable compressed instructions set to keep a constant offset between FP
+ * load/store/move instructions
+ */
+.option norvc
+/*
+ * put_f32_reg - Set a FP register from a register containing the value
+ * a0 = FP register index to be set
+ * a1 = value to be loaded in the FP register
+ */
+SYM_FUNC_START(put_f32_reg)
+       fp_access_prologue
+       fp_access_body(put_f32)
+       fp_access_epilogue
+SYM_FUNC_END(put_f32_reg)
+
+/*
+ * get_f32_reg - Get a FP register value and return it
+ * a0 = FP register index to be retrieved
+ */
+SYM_FUNC_START(get_f32_reg)
+       fp_access_prologue
+       fp_access_body(get_f32)
+       fp_access_epilogue
+SYM_FUNC_END(get_f32_reg)
+
+/*
+ * put_f64_reg - Set a 64 bits FP register from a value or a pointer.
+ * a0 = FP register index to be set
+ * a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
+ * load the value to a pointer).
+ */
+SYM_FUNC_START(put_f64_reg)
+       fp_access_prologue
+       fp_access_body(put_f64)
+       fp_access_epilogue
+SYM_FUNC_END(put_f64_reg)
+
+/*
+ * put_f64_reg - Get a 64 bits FP register value and returned it or store it to
+ *              a pointer.
+ * a0 = FP register index to be retrieved
+ * a1 = If xlen == 32, pointer which should be loaded with the FP register value
+ *     or unused if xlen == 64. In which case the FP register value is returned
+ *     through a0
+ */
+SYM_FUNC_START(get_f64_reg)
+       fp_access_prologue
+       fp_access_body(get_f64)
+       fp_access_epilogue
+SYM_FUNC_END(get_f64_reg)
+
+#endif /* CONFIG_RISCV_MISALIGNED */
index 18f97ec0f7ed5e46049cdbe295239ad8b7c24b2c..b77397432403d9ef028fea6855cdc97aea143d00 100644 (file)
@@ -19,7 +19,7 @@
 #include "efi-header.S"
 
 __HEAD
-ENTRY(_start)
+SYM_CODE_START(_start)
        /*
         * Image header expected by Linux boot-loaders. The image header data
         * structure is described in asm/image.h.
@@ -164,12 +164,12 @@ secondary_start_sbi:
        XIP_FIXUP_OFFSET a0
        call relocate_enable_mmu
 #endif
-       call setup_trap_vector
+       call .Lsetup_trap_vector
        tail smp_callin
 #endif /* CONFIG_SMP */
 
 .align 2
-setup_trap_vector:
+.Lsetup_trap_vector:
        /* Set trap vector to exception handler */
        la a0, handle_exception
        csrw CSR_TVEC, a0
@@ -187,9 +187,9 @@ setup_trap_vector:
        wfi
        j .Lsecondary_park
 
-END(_start)
+SYM_CODE_END(_start)
 
-ENTRY(_start_kernel)
+SYM_CODE_START(_start_kernel)
        /* Mask all interrupts */
        csrw CSR_IE, zero
        csrw CSR_IP, zero
@@ -206,7 +206,7 @@ ENTRY(_start_kernel)
         * not implement PMPs, so we set up a quick trap handler to just skip
         * touching the PMPs on any trap.
         */
-       la a0, pmp_done
+       la a0, .Lpmp_done
        csrw CSR_TVEC, a0
 
        li a0, -1
@@ -214,7 +214,7 @@ ENTRY(_start_kernel)
        li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
        csrw CSR_PMPCFG0, a0
 .align 2
-pmp_done:
+.Lpmp_done:
 
        /*
         * The hartid in a0 is expected later on, and we have no firmware
@@ -275,12 +275,12 @@ pmp_done:
        /* Clear BSS for flat non-ELF images */
        la a3, __bss_start
        la a4, __bss_stop
-       ble a4, a3, clear_bss_done
-clear_bss:
+       ble a4, a3, .Lclear_bss_done
+.Lclear_bss:
        REG_S zero, (a3)
        add a3, a3, RISCV_SZPTR
-       blt a3, a4, clear_bss
-clear_bss_done:
+       blt a3, a4, .Lclear_bss
+.Lclear_bss_done:
 #endif
        la a2, boot_cpu_hartid
        XIP_FIXUP_OFFSET a2
@@ -305,7 +305,7 @@ clear_bss_done:
        call relocate_enable_mmu
 #endif /* CONFIG_MMU */
 
-       call setup_trap_vector
+       call .Lsetup_trap_vector
        /* Restore C environment */
        la tp, init_task
        la sp, init_thread_union + THREAD_SIZE
@@ -348,10 +348,10 @@ clear_bss_done:
        tail .Lsecondary_start_common
 #endif /* CONFIG_RISCV_BOOT_SPINWAIT */
 
-END(_start_kernel)
+SYM_CODE_END(_start_kernel)
 
 #ifdef CONFIG_RISCV_M_MODE
-ENTRY(reset_regs)
+SYM_CODE_START_LOCAL(reset_regs)
        li      sp, 0
        li      gp, 0
        li      tp, 0
@@ -449,5 +449,5 @@ ENTRY(reset_regs)
 .Lreset_regs_done_vector:
 #endif /* CONFIG_RISCV_ISA_V */
        ret
-END(reset_regs)
+SYM_CODE_END(reset_regs)
 #endif /* CONFIG_RISCV_M_MODE */
index d698dd7df637ba8ad9263e2312f896c268a58b2d..d040dcf4add453dadaa328b85cd462ae43d6d31a 100644 (file)
@@ -21,7 +21,7 @@
  *
  * Always returns 0
  */
-ENTRY(__hibernate_cpu_resume)
+SYM_FUNC_START(__hibernate_cpu_resume)
        /* switch to hibernated image's page table. */
        csrw CSR_SATP, s0
        sfence.vma
@@ -34,7 +34,7 @@ ENTRY(__hibernate_cpu_resume)
        mv      a0, zero
 
        ret
-END(__hibernate_cpu_resume)
+SYM_FUNC_END(__hibernate_cpu_resume)
 
 /*
  * Prepare to restore the image.
@@ -42,7 +42,7 @@ END(__hibernate_cpu_resume)
  * a1: satp of temporary page tables.
  * a2: cpu_resume.
  */
-ENTRY(hibernate_restore_image)
+SYM_FUNC_START(hibernate_restore_image)
        mv      s0, a0
        mv      s1, a1
        mv      s2, a2
@@ -50,7 +50,7 @@ ENTRY(hibernate_restore_image)
        REG_L   a1, relocated_restore_code
 
        jr      a1
-END(hibernate_restore_image)
+SYM_FUNC_END(hibernate_restore_image)
 
 /*
  * The below code will be executed from a 'safe' page.
@@ -58,7 +58,7 @@ END(hibernate_restore_image)
  * back to the original memory location. Finally, it jumps to __hibernate_cpu_resume()
  * to restore the CPU context.
  */
-ENTRY(hibernate_core_restore_code)
+SYM_FUNC_START(hibernate_core_restore_code)
        /* switch to temp page table. */
        csrw satp, s1
        sfence.vma
@@ -73,4 +73,4 @@ ENTRY(hibernate_core_restore_code)
        bnez    s4, .Lcopy
 
        jr      s2
-END(hibernate_core_restore_code)
+SYM_FUNC_END(hibernate_core_restore_code)
index 669b8697aa38a5ed792ead53d73ce3057c62ab51..58dd96a2a15340ee83c473436a1b2cf25d407c1f 100644 (file)
@@ -82,7 +82,7 @@
        .endm
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
        SAVE_ABI
 
        addi    a0, t0, -FENTRY_RA_OFFSET
@@ -91,8 +91,7 @@ ENTRY(ftrace_caller)
        mv      a1, ra
        mv      a3, sp
 
-ftrace_call:
-       .global ftrace_call
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,16 +101,15 @@ ftrace_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
        mv      a2, s0
 #endif
-ftrace_graph_call:
-       .global ftrace_graph_call
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
        call    ftrace_stub
 #endif
        RESTORE_ABI
        jr t0
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-ENTRY(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_regs_caller)
        SAVE_ALL
 
        addi    a0, t0, -FENTRY_RA_OFFSET
@@ -120,8 +118,7 @@ ENTRY(ftrace_regs_caller)
        mv      a1, ra
        mv      a3, sp
 
-ftrace_regs_call:
-       .global ftrace_regs_call
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -131,12 +128,11 @@ ftrace_regs_call:
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
        mv      a2, s0
 #endif
-ftrace_graph_regs_call:
-       .global ftrace_graph_regs_call
+SYM_INNER_LABEL(ftrace_graph_regs_call, SYM_L_GLOBAL)
        call    ftrace_stub
 #endif
 
        RESTORE_ALL
        jr t0
-ENDPROC(ftrace_regs_caller)
+SYM_FUNC_END(ftrace_regs_caller)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
index 8818a8fa9ff3af7b3427e2456f1650c5c6cb7168..b4dd9ed6849e30f13922a5ab4e398f87de984e9b 100644 (file)
@@ -61,7 +61,7 @@ SYM_TYPED_FUNC_START(ftrace_stub_graph)
        ret
 SYM_FUNC_END(ftrace_stub_graph)
 
-ENTRY(return_to_handler)
+SYM_FUNC_START(return_to_handler)
 /*
  * On implementing the frame point test, the ideal way is to compare the
  * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return.
@@ -76,25 +76,25 @@ ENTRY(return_to_handler)
        mv      a2, a0
        RESTORE_RET_ABI_STATE
        jalr    a2
-ENDPROC(return_to_handler)
+SYM_FUNC_END(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(MCOUNT_NAME)
+SYM_FUNC_START(MCOUNT_NAME)
        la      t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        la      t0, ftrace_graph_return
        REG_L   t1, 0(t0)
-       bne     t1, t4, do_ftrace_graph_caller
+       bne     t1, t4, .Ldo_ftrace_graph_caller
 
        la      t3, ftrace_graph_entry
        REG_L   t2, 0(t3)
        la      t6, ftrace_graph_entry_stub
-       bne     t2, t6, do_ftrace_graph_caller
+       bne     t2, t6, .Ldo_ftrace_graph_caller
 #endif
        la      t3, ftrace_trace_function
        REG_L   t5, 0(t3)
-       bne     t5, t4, do_trace
+       bne     t5, t4, .Ldo_trace
        ret
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -102,7 +102,7 @@ ENTRY(MCOUNT_NAME)
  * A pseudo representation for the function graph tracer:
  * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
  */
-do_ftrace_graph_caller:
+.Ldo_ftrace_graph_caller:
        addi    a0, s0, -SZREG
        mv      a1, ra
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
@@ -118,7 +118,7 @@ do_ftrace_graph_caller:
  * A pseudo representation for the function tracer:
  * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
  */
-do_trace:
+.Ldo_trace:
        REG_L   a1, -SZREG(s0)
        mv      a0, ra
 
@@ -126,6 +126,6 @@ do_trace:
        jalr    t5
        RESTORE_ABI_STATE
        ret
-ENDPROC(MCOUNT_NAME)
+SYM_FUNC_END(MCOUNT_NAME)
 #endif
 EXPORT_SYMBOL(MCOUNT_NAME)
index 7c651d55fcbd2ff402f18d3b7c8ebc470c492bfd..56a8c78e9e215eab146fac7ae9b645723f75a063 100644 (file)
@@ -7,6 +7,9 @@
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 #include <linux/sizes.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 
+struct used_bucket {
+       struct list_head head;
+       struct hlist_head *bucket;
+};
+
+struct relocation_head {
+       struct hlist_node node;
+       struct list_head *rel_entry;
+       void *location;
+};
+
+struct relocation_entry {
+       struct list_head head;
+       Elf_Addr value;
+       unsigned int type;
+};
+
+struct relocation_handlers {
+       int (*reloc_handler)(struct module *me, void *location, Elf_Addr v);
+       int (*accumulate_handler)(struct module *me, void *location,
+                                 long buffer);
+};
+
+unsigned int initialize_relocation_hashtable(unsigned int num_relocations);
+void process_accumulated_relocations(struct module *me);
+int add_relocation_to_accumulate(struct module *me, int type, void *location,
+                                unsigned int hashtable_bits, Elf_Addr v);
+
+struct hlist_head *relocation_hashtable;
+
+struct list_head used_buckets_list;
+
 /*
  * The auipc+jalr instruction pair can reach any PC-relative offset
  * in the range [-2^31 - 2^11, 2^31 - 2^11)
@@ -27,68 +62,90 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
 #endif
 }
 
-static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
+static int riscv_insn_rmw(void *location, u32 keep, u32 set)
+{
+       u16 *parcel = location;
+       u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16;
+
+       insn &= keep;
+       insn |= set;
+
+       parcel[0] = cpu_to_le16(insn);
+       parcel[1] = cpu_to_le16(insn >> 16);
+       return 0;
+}
+
+static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set)
+{
+       u16 *parcel = location;
+       u16 insn = le16_to_cpu(*parcel);
+
+       insn &= keep;
+       insn |= set;
+
+       *parcel = cpu_to_le16(insn);
+       return 0;
+}
+
+static int apply_r_riscv_32_rela(struct module *me, void *location, Elf_Addr v)
 {
        if (v != (u32)v) {
                pr_err("%s: value %016llx out of range for 32-bit field\n",
                       me->name, (long long)v);
                return -EINVAL;
        }
-       *location = v;
+       *(u32 *)location = v;
        return 0;
 }
 
-static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v)
+static int apply_r_riscv_64_rela(struct module *me, void *location, Elf_Addr v)
 {
        *(u64 *)location = v;
        return 0;
 }
 
-static int apply_r_riscv_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_branch_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 imm12 = (offset & 0x1000) << (31 - 12);
        u32 imm11 = (offset & 0x800) >> (11 - 7);
        u32 imm10_5 = (offset & 0x7e0) << (30 - 10);
        u32 imm4_1 = (offset & 0x1e) << (11 - 4);
 
-       *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1;
-       return 0;
+       return riscv_insn_rmw(location, 0x1fff07f, imm12 | imm11 | imm10_5 | imm4_1);
 }
 
-static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
+static int apply_r_riscv_jal_rela(struct module *me, void *location,
                                  Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 imm20 = (offset & 0x100000) << (31 - 20);
        u32 imm19_12 = (offset & 0xff000);
        u32 imm11 = (offset & 0x800) << (20 - 11);
        u32 imm10_1 = (offset & 0x7fe) << (30 - 10);
 
-       *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, imm20 | imm19_12 | imm11 | imm10_1);
 }
 
-static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_branch_rela(struct module *me, void *location,
                                         Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u16 imm8 = (offset & 0x100) << (12 - 8);
        u16 imm7_6 = (offset & 0xc0) >> (6 - 5);
        u16 imm5 = (offset & 0x20) >> (5 - 2);
        u16 imm4_3 = (offset & 0x18) << (12 - 5);
        u16 imm2_1 = (offset & 0x6) << (12 - 10);
 
-       *(u16 *)location = (*(u16 *)location & 0xe383) |
-                   imm8 | imm7_6 | imm5 | imm4_3 | imm2_1;
-       return 0;
+       return riscv_insn_rvc_rmw(location, 0xe383,
+                       imm8 | imm7_6 | imm5 | imm4_3 | imm2_1);
 }
 
-static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
+static int apply_r_riscv_rvc_jump_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u16 imm11 = (offset & 0x800) << (12 - 11);
        u16 imm10 = (offset & 0x400) >> (10 - 8);
        u16 imm9_8 = (offset & 0x300) << (12 - 11);
@@ -98,16 +155,14 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
        u16 imm4 = (offset & 0x10) << (12 - 5);
        u16 imm3_1 = (offset & 0xe) << (12 - 10);
 
-       *(u16 *)location = (*(u16 *)location & 0xe003) |
-                   imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1;
-       return 0;
+       return riscv_insn_rvc_rmw(location, 0xe003,
+                       imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1);
 }
 
-static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_hi20_rela(struct module *me, void *location,
                                         Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
-       s32 hi20;
+       ptrdiff_t offset = (void *)v - location;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
                pr_err(
@@ -116,23 +171,20 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = (offset + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, void *location,
                                           Elf_Addr v)
 {
        /*
         * v is the lo12 value to fill. It is calculated before calling this
         * handler.
         */
-       *location = (*location & 0xfffff) | ((v & 0xfff) << 20);
-       return 0;
+       return riscv_insn_rmw(location, 0xfffff, (v & 0xfff) << 20);
 }
 
-static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, void *location,
                                           Elf_Addr v)
 {
        /*
@@ -142,15 +194,12 @@ static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location,
        u32 imm11_5 = (v & 0xfe0) << (31 - 11);
        u32 imm4_0 = (v & 0x1f) << (11 - 4);
 
-       *location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-       return 0;
+       return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_hi20_rela(struct module *me, void *location,
                                   Elf_Addr v)
 {
-       s32 hi20;
-
        if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) {
                pr_err(
                  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -158,22 +207,20 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = ((s32)v + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, ((s32)v + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_i_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
        /* Skip medlow checking because of filtering by HI20 already */
        s32 hi20 = ((s32)v + 0x800) & 0xfffff000;
        s32 lo12 = ((s32)v - hi20);
-       *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20);
-       return 0;
+
+       return riscv_insn_rmw(location, 0xfffff, (lo12 & 0xfff) << 20);
 }
 
-static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
+static int apply_r_riscv_lo12_s_rela(struct module *me, void *location,
                                     Elf_Addr v)
 {
        /* Skip medlow checking because of filtering by HI20 already */
@@ -181,20 +228,18 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
        s32 lo12 = ((s32)v - hi20);
        u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11);
        u32 imm4_0 = (lo12 & 0x1f) << (11 - 4);
-       *location = (*location & 0x1fff07f) | imm11_5 | imm4_0;
-       return 0;
+
+       return riscv_insn_rmw(location, 0x1fff07f, imm11_5 | imm4_0);
 }
 
-static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
+static int apply_r_riscv_got_hi20_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
-       s32 hi20;
+       ptrdiff_t offset = (void *)v - location;
 
        /* Always emit the got entry */
        if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-               offset = module_emit_got_entry(me, v);
-               offset = (void *)offset - (void *)location;
+               offset = (void *)module_emit_got_entry(me, v) - location;
        } else {
                pr_err(
                  "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
@@ -202,22 +247,19 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
                return -EINVAL;
        }
 
-       hi20 = (offset + 0x800) & 0xfffff000;
-       *location = (*location & 0xfff) | hi20;
-       return 0;
+       return riscv_insn_rmw(location, 0xfff, (offset + 0x800) & 0xfffff000);
 }
 
-static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_plt_rela(struct module *me, void *location,
                                       Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 hi20, lo12;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
                /* Only emit the plt entry if offset over 32-bit range */
                if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
-                       offset = module_emit_plt_entry(me, v);
-                       offset = (void *)offset - (void *)location;
+                       offset = (void *)module_emit_plt_entry(me, v) - location;
                } else {
                        pr_err(
                          "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
@@ -228,15 +270,14 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 
        hi20 = (offset + 0x800) & 0xfffff000;
        lo12 = (offset - hi20) & 0xfff;
-       *location = (*location & 0xfff) | hi20;
-       *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-       return 0;
+       riscv_insn_rmw(location, 0xfff, hi20);
+       return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_call_rela(struct module *me, u32 *location,
+static int apply_r_riscv_call_rela(struct module *me, void *location,
                                   Elf_Addr v)
 {
-       ptrdiff_t offset = (void *)v - (void *)location;
+       ptrdiff_t offset = (void *)v - location;
        u32 hi20, lo12;
 
        if (!riscv_insn_valid_32bit_offset(offset)) {
@@ -248,18 +289,17 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 
        hi20 = (offset + 0x800) & 0xfffff000;
        lo12 = (offset - hi20) & 0xfff;
-       *location = (*location & 0xfff) | hi20;
-       *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20);
-       return 0;
+       riscv_insn_rmw(location, 0xfff, hi20);
+       return riscv_insn_rmw(location + 4, 0xfffff, lo12 << 20);
 }
 
-static int apply_r_riscv_relax_rela(struct module *me, u32 *location,
+static int apply_r_riscv_relax_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        return 0;
 }
 
-static int apply_r_riscv_align_rela(struct module *me, u32 *location,
+static int apply_r_riscv_align_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        pr_err(
@@ -268,91 +308,446 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location,
        return -EINVAL;
 }
 
-static int apply_r_riscv_add16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location += (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_add16_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u16 *)location += (u16)v;
        return 0;
 }
 
-static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add32_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u32 *)location += (u32)v;
        return 0;
 }
 
-static int apply_r_riscv_add64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_add64_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u64 *)location += (u64)v;
        return 0;
 }
 
-static int apply_r_riscv_sub16_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location -= (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_sub16_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u16 *)location -= (u16)v;
        return 0;
 }
 
-static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub32_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u32 *)location -= (u32)v;
        return 0;
 }
 
-static int apply_r_riscv_sub64_rela(struct module *me, u32 *location,
+static int apply_r_riscv_sub64_rela(struct module *me, void *location,
                                    Elf_Addr v)
 {
        *(u64 *)location -= (u64)v;
        return 0;
 }
 
-static int (*reloc_handlers_rela[]) (struct module *me, u32 *location,
-                               Elf_Addr v) = {
-       [R_RISCV_32]                    = apply_r_riscv_32_rela,
-       [R_RISCV_64]                    = apply_r_riscv_64_rela,
-       [R_RISCV_BRANCH]                = apply_r_riscv_branch_rela,
-       [R_RISCV_JAL]                   = apply_r_riscv_jal_rela,
-       [R_RISCV_RVC_BRANCH]            = apply_r_riscv_rvc_branch_rela,
-       [R_RISCV_RVC_JUMP]              = apply_r_riscv_rvc_jump_rela,
-       [R_RISCV_PCREL_HI20]            = apply_r_riscv_pcrel_hi20_rela,
-       [R_RISCV_PCREL_LO12_I]          = apply_r_riscv_pcrel_lo12_i_rela,
-       [R_RISCV_PCREL_LO12_S]          = apply_r_riscv_pcrel_lo12_s_rela,
-       [R_RISCV_HI20]                  = apply_r_riscv_hi20_rela,
-       [R_RISCV_LO12_I]                = apply_r_riscv_lo12_i_rela,
-       [R_RISCV_LO12_S]                = apply_r_riscv_lo12_s_rela,
-       [R_RISCV_GOT_HI20]              = apply_r_riscv_got_hi20_rela,
-       [R_RISCV_CALL_PLT]              = apply_r_riscv_call_plt_rela,
-       [R_RISCV_CALL]                  = apply_r_riscv_call_rela,
-       [R_RISCV_RELAX]                 = apply_r_riscv_relax_rela,
-       [R_RISCV_ALIGN]                 = apply_r_riscv_align_rela,
-       [R_RISCV_ADD16]                 = apply_r_riscv_add16_rela,
-       [R_RISCV_ADD32]                 = apply_r_riscv_add32_rela,
-       [R_RISCV_ADD64]                 = apply_r_riscv_add64_rela,
-       [R_RISCV_SUB16]                 = apply_r_riscv_sub16_rela,
-       [R_RISCV_SUB32]                 = apply_r_riscv_sub32_rela,
-       [R_RISCV_SUB64]                 = apply_r_riscv_sub64_rela,
+static int dynamic_linking_not_supported(struct module *me, void *location,
+                                        Elf_Addr v)
+{
+       pr_err("%s: Dynamic linking not supported in kernel modules PC = %p\n",
+              me->name, location);
+       return -EINVAL;
+}
+
+static int tls_not_supported(struct module *me, void *location, Elf_Addr v)
+{
+       pr_err("%s: Thread local storage not supported in kernel modules PC = %p\n",
+              me->name, location);
+       return -EINVAL;
+}
+
+static int apply_r_riscv_sub6_rela(struct module *me, void *location, Elf_Addr v)
+{
+       u8 *byte = location;
+       u8 value = v;
+
+       *byte = (*byte - (value & 0x3f)) & 0x3f;
+       return 0;
+}
+
+static int apply_r_riscv_set6_rela(struct module *me, void *location, Elf_Addr v)
+{
+       u8 *byte = location;
+       u8 value = v;
+
+       *byte = (*byte & 0xc0) | (value & 0x3f);
+       return 0;
+}
+
+static int apply_r_riscv_set8_rela(struct module *me, void *location, Elf_Addr v)
+{
+       *(u8 *)location = (u8)v;
+       return 0;
+}
+
+static int apply_r_riscv_set16_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       *(u16 *)location = (u16)v;
+       return 0;
+}
+
+static int apply_r_riscv_set32_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       *(u32 *)location = (u32)v;
+       return 0;
+}
+
+static int apply_r_riscv_32_pcrel_rela(struct module *me, void *location,
+                                      Elf_Addr v)
+{
+       *(u32 *)location = v - (uintptr_t)location;
+       return 0;
+}
+
+static int apply_r_riscv_plt32_rela(struct module *me, void *location,
+                                   Elf_Addr v)
+{
+       ptrdiff_t offset = (void *)v - location;
+
+       if (!riscv_insn_valid_32bit_offset(offset)) {
+               /* Only emit the plt entry if offset over 32-bit range */
+               if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
+                       offset = (void *)module_emit_plt_entry(me, v) - location;
+               } else {
+                       pr_err("%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
+                              me->name, (long long)v, location);
+                       return -EINVAL;
+               }
+       }
+
+       *(u32 *)location = (u32)offset;
+       return 0;
+}
+
+static int apply_r_riscv_set_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+       *(long *)location = v;
+       return 0;
+}
+
+static int apply_r_riscv_sub_uleb128(struct module *me, void *location, Elf_Addr v)
+{
+       *(long *)location -= v;
+       return 0;
+}
+
+static int apply_6_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       u8 *byte = location;
+       u8 value = buffer;
+
+       if (buffer > 0x3f) {
+               pr_err("%s: value %ld out of range for 6-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+
+       *byte = (*byte & 0xc0) | (value & 0x3f);
+       return 0;
+}
+
+static int apply_8_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U8_MAX) {
+               pr_err("%s: value %ld out of range for 8-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u8 *)location = (u8)buffer;
+       return 0;
+}
+
+static int apply_16_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U16_MAX) {
+               pr_err("%s: value %ld out of range for 16-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u16 *)location = (u16)buffer;
+       return 0;
+}
+
+static int apply_32_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       if (buffer > U32_MAX) {
+               pr_err("%s: value %ld out of range for 32-bit relocation.\n",
+                      me->name, buffer);
+               return -EINVAL;
+       }
+       *(u32 *)location = (u32)buffer;
+       return 0;
+}
+
+static int apply_64_bit_accumulation(struct module *me, void *location, long buffer)
+{
+       *(u64 *)location = (u64)buffer;
+       return 0;
+}
+
+static int apply_uleb128_accumulation(struct module *me, void *location, long buffer)
+{
+       /*
+        * ULEB128 is a variable length encoding. Encode the buffer into
+        * the ULEB128 data format.
+        */
+       u8 *p = location;
+
+       while (buffer != 0) {
+               u8 value = buffer & 0x7f;
+
+               buffer >>= 7;
+               value |= (!!buffer) << 7;
+
+               *p++ = value;
+       }
+       return 0;
+}
+
+/*
+ * Relocations defined in the riscv-elf-psabi-doc.
+ * This handles static linking only.
+ */
+static const struct relocation_handlers reloc_handlers[] = {
+       [R_RISCV_32]            = { .reloc_handler = apply_r_riscv_32_rela },
+       [R_RISCV_64]            = { .reloc_handler = apply_r_riscv_64_rela },
+       [R_RISCV_RELATIVE]      = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_COPY]          = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_JUMP_SLOT]     = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPMOD32]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPMOD64]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPREL32]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_DTPREL64]  = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_TPREL32]   = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_TLS_TPREL64]   = { .reloc_handler = dynamic_linking_not_supported },
+       /* 12-15 undefined */
+       [R_RISCV_BRANCH]        = { .reloc_handler = apply_r_riscv_branch_rela },
+       [R_RISCV_JAL]           = { .reloc_handler = apply_r_riscv_jal_rela },
+       [R_RISCV_CALL]          = { .reloc_handler = apply_r_riscv_call_rela },
+       [R_RISCV_CALL_PLT]      = { .reloc_handler = apply_r_riscv_call_plt_rela },
+       [R_RISCV_GOT_HI20]      = { .reloc_handler = apply_r_riscv_got_hi20_rela },
+       [R_RISCV_TLS_GOT_HI20]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TLS_GD_HI20]   = { .reloc_handler = tls_not_supported },
+       [R_RISCV_PCREL_HI20]    = { .reloc_handler = apply_r_riscv_pcrel_hi20_rela },
+       [R_RISCV_PCREL_LO12_I]  = { .reloc_handler = apply_r_riscv_pcrel_lo12_i_rela },
+       [R_RISCV_PCREL_LO12_S]  = { .reloc_handler = apply_r_riscv_pcrel_lo12_s_rela },
+       [R_RISCV_HI20]          = { .reloc_handler = apply_r_riscv_hi20_rela },
+       [R_RISCV_LO12_I]        = { .reloc_handler = apply_r_riscv_lo12_i_rela },
+       [R_RISCV_LO12_S]        = { .reloc_handler = apply_r_riscv_lo12_s_rela },
+       [R_RISCV_TPREL_HI20]    = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_LO12_I]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_LO12_S]  = { .reloc_handler = tls_not_supported },
+       [R_RISCV_TPREL_ADD]     = { .reloc_handler = tls_not_supported },
+       [R_RISCV_ADD8]          = { .reloc_handler = apply_r_riscv_add8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_ADD16]         = { .reloc_handler = apply_r_riscv_add16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_ADD32]         = { .reloc_handler = apply_r_riscv_add32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_ADD64]         = { .reloc_handler = apply_r_riscv_add64_rela,
+                                   .accumulate_handler = apply_64_bit_accumulation },
+       [R_RISCV_SUB8]          = { .reloc_handler = apply_r_riscv_sub8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_SUB16]         = { .reloc_handler = apply_r_riscv_sub16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_SUB32]         = { .reloc_handler = apply_r_riscv_sub32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_SUB64]         = { .reloc_handler = apply_r_riscv_sub64_rela,
+                                   .accumulate_handler = apply_64_bit_accumulation },
+       /* 41-42 reserved for future standard use */
+       [R_RISCV_ALIGN]         = { .reloc_handler = apply_r_riscv_align_rela },
+       [R_RISCV_RVC_BRANCH]    = { .reloc_handler = apply_r_riscv_rvc_branch_rela },
+       [R_RISCV_RVC_JUMP]      = { .reloc_handler = apply_r_riscv_rvc_jump_rela },
+       /* 46-50 reserved for future standard use */
+       [R_RISCV_RELAX]         = { .reloc_handler = apply_r_riscv_relax_rela },
+       [R_RISCV_SUB6]          = { .reloc_handler = apply_r_riscv_sub6_rela,
+                                   .accumulate_handler = apply_6_bit_accumulation },
+       [R_RISCV_SET6]          = { .reloc_handler = apply_r_riscv_set6_rela,
+                                   .accumulate_handler = apply_6_bit_accumulation },
+       [R_RISCV_SET8]          = { .reloc_handler = apply_r_riscv_set8_rela,
+                                   .accumulate_handler = apply_8_bit_accumulation },
+       [R_RISCV_SET16]         = { .reloc_handler = apply_r_riscv_set16_rela,
+                                   .accumulate_handler = apply_16_bit_accumulation },
+       [R_RISCV_SET32]         = { .reloc_handler = apply_r_riscv_set32_rela,
+                                   .accumulate_handler = apply_32_bit_accumulation },
+       [R_RISCV_32_PCREL]      = { .reloc_handler = apply_r_riscv_32_pcrel_rela },
+       [R_RISCV_IRELATIVE]     = { .reloc_handler = dynamic_linking_not_supported },
+       [R_RISCV_PLT32]         = { .reloc_handler = apply_r_riscv_plt32_rela },
+       [R_RISCV_SET_ULEB128]   = { .reloc_handler = apply_r_riscv_set_uleb128,
+                                   .accumulate_handler = apply_uleb128_accumulation },
+       [R_RISCV_SUB_ULEB128]   = { .reloc_handler = apply_r_riscv_sub_uleb128,
+                                   .accumulate_handler = apply_uleb128_accumulation },
+       /* 62-191 reserved for future standard use */
+       /* 192-255 nonstandard ABI extensions  */
 };
 
+void process_accumulated_relocations(struct module *me)
+{
+       /*
+        * Only ADD/SUB/SET/ULEB128 should end up here.
+        *
+        * Each bucket may have more than one relocation location. All
+        * relocations for a location are stored in a list in a bucket.
+        *
+        * Relocations are applied to a temp variable before being stored to the
+        * provided location to check for overflow. This also allows ULEB128 to
+        * properly decide how many entries are needed before storing to
+        * location. The final value is stored into location using the handler
+        * for the last relocation to an address.
+        *
+        * Three layers of indexing:
+        *      - Each of the buckets in use
+        *      - Groups of relocations in each bucket by location address
+        *      - Each relocation entry for a location address
+        */
+       struct used_bucket *bucket_iter;
+       struct relocation_head *rel_head_iter;
+       struct relocation_entry *rel_entry_iter;
+       int curr_type;
+       void *location;
+       long buffer;
+
+       list_for_each_entry(bucket_iter, &used_buckets_list, head) {
+               hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) {
+                       buffer = 0;
+                       location = rel_head_iter->location;
+                       list_for_each_entry(rel_entry_iter,
+                                           rel_head_iter->rel_entry, head) {
+                               curr_type = rel_entry_iter->type;
+                               reloc_handlers[curr_type].reloc_handler(
+                                       me, &buffer, rel_entry_iter->value);
+                               kfree(rel_entry_iter);
+                       }
+                       reloc_handlers[curr_type].accumulate_handler(
+                               me, location, buffer);
+                       kfree(rel_head_iter);
+               }
+               kfree(bucket_iter);
+       }
+
+       kfree(relocation_hashtable);
+}
+
+int add_relocation_to_accumulate(struct module *me, int type, void *location,
+                                unsigned int hashtable_bits, Elf_Addr v)
+{
+       struct relocation_entry *entry;
+       struct relocation_head *rel_head;
+       struct hlist_head *current_head;
+       struct used_bucket *bucket;
+       unsigned long hash;
+
+       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+       INIT_LIST_HEAD(&entry->head);
+       entry->type = type;
+       entry->value = v;
+
+       hash = hash_min((uintptr_t)location, hashtable_bits);
+
+       current_head = &relocation_hashtable[hash];
+
+       /* Find matching location (if any) */
+       bool found = false;
+       struct relocation_head *rel_head_iter;
+
+       hlist_for_each_entry(rel_head_iter, current_head, node) {
+               if (rel_head_iter->location == location) {
+                       found = true;
+                       rel_head = rel_head_iter;
+                       break;
+               }
+       }
+
+       if (!found) {
+               rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL);
+               rel_head->rel_entry =
+                       kmalloc(sizeof(struct list_head), GFP_KERNEL);
+               INIT_LIST_HEAD(rel_head->rel_entry);
+               rel_head->location = location;
+               INIT_HLIST_NODE(&rel_head->node);
+               if (!current_head->first) {
+                       bucket =
+                               kmalloc(sizeof(struct used_bucket), GFP_KERNEL);
+                       INIT_LIST_HEAD(&bucket->head);
+                       bucket->bucket = current_head;
+                       list_add(&bucket->head, &used_buckets_list);
+               }
+               hlist_add_head(&rel_head->node, current_head);
+       }
+
+       /* Add relocation to head of discovered rel_head */
+       list_add_tail(&entry->head, rel_head->rel_entry);
+
+       return 0;
+}
+
+unsigned int initialize_relocation_hashtable(unsigned int num_relocations)
+{
+       /* Can safely assume that bits is not greater than sizeof(long) */
+       unsigned long hashtable_size = roundup_pow_of_two(num_relocations);
+       unsigned int hashtable_bits = ilog2(hashtable_size);
+
+       /*
+        * Double size of hashtable if num_relocations * 1.25 is greater than
+        * hashtable_size.
+        */
+       int should_double_size = ((num_relocations + (num_relocations >> 2)) > (hashtable_size));
+
+       hashtable_bits += should_double_size;
+
+       hashtable_size <<= should_double_size;
+
+       relocation_hashtable = kmalloc_array(hashtable_size,
+                                            sizeof(*relocation_hashtable),
+                                            GFP_KERNEL);
+       __hash_init(relocation_hashtable, hashtable_size);
+
+       INIT_LIST_HEAD(&used_buckets_list);
+
+       return hashtable_bits;
+}
+
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                       unsigned int symindex, unsigned int relsec,
                       struct module *me)
 {
        Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr;
-       int (*handler)(struct module *me, u32 *location, Elf_Addr v);
+       int (*handler)(struct module *me, void *location, Elf_Addr v);
        Elf_Sym *sym;
-       u32 *location;
+       void *location;
        unsigned int i, type;
        Elf_Addr v;
        int res;
+       unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel);
+       unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations);
 
        pr_debug("Applying relocate section %u to %u\n", relsec,
               sechdrs[relsec].sh_info);
 
-       for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+       for (i = 0; i < num_relocations; i++) {
                /* This is where to make the change */
                location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
                        + rel[i].r_offset;
@@ -370,8 +765,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 
                type = ELF_RISCV_R_TYPE(rel[i].r_info);
 
-               if (type < ARRAY_SIZE(reloc_handlers_rela))
-                       handler = reloc_handlers_rela[type];
+               if (type < ARRAY_SIZE(reloc_handlers))
+                       handler = reloc_handlers[type].reloc_handler;
                else
                        handler = NULL;
 
@@ -427,11 +822,16 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
                        }
                }
 
-               res = handler(me, location, v);
+               if (reloc_handlers[type].accumulate_handler)
+                       res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v);
+               else
+                       res = handler(me, location, v);
                if (res)
                        return res;
        }
 
+       process_accumulated_relocations(me);
+
        return 0;
 }
 
index 21bac92a170a9bd43be973f213cb8be3329175ef..f2cd83d9b0f004e0fe6ae64db9425d343a394bf9 100644 (file)
@@ -75,7 +75,7 @@
        REG_L x31, PT_T6(sp)
        .endm
 
-ENTRY(arch_rethook_trampoline)
+SYM_CODE_START(arch_rethook_trampoline)
        addi sp, sp, -(PT_SIZE_ON_STACK)
        save_all_base_regs
 
@@ -90,4 +90,4 @@ ENTRY(arch_rethook_trampoline)
        addi sp, sp, PT_SIZE_ON_STACK
 
        ret
-ENDPROC(arch_rethook_trampoline)
+SYM_CODE_END(arch_rethook_trampoline)
index d3099d67816d054b4abcc938a7e775fbe2a79212..6c166029079c42bf71f0d0a5b0c28308027680c6 100644 (file)
@@ -24,7 +24,7 @@ static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index,
                                       unsigned long val)
 {
        if (index == 0)
-               return false;
+               return true;
        else if (index <= 31)
                *((unsigned long *)regs + index) = val;
        else
index 194f166b2cc40e77b3bcef5dabe6ca9c01d86bd9..4b3dc8beaf77d31d0c828a7a04cc81e2d78ef2e0 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/uprobes.h>
+#include <asm/insn.h>
 
 #include "decode-insn.h"
 
@@ -17,6 +18,11 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
 #endif
 }
 
+bool is_trap_insn(uprobe_opcode_t *insn)
+{
+       return riscv_insn_is_ebreak(*insn) || riscv_insn_is_c_ebreak(*insn);
+}
+
 unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
 {
        return instruction_pointer(regs);
index e32d737e039fd477c33861da55705b86dbe5f53f..4f21d970a1292b06be357b8b33ed541751bbb091 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/thread_info.h>
 #include <asm/cpuidle.h>
 #include <asm/vector.h>
+#include <asm/cpufeature.h>
 
 register unsigned long gp_in_global __asm__("gp");
 
@@ -41,6 +42,23 @@ void arch_cpu_idle(void)
        cpu_do_idle();
 }
 
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+       if (!unaligned_ctl_available())
+               return -EINVAL;
+
+       tsk->thread.align_ctl = val;
+       return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+       if (!unaligned_ctl_available())
+               return -EINVAL;
+
+       return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
+}
+
 void __show_regs(struct pt_regs *regs)
 {
        show_regs_print_info(KERN_DEFAULT);
index c672c8ba9a2a6b45450993b56c41f48e3514249b..5a62ed1da45332c85820fdfdd7e90046b1ae3380 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/reboot.h>
 #include <asm/sbi.h>
 #include <asm/smp.h>
+#include <asm/tlbflush.h>
 
 /* default SBI version is 0.1 */
 unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
@@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask)
 }
 EXPORT_SYMBOL(sbi_remote_fence_i);
 
-/**
- * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
- *                          harts for the specified virtual address range.
- * @cpu_mask: A cpu mask containing all the target harts.
- * @start: Start of the virtual address
- * @size: Total size of the virtual address range.
- *
- * Return: 0 on success, appropriate linux error code otherwise.
- */
-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
-                          unsigned long start,
-                          unsigned long size)
-{
-       return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
-                           cpu_mask, start, size, 0, 0);
-}
-EXPORT_SYMBOL(sbi_remote_sfence_vma);
-
 /**
  * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
- * remote harts for a virtual address range belonging to a specific ASID.
+ * remote harts for a virtual address range belonging to a specific ASID or not.
  *
  * @cpu_mask: A cpu mask containing all the target harts.
  * @start: Start of the virtual address
  * @size: Total size of the virtual address range.
- * @asid: The value of address space identifier (ASID).
+ * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID
+ * for flushing all address spaces.
  *
  * Return: 0 on success, appropriate linux error code otherwise.
  */
@@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
                                unsigned long size,
                                unsigned long asid)
 {
-       return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
-                           cpu_mask, start, size, asid, 0);
+       if (asid == FLUSH_TLB_NO_ASID)
+               return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+                                   cpu_mask, start, size, 0, 0);
+       else
+               return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
+                                   cpu_mask, start, size, asid, 0);
 }
 EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
 
index 21a4d0e111bc5f151f9ef4a6205e7da17a87ec10..88b6220b260879ee75ac6a6824def025b004041b 100644 (file)
@@ -384,30 +384,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
        sigset_t *oldset = sigmask_to_save();
        int ret;
 
-       /* Are we from a system call? */
-       if (regs->cause == EXC_SYSCALL) {
-               /* Avoid additional syscall restarting via ret_from_exception */
-               regs->cause = -1UL;
-               /* If so, check system call restarting.. */
-               switch (regs->a0) {
-               case -ERESTART_RESTARTBLOCK:
-               case -ERESTARTNOHAND:
-                       regs->a0 = -EINTR;
-                       break;
-
-               case -ERESTARTSYS:
-                       if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
-                               regs->a0 = -EINTR;
-                               break;
-                       }
-                       fallthrough;
-               case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-                       regs->epc -= 0x4;
-                       break;
-               }
-       }
-
        rseq_signal_deliver(ksig, regs);
 
        /* Set up the stack frame */
@@ -421,35 +397,66 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 void arch_do_signal_or_restart(struct pt_regs *regs)
 {
+       unsigned long continue_addr = 0, restart_addr = 0;
+       int retval = 0;
        struct ksignal ksig;
+       bool syscall = (regs->cause == EXC_SYSCALL);
 
-       if (get_signal(&ksig)) {
-               /* Actually deliver the signal */
-               handle_signal(&ksig, regs);
-               return;
-       }
+       /* If we were from a system call, check for system call restarting */
+       if (syscall) {
+               continue_addr = regs->epc;
+               restart_addr = continue_addr - 4;
+               retval = regs->a0;
 
-       /* Did we come from a system call? */
-       if (regs->cause == EXC_SYSCALL) {
                /* Avoid additional syscall restarting via ret_from_exception */
                regs->cause = -1UL;
 
-               /* Restart the system call - no handlers present */
-               switch (regs->a0) {
+               /*
+                * Prepare for system call restart. We do this here so that a
+                * debugger will see the already changed PC.
+                */
+               switch (retval) {
                case -ERESTARTNOHAND:
                case -ERESTARTSYS:
                case -ERESTARTNOINTR:
-                        regs->a0 = regs->orig_a0;
-                       regs->epc -= 0x4;
-                       break;
                case -ERESTART_RESTARTBLOCK:
-                        regs->a0 = regs->orig_a0;
-                       regs->a7 = __NR_restart_syscall;
-                       regs->epc -= 0x4;
+                       regs->a0 = regs->orig_a0;
+                       regs->epc = restart_addr;
                        break;
                }
        }
 
+       /*
+        * Get the signal to deliver. When running under ptrace, at this point
+        * the debugger may change all of our registers.
+        */
+       if (get_signal(&ksig)) {
+               /*
+                * Depending on the signal settings, we may need to revert the
+                * decision to restart the system call, but skip this if a
+                * debugger has chosen to restart at a different PC.
+                */
+               if (regs->epc == restart_addr &&
+                   (retval == -ERESTARTNOHAND ||
+                    retval == -ERESTART_RESTARTBLOCK ||
+                    (retval == -ERESTARTSYS &&
+                     !(ksig.ka.sa.sa_flags & SA_RESTART)))) {
+                       regs->a0 = -EINTR;
+                       regs->epc = continue_addr;
+               }
+
+               /* Actually deliver the signal */
+               handle_signal(&ksig, regs);
+               return;
+       }
+
+       /*
+        * Handle restarting a different system call. As above, if a debugger
+        * has chosen to restart at a different PC, ignore the restart.
+        */
+       if (syscall && regs->epc == restart_addr && retval == -ERESTART_RESTARTBLOCK)
+               regs->a7 = __NR_restart_syscall;
+
        /*
         * If there is no signal to deliver, we just put the saved
         * sigmask back.
index d1b0a6fc3adfc740f89af3b85883bf2ea82af4ec..d162bf339beb16e3e4ffcb2b7d755ded9712e895 100644 (file)
@@ -248,7 +248,6 @@ asmlinkage __visible void smp_callin(void)
 
        numa_add_cpu(curr_cpuid);
        set_cpu_online(curr_cpuid, 1);
-       check_unaligned_access(curr_cpuid);
 
        if (has_vector()) {
                if (riscv_v_setup_vsize())
index d5cf8b57577739f5ae615b1e3cbd1b117dbeff19..2d54f309c14059ad901f80448df2ff257f047388 100644 (file)
@@ -16,7 +16,7 @@
        .altmacro
        .option norelax
 
-ENTRY(__cpu_suspend_enter)
+SYM_FUNC_START(__cpu_suspend_enter)
        /* Save registers (except A0 and T0-T6) */
        REG_S   ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0)
        REG_S   sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0)
@@ -57,7 +57,7 @@ ENTRY(__cpu_suspend_enter)
 
        /* Return to C code */
        ret
-END(__cpu_suspend_enter)
+SYM_FUNC_END(__cpu_suspend_enter)
 
 SYM_TYPED_FUNC_START(__cpu_resume_enter)
        /* Load the global pointer */
diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug
new file mode 100644 (file)
index 0000000..5dba64e
--- /dev/null
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menu "arch/riscv/kernel Testing and Coverage"
+
+config AS_HAS_ULEB128
+       def_bool $(as-instr,.reloc label$(comma) R_RISCV_SET_ULEB128$(comma) 127\n.reloc label$(comma) R_RISCV_SUB_ULEB128$(comma) 127\nlabel:\n.word 0)
+
+menuconfig RUNTIME_KERNEL_TESTING_MENU
+       bool "arch/riscv/kernel runtime Testing"
+       def_bool y
+       help
+         Enable riscv kernel runtime testing.
+
+if RUNTIME_KERNEL_TESTING_MENU
+
+config RISCV_MODULE_LINKING_KUNIT
+       bool "KUnit test riscv module linking at runtime" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         Enable this option to test riscv module linking at boot. This will
+        enable a module called "test_module_linking".
+
+         KUnit tests run during boot and output the results to the debug log
+         in TAP format (http://testanything.org/). Only useful for kernel devs
+         running the KUnit test harness, and not intended for inclusion into a
+         production build.
+
+         For more information on KUnit and unit tests in general please refer
+         to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+         If unsure, say N.
+
+endif # RUNTIME_TESTING_MENU
+
+endmenu # "arch/riscv/kernel runtime Testing"
diff --git a/arch/riscv/kernel/tests/Makefile b/arch/riscv/kernel/tests/Makefile
new file mode 100644 (file)
index 0000000..7d6c76c
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_RISCV_MODULE_LINKING_KUNIT)       += module_test/
diff --git a/arch/riscv/kernel/tests/module_test/Makefile b/arch/riscv/kernel/tests/module_test/Makefile
new file mode 100644 (file)
index 0000000..d7a6fd8
--- /dev/null
@@ -0,0 +1,15 @@
+obj-m += test_module_linking.o
+
+test_sub := test_sub6.o test_sub8.o test_sub16.o test_sub32.o test_sub64.o
+
+test_set := test_set6.o test_set8.o test_set16.o test_set32.o
+
+test_module_linking-objs += $(test_sub)
+
+test_module_linking-objs += $(test_set)
+
+ifeq ($(CONFIG_AS_HAS_ULEB128),y)
+test_module_linking-objs += test_uleb128.o
+endif
+
+test_module_linking-objs += test_module_linking_main.o
diff --git a/arch/riscv/kernel/tests/module_test/test_module_linking_main.c b/arch/riscv/kernel/tests/module_test/test_module_linking_main.c
new file mode 100644 (file)
index 0000000..8df5fa5
--- /dev/null
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <kunit/test.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Test module linking");
+
+extern int test_set32(void);
+extern int test_set16(void);
+extern int test_set8(void);
+extern int test_set6(void);
+extern long test_sub64(void);
+extern int test_sub32(void);
+extern int test_sub16(void);
+extern int test_sub8(void);
+extern int test_sub6(void);
+
+#ifdef CONFIG_AS_HAS_ULEB128
+extern int test_uleb_basic(void);
+extern int test_uleb_large(void);
+#endif
+
+#define CHECK_EQ(lhs, rhs) KUNIT_ASSERT_EQ(test, lhs, rhs)
+
+void run_test_set(struct kunit *test);
+void run_test_sub(struct kunit *test);
+void run_test_uleb(struct kunit *test);
+
+void run_test_set(struct kunit *test)
+{
+       int val32 = test_set32();
+       int val16 = test_set16();
+       int val8 = test_set8();
+       int val6 = test_set6();
+
+       CHECK_EQ(val32, 0);
+       CHECK_EQ(val16, 0);
+       CHECK_EQ(val8, 0);
+       CHECK_EQ(val6, 0);
+}
+
+void run_test_sub(struct kunit *test)
+{
+       int val64 = test_sub64();
+       int val32 = test_sub32();
+       int val16 = test_sub16();
+       int val8 = test_sub8();
+       int val6 = test_sub6();
+
+       CHECK_EQ(val64, 0);
+       CHECK_EQ(val32, 0);
+       CHECK_EQ(val16, 0);
+       CHECK_EQ(val8, 0);
+       CHECK_EQ(val6, 0);
+}
+
+#ifdef CONFIG_AS_HAS_ULEB128
+void run_test_uleb(struct kunit *test)
+{
+       int val_uleb = test_uleb_basic();
+       int val_uleb2 = test_uleb_large();
+
+       CHECK_EQ(val_uleb, 0);
+       CHECK_EQ(val_uleb2, 0);
+}
+#endif
+
+static struct kunit_case __refdata riscv_module_linking_test_cases[] = {
+       KUNIT_CASE(run_test_set),
+       KUNIT_CASE(run_test_sub),
+#ifdef CONFIG_AS_HAS_ULEB128
+       KUNIT_CASE(run_test_uleb),
+#endif
+       {}
+};
+
+static struct kunit_suite riscv_module_linking_test_suite = {
+       .name = "riscv_checksum",
+       .test_cases = riscv_module_linking_test_cases,
+};
+
+kunit_test_suites(&riscv_module_linking_test_suite);
diff --git a/arch/riscv/kernel/tests/module_test/test_set16.S b/arch/riscv/kernel/tests/module_test/test_set16.S
new file mode 100644 (file)
index 0000000..2be0e44
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set16
+test_set16:
+       lw      a0, set16
+       la      t0, set16
+#ifdef CONFIG_32BIT
+       slli    t0, t0, 16
+       srli    t0, t0, 16
+#else
+       slli    t0, t0, 48
+       srli    t0, t0, 48
+#endif
+       sub     a0, a0, t0
+       ret
+.data
+set16:
+       .reloc set16, R_RISCV_SET16, set16
+       .word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set32.S b/arch/riscv/kernel/tests/module_test/test_set32.S
new file mode 100644 (file)
index 0000000..de04445
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set32
+test_set32:
+       lw      a0, set32
+       la      t0, set32
+#ifndef CONFIG_32BIT
+       slli    t0, t0, 32
+       srli    t0, t0, 32
+#endif
+       sub     a0, a0, t0
+       ret
+.data
+set32:
+       .reloc set32, R_RISCV_SET32, set32
+       .word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set6.S b/arch/riscv/kernel/tests/module_test/test_set6.S
new file mode 100644 (file)
index 0000000..c39ce4c
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set6
+test_set6:
+       lw      a0, set6
+       la      t0, set6
+#ifdef CONFIG_32BIT
+       slli    t0, t0, 26
+       srli    t0, t0, 26
+#else
+       slli    t0, t0, 58
+       srli    t0, t0, 58
+#endif
+       sub     a0, a0, t0
+       ret
+.data
+set6:
+       .reloc set6, R_RISCV_SET6, set6
+       .word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_set8.S b/arch/riscv/kernel/tests/module_test/test_set8.S
new file mode 100644 (file)
index 0000000..a656173
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_set8
+test_set8:
+       lw      a0, set8
+       la      t0, set8
+#ifdef CONFIG_32BIT
+       slli    t0, t0, 24
+       srli    t0, t0, 24
+#else
+       slli    t0, t0, 56
+       srli    t0, t0, 56
+#endif
+       sub     a0, a0, t0
+       ret
+.data
+set8:
+       .reloc set8, R_RISCV_SET8, set8
+       .word 0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub16.S b/arch/riscv/kernel/tests/module_test/test_sub16.S
new file mode 100644 (file)
index 0000000..80f731d
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub16
+test_sub16:
+       lh      a0, sub16
+       addi    a0, a0, -32
+       ret
+first:
+       .space 32
+second:
+
+.data
+sub16:
+       .reloc          sub16, R_RISCV_ADD16, second
+       .reloc          sub16, R_RISCV_SUB16, first
+       .half           0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub32.S b/arch/riscv/kernel/tests/module_test/test_sub32.S
new file mode 100644 (file)
index 0000000..a341686
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub32
+test_sub32:
+       lw      a0, sub32
+       addi    a0, a0, -32
+       ret
+first:
+       .space 32
+second:
+
+.data
+sub32:
+       .reloc          sub32, R_RISCV_ADD32, second
+       .reloc          sub32, R_RISCV_SUB32, first
+       .word           0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub6.S b/arch/riscv/kernel/tests/module_test/test_sub6.S
new file mode 100644 (file)
index 0000000..e8b61c1
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub6
+test_sub6:
+       lb      a0, sub6
+       addi    a0, a0, -32
+       ret
+first:
+       .space 32
+second:
+
+.data
+sub6:
+       .reloc          sub6, R_RISCV_SET6, second
+       .reloc          sub6, R_RISCV_SUB6, first
+       .byte           0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub64.S b/arch/riscv/kernel/tests/module_test/test_sub64.S
new file mode 100644 (file)
index 0000000..a59e8af
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub64
+test_sub64:
+#ifdef CONFIG_32BIT
+       lw      a0, sub64
+#else
+       ld      a0, sub64
+#endif
+       addi    a0, a0, -32
+       ret
+first:
+       .space 32
+second:
+
+.data
+sub64:
+       .reloc          sub64, R_RISCV_ADD64, second
+       .reloc          sub64, R_RISCV_SUB64, first
+       .word           0
+       .word           0
diff --git a/arch/riscv/kernel/tests/module_test/test_sub8.S b/arch/riscv/kernel/tests/module_test/test_sub8.S
new file mode 100644 (file)
index 0000000..ac5d0ec
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_sub8
+test_sub8:
+       lb      a0, sub8
+       addi    a0, a0, -32
+       ret
+first:
+       .space 32
+second:
+
+.data
+sub8:
+       .reloc          sub8, R_RISCV_ADD8, second
+       .reloc          sub8, R_RISCV_SUB8, first
+       .byte           0
diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S
new file mode 100644 (file)
index 0000000..90f2204
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Rivos Inc.
+ */
+
+.text
+.global test_uleb_basic
+test_uleb_basic:
+       ld      a0, second
+       addi    a0, a0, -127
+       ret
+
+.global test_uleb_large
+test_uleb_large:
+       ld      a0, fourth
+       addi    a0, a0, -0x07e8
+       ret
+
+.data
+first:
+       .space 127
+second:
+       .reloc second, R_RISCV_SET_ULEB128, second
+       .reloc second, R_RISCV_SUB_ULEB128, first
+       .dword 0
+third:
+       .space 1000
+fourth:
+       .reloc fourth, R_RISCV_SET_ULEB128, fourth
+       .reloc fourth, R_RISCV_SUB_ULEB128, third
+       .dword 0
index c9d698518bae19f529fd388d758a505ac5a0d7d1..a1b9be3c4332d97f08b50beebfcadba5adaa02be 100644 (file)
@@ -36,7 +36,21 @@ int show_unhandled_signals = 1;
 
 static DEFINE_SPINLOCK(die_lock);
 
-static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs)
+static int copy_code(struct pt_regs *regs, u16 *val, const u16 *insns)
+{
+       const void __user *uaddr = (__force const void __user *)insns;
+
+       if (!user_mode(regs))
+               return get_kernel_nofault(*val, insns);
+
+       /* The user space code from other tasks cannot be accessed. */
+       if (regs != task_pt_regs(current))
+               return -EPERM;
+
+       return copy_from_user_nofault(val, uaddr, sizeof(*val));
+}
+
+static void dump_instr(const char *loglvl, struct pt_regs *regs)
 {
        char str[sizeof("0000 ") * 12 + 2 + 1], *p = str;
        const u16 *insns = (u16 *)instruction_pointer(regs);
@@ -45,7 +59,7 @@ static void dump_kernel_instr(const char *loglvl, struct pt_regs *regs)
        int i;
 
        for (i = -10; i < 2; i++) {
-               bad = get_kernel_nofault(val, &insns[i]);
+               bad = copy_code(regs, &val, &insns[i]);
                if (!bad) {
                        p += sprintf(p, i == 0 ? "(%04hx) " : "%04hx ", val);
                } else {
@@ -74,7 +88,7 @@ void die(struct pt_regs *regs, const char *str)
        print_modules();
        if (regs) {
                show_regs(regs);
-               dump_kernel_instr(KERN_EMERG, regs);
+               dump_instr(KERN_EMERG, regs);
        }
 
        cause = regs ? regs->cause : -1;
@@ -107,6 +121,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
                print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
                pr_cont("\n");
                __show_regs(regs);
+               dump_instr(KERN_EMERG, regs);
        }
 
        force_sig_fault(signo, code, (void __user *)addr);
@@ -181,14 +196,6 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
 
 DO_ERROR_INFO(do_trap_load_fault,
        SIGSEGV, SEGV_ACCERR, "load access fault");
-#ifndef CONFIG_RISCV_M_MODE
-DO_ERROR_INFO(do_trap_load_misaligned,
-       SIGBUS, BUS_ADRALN, "Oops - load address misaligned");
-DO_ERROR_INFO(do_trap_store_misaligned,
-       SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned");
-#else
-int handle_misaligned_load(struct pt_regs *regs);
-int handle_misaligned_store(struct pt_regs *regs);
 
 asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
 {
@@ -231,7 +238,6 @@ asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs
                irqentry_nmi_exit(regs, state);
        }
 }
-#endif
 DO_ERROR_INFO(do_trap_store_fault,
        SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
 DO_ERROR_INFO(do_trap_ecall_s,
index 378f5b151443564020e775edfc13e6e90e557152..5eba37147caa96c077eb9ffb89233e1f679fed6d 100644 (file)
@@ -6,12 +6,16 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/perf_event.h>
 #include <linux/irq.h>
 #include <linux/stringify.h>
 
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
+#include <asm/entry-common.h>
+#include <asm/hwprobe.h>
+#include <asm/cpufeature.h>
 
 #define INSN_MATCH_LB                  0x3
 #define INSN_MASK_LB                   0x707f
 #define PRECISION_S 0
 #define PRECISION_D 1
 
-#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn)                 \
-static inline type load_##type(const type *addr)                       \
-{                                                                      \
-       type val;                                                       \
-       asm (#insn " %0, %1"                                            \
-       : "=&r" (val) : "m" (*addr));                                   \
-       return val;                                                     \
+#ifdef CONFIG_FPU
+
+#define FP_GET_RD(insn)                (insn >> 7 & 0x1F)
+
+extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
+                     unsigned long val)
+{
+       unsigned long fp_reg = FP_GET_RD(insn);
+
+       put_f32_reg(fp_reg, val);
+       regs->status |= SR_FS_DIRTY;
+
+       return 0;
 }
 
-#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn)                        \
-static inline void store_##type(type *addr, type val)                  \
-{                                                                      \
-       asm volatile (#insn " %0, %1\n"                                 \
-       : : "r" (val), "m" (*addr));                                    \
+extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
+
+static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
+{
+       unsigned long fp_reg = FP_GET_RD(insn);
+       unsigned long value;
+
+#if __riscv_xlen == 32
+       value = (unsigned long) &val;
+#else
+       value = val;
+#endif
+       put_f64_reg(fp_reg, value);
+       regs->status |= SR_FS_DIRTY;
+
+       return 0;
 }
 
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw)
-#if defined(CONFIG_64BIT)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld)
-DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld)
+#if __riscv_xlen == 32
+extern void get_f64_reg(unsigned long fp_reg, u64 *value);
+
+static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                     struct pt_regs *regs)
+{
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       u64 val;
+
+       get_f64_reg(fp_reg, &val);
+       regs->status |= SR_FS_DIRTY;
+
+       return val;
+}
 #else
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw)
-DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw)
 
-static inline u64 load_u64(const u64 *addr)
+extern unsigned long get_f64_reg(unsigned long fp_reg);
+
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
 {
-       return load_u32((u32 *)addr)
-               + ((u64)load_u32((u32 *)addr + 1) << 32);
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       unsigned long val;
+
+       val = get_f64_reg(fp_reg);
+       regs->status |= SR_FS_DIRTY;
+
+       return val;
 }
 
-static inline void store_u64(u64 *addr, u64 val)
+#endif
+
+extern unsigned long get_f32_reg(unsigned long fp_reg);
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
 {
-       store_u32((u32 *)addr, val);
-       store_u32((u32 *)addr + 1, val >> 32);
+       unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
+       unsigned long val;
+
+       val = get_f32_reg(fp_reg);
+       regs->status |= SR_FS_DIRTY;
+
+       return val;
 }
+
+#else /* CONFIG_FPU */
+static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
+                      unsigned long val) {}
+
+static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
+
+static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
+{
+       return 0;
+}
+
+static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
+                               struct pt_regs *regs)
+{
+       return 0;
+}
+
 #endif
 
-static inline ulong get_insn(ulong mepc)
+#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
+#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
+#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
+
+#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
+#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
+#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
+
+#ifdef CONFIG_RISCV_M_MODE
+static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
+{
+       u8 val;
+
+       asm volatile("lbu %0, %1" : "=&r" (val) : "m" (*addr));
+       *r_val = val;
+
+       return 0;
+}
+
+static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
+{
+       asm volatile ("sb %0, %1\n" : : "r" (val), "m" (*addr));
+
+       return 0;
+}
+
+static inline int get_insn(struct pt_regs *regs, ulong mepc, ulong *r_insn)
 {
        register ulong __mepc asm ("a2") = mepc;
        ulong val, rvc_mask = 3, tmp;
@@ -226,23 +311,119 @@ static inline ulong get_insn(ulong mepc)
        : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask),
          [xlen_minus_16] "i" (XLEN_MINUS_16));
 
-       return val;
+       *r_insn = val;
+
+       return 0;
+}
+#else
+static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
+{
+       if (user_mode(regs)) {
+               return __get_user(*r_val, addr);
+       } else {
+               *r_val = *addr;
+               return 0;
+       }
+}
+
+static inline int store_u8(struct pt_regs *regs, u8 *addr, u8 val)
+{
+       if (user_mode(regs)) {
+               return __put_user(val, addr);
+       } else {
+               *addr = val;
+               return 0;
+       }
 }
 
+#define __read_insn(regs, insn, insn_addr)             \
+({                                                     \
+       int __ret;                                      \
+                                                       \
+       if (user_mode(regs)) {                          \
+               __ret = __get_user(insn, insn_addr);    \
+       } else {                                        \
+               insn = *insn_addr;                      \
+               __ret = 0;                              \
+       }                                               \
+                                                       \
+       __ret;                                          \
+})
+
+static inline int get_insn(struct pt_regs *regs, ulong epc, ulong *r_insn)
+{
+       ulong insn = 0;
+
+       if (epc & 0x2) {
+               ulong tmp = 0;
+               u16 __user *insn_addr = (u16 __user *)epc;
+
+               if (__read_insn(regs, insn, insn_addr))
+                       return -EFAULT;
+               /* __get_user() uses regular "lw" which sign extend the loaded
+                * value make sure to clear higher order bits in case we "or" it
+                * below with the upper 16 bits half.
+                */
+               insn &= GENMASK(15, 0);
+               if ((insn & __INSN_LENGTH_MASK) != __INSN_LENGTH_32) {
+                       *r_insn = insn;
+                       return 0;
+               }
+               insn_addr++;
+               if (__read_insn(regs, tmp, insn_addr))
+                       return -EFAULT;
+               *r_insn = (tmp << 16) | insn;
+
+               return 0;
+       } else {
+               u32 __user *insn_addr = (u32 __user *)epc;
+
+               if (__read_insn(regs, insn, insn_addr))
+                       return -EFAULT;
+               if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) {
+                       *r_insn = insn;
+                       return 0;
+               }
+               insn &= GENMASK(15, 0);
+               *r_insn = insn;
+
+               return 0;
+       }
+}
+#endif
+
 union reg_data {
        u8 data_bytes[8];
        ulong data_ulong;
        u64 data_u64;
 };
 
+static bool unaligned_ctl __read_mostly;
+
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;       /* Enabled by default */
+
 int handle_misaligned_load(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
-       unsigned long insn = get_insn(epc);
-       unsigned long addr = csr_read(mtval);
+       unsigned long insn;
+       unsigned long addr = regs->badaddr;
        int i, fp = 0, shift = 0, len = 0;
 
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+       *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+       if (!unaligned_enabled)
+               return -1;
+
+       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+               return -1;
+
+       if (get_insn(regs, epc, &insn))
+               return -1;
+
        regs->epc = 0;
 
        if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
@@ -305,13 +486,21 @@ int handle_misaligned_load(struct pt_regs *regs)
                return -1;
        }
 
+       if (!IS_ENABLED(CONFIG_FPU) && fp)
+               return -EOPNOTSUPP;
+
        val.data_u64 = 0;
-       for (i = 0; i < len; i++)
-               val.data_bytes[i] = load_u8((void *)(addr + i));
+       for (i = 0; i < len; i++) {
+               if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
+                       return -1;
+       }
 
-       if (fp)
-               return -1;
-       SET_RD(insn, regs, val.data_ulong << shift >> shift);
+       if (!fp)
+               SET_RD(insn, regs, val.data_ulong << shift >> shift);
+       else if (len == 8)
+               set_f64_rd(insn, regs, val.data_u64);
+       else
+               set_f32_rd(insn, regs, val.data_ulong);
 
        regs->epc = epc + INSN_LEN(insn);
 
@@ -322,9 +511,20 @@ int handle_misaligned_store(struct pt_regs *regs)
 {
        union reg_data val;
        unsigned long epc = regs->epc;
-       unsigned long insn = get_insn(epc);
-       unsigned long addr = csr_read(mtval);
-       int i, len = 0;
+       unsigned long insn;
+       unsigned long addr = regs->badaddr;
+       int i, len = 0, fp = 0;
+
+       perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+
+       if (!unaligned_enabled)
+               return -1;
+
+       if (user_mode(regs) && (current->thread.align_ctl & PR_UNALIGN_SIGBUS))
+               return -1;
+
+       if (get_insn(regs, epc, &insn))
+               return -1;
 
        regs->epc = 0;
 
@@ -336,6 +536,14 @@ int handle_misaligned_store(struct pt_regs *regs)
        } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
                len = 8;
 #endif
+       } else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2(insn, regs);
+       } else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2(insn, regs);
        } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
                len = 2;
 #if defined(CONFIG_64BIT)
@@ -354,15 +562,88 @@ int handle_misaligned_store(struct pt_regs *regs)
                   ((insn >> SH_RD) & 0x1f)) {
                len = 4;
                val.data_ulong = GET_RS2C(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2S(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
+               fp = 1;
+               len = 8;
+               val.data_u64 = GET_F64_RS2C(insn, regs);
+#if !defined(CONFIG_64BIT)
+       } else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2S(insn, regs);
+       } else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
+               fp = 1;
+               len = 4;
+               val.data_ulong = GET_F32_RS2C(insn, regs);
+#endif
        } else {
                regs->epc = epc;
                return -1;
        }
 
-       for (i = 0; i < len; i++)
-               store_u8((void *)(addr + i), val.data_bytes[i]);
+       if (!IS_ENABLED(CONFIG_FPU) && fp)
+               return -EOPNOTSUPP;
+
+       for (i = 0; i < len; i++) {
+               if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
+                       return -1;
+       }
 
        regs->epc = epc + INSN_LEN(insn);
 
        return 0;
 }
+
+bool check_unaligned_access_emulated(int cpu)
+{
+       long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
+       unsigned long tmp_var, tmp_val;
+       bool misaligned_emu_detected;
+
+       *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+       __asm__ __volatile__ (
+               "       "REG_L" %[tmp], 1(%[ptr])\n"
+               : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory");
+
+       misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED);
+       /*
+        * If unaligned_ctl is already set, this means that we detected that all
+        * CPUS uses emulated misaligned access at boot time. If that changed
+        * when hotplugging the new cpu, this is something we don't handle.
+        */
+       if (unlikely(unaligned_ctl && !misaligned_emu_detected)) {
+               pr_crit("CPU misaligned accesses non homogeneous (expected all emulated)\n");
+               while (true)
+                       cpu_relax();
+       }
+
+       return misaligned_emu_detected;
+}
+
+void unaligned_emulation_finish(void)
+{
+       int cpu;
+
+       /*
+        * We can only support PR_UNALIGN controls if all CPUs have misaligned
+        * accesses emulated since tasks requesting such control can run on any
+        * CPU.
+        */
+       for_each_present_cpu(cpu) {
+               if (per_cpu(misaligned_access_speed, cpu) !=
+                                       RISCV_HWPROBE_MISALIGNED_EMULATED) {
+                       return;
+               }
+       }
+       unaligned_ctl = true;
+}
+
+bool unaligned_ctl_available(void)
+{
+       return unaligned_ctl;
+}
index 82f97d67c23e9bdde94b0d2f655f52d32c8fd6d1..8f884227e8bca7fd3634217e71d4ee4ed122559a 100644 (file)
@@ -8,7 +8,7 @@
 
        .text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
-ENTRY(__vdso_flush_icache)
+SYM_FUNC_START(__vdso_flush_icache)
        .cfi_startproc
 #ifdef CONFIG_SMP
        li a7, __NR_riscv_flush_icache
@@ -19,4 +19,4 @@ ENTRY(__vdso_flush_icache)
 #endif
        ret
        .cfi_endproc
-ENDPROC(__vdso_flush_icache)
+SYM_FUNC_END(__vdso_flush_icache)
index bb0c05e2ffbae3d6aa3609fc5b5de84630aaa37d..9c1bd531907f2fefda1d0778191073ca6b70df1a 100644 (file)
@@ -8,11 +8,11 @@
 
        .text
 /* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
-ENTRY(__vdso_getcpu)
+SYM_FUNC_START(__vdso_getcpu)
        .cfi_startproc
        /* For now, just do the syscall. */
        li a7, __NR_getcpu
        ecall
        ret
        .cfi_endproc
-ENDPROC(__vdso_getcpu)
+SYM_FUNC_END(__vdso_getcpu)
index 10438c7c626acc8034fa22d6765422fbc7b67f0b..3dc022aa8931ad3b3798f4cc492ce795dd7b7bf5 100644 (file)
@@ -7,10 +7,10 @@
 #include <asm/unistd.h>
 
        .text
-ENTRY(__vdso_rt_sigreturn)
+SYM_FUNC_START(__vdso_rt_sigreturn)
        .cfi_startproc
        .cfi_signal_frame
        li a7, __NR_rt_sigreturn
        ecall
        .cfi_endproc
-ENDPROC(__vdso_rt_sigreturn)
+SYM_FUNC_END(__vdso_rt_sigreturn)
index 4e704146c77a092e481b8b532c19b11e3efa82e4..77e57f8305216c466f51979c91899754b4a7b382 100644 (file)
@@ -5,11 +5,11 @@
 #include <asm/unistd.h>
 
 .text
-ENTRY(riscv_hwprobe)
+SYM_FUNC_START(riscv_hwprobe)
        .cfi_startproc
        li a7, __NR_riscv_hwprobe
        ecall
        ret
 
        .cfi_endproc
-ENDPROC(riscv_hwprobe)
+SYM_FUNC_END(riscv_hwprobe)
index 82ce64900f3d7e7af48a211b8e24aeec0952d504..cbe2a179331d2511a8b4a26c06383e46131661b1 100644 (file)
@@ -23,35 +23,31 @@ SECTIONS
        .gnu.version_d  : { *(.gnu.version_d) }
        .gnu.version_r  : { *(.gnu.version_r) }
 
-       .note           : { *(.note.*) }                :text   :note
        .dynamic        : { *(.dynamic) }               :text   :dynamic
 
+       .rodata         : {
+               *(.rodata .rodata.* .gnu.linkonce.r.*)
+               *(.got.plt) *(.got)
+               *(.data .data.* .gnu.linkonce.d.*)
+               *(.dynbss)
+               *(.bss .bss.* .gnu.linkonce.b.*)
+       }
+
+       .note           : { *(.note.*) }                :text   :note
+
        .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
        .eh_frame       : { KEEP (*(.eh_frame)) }       :text
 
-       .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-
        /*
-        * This linker script is used both with -r and with -shared.
-        * For the layouts to match, we need to skip more than enough
-        * space for the dynamic symbol table, etc. If this amount is
-        * insufficient, ld -shared will error; simply increase it here.
+        * Text is well-separated from actual data: there's plenty of
+        * stuff that isn't used at runtime in between.
         */
-       . = 0x800;
+       . = ALIGN(16);
        .text           : { *(.text .text.*) }          :text
 
        . = ALIGN(4);
        .alternative : {
-               __alt_start = .;
                *(.alternative)
-               __alt_end = .;
-       }
-
-       .data           : {
-               *(.got.plt) *(.got)
-               *(.data .data.* .gnu.linkonce.d.*)
-               *(.dynbss)
-               *(.bss .bss.* .gnu.linkonce.b.*)
        }
 }
 
index 74bb27440527b3bafa1418fc75337162e9bc3589..a944294f6f23a70335070dc877588321429da0de 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/kvm_host.h>
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_aia_imsic.h>
 
 struct aia_hgei_control {
index 48ae0d4b3932457f642760b01c831bf84f5fb3bc..225a435d9c9a9c25b8cf24f4501a2e9e3bb94d1b 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/module.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/sbi.h>
 
 long kvm_arch_dev_ioctl(struct file *filp,
index 44bc324aeeb08d824804fd1138e4ab2b0d5e2d8e..23c0e82b5103cdd950b2da266258260292c0cea5 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
 #include <asm/csr.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/insn-def.h>
 
 #define has_svinval()  riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
index 08ba48a395aa2a232bd00755f02355e6770d0ee7..030904d82b583e1ce3f4e44cdabe4e61e708e616 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #ifdef CONFIG_FPU
 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
index c6ebce6126b55006a2714a43d5cd3f123636d654..f8c9fa0c03c5abbd8a8035f255455e7d5d1c9288 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/uaccess.h>
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
index b430cbb695214da2a500d46c58c7113faa04237c..b339a2682f252bb8c0ac6d3803a8eab46e1e1443 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/uaccess.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/kvm_vcpu_vector.h>
 #include <asm/vector.h>
 
index d7a256eb53f404feaf8ebc750453968bc5352188..b22de1231144c29758d3fae335a5d727f44b51bb 100644 (file)
@@ -29,41 +29,41 @@ SYM_FUNC_START(clear_page)
        lw      a1, riscv_cboz_block_size
        add     a2, a0, a2
 .Lzero_loop:
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
        CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
        CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
        CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
        CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
-       CBO_zero(a0)
+       CBO_ZERO(a0)
        add     a0, a0, a1
        bltu    a0, a2, .Lzero_loop
        ret
index 1a40d01a95439e1592b673ad76e5f5b964bbbed3..44e009ec5fef683a3290d57f31239661a5352f9e 100644 (file)
@@ -7,8 +7,7 @@
 #include <asm/asm.h>
 
 /* void *memcpy(void *, const void *, size_t) */
-ENTRY(__memcpy)
-WEAK(memcpy)
+SYM_FUNC_START(__memcpy)
        move t6, a0  /* Preserve return value */
 
        /* Defer to byte-oriented copy for small sizes */
@@ -105,6 +104,7 @@ WEAK(memcpy)
        bltu a1, a3, 5b
 6:
        ret
-END(__memcpy)
+SYM_FUNC_END(__memcpy)
+SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)
 SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)
index 838ff2022fe32d8e16769ef0e5962d9c294b0ed3..cb3e2e7ef0baa248d906717523a6f848f591eaf9 100644 (file)
@@ -7,7 +7,6 @@
 #include <asm/asm.h>
 
 SYM_FUNC_START(__memmove)
-SYM_FUNC_START_WEAK(memmove)
        /*
         * Returns
         *   a0 - dest
@@ -26,8 +25,8 @@ SYM_FUNC_START_WEAK(memmove)
         */
 
        /* Return if nothing to do */
-       beq a0, a1, return_from_memmove
-       beqz a2, return_from_memmove
+       beq a0, a1, .Lreturn_from_memmove
+       beqz a2, .Lreturn_from_memmove
 
        /*
         * Register Uses
@@ -60,7 +59,7 @@ SYM_FUNC_START_WEAK(memmove)
         * small enough not to bother.
         */
        andi t0, a2, -(2 * SZREG)
-       beqz t0, byte_copy
+       beqz t0, .Lbyte_copy
 
        /*
         * Now solve for t5 and t6.
@@ -87,14 +86,14 @@ SYM_FUNC_START_WEAK(memmove)
         */
        xor  t0, a0, a1
        andi t1, t0, (SZREG - 1)
-       beqz t1, coaligned_copy
+       beqz t1, .Lcoaligned_copy
        /* Fall through to misaligned fixup copy */
 
-misaligned_fixup_copy:
-       bltu a1, a0, misaligned_fixup_copy_reverse
+.Lmisaligned_fixup_copy:
+       bltu a1, a0, .Lmisaligned_fixup_copy_reverse
 
-misaligned_fixup_copy_forward:
-       jal  t0, byte_copy_until_aligned_forward
+.Lmisaligned_fixup_copy_forward:
+       jal  t0, .Lbyte_copy_until_aligned_forward
 
        andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
        slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -153,10 +152,10 @@ misaligned_fixup_copy_forward:
        mv    t3, t6 /* Fix the dest pointer in case the loop was broken */
 
        add  a1, t3, a5 /* Restore the src pointer */
-       j byte_copy_forward /* Copy any remaining bytes */
+       j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-misaligned_fixup_copy_reverse:
-       jal  t0, byte_copy_until_aligned_reverse
+.Lmisaligned_fixup_copy_reverse:
+       jal  t0, .Lbyte_copy_until_aligned_reverse
 
        andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
        slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
@@ -215,18 +214,18 @@ misaligned_fixup_copy_reverse:
        mv    t4, t5 /* Fix the dest pointer in case the loop was broken */
 
        add  a4, t4, a5 /* Restore the src pointer */
-       j byte_copy_reverse /* Copy any remaining bytes */
+       j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * Simple copy loops for SZREG co-aligned memory locations.
  * These also make calls to do byte copies for any unaligned
  * data at their terminations.
  */
-coaligned_copy:
-       bltu a1, a0, coaligned_copy_reverse
+.Lcoaligned_copy:
+       bltu a1, a0, .Lcoaligned_copy_reverse
 
-coaligned_copy_forward:
-       jal t0, byte_copy_until_aligned_forward
+.Lcoaligned_copy_forward:
+       jal t0, .Lbyte_copy_until_aligned_forward
 
        1:
        REG_L t1, ( 0 * SZREG)(a1)
@@ -235,10 +234,10 @@ coaligned_copy_forward:
        REG_S t1, (-1 * SZREG)(t3)
        bne   t3, t6, 1b
 
-       j byte_copy_forward /* Copy any remaining bytes */
+       j .Lbyte_copy_forward /* Copy any remaining bytes */
 
-coaligned_copy_reverse:
-       jal t0, byte_copy_until_aligned_reverse
+.Lcoaligned_copy_reverse:
+       jal t0, .Lbyte_copy_until_aligned_reverse
 
        1:
        REG_L t1, (-1 * SZREG)(a4)
@@ -247,7 +246,7 @@ coaligned_copy_reverse:
        REG_S t1, ( 0 * SZREG)(t4)
        bne   t4, t5, 1b
 
-       j byte_copy_reverse /* Copy any remaining bytes */
+       j .Lbyte_copy_reverse /* Copy any remaining bytes */
 
 /*
  * These are basically sub-functions within the function.  They
@@ -258,7 +257,7 @@ coaligned_copy_reverse:
  * up from where they were left and we avoid code duplication
  * without any overhead except the call in and return jumps.
  */
-byte_copy_until_aligned_forward:
+.Lbyte_copy_until_aligned_forward:
        beq  t3, t5, 2f
        1:
        lb   t1,  0(a1)
@@ -269,7 +268,7 @@ byte_copy_until_aligned_forward:
        2:
        jalr zero, 0x0(t0) /* Return to multibyte copy loop */
 
-byte_copy_until_aligned_reverse:
+.Lbyte_copy_until_aligned_reverse:
        beq  t4, t6, 2f
        1:
        lb   t1, -1(a4)
@@ -285,10 +284,10 @@ byte_copy_until_aligned_reverse:
  * These will byte copy until they reach the end of data to copy.
  * At that point, they will call to return from memmove.
  */
-byte_copy:
-       bltu a1, a0, byte_copy_reverse
+.Lbyte_copy:
+       bltu a1, a0, .Lbyte_copy_reverse
 
-byte_copy_forward:
+.Lbyte_copy_forward:
        beq  t3, t4, 2f
        1:
        lb   t1,  0(a1)
@@ -299,7 +298,7 @@ byte_copy_forward:
        2:
        ret
 
-byte_copy_reverse:
+.Lbyte_copy_reverse:
        beq  t4, t3, 2f
        1:
        lb   t1, -1(a4)
@@ -309,10 +308,10 @@ byte_copy_reverse:
        bne  t4, t3, 1b
        2:
 
-return_from_memmove:
+.Lreturn_from_memmove:
        ret
 
-SYM_FUNC_END(memmove)
 SYM_FUNC_END(__memmove)
+SYM_FUNC_ALIAS_WEAK(memmove, __memmove)
 SYM_FUNC_ALIAS(__pi_memmove, __memmove)
 SYM_FUNC_ALIAS(__pi___memmove, __memmove)
index 34c5360c6705c56466cf9890f1a7261e6383a5e3..35f358e70bdb6bf79bda0f366e34c98c6ef5bbc3 100644 (file)
@@ -8,8 +8,7 @@
 #include <asm/asm.h>
 
 /* void *memset(void *, int, size_t) */
-ENTRY(__memset)
-WEAK(memset)
+SYM_FUNC_START(__memset)
        move t0, a0  /* Preserve return value */
 
        /* Defer to byte-oriented fill for small sizes */
@@ -110,4 +109,5 @@ WEAK(memset)
        bltu t0, a3, 5b
 6:
        ret
-END(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_ALIAS_WEAK(memset, __memset)
index 09b47ebacf2e8743bc1cf77be0693b4499748afb..3ab438f30d1328707862134f819e8a74598c6dce 100644 (file)
@@ -10,8 +10,7 @@
        _asm_extable    100b, \lbl
        .endm
 
-ENTRY(__asm_copy_to_user)
-ENTRY(__asm_copy_from_user)
+SYM_FUNC_START(__asm_copy_to_user)
 
        /* Enable access to user memory */
        li t6, SR_SUM
@@ -181,13 +180,13 @@ ENTRY(__asm_copy_from_user)
        csrc CSR_STATUS, t6
        sub a0, t5, a0
        ret
-ENDPROC(__asm_copy_to_user)
-ENDPROC(__asm_copy_from_user)
+SYM_FUNC_END(__asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_to_user)
+SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user)
 EXPORT_SYMBOL(__asm_copy_from_user)
 
 
-ENTRY(__clear_user)
+SYM_FUNC_START(__clear_user)
 
        /* Enable access to user memory */
        li t6, SR_SUM
@@ -233,5 +232,5 @@ ENTRY(__clear_user)
        csrc CSR_STATUS, t6
        sub a0, a3, a0
        ret
-ENDPROC(__clear_user)
+SYM_FUNC_END(__clear_user)
 EXPORT_SYMBOL(__clear_user)
index 9c454f90fd3da21200d180da8a463d45a15da178..3a4dfc8babcf8c3ef4cf2d4c39731b0e9067eb14 100644 (file)
@@ -36,3 +36,4 @@ endif
 
 obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
 obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o
+obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o
diff --git a/arch/riscv/mm/cache-ops.c b/arch/riscv/mm/cache-ops.c
new file mode 100644 (file)
index 0000000..a993ad1
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <asm/dma-noncoherent.h>
+
+struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init;
+
+void
+riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops)
+{
+       if (!ops)
+               return;
+       noncoherent_cache_ops = *ops;
+}
+EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
index b76e7e192eb183460c3caf897b36f5a0ec59b30b..4e4e469b8dd66cfdf3e24346a514db2d3dd55773 100644 (file)
@@ -15,12 +15,6 @@ static bool noncoherent_supported __ro_after_init;
 int dma_cache_alignment __ro_after_init = ARCH_DMA_MINALIGN;
 EXPORT_SYMBOL_GPL(dma_cache_alignment);
 
-struct riscv_nonstd_cache_ops noncoherent_cache_ops __ro_after_init = {
-       .wback = NULL,
-       .inv = NULL,
-       .wback_inv = NULL,
-};
-
 static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
 {
        void *vaddr = phys_to_virt(paddr);
@@ -31,7 +25,7 @@ static inline void arch_dma_cache_wback(phys_addr_t paddr, size_t size)
                return;
        }
 #endif
-       ALT_CMO_OP(clean, vaddr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(CLEAN, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
@@ -45,7 +39,7 @@ static inline void arch_dma_cache_inv(phys_addr_t paddr, size_t size)
        }
 #endif
 
-       ALT_CMO_OP(inval, vaddr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(INVAL, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
@@ -59,7 +53,7 @@ static inline void arch_dma_cache_wback_inv(phys_addr_t paddr, size_t size)
        }
 #endif
 
-       ALT_CMO_OP(flush, vaddr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(FLUSH, vaddr, size, riscv_cbom_block_size);
 }
 
 static inline bool arch_sync_dma_clean_before_fromdevice(void)
@@ -131,7 +125,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
        }
 #endif
 
-       ALT_CMO_OP(flush, flush_addr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size);
 }
 
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
@@ -162,12 +156,3 @@ void __init riscv_set_dma_cache_alignment(void)
        if (!noncoherent_supported)
                dma_cache_alignment = 1;
 }
-
-void riscv_noncoherent_register_cache_ops(const struct riscv_nonstd_cache_ops *ops)
-{
-       if (!ops)
-               return;
-
-       noncoherent_cache_ops = *ops;
-}
-EXPORT_SYMBOL_GPL(riscv_noncoherent_register_cache_ops);
index b128ed3c5a372ae4ab60106c6a9685d97a48e5ef..2e011cbddf3af373ea68d703e7e5736c5b04155a 100644 (file)
@@ -666,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
 static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
                                      phys_addr_t size)
 {
-       if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
-               return PGDIR_SIZE;
-
-       if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+       if (pgtable_l5_enabled &&
+           !(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
                return P4D_SIZE;
 
-       if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+       if (pgtable_l4_enabled &&
+           !(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
                return PUD_SIZE;
 
-       if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
+       if (IS_ENABLED(CONFIG_64BIT) &&
+           !(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
                return PMD_SIZE;
 
        return PAGE_SIZE;
index 161d0b34c2cb28dbc9962d2ec7c4db64fbe08a34..fc5fc4f785c481c20acec4b68ba1a75d278ee150 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/pagewalk.h>
 #include <linux/pgtable.h>
+#include <linux/vmalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/bitops.h>
 #include <asm/set_memory.h>
@@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
        return new_val;
 }
 
-static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
-                             unsigned long next, struct mm_walk *walk)
-{
-       pgd_t val = READ_ONCE(*pgd);
-
-       if (pgd_leaf(val)) {
-               val = __pgd(set_pageattr_masks(pgd_val(val), walk));
-               set_pgd(pgd, val);
-       }
-
-       return 0;
-}
-
 static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
                              unsigned long next, struct mm_walk *walk)
 {
@@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next,
 }
 
 static const struct mm_walk_ops pageattr_ops = {
-       .pgd_entry = pageattr_pgd_entry,
        .p4d_entry = pageattr_p4d_entry,
        .pud_entry = pageattr_pud_entry,
        .pmd_entry = pageattr_pmd_entry,
@@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = {
        .walk_lock = PGWALK_RDLOCK,
 };
 
+#ifdef CONFIG_64BIT
+static int __split_linear_mapping_pmd(pud_t *pudp,
+                                     unsigned long vaddr, unsigned long end)
+{
+       pmd_t *pmdp;
+       unsigned long next;
+
+       pmdp = pmd_offset(pudp, vaddr);
+
+       do {
+               next = pmd_addr_end(vaddr, end);
+
+               if (next - vaddr >= PMD_SIZE &&
+                   vaddr <= (vaddr & PMD_MASK) && end >= next)
+                       continue;
+
+               if (pmd_leaf(*pmdp)) {
+                       struct page *pte_page;
+                       unsigned long pfn = _pmd_pfn(*pmdp);
+                       pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
+                       pte_t *ptep_new;
+                       int i;
+
+                       pte_page = alloc_page(GFP_KERNEL);
+                       if (!pte_page)
+                               return -ENOMEM;
+
+                       ptep_new = (pte_t *)page_address(pte_page);
+                       for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new)
+                               set_pte(ptep_new, pfn_pte(pfn + i, prot));
+
+                       smp_wmb();
+
+                       set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE));
+               }
+       } while (pmdp++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int __split_linear_mapping_pud(p4d_t *p4dp,
+                                     unsigned long vaddr, unsigned long end)
+{
+       pud_t *pudp;
+       unsigned long next;
+       int ret;
+
+       pudp = pud_offset(p4dp, vaddr);
+
+       do {
+               next = pud_addr_end(vaddr, end);
+
+               if (next - vaddr >= PUD_SIZE &&
+                   vaddr <= (vaddr & PUD_MASK) && end >= next)
+                       continue;
+
+               if (pud_leaf(*pudp)) {
+                       struct page *pmd_page;
+                       unsigned long pfn = _pud_pfn(*pudp);
+                       pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
+                       pmd_t *pmdp_new;
+                       int i;
+
+                       pmd_page = alloc_page(GFP_KERNEL);
+                       if (!pmd_page)
+                               return -ENOMEM;
+
+                       pmdp_new = (pmd_t *)page_address(pmd_page);
+                       for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new)
+                               set_pmd(pmdp_new,
+                                       pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot));
+
+                       smp_wmb();
+
+                       set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE));
+               }
+
+               ret = __split_linear_mapping_pmd(pudp, vaddr, next);
+               if (ret)
+                       return ret;
+       } while (pudp++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int __split_linear_mapping_p4d(pgd_t *pgdp,
+                                     unsigned long vaddr, unsigned long end)
+{
+       p4d_t *p4dp;
+       unsigned long next;
+       int ret;
+
+       p4dp = p4d_offset(pgdp, vaddr);
+
+       do {
+               next = p4d_addr_end(vaddr, end);
+
+               /*
+                * If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't
+                * need to split, we'll change the protections on the whole P4D.
+                */
+               if (next - vaddr >= P4D_SIZE &&
+                   vaddr <= (vaddr & P4D_MASK) && end >= next)
+                       continue;
+
+               if (p4d_leaf(*p4dp)) {
+                       struct page *pud_page;
+                       unsigned long pfn = _p4d_pfn(*p4dp);
+                       pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
+                       pud_t *pudp_new;
+                       int i;
+
+                       pud_page = alloc_page(GFP_KERNEL);
+                       if (!pud_page)
+                               return -ENOMEM;
+
+                       /*
+                        * Fill the pud level with leaf puds that have the same
+                        * protections as the leaf p4d.
+                        */
+                       pudp_new = (pud_t *)page_address(pud_page);
+                       for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new)
+                               set_pud(pudp_new,
+                                       pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot));
+
+                       /*
+                        * Make sure the pud filling is not reordered with the
+                        * p4d store which could result in seeing a partially
+                        * filled pud level.
+                        */
+                       smp_wmb();
+
+                       set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE));
+               }
+
+               ret = __split_linear_mapping_pud(p4dp, vaddr, next);
+               if (ret)
+                       return ret;
+       } while (p4dp++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int __split_linear_mapping_pgd(pgd_t *pgdp,
+                                     unsigned long vaddr,
+                                     unsigned long end)
+{
+       unsigned long next;
+       int ret;
+
+       do {
+               next = pgd_addr_end(vaddr, end);
+               /* We never use PGD mappings for the linear mapping */
+               ret = __split_linear_mapping_p4d(pgdp, vaddr, next);
+               if (ret)
+                       return ret;
+       } while (pgdp++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int split_linear_mapping(unsigned long start, unsigned long end)
+{
+       return __split_linear_mapping_pgd(pgd_offset_k(start), start, end);
+}
+#endif /* CONFIG_64BIT */
+
 static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
                        pgprot_t clear_mask)
 {
        int ret;
        unsigned long start = addr;
        unsigned long end = start + PAGE_SIZE * numpages;
+       unsigned long __maybe_unused lm_start;
+       unsigned long __maybe_unused lm_end;
        struct pageattr_masks masks = {
                .set_mask = set_mask,
                .clear_mask = clear_mask
@@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
                return 0;
 
        mmap_write_lock(&init_mm);
+
+#ifdef CONFIG_64BIT
+       /*
+        * We are about to change the permissions of a kernel mapping, we must
+        * apply the same changes to its linear mapping alias, which may imply
+        * splitting a huge mapping.
+        */
+
+       if (is_vmalloc_or_module_addr((void *)start)) {
+               struct vm_struct *area = NULL;
+               int i, page_start;
+
+               area = find_vm_area((void *)start);
+               page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT;
+
+               for (i = page_start; i < page_start + numpages; ++i) {
+                       lm_start = (unsigned long)page_address(area->pages[i]);
+                       lm_end = lm_start + PAGE_SIZE;
+
+                       ret = split_linear_mapping(lm_start, lm_end);
+                       if (ret)
+                               goto unlock;
+
+                       ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+                                                   &pageattr_ops, NULL, &masks);
+                       if (ret)
+                               goto unlock;
+               }
+       } else if (is_kernel_mapping(start) || is_linear_mapping(start)) {
+               lm_start = (unsigned long)lm_alias(start);
+               lm_end = (unsigned long)lm_alias(end);
+
+               ret = split_linear_mapping(lm_start, lm_end);
+               if (ret)
+                       goto unlock;
+
+               ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
+                                           &pageattr_ops, NULL, &masks);
+               if (ret)
+                       goto unlock;
+       }
+
        ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
                                     &masks);
+
+unlock:
+       mmap_write_unlock(&init_mm);
+
+       /*
+        * We can't use flush_tlb_kernel_range() here as we may have split a
+        * hugepage that is larger than that, so let's flush everything.
+        */
+       flush_tlb_all();
+#else
+       ret =  walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+                                    &masks);
+
        mmap_write_unlock(&init_mm);
 
        flush_tlb_kernel_range(start, end);
+#endif
 
        return ret;
 }
@@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages)
 
 int set_direct_map_invalid_noflush(struct page *page)
 {
-       int ret;
-       unsigned long start = (unsigned long)page_address(page);
-       unsigned long end = start + PAGE_SIZE;
-       struct pageattr_masks masks = {
-               .set_mask = __pgprot(0),
-               .clear_mask = __pgprot(_PAGE_PRESENT)
-       };
-
-       mmap_read_lock(&init_mm);
-       ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
-       mmap_read_unlock(&init_mm);
-
-       return ret;
+       return __set_memory((unsigned long)page_address(page), 1,
+                           __pgprot(0), __pgprot(_PAGE_PRESENT));
 }
 
 int set_direct_map_default_noflush(struct page *page)
 {
-       int ret;
-       unsigned long start = (unsigned long)page_address(page);
-       unsigned long end = start + PAGE_SIZE;
-       struct pageattr_masks masks = {
-               .set_mask = PAGE_KERNEL,
-               .clear_mask = __pgprot(0)
-       };
-
-       mmap_read_lock(&init_mm);
-       ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
-       mmap_read_unlock(&init_mm);
-
-       return ret;
+       return __set_memory((unsigned long)page_address(page), 1,
+                           PAGE_KERNEL, __pgprot(0));
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
index c5fc5ec96f6d4b1fc71b2949ed970c561b7fcb14..370a422ede1101b768f4849e946d6af4fdfc8bd5 100644 (file)
@@ -17,7 +17,7 @@ void arch_wb_cache_pmem(void *addr, size_t size)
                return;
        }
 #endif
-       ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(CLEAN, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
@@ -29,6 +29,6 @@ void arch_invalidate_pmem(void *addr, size_t size)
                return;
        }
 #endif
-       ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size);
+       ALT_CMO_OP(INVAL, addr, size, riscv_cbom_block_size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
index e9090b38f8117c992984560c68fa28668fe722e6..657c27bc07a7694edbb70795c4b2bd102b8780d0 100644 (file)
@@ -129,55 +129,55 @@ static struct ptd_mm_info efi_ptd_info = {
 /* Page Table Entry */
 struct prot_bits {
        u64 mask;
-       u64 val;
        const char *set;
        const char *clear;
 };
 
 static const struct prot_bits pte_bits[] = {
        {
+#ifdef CONFIG_64BIT
+               .mask = _PAGE_NAPOT,
+               .set = "N",
+               .clear = ".",
+       }, {
+               .mask = _PAGE_MTMASK_SVPBMT,
+               .set = "MT(%s)",
+               .clear = "  ..  ",
+       }, {
+#endif
                .mask = _PAGE_SOFT,
-               .val = _PAGE_SOFT,
-               .set = "RSW",
-               .clear = "   ",
+               .set = "RSW(%d)",
+               .clear = "  ..  ",
        }, {
                .mask = _PAGE_DIRTY,
-               .val = _PAGE_DIRTY,
                .set = "D",
                .clear = ".",
        }, {
                .mask = _PAGE_ACCESSED,
-               .val = _PAGE_ACCESSED,
                .set = "A",
                .clear = ".",
        }, {
                .mask = _PAGE_GLOBAL,
-               .val = _PAGE_GLOBAL,
                .set = "G",
                .clear = ".",
        }, {
                .mask = _PAGE_USER,
-               .val = _PAGE_USER,
                .set = "U",
                .clear = ".",
        }, {
                .mask = _PAGE_EXEC,
-               .val = _PAGE_EXEC,
                .set = "X",
                .clear = ".",
        }, {
                .mask = _PAGE_WRITE,
-               .val = _PAGE_WRITE,
                .set = "W",
                .clear = ".",
        }, {
                .mask = _PAGE_READ,
-               .val = _PAGE_READ,
                .set = "R",
                .clear = ".",
        }, {
                .mask = _PAGE_PRESENT,
-               .val = _PAGE_PRESENT,
                .set = "V",
                .clear = ".",
        }
@@ -208,15 +208,30 @@ static void dump_prot(struct pg_state *st)
        unsigned int i;
 
        for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
-               const char *s;
+               char s[7];
+               unsigned long val;
 
-               if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val)
-                       s = pte_bits[i].set;
-               else
-                       s = pte_bits[i].clear;
+               val = st->current_prot & pte_bits[i].mask;
+               if (val) {
+                       if (pte_bits[i].mask == _PAGE_SOFT)
+                               sprintf(s, pte_bits[i].set, val >> 8);
+#ifdef CONFIG_64BIT
+                       else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) {
+                               if (val == _PAGE_NOCACHE_SVPBMT)
+                                       sprintf(s, pte_bits[i].set, "NC");
+                               else if (val == _PAGE_IO_SVPBMT)
+                                       sprintf(s, pte_bits[i].set, "IO");
+                               else
+                                       sprintf(s, pte_bits[i].set, "??");
+                       }
+#endif
+                       else
+                               sprintf(s, "%s", pte_bits[i].set);
+               } else {
+                       sprintf(s, "%s", pte_bits[i].clear);
+               }
 
-               if (s)
-                       pt_dump_seq_printf(st->seq, " %s", s);
+               pt_dump_seq_printf(st->seq, " %s", s);
        }
 }
 
index 77be59aadc735ea9979473fd2d4dd8ffa04394e2..e6659d7368b35403d1b91739080496bfc45442af 100644 (file)
@@ -3,33 +3,56 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/hugetlb.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
 
 static inline void local_flush_tlb_all_asid(unsigned long asid)
 {
-       __asm__ __volatile__ ("sfence.vma x0, %0"
-                       :
-                       : "r" (asid)
-                       : "memory");
+       if (asid != FLUSH_TLB_NO_ASID)
+               __asm__ __volatile__ ("sfence.vma x0, %0"
+                               :
+                               : "r" (asid)
+                               : "memory");
+       else
+               local_flush_tlb_all();
 }
 
 static inline void local_flush_tlb_page_asid(unsigned long addr,
                unsigned long asid)
 {
-       __asm__ __volatile__ ("sfence.vma %0, %1"
-                       :
-                       : "r" (addr), "r" (asid)
-                       : "memory");
+       if (asid != FLUSH_TLB_NO_ASID)
+               __asm__ __volatile__ ("sfence.vma %0, %1"
+                               :
+                               : "r" (addr), "r" (asid)
+                               : "memory");
+       else
+               local_flush_tlb_page(addr);
 }
 
-static inline void local_flush_tlb_range(unsigned long start,
-               unsigned long size, unsigned long stride)
+/*
+ * Flush entire TLB if number of entries to be flushed is greater
+ * than the threshold below.
+ */
+static unsigned long tlb_flush_all_threshold __read_mostly = 64;
+
+static void local_flush_tlb_range_threshold_asid(unsigned long start,
+                                                unsigned long size,
+                                                unsigned long stride,
+                                                unsigned long asid)
 {
-       if (size <= stride)
-               local_flush_tlb_page(start);
-       else
-               local_flush_tlb_all();
+       unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride);
+       int i;
+
+       if (nr_ptes_in_range > tlb_flush_all_threshold) {
+               local_flush_tlb_all_asid(asid);
+               return;
+       }
+
+       for (i = 0; i < nr_ptes_in_range; ++i) {
+               local_flush_tlb_page_asid(start, asid);
+               start += stride;
+       }
 }
 
 static inline void local_flush_tlb_range_asid(unsigned long start,
@@ -37,8 +60,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
 {
        if (size <= stride)
                local_flush_tlb_page_asid(start, asid);
-       else
+       else if (size == FLUSH_TLB_MAX_SIZE)
                local_flush_tlb_all_asid(asid);
+       else
+               local_flush_tlb_range_threshold_asid(start, size, stride, asid);
 }
 
 static void __ipi_flush_tlb_all(void *info)
@@ -51,7 +76,7 @@ void flush_tlb_all(void)
        if (riscv_use_ipi_for_rfence())
                on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
        else
-               sbi_remote_sfence_vma(NULL, 0, -1);
+               sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
 }
 
 struct flush_tlb_range_data {
@@ -68,68 +93,62 @@ static void __ipi_flush_tlb_range_asid(void *info)
        local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
 }
 
-static void __ipi_flush_tlb_range(void *info)
-{
-       struct flush_tlb_range_data *d = info;
-
-       local_flush_tlb_range(d->start, d->size, d->stride);
-}
-
 static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
                              unsigned long size, unsigned long stride)
 {
        struct flush_tlb_range_data ftd;
-       struct cpumask *cmask = mm_cpumask(mm);
-       unsigned int cpuid;
+       const struct cpumask *cmask;
+       unsigned long asid = FLUSH_TLB_NO_ASID;
        bool broadcast;
 
-       if (cpumask_empty(cmask))
-               return;
+       if (mm) {
+               unsigned int cpuid;
 
-       cpuid = get_cpu();
-       /* check if the tlbflush needs to be sent to other CPUs */
-       broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
-       if (static_branch_unlikely(&use_asid_allocator)) {
-               unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
-
-               if (broadcast) {
-                       if (riscv_use_ipi_for_rfence()) {
-                               ftd.asid = asid;
-                               ftd.start = start;
-                               ftd.size = size;
-                               ftd.stride = stride;
-                               on_each_cpu_mask(cmask,
-                                                __ipi_flush_tlb_range_asid,
-                                                &ftd, 1);
-                       } else
-                               sbi_remote_sfence_vma_asid(cmask,
-                                                          start, size, asid);
-               } else {
-                       local_flush_tlb_range_asid(start, size, stride, asid);
-               }
+               cmask = mm_cpumask(mm);
+               if (cpumask_empty(cmask))
+                       return;
+
+               cpuid = get_cpu();
+               /* check if the tlbflush needs to be sent to other CPUs */
+               broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
+
+               if (static_branch_unlikely(&use_asid_allocator))
+                       asid = atomic_long_read(&mm->context.id) & asid_mask;
        } else {
-               if (broadcast) {
-                       if (riscv_use_ipi_for_rfence()) {
-                               ftd.asid = 0;
-                               ftd.start = start;
-                               ftd.size = size;
-                               ftd.stride = stride;
-                               on_each_cpu_mask(cmask,
-                                                __ipi_flush_tlb_range,
-                                                &ftd, 1);
-                       } else
-                               sbi_remote_sfence_vma(cmask, start, size);
-               } else {
-                       local_flush_tlb_range(start, size, stride);
-               }
+               cmask = cpu_online_mask;
+               broadcast = true;
        }
 
-       put_cpu();
+       if (broadcast) {
+               if (riscv_use_ipi_for_rfence()) {
+                       ftd.asid = asid;
+                       ftd.start = start;
+                       ftd.size = size;
+                       ftd.stride = stride;
+                       on_each_cpu_mask(cmask,
+                                        __ipi_flush_tlb_range_asid,
+                                        &ftd, 1);
+               } else
+                       sbi_remote_sfence_vma_asid(cmask,
+                                                  start, size, asid);
+       } else {
+               local_flush_tlb_range_asid(start, size, stride, asid);
+       }
+
+       if (mm)
+               put_cpu();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-       __flush_tlb_range(mm, 0, -1, PAGE_SIZE);
+       __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_mm_range(struct mm_struct *mm,
+                       unsigned long start, unsigned long end,
+                       unsigned int page_size)
+{
+       __flush_tlb_range(mm, start, end - start, page_size);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
@@ -140,8 +159,40 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                     unsigned long end)
 {
-       __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
+       unsigned long stride_size;
+
+       if (!is_vm_hugetlb_page(vma)) {
+               stride_size = PAGE_SIZE;
+       } else {
+               stride_size = huge_page_size(hstate_vma(vma));
+
+               /*
+                * As stated in the privileged specification, every PTE in a
+                * NAPOT region must be invalidated, so reset the stride in that
+                * case.
+                */
+               if (has_svnapot()) {
+                       if (stride_size >= PGDIR_SIZE)
+                               stride_size = PGDIR_SIZE;
+                       else if (stride_size >= P4D_SIZE)
+                               stride_size = P4D_SIZE;
+                       else if (stride_size >= PUD_SIZE)
+                               stride_size = PUD_SIZE;
+                       else if (stride_size >= PMD_SIZE)
+                               stride_size = PMD_SIZE;
+                       else
+                               stride_size = PAGE_SIZE;
+               }
+       }
+
+       __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
 }
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
                        unsigned long end)
index 0194f4554130ae6b89cf5db97a069a65bc2c6fc1..5bcf3af903daa2f9fb2aaf1e57d79121bfcda988 100644 (file)
@@ -7,15 +7,11 @@
  * Author: Li Zhengyu (lizhengyu3@huawei.com)
  *
  */
-
-.macro size, sym:req
-       .size \sym, . - \sym
-.endm
+#include <linux/linkage.h>
 
 .text
 
-.globl purgatory_start
-purgatory_start:
+SYM_CODE_START(purgatory_start)
 
        lla     sp, .Lstack
        mv      s0, a0  /* The hartid of the current hart */
@@ -28,8 +24,7 @@ purgatory_start:
        mv      a1, s1
        ld      a2, riscv_kernel_entry
        jr      a2
-
-size purgatory_start
+SYM_CODE_END(purgatory_start)
 
 .align 4
        .rept   256
@@ -39,9 +34,6 @@ size purgatory_start
 
 .data
 
-.globl riscv_kernel_entry
-riscv_kernel_entry:
-       .quad   0
-size riscv_kernel_entry
+SYM_DATA(riscv_kernel_entry, .quad 0)
 
 .end
index 06f5bad3c3e06c5668a0a3c8b853b87c7a25be1d..57857c0dfba97e0bfdcd5190e8aee31e11028667 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/limits.h>
 #include <clocksource/timer-riscv.h>
 #include <asm/smp.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 #include <asm/sbi.h>
 #include <asm/timex.h>
 
index ef4c12f0877ba428f55db3510771da69b0c7cad9..06964a3c130f6addeed20eca1ed26153a2260854 100644 (file)
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM)          += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
                                   -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
                                   -DEFI_HAVE_STRCMP -fno-builtin -fpic \
                                   $(call cc-option,-mno-single-pic-base)
-cflags-$(CONFIG_RISCV)         += -fpic
+cflags-$(CONFIG_RISCV)         += -fpic -DNO_ALTERNATIVE
 cflags-$(CONFIG_LOONGARCH)     += -fpie
 
 cflags-$(CONFIG_EFI_PARAMS_FROM_FDT)   += -I$(srctree)/scripts/dtc/libfdt
index cf8dacf3e3b84d8a56aae6aa1bd79da7cf8e7fea..afdaefbd03f61563c2b2972ff0c64266d3acaa9e 100644 (file)
@@ -1267,6 +1267,7 @@ DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells")
 DEFINE_SIMPLE_PROP(leds, "leds", NULL)
 DEFINE_SIMPLE_PROP(backlight, "backlight", NULL)
 DEFINE_SIMPLE_PROP(panel, "panel", NULL)
+DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells")
 DEFINE_SUFFIX_PROP(regulators, "-supply", NULL)
 DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells")
 
@@ -1356,6 +1357,7 @@ static const struct supplier_bindings of_supplier_bindings[] = {
        { .parse_prop = parse_leds, },
        { .parse_prop = parse_backlight, },
        { .parse_prop = parse_panel, },
+       { .parse_prop = parse_msi_parent, },
        { .parse_prop = parse_gpio_compat, },
        { .parse_prop = parse_interrupts, },
        { .parse_prop = parse_regulators, },
index 96c7f670c8f0d1821277a303abbdffdc517c9e25..16acd4dcdb96c75e07b45a3745a71842f2d7d2b8 100644 (file)
@@ -22,7 +22,7 @@
 
 #include <asm/errata_list.h>
 #include <asm/sbi.h>
-#include <asm/hwcap.h>
+#include <asm/cpufeature.h>
 
 #define SYSCTL_NO_USER_ACCESS  0
 #define SYSCTL_USER_ACCESS     1
@@ -543,8 +543,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
 
        if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
            (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
-               on_each_cpu_mask(mm_cpumask(event->owner->mm),
-                                pmu_sbi_set_scounteren, (void *)event, 1);
+               pmu_sbi_set_scounteren((void *)event);
 }
 
 static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -554,8 +553,7 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
 
        if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
            (hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
-               on_each_cpu_mask(mm_cpumask(event->owner->mm),
-                                pmu_sbi_reset_scounteren, (void *)event, 1);
+               pmu_sbi_reset_scounteren((void *)event);
 
        ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
        if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
@@ -689,6 +687,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
 
        /* Firmware counter don't support overflow yet */
        fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
+       if (fidx == RISCV_MAX_COUNTERS) {
+               csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
+               return IRQ_NONE;
+       }
+
        event = cpu_hw_evt->events[fidx];
        if (!event) {
                csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));