When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.
Before this patch, the test #359 ADD default X is:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 92 e1 00 2c stw r23,44(r1)
14: 93 01 00 30 stw r24,48(r1)
18: 93 21 00 34 stw r25,52(r1)
1c: 93 41 00 38 stw r26,56(r1)
20: 39 80 00 00 li r12,0
24: 39 60 00 00 li r11,0
28: 3b 40 00 00 li r26,0
2c: 3b 20 00 00 li r25,0
30: 7c 98 23 78 mr r24,r4
34: 7c 77 1b 78 mr r23,r3
38: 39 80 00 42 li r12,66
3c: 39 60 00 00 li r11,0
40: 7d 8c d2 14 add r12,r12,r26
44: 39 60 00 00 li r11,0
48: 7d 83 63 78 mr r3,r12
4c: 82 e1 00 2c lwz r23,44(r1)
50: 83 01 00 30 lwz r24,48(r1)
54: 83 21 00 34 lwz r25,52(r1)
58: 83 41 00 38 lwz r26,56(r1)
5c: 38 21 00 50 addi r1,r1,80
60: 4e 80 00 20 blr
After this patch, the same test has become:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 39 80 00 00 li r12,0
14: 39 60 00 00 li r11,0
18: 39 00 00 00 li r8,0
1c: 38 e0 00 00 li r7,0
20: 7c 86 23 78 mr r6,r4
24: 7c 65 1b 78 mr r5,r3
28: 39 80 00 42 li r12,66
2c: 39 60 00 00 li r11,0
30: 7d 8c 42 14 add r12,r12,r8
34: 39 60 00 00 li r11,0
38: 7d 83 63 78 mr r3,r12
3c: 38 21 00 50 addi r1,r1,80
40: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
#define SEEN_STACK 0x40000000 /* uses BPF stack */
#define SEEN_TAILCALL 0x80000000 /* uses tail calls */
+#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
struct codegen_context {
/*
* This is used to track register usage as well
unsigned int seen;
unsigned int idx;
unsigned int stack_size;
+ int b2p[ARRAY_SIZE(b2p)];
};
static inline void bpf_flush_icache(void *start, void *end)
ctx->seen |= 1 << (31 - i);
}
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen &= ~(1 << (31 - i));
+}
+
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
u32 *addrs, bool extra_pass);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
#endif
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
/* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
/* function return value */
[BPF_REG_0] = 8,
/* function arguments */
}
memset(&cgctx, 0, sizeof(struct codegen_context));
+ memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
/* Make sure that the stack is quadword aligned. */
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
}
}
+ bpf_jit_realloc_regs(&cgctx);
/*
* Pretend to build prologue, given the features we've seen. This will
* update ctgtx.idx as it pretends to output instructions, then we can
#define TMP_REG (MAX_BPF_JIT_REG + 0)
/* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 1] = {
/* function return value */
[BPF_REG_0] = 12,
/* function arguments */
static int bpf_to_ppc(struct codegen_context *ctx, int reg)
{
- return b2p[reg];
+ return ctx->b2p[reg];
}
/* PPC NVR range -- update this if we ever use NVRs below r17 */
return BPF_PPC_STACKFRAME(ctx) - 4;
}
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+ if (ctx->seen & SEEN_FUNC)
+ return;
+
+ while (ctx->seen & SEEN_NVREG_MASK &&
+ (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+ int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+ int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+ int i;
+
+ for (i = BPF_REG_0; i <= TMP_REG; i++) {
+ if (ctx->b2p[i] != old)
+ continue;
+ ctx->b2p[i] = new;
+ bpf_set_seen_register(ctx, new);
+ bpf_clear_seen_register(ctx, old);
+ if (i != TMP_REG) {
+ bpf_set_seen_register(ctx, new - 1);
+ bpf_clear_seen_register(ctx, old - 1);
+ }
+ break;
+ }
+ }
+}
+
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
BUG();
}
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;