From eb4b33e17c980e1285dcc46e33931c8505d92e06 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Fri, 26 Apr 2024 13:14:55 +0200 Subject: [PATCH] Save kernel FPU registers when performing context switch Signed-off-by: Maurizio Lombardi --- arch/s390/Kconfig | 1 - arch/s390/include/asm/fpu/api.h | 3 + arch/s390/include/asm/processor.h | 6 +- arch/s390/include/asm/switch_to.h | 2 + arch/s390/kernel/fpu.c | 168 ++++++++++++++++++++++++++++++ arch/s390/kernel/process.c | 3 +- arch/s390/kernel/setup.c | 17 --- 7 files changed, 177 insertions(+), 23 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 81edac4b35a2..e5c2634b0a1e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,7 +117,6 @@ config S390 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h index b714ed0ef688..4d84dde0c2d7 100644 --- a/arch/s390/include/asm/fpu/api.h +++ b/arch/s390/include/asm/fpu/api.h @@ -47,6 +47,9 @@ #include #include + +void save_kernel_fpu_regs(struct thread_struct *thread); +void restore_kernel_fpu_regs(struct thread_struct *thread); void save_fpu_regs(void); void load_fpu_regs(void); void __load_fpu_regs(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 1924d2dec100..67475bdf6982 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -119,6 +119,7 @@ struct thread_struct { unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ + int kfpu_flags; /* kernel fpu flags */ /* Per-thread information related to debugging */ struct per_regs per_user; /* User specified PER registers */ @@ -134,10 +135,7 @@ struct thread_struct { struct gs_cb *gs_cb; /* Current guarded storage cb */ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */ - /* - * Warning: 'fpu' is dynamically-sized. It *MUST* be at - * the end. - */ + struct fpu kfpu; /* Kernel FP and VX register save area */ struct fpu fpu; /* FP and VX register save area */ }; diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index c61b2cc1a8a8..da3557921b70 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -36,10 +36,12 @@ static inline void restore_access_regs(unsigned int *acrs) * soon as the next task returns to user space \ */ \ save_fpu_regs(); \ + save_kernel_fpu_regs(&prev->thread); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ save_gs_cb(prev->thread.gs_cb); \ update_cr_regs(next); \ + restore_kernel_fpu_regs(&next->thread); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ restore_gs_cb(next->thread.gs_cb); \ diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index d864c9a325e2..baf874020946 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -10,6 +10,7 @@ #include #include #include +#include asm(".include \"asm/vx-insn.h\"\n"); @@ -263,3 +264,170 @@ void save_fpu_regs(void) local_irq_restore(flags); } EXPORT_SYMBOL(save_fpu_regs); + + +static __always_inline void fpu_stfpc(unsigned int *fpc) +{ + instrument_write(fpc, sizeof(*fpc)); + asm volatile("stfpc %[fpc]" + : [fpc] "=Q" (*fpc) + : + : "memory"); +} + +static __always_inline void fpu_lfpc(unsigned int *fpc) +{ + instrument_read(fpc, sizeof(*fpc)); + asm volatile("lfpc %[fpc]" + : + : [fpc] "Q" (*fpc) + : "memory"); +} + +static __always_inline void fpu_std(unsigned short fpr, freg_t *reg) +{ + instrument_write(reg, sizeof(*reg)); + asm volatile("std %[fpr],%[reg]\n" + : [reg] "=Q" (reg->ui) + : [fpr] "I" (fpr) + : "memory"); +} + +static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset) +{ + fpu_std(0, &fprs[0 * offset]); + fpu_std(1, &fprs[1 * offset]); + fpu_std(2, &fprs[2 * offset]); + fpu_std(3, &fprs[3 * offset]); + fpu_std(4, &fprs[4 * offset]); + fpu_std(5, &fprs[5 * offset]); + fpu_std(6, &fprs[6 * offset]); + fpu_std(7, &fprs[7 * offset]); + fpu_std(8, &fprs[8 * offset]); + fpu_std(9, &fprs[9 * offset]); + fpu_std(10, &fprs[10 * offset]); + fpu_std(11, &fprs[11 * offset]); + fpu_std(12, &fprs[12 * offset]); + fpu_std(13, &fprs[13 * offset]); + fpu_std(14, &fprs[14 * offset]); + fpu_std(15, &fprs[15 * offset]); +} + + +static __always_inline void save_fp_regs(freg_t *fprs) +{ + __save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); +} + + + +#ifdef CONFIG_CC_IS_CLANG + +#define fpu_vlm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_read(_v, size); \ + asm volatile("\n" \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ + (_v3) - (_v1) + 1; \ +}) + +#else /* CONFIG_CC_IS_CLANG */ + +#define fpu_vlm(_v1, _v3, _vxrs) \ +({ \ + unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128); \ + struct { \ + __vector128 _v[(_v3) - (_v1) + 1]; \ + } *_v = (void *)(_vxrs); \ + \ + instrument_read(_v, size); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ + (_v3) - (_v1) + 1; \ +}) + +#endif /* CONFIG_CC_IS_CLANG */ + + +static __always_inline void load_vx_regs(__vector128 *vxrs) +{ + fpu_vlm(0, 15, &vxrs[0]); + fpu_vlm(16, 31, &vxrs[16]); +} + +static __always_inline void fpu_ld(unsigned short fpr, freg_t *reg) +{ + instrument_read(reg, sizeof(*reg)); + asm volatile("ld %[fpr],%[reg]\n" + : + : [fpr] "I" (fpr), [reg] "Q" (reg->ui) + : "memory"); +} + + +static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset) +{ + fpu_ld(0, &fprs[0 * offset]); + fpu_ld(1, &fprs[1 * offset]); + fpu_ld(2, &fprs[2 * offset]); + fpu_ld(3, &fprs[3 * offset]); + fpu_ld(4, &fprs[4 * offset]); + fpu_ld(5, &fprs[5 * offset]); + fpu_ld(6, &fprs[6 * offset]); + fpu_ld(7, &fprs[7 * offset]); + fpu_ld(8, &fprs[8 * offset]); + fpu_ld(9, &fprs[9 * offset]); + fpu_ld(10, &fprs[10 * offset]); + fpu_ld(11, &fprs[11 * offset]); + fpu_ld(12, &fprs[12 * offset]); + fpu_ld(13, &fprs[13 * offset]); + fpu_ld(14, &fprs[14 * offset]); + fpu_ld(15, &fprs[15 * offset]); +} + + +static __always_inline void load_fp_regs(freg_t *fprs) +{ + __load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t)); +} + +void save_kernel_fpu_regs(struct thread_struct *thread) +{ + struct fpu *state = &thread->kfpu; + + if (!thread->kfpu_flags) + return; + fpu_stfpc(&state->fpc); + if (likely(test_facility(129))) + save_vx_regs(state->vxrs); + else + save_fp_regs(state->fprs); +} +EXPORT_SYMBOL(save_kernel_fpu_regs); + +void restore_kernel_fpu_regs(struct thread_struct *thread) +{ + struct fpu *state = &thread->kfpu; + + if (!thread->kfpu_flags) + return; + fpu_lfpc(&state->fpc); + if (likely(test_facility(129))) + load_vx_regs(state->vxrs); + else + load_fp_regs(state->fprs); +} +EXPORT_SYMBOL(restore_kernel_fpu_regs); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d5119e039d85..faa08be493a2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -89,8 +89,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) */ save_fpu_regs(); - memcpy(dst, src, arch_task_struct_size); + *dst = *src; dst->thread.fpu.regs = dst->thread.fpu.fprs; + dst->thread.kfpu_flags = 0; /* * Don't transfer over the runtime instrumentation or the guarded diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 40cbcda40926..9b235a39681d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1041,22 +1041,6 @@ static void __init setup_randomness(void) static_branch_enable(&s390_arch_random_available); } -/* - * Find the correct size for the task_struct. This depends on - * the size of the struct fpu at the end of the thread_struct - * which is embedded in the task_struct. - */ -static void __init setup_task_size(void) -{ - int task_size = sizeof(struct task_struct); - - if (!MACHINE_HAS_VX) { - task_size -= sizeof(__vector128) * __NUM_VXRS; - task_size += sizeof(freg_t) * __NUM_FPRS; - } - arch_task_struct_size = task_size; -} - /* * Issue diagnose 318 to set the control program name and * version codes. @@ -1152,7 +1136,6 @@ void __init setup_arch(char **cmdline_p) os_info_init(); setup_ipl(); - setup_task_size(); setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ -- 2.39.3