aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 07:56:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 07:56:47 -0700
commitcc0356d6a02e064387c16a83cb96fe43ef33181e (patch)
tree6d989747153869c549030e8fdbee907e738124ec
parentfc02cb2b37fe2cbf1d3334b9f0f0eab9431766c4 (diff)
parenta72fdfd21e01c626273ddcf5ab740d4caef4be54 (diff)
downloadlinux-cc0356d6a02e064387c16a83cb96fe43ef33181e.tar.gz
Merge tag 'x86_core_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Borislav Petkov: - Do not #GP on userspace use of CLI/STI but pretend it was a NOP to keep old userspace from breaking. Adjust the corresponding iopl selftest to that. - Improve stack overflow warnings to say which stack got overflowed and raise the exception stack sizes to 2 pages since overflowing the single page of exception stack is very easy to do nowadays with all the tracing machinery enabled. With that, rip out the custom mapping of AMD SEV's too. - A bunch of changes in preparation for FGKASLR like supporting more than 64K section headers in the relocs tool, correct ORC lookup table size to cover the whole kernel .text and other adjustments. * tag 'x86_core_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: selftests/x86/iopl: Adjust to the faked iopl CLI/STI usage vmlinux.lds.h: Have ORC lookup cover entire _etext - _stext x86/boot/compressed: Avoid duplicate malloc() implementations x86/boot: Allow a "silent" kaslr random byte fetch x86/tools/relocs: Support >64K section headers x86/sev: Make the #VC exception stacks part of the default stacks storage x86: Increase exception stack sizes x86/mm/64: Improve stack overflow warnings x86/iopl: Fake iopl(3) CLI/STI usage
-rw-r--r--arch/x86/boot/compressed/kaslr.c4
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h8
-rw-r--r--arch/x86/include/asm/insn-eval.h1
-rw-r--r--arch/x86/include/asm/irq_stack.h37
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/processor.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h10
-rw-r--r--arch/x86/include/asm/traps.h6
-rw-r--r--arch/x86/kernel/dumpstack_64.c6
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/sev.c32
-rw-r--r--arch/x86/kernel/traps.c58
-rw-r--r--arch/x86/lib/insn-eval.c2
-rw-r--r--arch/x86/lib/kaslr.c18
-rw-r--r--arch/x86/mm/cpu_entry_area.c7
-rw-r--r--arch/x86/mm/fault.c20
-rw-r--r--arch/x86/tools/relocs.c103
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/linux/decompress/mm.h12
-rw-r--r--tools/testing/selftests/x86/iopl.c78
22 files changed, 284 insertions, 130 deletions
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 67c3208b668aba..411b268bc0a24d 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -32,10 +32,6 @@
#include <generated/utsrelease.h>
#include <asm/efi.h>
-/* Macros used by the included decompressor code below. */
-#define STATIC
-#include <linux/decompress/mm.h>
-
#define _SETUP
#include <asm/setup.h> /* For COMMAND_LINE_SIZE */
#undef _SETUP
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 743f13ea25c128..a4339cb2d247fd 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -28,6 +28,9 @@
/* Macros used by the included decompressor code below. */
#define STATIC static
+/* Define an externally visible malloc()/free(). */
+#define MALLOC_VISIBLE
+#include <linux/decompress/mm.h>
/*
* Provide definitions of memzero and memmove as some of the decompressors will
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 49bde196da9b93..16ed360b6692db 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -46,6 +46,8 @@ extern char _head[], _end[];
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
+void *malloc(int size);
+void free(void *where);
extern struct boot_params *boot_params;
void __putstr(const char *s);
void __puthex(unsigned long value);
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 3d52b094850a90..dd5ea1bdf04c5f 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -10,6 +10,12 @@
#ifdef CONFIG_X86_64
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ
+#else
+#define VC_EXCEPTION_STKSZ 0
+#endif
+
/* Macro to enforce the same ordering and stack sizes */
#define ESTACKS_MEMBERS(guardsize, optional_stack_size) \
char DF_stack_guard[guardsize]; \
@@ -28,7 +34,7 @@
/* The exception stacks' physical storage. No guard pages required */
struct exception_stacks {
- ESTACKS_MEMBERS(0, 0)
+ ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ)
};
/* The effective cpu entry area mapping with guard pages. */
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182ad2d6e7..4ec3613551e3b5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip);
int insn_fetch_from_user(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 7dcc2b0a4f09f4..ae9d40f6c7066e 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -77,11 +77,11 @@
* Function calls can clobber anything except the callee-saved
* registers. Tell the compiler.
*/
-#define call_on_irqstack(func, asm_call, argconstr...) \
+#define call_on_stack(stack, func, asm_call, argconstr...) \
{ \
register void *tos asm("r11"); \
\
- tos = ((void *)__this_cpu_read(hardirq_stack_ptr)); \
+ tos = ((void *)(stack)); \
\
asm_inline volatile( \
"movq %%rsp, (%[tos]) \n" \
@@ -98,6 +98,25 @@
); \
}
+#define ASM_CALL_ARG0 \
+ "call %P[__func] \n"
+
+#define ASM_CALL_ARG1 \
+ "movq %[arg1], %%rdi \n" \
+ ASM_CALL_ARG0
+
+#define ASM_CALL_ARG2 \
+ "movq %[arg2], %%rsi \n" \
+ ASM_CALL_ARG1
+
+#define ASM_CALL_ARG3 \
+ "movq %[arg3], %%rdx \n" \
+ ASM_CALL_ARG2
+
+#define call_on_irqstack(func, asm_call, argconstr...) \
+ call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
+ func, asm_call, argconstr)
+
/* Macros to assert type correctness for run_*_on_irqstack macros */
#define assert_function_type(func, proto) \
static_assert(__builtin_types_compatible_p(typeof(&func), proto))
@@ -147,8 +166,7 @@
*/
#define ASM_CALL_SYSVEC \
"call irq_enter_rcu \n" \
- "movq %[arg1], %%rdi \n" \
- "call %P[__func] \n" \
+ ASM_CALL_ARG1 \
"call irq_exit_rcu \n"
#define SYSVEC_CONSTRAINTS , [arg1] "r" (regs)
@@ -168,12 +186,10 @@
*/
#define ASM_CALL_IRQ \
"call irq_enter_rcu \n" \
- "movq %[arg1], %%rdi \n" \
- "movl %[arg2], %%esi \n" \
- "call %P[__func] \n" \
+ ASM_CALL_ARG2 \
"call irq_exit_rcu \n"
-#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" (vector)
+#define IRQ_CONSTRAINTS , [arg1] "r" (regs), [arg2] "r" ((unsigned long)vector)
#define run_irq_on_irqstack_cond(func, regs, vector) \
{ \
@@ -186,9 +202,6 @@
}
#ifndef CONFIG_PREEMPT_RT
-#define ASM_CALL_SOFTIRQ \
- "call %P[__func] \n"
-
/*
* Macro to invoke __do_softirq on the irq stack. This is only called from
* task context when bottom halves are about to be reenabled and soft
@@ -198,7 +211,7 @@
#define do_softirq_own_stack() \
{ \
__this_cpu_write(hardirq_stack_inuse, true); \
- call_on_irqstack(__do_softirq, ASM_CALL_SOFTIRQ); \
+ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
__this_cpu_write(hardirq_stack_inuse, false); \
}
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index a8d4ad8565681b..e9e2c3ba59239a 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -15,7 +15,7 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
+#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 887380fc9dfe1c..191878a65c61a9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -516,6 +516,7 @@ struct thread_struct {
*/
unsigned long iopl_emul;
+ unsigned int iopl_warn:1;
unsigned int sig_on_uaccess_err:1;
/*
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f248eb2ac2d4ad..3881b5333eb815 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -38,6 +38,16 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
bool get_stack_info_noinstr(unsigned long *stack, struct task_struct *task,
struct stack_info *info);
+static __always_inline
+bool get_stack_guard_info(unsigned long *stack, struct stack_info *info)
+{
+ /* make sure it's not in the stack proper */
+ if (get_stack_info_noinstr(stack, current, info))
+ return false;
+ /* but if it is in the page below it, we hit a guard */
+ return get_stack_info_noinstr((void *)stack + PAGE_SIZE, current, info);
+}
+
const char *stack_type_name(enum stack_type type);
static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 7f7200021bd132..6221be7cafc3b2 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,9 +40,9 @@ void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
#ifdef CONFIG_VMAP_STACK
-void __noreturn handle_stack_overflow(const char *message,
- struct pt_regs *regs,
- unsigned long fault_address);
+void __noreturn handle_stack_overflow(struct pt_regs *regs,
+ unsigned long fault_address,
+ struct stack_info *info);
#endif
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5601b95944faed..6c5defd6569a3e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -32,9 +32,15 @@ const char *stack_type_name(enum stack_type type)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
+ if (type == STACK_TYPE_TASK)
+ return "TASK";
+
if (type == STACK_TYPE_IRQ)
return "IRQ";
+ if (type == STACK_TYPE_SOFTIRQ)
+ return "SOFTIRQ";
+
if (type == STACK_TYPE_ENTRY) {
/*
* On 64-bit, we have a generic entry stack that we
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index eb470be0e5aeb0..e9ee8b5263198e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -146,6 +146,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
+ p->thread.iopl_warn = 0;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c195ffe5804950..74f0ec95538486 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -46,16 +46,6 @@ static struct ghcb __initdata *boot_ghcb;
struct sev_es_runtime_data {
struct ghcb ghcb_page;
- /* Physical storage for the per-CPU IST stack of the #VC handler */
- char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
- /*
- * Physical storage for the per-CPU fall-back stack of the #VC handler.
- * The fall-back stack is used when it is not safe to switch back to the
- * interrupted stack in the #VC entry code.
- */
- char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
/*
* Reserve one page per CPU as backup storage for the unencrypted GHCB.
* It is needed when an NMI happens while the #VC handler uses the real
@@ -99,27 +89,6 @@ DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
/* Needed in vc_early_forward_exception */
void do_early_exception(struct pt_regs *regs, int trapnr);
-static void __init setup_vc_stacks(int cpu)
-{
- struct sev_es_runtime_data *data;
- struct cpu_entry_area *cea;
- unsigned long vaddr;
- phys_addr_t pa;
-
- data = per_cpu(runtime_data, cpu);
- cea = get_cpu_entry_area(cpu);
-
- /* Map #VC IST stack */
- vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
- pa = __pa(data->ist_stack);
- cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-
- /* Map VC fall-back stack */
- vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
- pa = __pa(data->fallback_stack);
- cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-}
-
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;
@@ -788,7 +757,6 @@ void __init sev_es_init_vc_handling(void)
for_each_possible_cpu(cpu) {
alloc_runtime_data(cpu);
init_ghcb(cpu);
- setup_vc_stacks(cpu);
}
sev_es_setup_play_dead();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 43f70bc0176290..c9d566dcf89a04 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,17 +313,19 @@ out:
}
#ifdef CONFIG_VMAP_STACK
-__visible void __noreturn handle_stack_overflow(const char *message,
- struct pt_regs *regs,
- unsigned long fault_address)
+__visible void __noreturn handle_stack_overflow(struct pt_regs *regs,
+ unsigned long fault_address,
+ struct stack_info *info)
{
- printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
- (void *)fault_address, current->stack,
- (char *)current->stack + THREAD_SIZE - 1);
- die(message, regs, 0);
+ const char *name = stack_type_name(info->type);
+
+ printk(KERN_EMERG "BUG: %s stack guard page was hit at %p (stack is %p..%p)\n",
+ name, (void *)fault_address, info->begin, info->end);
+
+ die("stack guard page", regs, 0);
/* Be absolutely certain we don't return. */
- panic("%s", message);
+ panic("%s stack guard hit", name);
}
#endif
@@ -353,6 +355,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
#ifdef CONFIG_VMAP_STACK
unsigned long address = read_cr2();
+ struct stack_info info;
#endif
#ifdef CONFIG_X86_ESPFIX64
@@ -455,10 +458,8 @@ DEFINE_IDTENTRY_DF(exc_double_fault)
* stack even if the actual trigger for the double fault was
* something else.
*/
- if ((unsigned long)task_stack_page(tsk) - 1 - address < PAGE_SIZE) {
- handle_stack_overflow("kernel stack overflow (double-fault)",
- regs, address);
- }
+ if (get_stack_guard_info((void *)address, &info))
+ handle_stack_overflow(regs, address, &info);
#endif
pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
@@ -528,6 +529,36 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
#define GPFSTR "general protection fault"
+static bool fixup_iopl_exception(struct pt_regs *regs)
+{
+ struct thread_struct *t = &current->thread;
+ unsigned char byte;
+ unsigned long ip;
+
+ if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3)
+ return false;
+
+ if (insn_get_effective_ip(regs, &ip))
+ return false;
+
+ if (get_user(byte, (const char __user *)ip))
+ return false;
+
+ if (byte != 0xfa && byte != 0xfb)
+ return false;
+
+ if (!t->iopl_warn && printk_ratelimit()) {
+ pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx",
+ current->comm, task_pid_nr(current), ip);
+ print_vma_addr(KERN_CONT " in ", ip);
+ pr_cont("\n");
+ t->iopl_warn = 1;
+ }
+
+ regs->ip += 1;
+ return true;
+}
+
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
{
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
@@ -553,6 +584,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
tsk = current;
if (user_mode(regs)) {
+ if (fixup_iopl_exception(regs))
+ goto exit;
+
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_GP;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a1d24fdc07cf02..eb3ccffb9b9dcc 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
}
}
-static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
+int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
{
unsigned long seg_base = 0;
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index a536651164584c..2b3eb8c948a3d0 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -56,11 +56,14 @@ unsigned long kaslr_get_random_long(const char *purpose)
unsigned long raw, random = get_boot_seed();
bool use_i8254 = true;
- debug_putstr(purpose);
- debug_putstr(" KASLR using");
+ if (purpose) {
+ debug_putstr(purpose);
+ debug_putstr(" KASLR using");
+ }
if (has_cpuflag(X86_FEATURE_RDRAND)) {
- debug_putstr(" RDRAND");
+ if (purpose)
+ debug_putstr(" RDRAND");
if (rdrand_long(&raw)) {
random ^= raw;
use_i8254 = false;
@@ -68,7 +71,8 @@ unsigned long kaslr_get_random_long(const char *purpose)
}
if (has_cpuflag(X86_FEATURE_TSC)) {
- debug_putstr(" RDTSC");
+ if (purpose)
+ debug_putstr(" RDTSC");
raw = rdtsc();
random ^= raw;
@@ -76,7 +80,8 @@ unsigned long kaslr_get_random_long(const char *purpose)
}
if (use_i8254) {
- debug_putstr(" i8254");
+ if (purpose)
+ debug_putstr(" i8254");
random ^= i8254();
}
@@ -86,7 +91,8 @@ unsigned long kaslr_get_random_long(const char *purpose)
: "a" (random), "rm" (mix_const));
random += raw;
- debug_putstr("...\n");
+ if (purpose)
+ debug_putstr("...\n");
return random;
}
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index f5e1e60c9095ff..6c2f1b76a0b610 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -110,6 +110,13 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
cea_map_stack(NMI);
cea_map_stack(DB);
cea_map_stack(MCE);
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
+ cea_map_stack(VC);
+ cea_map_stack(VC2);
+ }
+ }
}
#else
static inline void percpu_setup_exception_stacks(unsigned int cpu)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 84a2c8c4af7358..4bfed53e210ec2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
+#include <asm/irq_stack.h>
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -631,6 +632,9 @@ static noinline void
page_fault_oops(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
+#ifdef CONFIG_VMAP_STACK
+ struct stack_info info;
+#endif
unsigned long flags;
int sig;
@@ -649,9 +653,7 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
* that we're in vmalloc space to avoid this.
*/
if (is_vmalloc_addr((void *)address) &&
- (((unsigned long)current->stack - 1 - address < PAGE_SIZE) ||
- address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) {
- unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
+ get_stack_guard_info((void *)address, &info)) {
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
@@ -662,13 +664,11 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
* and then double-fault, though, because we're likely to
* break the console driver and lose most of the stack dump.
*/
- asm volatile ("movq %[stack], %%rsp\n\t"
- "call handle_stack_overflow\n\t"
- "1: jmp 1b"
- : ASM_CALL_CONSTRAINT
- : "D" ("kernel stack overflow (page fault)"),
- "S" (regs), "d" (address),
- [stack] "rm" (stack));
+ call_on_stack(__this_cpu_ist_top_va(DF) - sizeof(void*),
+ handle_stack_overflow,
+ ASM_CALL_ARG3,
+ , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
+
unreachable();
}
#endif
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 27c82207d38781..3f5d39768287cb 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -14,6 +14,10 @@
static Elf_Ehdr ehdr;
static unsigned long shnum;
static unsigned int shstrndx;
+static unsigned int shsymtabndx;
+static unsigned int shxsymtabndx;
+
+static int sym_index(Elf_Sym *sym);
struct relocs {
uint32_t *offset;
@@ -35,6 +39,7 @@ struct section {
Elf_Shdr shdr;
struct section *link;
Elf_Sym *symtab;
+ Elf32_Word *xsymtab;
Elf_Rel *reltab;
char *strtab;
};
@@ -268,7 +273,7 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
name = sym_strtab + sym->st_name;
}
else {
- name = sec_name(sym->st_shndx);
+ name = sec_name(sym_index(sym));
}
return name;
}
@@ -338,6 +343,23 @@ static uint64_t elf64_to_cpu(uint64_t val)
#define elf_xword_to_cpu(x) elf32_to_cpu(x)
#endif
+static int sym_index(Elf_Sym *sym)
+{
+ Elf_Sym *symtab = secs[shsymtabndx].symtab;
+ Elf32_Word *xsymtab = secs[shxsymtabndx].xsymtab;
+ unsigned long offset;
+ int index;
+
+ if (sym->st_shndx != SHN_XINDEX)
+ return sym->st_shndx;
+
+ /* calculate offset of sym from head of table. */
+ offset = (unsigned long)sym - (unsigned long)symtab;
+ index = offset / sizeof(*sym);
+
+ return elf32_to_cpu(xsymtab[index]);
+}
+
static void read_ehdr(FILE *fp)
{
if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) {
@@ -471,31 +493,60 @@ static void read_strtabs(FILE *fp)
static void read_symtabs(FILE *fp)
{
int i,j;
+
for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
- if (sec->shdr.sh_type != SHT_SYMTAB) {
+ int num_syms;
+
+ switch (sec->shdr.sh_type) {
+ case SHT_SYMTAB_SHNDX:
+ sec->xsymtab = malloc(sec->shdr.sh_size);
+ if (!sec->xsymtab) {
+ die("malloc of %" FMT " bytes for xsymtab failed\n",
+ sec->shdr.sh_size);
+ }
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
+ }
+ if (fread(sec->xsymtab, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
+ die("Cannot read extended symbol table: %s\n",
+ strerror(errno));
+ }
+ shxsymtabndx = i;
+ continue;
+
+ case SHT_SYMTAB:
+ num_syms = sec->shdr.sh_size / sizeof(Elf_Sym);
+
+ sec->symtab = malloc(sec->shdr.sh_size);
+ if (!sec->symtab) {
+ die("malloc of %" FMT " bytes for symtab failed\n",
+ sec->shdr.sh_size);
+ }
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
+ die("Seek to %" FMT " failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
+ }
+ if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
+ die("Cannot read symbol table: %s\n",
+ strerror(errno));
+ }
+ for (j = 0; j < num_syms; j++) {
+ Elf_Sym *sym = &sec->symtab[j];
+
+ sym->st_name = elf_word_to_cpu(sym->st_name);
+ sym->st_value = elf_addr_to_cpu(sym->st_value);
+ sym->st_size = elf_xword_to_cpu(sym->st_size);
+ sym->st_shndx = elf_half_to_cpu(sym->st_shndx);
+ }
+ shsymtabndx = i;
+ continue;
+
+ default:
continue;
- }
- sec->symtab = malloc(sec->shdr.sh_size);
- if (!sec->symtab) {
- die("malloc of %" FMT " bytes for symtab failed\n",
- sec->shdr.sh_size);
- }
- if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
- die("Seek to %" FMT " failed: %s\n",
- sec->shdr.sh_offset, strerror(errno));
- }
- if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
- != sec->shdr.sh_size) {
- die("Cannot read symbol table: %s\n",
- strerror(errno));
- }
- for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) {
- Elf_Sym *sym = &sec->symtab[j];
- sym->st_name = elf_word_to_cpu(sym->st_name);
- sym->st_value = elf_addr_to_cpu(sym->st_value);
- sym->st_size = elf_xword_to_cpu(sym->st_size);
- sym->st_shndx = elf_half_to_cpu(sym->st_shndx);
}
}
}
@@ -762,7 +813,9 @@ static void percpu_init(void)
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
{
- return (sym->st_shndx == per_cpu_shndx) &&
+ int shndx = sym_index(sym);
+
+ return (shndx == per_cpu_shndx) &&
strcmp(symname, "__init_begin") &&
strcmp(symname, "__per_cpu_load") &&
strncmp(symname, "init_per_cpu_", 13);
@@ -1095,7 +1148,7 @@ static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
sec_name(sec->shdr.sh_info),
rel_type(ELF_R_TYPE(rel->r_info)),
symname,
- sec_name(sym->st_shndx));
+ sec_name(sym_index(sym)));
return 0;
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8771c435f34bb0..0c0d3608ea1183 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -875,10 +875,11 @@
KEEP(*(.orc_unwind)) \
__stop_orc_unwind = .; \
} \
+ text_size = _etext - _stext; \
. = ALIGN(4); \
.orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) { \
orc_lookup = .; \
- . += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) / \
+ . += (((text_size + LOOKUP_BLOCK_SIZE - 1) / \
LOOKUP_BLOCK_SIZE) + 1) * 4; \
orc_lookup_end = .; \
}
diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 868e9eacd69e5e..9192986b1a7313 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -25,13 +25,21 @@
#define STATIC_RW_DATA static
#endif
+/*
+ * When an architecture needs to share the malloc()/free() implementation
+ * between compilation units, it needs to have non-local visibility.
+ */
+#ifndef MALLOC_VISIBLE
+#define MALLOC_VISIBLE static
+#endif
+
/* A trivial malloc implementation, adapted from
* malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
*/
STATIC_RW_DATA unsigned long malloc_ptr;
STATIC_RW_DATA int malloc_count;
-static void *malloc(int size)
+MALLOC_VISIBLE void *malloc(int size)
{
void *p;
@@ -52,7 +60,7 @@ static void *malloc(int size)
return p;
}
-static void free(void *where)
+MALLOC_VISIBLE void free(void *where)
{
malloc_count--;
if (!malloc_count)
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index bab2f6e06b63d0..7e3e09c1abac67 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port)
printf("[OK]\toutb to 0x%02hx failed\n", port);
}
-static bool try_cli(void)
+#define RET_FAULTED 0
+#define RET_FAIL 1
+#define RET_EMUL 2
+
+static int try_cli(void)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("cli");
- return true;
+ asm volatile("cli; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (!(flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static bool try_sti(void)
+static int try_sti(bool irqs_off)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("sti");
- return true;
+ asm volatile("sti; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (irqs_off && (flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static void expect_gp_sti(void)
+static void expect_gp_sti(bool irqs_off)
{
- if (try_sti()) {
+ int ret = try_sti(irqs_off);
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tSTI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tSTI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tSTI worked\n");
nerrs++;
- } else {
- printf("[OK]\tSTI faulted\n");
}
}
-static void expect_gp_cli(void)
+/*
+ * Returns whether it managed to disable interrupts.
+ */
+static bool test_cli(void)
{
- if (try_cli()) {
+ int ret = try_cli();
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tCLI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tCLI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tCLI worked\n");
nerrs++;
- } else {
- printf("[OK]\tCLI faulted\n");
+ return true;
}
+
+ return false;
}
int main(void)
@@ -152,8 +192,7 @@ int main(void)
}
/* Make sure that CLI/STI are blocked even with IOPL level 3 */
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
expect_ok_outb(0x80);
/* Establish an I/O bitmap to test the restore */
@@ -204,8 +243,7 @@ int main(void)
printf("[RUN]\tparent: write to 0x80 (should fail)\n");
expect_gp_outb(0x80);
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
/* Test the capability checks. */
printf("\tiopl(3)\n");