mm: optimize thread stack usage on ARMv7 [1/1]

PD#SWPL-2681

Problem:
Kernel stack usage is large when running many tasks.

Solution:
Map kernel stack to module space and handle page-fault
for stack fault. This can save about 50% memory of stack
usage

Verify:
p212

Change-Id: Ie894bc8f00cb525ddf8ac63c6d99d9c6e937fdc0
Signed-off-by: tao zeng <tao.zeng@amlogic.com>
This commit is contained in:
tao zeng
2018-11-30 18:01:31 +08:00
committed by Luke Go
parent 4b9ca77d52
commit 003b4e1f9b
19 changed files with 656 additions and 48 deletions

View File

@@ -30,7 +30,6 @@ CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_JUMP_LABEL=y
CONFIG_CC_STACKPROTECTOR_STRONG=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y

View File

@@ -208,6 +208,10 @@
THUMB( mov \rd, sp )
THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT )
mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
#ifdef CONFIG_AMLOGIC_VMAP
add \rd, \rd, #TI_THREAD_SIZE
sub \rd, \rd, #TI_THREAD_INFO_SIZE
#endif
.endm
/*

View File

@@ -33,18 +33,34 @@
#ifdef CONFIG_MMU
#ifdef CONFIG_AMLOGIC_VMAP
/*
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
*/
#define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_64M))
#define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M)
#else /* CONFIG_AMLOGIC_VMAP */
/*
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
*/
#define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M))
#define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M)
#endif /* CONFIG_AMLOGIC_VMAP */
/*
* The maximum size of a 26-bit user space task.
*/
#define TASK_SIZE_26 (UL(1) << 26)
#ifdef CONFIG_AMLOGIC_VMAP
#ifndef CONFIG_THUMB2_KERNEL
#define MODULES_VADDR (PAGE_OFFSET - SZ_64M)
#else
#define MODULES_VADDR (PAGE_OFFSET - SZ_8M)
#endif
#else /* CONFIG_AMLOGIC_VMAP */
/*
* The module space lives between the addresses given by TASK_SIZE
* and PAGE_OFFSET - it must be within 32MB of the kernel text.
@@ -55,6 +71,7 @@
/* smaller range for Thumb-2 symbols relocation (2^24)*/
#define MODULES_VADDR (PAGE_OFFSET - SZ_8M)
#endif
#endif /* CONFIG_AMLOGIC_VMAP */
#if TASK_SIZE > MODULES_VADDR
#error Top of user space clashes with start of module space

View File

@@ -164,9 +164,16 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
return regs->ARM_sp;
}
#ifdef CONFIG_AMLOGIC_VMAP
#define current_pt_regs(void) ({ (struct pt_regs *) \
((current_stack_pointer | (THREAD_SIZE - 1)) - 7 - \
THREAD_INFO_SIZE) - 1; \
})
#else
#define current_pt_regs(void) ({ (struct pt_regs *) \
((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \
})
#endif
#endif /* __ASSEMBLY__ */
#endif

View File

@@ -18,7 +18,15 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#ifdef CONFIG_AMLOGIC_VMAP
#define THREAD_INFO_SIZE (sizeof(struct thread_info))
#define THREAD_INFO_OFFSET (THREAD_SIZE - THREAD_INFO_SIZE)
#define THREAD_START_SP (THREAD_SIZE - 8 - THREAD_INFO_SIZE)
#define VMAP_RESERVE_SIZE (8 + 4 * 4)
#define VMAP_BACK_SP 12
#else
#define THREAD_START_SP (THREAD_SIZE - 8)
#endif
#ifndef __ASSEMBLY__
@@ -88,11 +96,20 @@ register unsigned long current_stack_pointer asm ("sp");
*/
static inline struct thread_info *current_thread_info(void) __attribute_const__;
#ifdef CONFIG_AMLOGIC_VMAP
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)
(((current_stack_pointer & ~(THREAD_SIZE - 1)) +
THREAD_INFO_OFFSET));
}
#else
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)
(current_stack_pointer & ~(THREAD_SIZE - 1));
}
#endif
#define thread_saved_pc(tsk) \
((unsigned long)(task_thread_info(tsk)->cpu_context.pc))

View File

@@ -183,5 +183,14 @@ int main(void)
#ifdef CONFIG_VDSO
DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store));
#endif
#ifdef CONFIG_AMLOGIC_VMAP
DEFINE(TI_THREAD_START_SP, THREAD_START_SP);
DEFINE(TI_VMAP_BACK_SP, VMAP_BACK_SP);
DEFINE(TI_VMAP_RESERVE_LEN, VMAP_RESERVE_SIZE);
DEFINE(TI_THREAD_SIZE, THREAD_SIZE);
DEFINE(TI_THREAD_INFO_SIZE, sizeof(struct thread_info));
#endif
return 0;
}

View File

@@ -39,6 +39,12 @@
* Interrupt handling.
*/
.macro irq_handler
#ifdef CONFIG_AMLOGIC_VMAP
mov r8, sp /* back up sp */
mov r0, sp
bl irq_stack_entry /* switch IRQ stack */
mov sp, r0
#endif
#ifdef CONFIG_MULTI_IRQ_HANDLER
ldr r1, =handle_arch_irq
mov r0, sp
@@ -48,6 +54,9 @@
arch_irq_handler_default
#endif
9997:
#ifdef CONFIG_AMLOGIC_VMAP
mov sp, r8 /* switch stack back to task stack */
#endif
.endm
.macro pabt_helper
@@ -149,10 +158,24 @@ ENDPROC(__und_invalid)
#define SPFIX(code...)
#endif
#ifdef CONFIG_AMLOGIC_VMAP
.macro svc_entry, stack_hole=0, trace=1, uaccess=1, vmap=0
#else
.macro svc_entry, stack_hole=0, trace=1, uaccess=1
#endif
UNWIND(.fnstart )
UNWIND(.save {r0 - pc} )
#ifdef CONFIG_AMLOGIC_VMAP
.if \vmap
/* keep using stack of abt mode */
str sp, [r0, #TI_VMAP_BACK_SP]
sub sp, r0, #(SVC_REGS_SIZE + \stack_hole - 4)
.else
sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
.endif
#else /* !CONFIG_AMLOGIC_VMAP */
sub sp, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
#endif /* CONFIG_AMLOGIC_VMAP */
#ifdef CONFIG_THUMB2_KERNEL
SPFIX( str r0, [sp] ) @ temporarily saved
SPFIX( mov r0, sp )
@@ -167,7 +190,15 @@ ENDPROC(__und_invalid)
ldmia r0, {r3 - r5}
add r7, sp, #S_SP - 4 @ here for interlock avoidance
mov r6, #-1 @ "" "" "" ""
#ifdef CONFIG_AMLOGIC_VMAP
.if \vmap
ldr r2, [sp, #(TI_VMAP_BACK_SP + SVC_REGS_SIZE - 4)]
.else
add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
.endif
#else
add r2, sp, #(SVC_REGS_SIZE + \stack_hole - 4)
#endif
SPFIX( addeq r2, r2, #4 )
str r3, [sp, #-4]! @ save the "real" r0 copied
@ from the exception stack
@@ -185,7 +216,44 @@ ENDPROC(__und_invalid)
@
stmia r7, {r2 - r6}
#ifdef CONFIG_AMLOGIC_VMAP
.if \vmap
/*
* get fault task thread info
*/
ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)]
mrc p15, 0, r1, c6, c0, 0 @ get FAR
bl pmd_check
mov tsk, r0
mov tsk, tsk, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT
mov tsk, tsk, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
add tsk, tsk, #TI_THREAD_SIZE
sub tsk, tsk, #TI_THREAD_INFO_SIZE
/*
* copy some important member of thread_info from current
* task to vmap stack
*/
ldr r0, [tsk, #TI_FLAGS]
ldr r1, [tsk, #TI_PREEMPT]
str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_FLAGS)]
str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_PREEMPT)]
ldr r0, [tsk, #TI_ADDR_LIMIT]
ldr r1, [tsk, #TI_TASK]
str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_ADDR_LIMIT)]
str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_TASK)]
ldr r0, [tsk, #TI_CPU]
ldr r1, [tsk, #TI_CPU_DOMAIN]
str r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_CPU)]
str r1, [sp, #(SVC_REGS_SIZE + TI_VMAP_RESERVE_LEN + TI_CPU_DOMAIN)]
.else
get_thread_info tsk
.endif
#else
get_thread_info tsk
#endif
ldr r0, [tsk, #TI_ADDR_LIMIT]
mov r1, #TASK_SIZE
str r1, [tsk, #TI_ADDR_LIMIT]
@@ -205,7 +273,28 @@ ENDPROC(__und_invalid)
.align 5
__dabt_svc:
#ifdef CONFIG_AMLOGIC_VMAP
svc_entry uaccess=0, vmap=1
mrc p15, 0, r1, c5, c0, 0 @ get FSR
mrc p15, 0, r0, c6, c0, 0 @ get FAR
mov r2, sp
uaccess_disable ip @ disable userspace access
bl handle_vmap_fault
cmp r0, #0
bne .L__dabt_svc_next
/* handled by vmap fault handler */
svc_exit r5, vmap=1 @ return from exception
.L__dabt_svc_next:
/* re-build context for normal abort handler */
ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)]
sub r0, #SVC_REGS_SIZE
mov r1, sp
mov r2, #SVC_REGS_SIZE
bl memcpy /* copy back sp */
mov sp, r0
#else
svc_entry uaccess=0
#endif
mov r2, sp
dabt_helper
THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR

View File

@@ -197,7 +197,11 @@
.endm
#ifdef CONFIG_AMLOGIC_VMAP
.macro svc_exit, rpsr, irq = 0, vmap = 0
#else
.macro svc_exit, rpsr, irq = 0
#endif /* CONFIG_AMLOGIC_VMAP */
.if \irq != 0
@ IRQs already off
#ifdef CONFIG_TRACE_IRQFLAGS
@@ -224,7 +228,16 @@
msr spsr_cxsf, \rpsr
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
@ We must avoid clrex due to Cortex-A15 erratum #830321
#ifdef CONFIG_AMLOGIC_VMAP
.if \vmap
ldr r0, [sp, #(SVC_REGS_SIZE + TI_VMAP_BACK_SP)]
sub r0, r0, #4 @ uninhabited address
.else
sub r0, sp, #4 @ uninhabited address
.endif
#else
sub r0, sp, #4 @ uninhabited address
#endif /* CONFIG_AMLOGIC_VMAP */
strex r1, r2, [r0] @ clear the exclusive monitor
#endif
ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr

View File

@@ -119,7 +119,11 @@ __mmap_switched_data:
#else
.long 0 @ r7
#endif
#ifdef CONFIG_AMLOGIC_VMAP
.long init_thread_union + TI_THREAD_START_SP @ sp
#else
.long init_thread_union + THREAD_START_SP @ sp
#endif
.size __mmap_switched_data, . - __mmap_switched_data
/*

View File

@@ -62,6 +62,9 @@
#include <asm/unwind.h>
#include <asm/memblock.h>
#include <asm/virt.h>
#ifdef CONFIG_AMLOGIC_VMAP
#include <linux/amlogic/vmap_stack.h>
#endif
#ifdef CONFIG_AMLOGIC_CPU_INFO
#include <linux/amlogic/cpu_version.h>
#endif
@@ -515,6 +518,17 @@ static void __init elf_hwcap_fixup(void)
elf_hwcap &= ~HWCAP_SWP;
}
#ifdef CONFIG_AMLOGIC_VMAP
static void __init fixup_init_thread_union(void)
{
void *p;
p = (void *)((unsigned long)&init_thread_union + THREAD_INFO_OFFSET);
memcpy(p, &init_thread_union, THREAD_INFO_SIZE);
memset(&init_thread_union, 0, THREAD_INFO_SIZE);
}
#endif
/*
* cpu_init - initialise one CPU.
*
@@ -578,6 +592,9 @@ void notrace cpu_init(void)
"I" (offsetof(struct stack, fiq[0])),
PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
: "r14");
#ifdef CONFIG_AMLOGIC_VMAP
__setup_vmap_stack(cpu);
#endif
#endif
}
@@ -600,6 +617,9 @@ void __init smp_setup_processor_id(void)
*/
set_my_cpu_offset(0);
#ifdef CONFIG_AMLOGIC_VMAP
fixup_init_thread_union();
#endif
pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
}

View File

@@ -326,11 +326,20 @@ void arch_cpu_idle_dead(void)
* cpu initialisation. There's some initialisation which needs
* to be repeated to undo the effects of taking the CPU offline.
*/
#ifdef CONFIG_AMLOGIC_VMAP
__asm__("mov sp, %0\n"
" mov fp, #0\n"
" b secondary_start_kernel"
:
: "r" (task_stack_page(current) + THREAD_SIZE - 8 -
THREAD_INFO_SIZE));
#else
__asm__("mov sp, %0\n"
" mov fp, #0\n"
" b secondary_start_kernel"
:
: "r" (task_stack_page(current) + THREAD_SIZE - 8));
#endif
}
#endif /* CONFIG_HOTPLUG_CPU */

View File

@@ -10,6 +10,10 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_AMLOGIC_VMAP
#include <linux/amlogic/vmap_stack.h>
#endif
extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
extern void cpu_resume_mmu(void);
@@ -47,6 +51,27 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
#define idmap_pgd NULL
#endif
#ifdef CONFIG_AMLOGIC_VMAP
void copy_pgd(void)
{
unsigned long index;
pgd_t *pgd_c = NULL, *pgd_k, *pgd_i;
unsigned long size;
/*
* sync pgd of current task and idmap_pgd from init mm
*/
index = pgd_index(TASK_SIZE);
pgd_c = cpu_get_pgd() + index;
pgd_i = idmap_pgd + index;
pgd_k = init_mm.pgd + index;
size = (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t);
pr_debug("pgd:%p, pgd_k:%p, pdg_i:%p\n",
pgd_c, pgd_k, pgd_i);
memcpy(pgd_c, pgd_k, size);
memcpy(pgd_i, pgd_k, size);
}
#endif
/*
* This is called by __cpu_suspend() to save the state, and do whatever
* flushing is required to ensure that when the CPU goes to sleep we have
@@ -56,7 +81,21 @@ void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr)
{
u32 *ctx = ptr;
#ifdef CONFIG_AMLOGIC_VMAP
if (likely(is_vmap_addr((unsigned long)ptr))) {
struct page *page = vmalloc_to_page(ptr);
unsigned long offset;
offset = (unsigned long)ptr & (PAGE_SIZE - 1);
*save_ptr = (page_to_phys(page) + offset);
pr_debug("%s, ptr:%p, page:%lx, save_ptr:%x\n",
__func__, ptr, page_to_pfn(page), *save_ptr);
copy_pgd();
} else
*save_ptr = virt_to_phys(ptr);
#else
*save_ptr = virt_to_phys(ptr);
#endif
/* This must correspond to the LDM in cpu_resume() assembly */
*ptr++ = virt_to_phys(idmap_pgd);

View File

@@ -44,6 +44,9 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#ifdef CONFIG_AMLOGIC_VMAP
#include <linux/amlogic/vmap_stack.h>
#endif
#include <asm/stacktrace.h>
#include <asm/traps.h>
@@ -468,6 +471,20 @@ int unwind_frame(struct stackframe *frame)
return URC_OK;
}
#ifdef CONFIG_AMLOGIC_VMAP
static void dump_backtrace_entry_fp(unsigned long where, unsigned long fp,
unsigned long sp)
{
signed long fp_size = 0;
fp_size = fp - sp + 4;
if (fp_size < 0 || !fp)
fp_size = 0;
pr_info("[%08lx+%4ld][<%08lx>] %pS\n",
fp, fp_size, where, (void *)where);
}
#endif
void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
@@ -504,9 +521,33 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
unsigned long where = frame.pc;
urc = unwind_frame(&frame);
#ifdef CONFIG_AMLOGIC_VMAP
if (urc < 0) {
int keep = 0;
/* continue search for irq stack */
if (on_irq_stack(frame.sp, raw_smp_processor_id())) {
unsigned long *prev_fp;
prev_fp = (unsigned long *)(frame.fp - 12);
if (frame.fp >= TASK_SIZE) {
keep = 1;
frame.fp = prev_fp[0];
frame.sp = prev_fp[1];
frame.lr = prev_fp[2];
frame.pc = prev_fp[3];
}
}
if (!keep)
break;
}
where = frame.lr;
dump_backtrace_entry_fp(where, frame.fp, frame.sp);
#else
if (urc < 0)
break;
dump_backtrace_entry(where, frame.pc, frame.sp - 4);
#endif
}
}

View File

@@ -43,7 +43,6 @@ config AMLOGIC_VMAP
bool "Amlogic kernel stack"
depends on AMLOGIC_MEMORY_EXTEND
depends on !KASAN
depends on 64BIT
default y
help
This config is used to enable amlogic kernel stack

View File

@@ -50,8 +50,21 @@ static unsigned char vmap_shrink_enable;
static atomic_t vmap_stack_size;
static struct aml_vmap *avmap;
#ifdef CONFIG_ARM64
DEFINE_PER_CPU(unsigned long [THREAD_SIZE/sizeof(long)], vmap_stack)
__aligned(16);
#else
static unsigned long irq_stack1[(THREAD_SIZE/sizeof(long))]
__aligned(THREAD_SIZE);
static void *irq_stack[NR_CPUS] = {
irq_stack1, /* only assign 1st irq stack ,other need alloc */
};
static unsigned long vmap_stack1[(THREAD_SIZE/sizeof(long))]
__aligned(THREAD_SIZE);
static void *vmap_stack[NR_CPUS] = {
vmap_stack1, /* only assign 1st vmap stack ,other need alloc */
};
#endif
void update_vmap_stack(int diff)
{
@@ -65,7 +78,146 @@ int get_vmap_stack_size(void)
}
EXPORT_SYMBOL(get_vmap_stack_size);
static int is_vmap_addr(unsigned long addr)
#ifdef CONFIG_ARM64
bool on_vmap_stack(unsigned long sp, int cpu)
{
/* variable names the same as kernel/stacktrace.c */
unsigned long low = (unsigned long)per_cpu(vmap_stack, cpu);
unsigned long high = low + THREAD_START_SP;
return (low <= sp && sp <= high);
}
#endif
#ifdef CONFIG_ARM
void notrace __setup_vmap_stack(unsigned long cpu)
{
void *stack;
#ifdef CONFIG_THUMB2_KERNEL
#define TAG "r"
#else
#define TAG "I"
#endif
stack = vmap_stack[cpu];
if (!stack) {
stack = kmalloc(THREAD_SIZE, GFP_ATOMIC | __GFP_ZERO);
WARN_ON(!stack);
vmap_stack[cpu] = stack;
irq_stack[cpu] = kmalloc(THREAD_SIZE, GFP_ATOMIC | __GFP_ZERO);
WARN_ON(!irq_stack[cpu]);
}
pr_info("cpu %ld, vmap stack:[%lx-%lx]\n",
cpu, (unsigned long)stack,
(unsigned long)stack + THREAD_START_SP);
pr_info("cpu %ld, irq stack:[%lx-%lx]\n",
cpu, (unsigned long)irq_stack[cpu],
(unsigned long)irq_stack[cpu] + THREAD_START_SP);
stack += THREAD_SIZE;
stack -= sizeof(struct thread_info);
/*
* reserve 24 byte for r0, lr, spsr, sp_svc and 8 bytes gap
*/
stack -= (24);
asm volatile (
"msr cpsr_c, %1 \n"
"mov sp, %0 \n"
"msr cpsr_c, %2 \n"
:
: "r" (stack),
TAG(PSR_F_BIT | PSR_I_BIT | ABT_MODE),
TAG(PSR_F_BIT | PSR_I_BIT | SVC_MODE)
: "memory", "cc"
);
}
int on_irq_stack(unsigned long sp, int cpu)
{
unsigned long sp_irq;
sp_irq = (unsigned long)irq_stack[cpu];
if ((sp & ~(THREAD_SIZE - 1)) == (sp_irq & ~(THREAD_SIZE - 1)))
return 1;
return 0;
}
unsigned long notrace irq_stack_entry(unsigned long sp_irq)
{
int cpu = raw_smp_processor_id();
if (!on_irq_stack(sp_irq, cpu)) {
unsigned long sp = (unsigned long)irq_stack[cpu];
void *src, *dst;
/*
* copy some data to irq stack
*/
src = current_thread_info();
dst = (void *)(sp + THREAD_INFO_OFFSET);
memcpy(dst, src, offsetof(struct thread_info, cpu_context));
sp_irq = (unsigned long)dst - 8;
}
return sp_irq;
}
unsigned long notrace pmd_check(unsigned long addr, unsigned long far)
{
unsigned int index;
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
if (addr < TASK_SIZE)
return addr;
index = pgd_index(addr);
pgd = cpu_get_pgd() + index;
pgd_k = init_mm.pgd + index;
if (pgd_none(*pgd_k))
goto bad_area;
if (!pgd_present(*pgd))
set_pgd(pgd, *pgd_k);
pud = pud_offset(pgd, addr);
pud_k = pud_offset(pgd_k, addr);
if (pud_none(*pud_k))
goto bad_area;
if (!pud_present(*pud))
set_pud(pud, *pud_k);
pmd = pmd_offset(pud, addr);
pmd_k = pmd_offset(pud_k, addr);
#ifdef CONFIG_ARM_LPAE
/*
* Only one hardware entry per PMD with LPAE.
*/
index = 0;
#else
/*
* On ARM one Linux PGD entry contains two hardware entries (see page
* tables layout in pgtable.h). We normally guarantee that we always
* fill both L1 entries. But create_mapping() doesn't follow the rule.
* It can create inidividual L1 entries, so here we have to call
* pmd_none() check for the entry really corresponded to address, not
* for the first of pair.
*/
index = (addr >> SECTION_SHIFT) & 1;
#endif
if (pmd_none(pmd_k[index]))
goto bad_area;
copy_pmd(pmd, pmd_k);
bad_area:
return addr;
}
#endif
int is_vmap_addr(unsigned long addr)
{
unsigned long start, end;
@@ -96,6 +248,50 @@ static struct page *get_vmap_cached_page(int *remain)
return page;
}
static struct page *check_pte_exist(unsigned long addr)
{
struct mm_struct *mm;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
mm = &init_mm;
pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd))
return NULL;
if (pgd_bad(*pgd))
return NULL;
pud = pud_offset(pgd, addr);
if (pud_none(*pud))
return NULL;
if (pud_bad(*pud))
return NULL;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return NULL;
if (pmd_bad(*pmd))
return NULL;
pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte))
return NULL;
#ifdef CONFIG_ARM64
return pte_page(*pte);
#elif defined(CONFIG_ARM)
return pte_page(pte_val(*pte));
#else
return NULL; /* not supported */
#endif
}
static int vmap_mmu_set(struct page *page, unsigned long addr, int set)
{
pgd_t *pgd = NULL;
@@ -121,16 +317,22 @@ static int vmap_mmu_set(struct page *page, unsigned long addr, int set)
goto nomem;
}
pte = pte_offset_map(pmd, addr);
pte = pte_offset_kernel(pmd, addr);
if (set)
set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
else
pte_clear(&init_mm, addr, pte);
pte_unmap(pte);
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
#ifdef CONFIG_ARM64
D("add:%lx, pgd:%p %llx, pmd:%p %llx, pte:%p %llx\n",
addr, pgd, pgd_val(*pgd), pmd, pmd_val(*pmd),
pte, pte_val(*pte));
#elif defined(CONFIG_ARM)
D("add:%lx, pgd:%p %x, pmd:%p %x, pte:%p %x\n",
addr, pgd, (unsigned int)pgd_val(*pgd),
pmd, (unsigned int)pmd_val(*pmd),
pte, pte_val(*pte));
#endif
return 0;
nomem:
E("allocation page talbe failed, G:%p, U:%p, M:%p, T:%p",
@@ -140,10 +342,16 @@ nomem:
static int stack_floor_page(unsigned long addr)
{
unsigned long pos;
pos = addr & (THREAD_SIZE - 1);
/*
* stack address must align to THREAD_SIZE
*/
return ((addr & (THREAD_SIZE - 1)) < PAGE_SIZE);
if (THREAD_SIZE_ORDER > 1)
return pos < PAGE_SIZE;
else
return pos < (PAGE_SIZE / 4);
}
static int check_addr_up_flow(unsigned long addr)
@@ -153,17 +361,17 @@ static int check_addr_up_flow(unsigned long addr)
* rage(aligned to THREAD_SIZE) but next page of this
* addr is not mapped
*/
if (stack_floor_page(addr) &&
!vmalloc_to_page((const void *)(addr + PAGE_SIZE)))
if (stack_floor_page(addr) && !check_pte_exist(addr + PAGE_SIZE))
return 1;
return 0;
}
#if DEBUG
static void dump_backtrace_entry(unsigned long ip, unsigned long fp)
static void dump_backtrace_entry(unsigned long ip, unsigned long fp,
unsigned long sp)
{
unsigned long fp_size = 0;
#ifdef CONFIG_ARM64
if (fp >= VMALLOC_START) {
fp_size = *((unsigned long *)fp) - fp;
/* fp cross IRQ or vmap stack */
@@ -172,29 +380,94 @@ static void dump_backtrace_entry(unsigned long ip, unsigned long fp)
}
pr_info("[%016lx+%4ld][<%p>] %pS\n",
fp, fp_size, (void *) ip, (void *) ip);
#elif defined(CONFIG_ARM)
if (fp >= TASK_SIZE) {
fp_size = fp - sp + 4;
/* fp cross IRQ or vmap stack */
if (fp_size >= THREAD_SIZE)
fp_size = 0;
}
pr_info("[%08lx+%4ld][<%p>] %pS\n",
fp, fp_size, (void *) ip, (void *) ip);
#endif
}
static void show_fault_stack(unsigned long addr, struct pt_regs *regs)
static noinline void show_fault_stack(unsigned long addr, struct pt_regs *regs)
{
struct stackframe frame;
#ifdef CONFIG_ARM64
frame.fp = regs->regs[29];
frame.sp = addr;
frame.pc = (unsigned long)regs->regs[30];
#elif defined(CONFIG_ARM)
frame.fp = regs->ARM_fp;
frame.sp = regs->ARM_sp;
frame.pc = (unsigned long)regs->uregs[15];
#endif
pr_info("Call trace:\n");
pr_info("Addr:%lx, Call trace:\n", addr);
#ifdef CONFIG_ARM64
pr_info("[%016lx+%4ld][<%p>] %pS\n",
addr, frame.fp - addr, (void *)regs->pc, (void *) regs->pc);
#elif defined(CONFIG_ARM)
pr_info("[%08lx+%4ld][<%p>] %pS\n",
addr, frame.fp - addr, (void *)regs->uregs[15],
(void *) regs->uregs[15]);
#endif
while (1) {
int ret;
dump_backtrace_entry(frame.pc, frame.fp);
dump_backtrace_entry(frame.pc, frame.fp, frame.sp);
#ifdef CONFIG_ARM64
ret = unwind_frame(current, &frame);
#elif defined(CONFIG_ARM)
ret = unwind_frame(&frame);
#endif
if (ret < 0)
break;
}
}
static void check_sp_fault_again(struct pt_regs *regs)
{
unsigned long sp = 0, addr;
struct page *page;
int cache;
#ifdef CONFIG_ARM
sp = regs->ARM_sp;
#elif defined(CONFIG_ARM64)
sp = regs->sp;
#endif
addr = sp - sizeof(*regs);
if (sp && ((addr & PAGE_MASK) != (sp & PAGE_MASK))) {
/*
* will fault when we copy back context, so handle
* it first
*/
E("fault again, sp:%lx, addr:%lx\n", sp, addr);
page = get_vmap_cached_page(&cache);
WARN_ON(!page);
vmap_mmu_set(page, addr, 1);
update_vmap_stack(1);
if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) {
E("task:%d %s, stack near overflow, addr:%lx\n",
current->pid, current->comm, addr);
show_fault_stack(addr, regs);
}
/* cache is not enough */
if (cache <= (VMAP_CACHE_PAGE / 2))
mod_delayed_work(system_highpri_wq, &avmap->mwork, 0);
D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr);
#if DEBUG
show_fault_stack(addr, regs);
#endif
}
}
/*
* IRQ should *NEVER* been opened in this handler
@@ -205,28 +478,54 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr,
struct page *page;
int cache = 0;
if (!is_vmap_addr(addr))
if (!is_vmap_addr(addr)) {
check_sp_fault_again(regs);
return -EINVAL;
}
D("addr:%lx, esr:%x, task:%5d %s\n",
addr, esr, current->pid, current->comm);
#ifdef CONFIG_ARM64
D("pc:%pf, %llx, lr:%pf, %llx, sp:%llx, %lx\n",
(void *)regs->pc, regs->pc,
(void *)regs->regs[30], regs->regs[30], regs->sp,
current_stack_pointer);
#elif defined(CONFIG_ARM)
D("pc:%pf, %lx, lr:%pf, %lx, sp:%lx, %lx\n",
(void *)regs->uregs[15], regs->uregs[15],
(void *)regs->uregs[14], regs->uregs[14], regs->uregs[13],
current_stack_pointer);
#endif
if (check_addr_up_flow(addr)) {
E("address %lx out of range\n", addr);
#ifdef CONFIG_ARM64
E("PC is:%llx, %pf, LR is:%llx %pf\n",
regs->pc, (void *)regs->pc,
regs->regs[30], (void *)regs->regs[30]);
#elif defined(CONFIG_ARM)
E("PC is:%lx, %pf, LR is:%lx %pf\n",
regs->uregs[15], (void *)regs->uregs[15],
regs->uregs[14], (void *)regs->uregs[14]);
#endif
E("task:%d %s, stack:%p, %lx\n",
current->pid, current->comm, current->stack,
current_stack_pointer);
dump_stack();
show_fault_stack(addr, regs);
check_sp_fault_again(regs);
return -ERANGE;
}
#ifdef CONFIG_ARM
page = check_pte_exist(addr);
if (page) {
D("task:%d %s, page:%lx mapped for addr:%lx\n",
current->pid, current->comm, page_to_pfn(page), addr);
check_sp_fault_again(regs);
return -EINVAL;
}
#endif
/*
* allocate a new page for vmap
*/
@@ -234,10 +533,10 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr,
WARN_ON(!page);
vmap_mmu_set(page, addr, 1);
update_vmap_stack(1);
if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) {
if ((THREAD_SIZE_ORDER > 1) && stack_floor_page(addr)) {
E("task:%d %s, stack near overflow, addr:%lx\n",
current->pid, current->comm, addr);
dump_stack();
show_fault_stack(addr, regs);
}
/* cache is not enough */
@@ -248,7 +547,6 @@ int handle_vmap_fault(unsigned long addr, unsigned int esr,
#if DEBUG
show_fault_stack(addr, regs);
#endif
return 0;
}
EXPORT_SYMBOL(handle_vmap_fault);
@@ -344,6 +642,17 @@ void aml_account_task_stack(struct task_struct *tsk, int account)
unsigned long stack = (unsigned long)task_stack_page(tsk);
struct page *first_page;
if (unlikely(!is_vmap_addr(stack))) {
/* stack get from kmalloc */
first_page = virt_to_page((void *)stack);
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
THREAD_SIZE / 1024 * account);
memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
account * (THREAD_SIZE / 1024));
update_vmap_stack(account * (THREAD_SIZE / PAGE_SIZE));
return;
}
stack += STACK_TOP_PAGE_OFF;
first_page = vmalloc_to_page((void *)stack);
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
@@ -379,13 +688,18 @@ void *aml_stack_alloc(int node, struct task_struct *tsk)
avmap->start_bit = bitmap_no + 1; /* next idle address space */
if (bitmap_no >= MAX_TASKS) {
spin_unlock_irqrestore(&avmap->vmap_lock, flags);
E("BITMAP FULL!!!\n");
return NULL;
/*
* if vmap address space is full, we still need to try
* to get stack from kmalloc
*/
addr = (unsigned long)kmalloc(THREAD_SIZE, GFP_KERNEL);
E("BITMAP FULL, kmalloc task stack:%lx\n", addr);
return (void *)addr;
}
bitmap_set(avmap->bitmap, bitmap_no, 1);
spin_unlock_irqrestore(&avmap->vmap_lock, flags);
page = alloc_page(THREADINFO_GFP | __GFP_ZERO);
page = alloc_page(THREADINFO_GFP | __GFP_ZERO | __GFP_HIGHMEM);
if (!page) {
spin_lock_irqsave(&avmap->vmap_lock, flags);
bitmap_clear(avmap->bitmap, bitmap_no, 1);
@@ -400,6 +714,7 @@ void *aml_stack_alloc(int node, struct task_struct *tsk)
map_addr = addr + STACK_TOP_PAGE_OFF;
vmap_mmu_set(page, map_addr, 1);
update_vmap_stack(1);
D("bit idx:%5ld, start:%5ld, addr:%lx, page:%lx\n",
bitmap_no, raw_start, addr, page_to_pfn(page));
@@ -413,6 +728,12 @@ void aml_stack_free(struct task_struct *tsk)
struct page *page;
unsigned long flags;
if (unlikely(!is_vmap_addr(stack))) {
/* stack get from kmalloc */
kfree((void *)stack);
return;
}
addr = stack + STACK_TOP_PAGE_OFF;
for (; addr >= stack; addr -= PAGE_SIZE) {
page = vmalloc_to_page((const void *)addr);
@@ -458,7 +779,7 @@ static void page_cache_maintain_work(struct work_struct *work)
INIT_LIST_HEAD(&head);
for (i = 0; i < VMAP_CACHE_PAGE - cnt; i++) {
page = alloc_page(GFP_KERNEL | __GFP_HIGH);
page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
if (!page) {
E("get page failed, allocated:%d, cnt:%d\n", i, cnt);
break;
@@ -483,10 +804,9 @@ arch_initcall(start_thread_work);
void __init thread_stack_cache_init(void)
{
int i;
unsigned long addr;
struct page *page;
page = alloc_pages(GFP_KERNEL, VMAP_CACHE_PAGE_ORDER);
page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, VMAP_CACHE_PAGE_ORDER);
if (!page)
return;
@@ -505,8 +825,8 @@ void __init thread_stack_cache_init(void)
pr_info("%s, vmap:%p, bitmap:%p, cache page:%lx\n",
__func__, avmap, avmap->bitmap, page_to_pfn(page));
avmap->root_vm = __get_vm_area_node(VM_STACK_AREA_SIZE,
VM_STACK_AREA_SIZE,
0, VMALLOC_START, VMALLOC_END,
VMAP_ALIGN,
0, VMAP_ADDR_START, VMAP_ADDR_END,
NUMA_NO_NODE, GFP_KERNEL,
__builtin_return_address(0));
if (!avmap->root_vm) {
@@ -530,7 +850,9 @@ void __init thread_stack_cache_init(void)
avmap->cached_pages = VMAP_CACHE_PAGE;
INIT_DELAYED_WORK(&avmap->mwork, page_cache_maintain_work);
#ifdef CONFIG_ARM64
for_each_possible_cpu(i) {
unsigned long addr;
addr = (unsigned long)per_cpu_ptr(vmap_stack, i);
pr_info("cpu %d, vmap_stack:[%lx-%lx]\n",
i, addr, addr + THREAD_START_SP);
@@ -538,5 +860,6 @@ void __init thread_stack_cache_init(void)
pr_info("cpu %d, irq_stack: [%lx-%lx]\n",
i, addr, addr + THREAD_START_SP);
}
#endif
register_shrinker(&vmap_shrinker);
}

View File

@@ -2698,6 +2698,17 @@ static long exact_copy_from_user(void *to, const void __user * from,
if (!access_ok(VERIFY_READ, from, n))
return n;
#ifdef CONFIG_AMLOGIC_VMAP
/* addr from kernel space and in vmalloc range, avoid overflow */
if (is_vmalloc_or_module_addr((void *)from)) {
unsigned long old = n;
n = strlen(from) + 1;
pr_info("addr:%p is in kernel, size fix %ld->%ld, data:%s\n",
from, old, n, (char *)from);
}
#endif
while (n) {
if (__get_user(c, f)) {
memset(t, 0, n);

View File

@@ -20,7 +20,18 @@
#define STACK_SHRINK_THRESHOLD (PAGE_SIZE + 1024)
#define STACK_SHRINK_SLEEP (HZ)
#ifdef CONFIG_64BIT
#define VM_STACK_AREA_SIZE SZ_512M
#define VMAP_ADDR_START VMALLOC_START
#define VMAP_ADDR_END VMALLOC_END
#define VMAP_ALIGN VM_STACK_AREA_SIZE
#else
/* currently support max 6144 tasks on 32bit */
#define VM_STACK_AREA_SIZE (SZ_64M - SZ_16M)
#define VMAP_ADDR_START MODULES_VADDR
#define VMAP_ADDR_END MODULES_END
#define VMAP_ALIGN SZ_64M
#endif
#define STACK_TOP_PAGE_OFF (THREAD_SIZE - PAGE_SIZE)
@@ -34,33 +45,28 @@
#define CACHE_MAINTAIN_DELAY (HZ)
struct aml_vmap {
spinlock_t vmap_lock;
unsigned int start_bit;
int cached_pages;
struct vm_struct *root_vm;
unsigned long *bitmap;
struct list_head list;
spinlock_t vmap_lock;
spinlock_t page_lock;
struct delayed_work mwork;
spinlock_t page_lock;
};
extern int handle_vmap_fault(unsigned long addr,
unsigned int esr, struct pt_regs *regs);
extern DEFINE_PER_CPU(unsigned long [THREAD_SIZE/sizeof(long)], vmap_stack);
static inline bool on_vmap_stack(unsigned long sp, int cpu)
{
/* variable names the same as kernel/stacktrace.c */
unsigned long low = (unsigned long)per_cpu(vmap_stack, cpu);
unsigned long high = low + THREAD_START_SP;
return (low <= sp && sp <= high);
}
extern bool on_vmap_stack(unsigned long sp, int cpu);
extern void __setup_vmap_stack(unsigned long off);
extern void update_vmap_stack(int diff);
extern int get_vmap_stack_size(void);
extern int is_vmap_addr(unsigned long addr);
extern void aml_stack_free(struct task_struct *tsk);
extern void *aml_stack_alloc(int node, struct task_struct *tsk);
extern void aml_account_task_stack(struct task_struct *tsk, int account);
#ifdef CONFIG_ARM
extern int on_irq_stack(unsigned long sp, int cpu);
#endif
#endif /* __VMAP_STACK_H__ */

View File

@@ -3331,7 +3331,13 @@ static inline unsigned long *end_of_stack(const struct task_struct *task)
#elif !defined(__HAVE_THREAD_FUNCTIONS)
#ifdef CONFIG_AMLOGIC_VMAP
#define task_thread_info(task) \
((struct thread_info *)(((unsigned long)(task)->stack) + \
THREAD_INFO_OFFSET))
#else
#define task_thread_info(task) ((struct thread_info *)(task)->stack)
#endif
#define task_stack_page(task) ((void *)(task)->stack)
static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
@@ -3351,11 +3357,15 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
*/
static inline unsigned long *end_of_stack(struct task_struct *p)
{
#ifdef CONFIG_AMLOGIC_VMAP
return p->stack;
#else /* CONFIG_AMLOGIC_VMAP */
#ifdef CONFIG_STACK_GROWSUP
return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
#else
return (unsigned long *)(task_thread_info(p) + 1);
#endif
#endif /* CONFIG_AMLOGIC_VMAP */
}
#endif

View File

@@ -481,21 +481,12 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
return 0;
}
#ifdef CONFIG_AMLOGIC_VMAP
static bool first_magic __read_mostly;
#endif
void set_task_stack_end_magic(struct task_struct *tsk)
{
unsigned long *stackend;
stackend = end_of_stack(tsk);
#ifdef CONFIG_AMLOGIC_VMAP
if (unlikely(!first_magic)) {
*stackend = STACK_END_MAGIC; /* for overflow detection */
first_magic = 1;
}
#else
#ifndef CONFIG_AMLOGIC_VMAP
*stackend = STACK_END_MAGIC; /* for overflow detection */
#endif
}