mirror of
https://github.com/hardkernel/kernel_common_drivers.git
synced 2026-06-25 12:03:48 +09:00
ae8f7a4020
PD#SWPL-201692 Problem: spinlock is sleeping lock, cause BUG_ON in atomic context Solution: change spinlock to raw_spinlock Verify: on A5 Change-Id: Iea05aa36c58143d0efe880631d8906d9125fe55e Signed-off-by: Lei Zhang <lei.zhang@amlogic.com>
1028 lines
25 KiB
C
1028 lines
25 KiB
C
// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
|
|
/*
|
|
* Copyright (c) 2019 Amlogic, Inc. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/version.h>
|
|
#include <linux/types.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/init.h>
|
|
#include <linux/device.h>
|
|
#include <linux/platform_device.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/io.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/reboot.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/amlogic/arm-smccc.h>
|
|
#include <linux/memcontrol.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/amlogic/vmap_stack.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sched/task_stack.h>
|
|
#ifdef CONFIG_KASAN
|
|
#include <linux/kasan.h>
|
|
#endif
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/stacktrace.h>
|
|
#include <internal.h>
|
|
|
|
#define DEBUG_VMAP 0
|
|
#define MODULE_NAME "amlogic-vmap"
|
|
|
|
#define D(format, args...) \
|
|
{ if (DEBUG_VMAP) \
|
|
pr_info("%s " format, __func__, ##args); \
|
|
}
|
|
|
|
#define E(format, args...) pr_err("%s " format, __func__, ##args)
|
|
|
|
static atomic_t vmap_stack_size;
|
|
static atomic_t vmap_fault_count;
|
|
static atomic_t vmap_pre_handle_count;
|
|
static struct aml_vmap *avmap;
|
|
static atomic_t vmap_cache_flag;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
DEFINE_PER_CPU(unsigned long [THREAD_SIZE / sizeof(long)], vmap_stack)
|
|
__aligned(PAGE_SIZE);
|
|
|
|
bool on_vmap_stack(unsigned long sp, struct stack_info *info)
|
|
{
|
|
unsigned long low = (unsigned long)raw_cpu_ptr(vmap_stack);
|
|
unsigned long high = low + THREAD_SIZE;
|
|
|
|
if (sp < low || sp >= high)
|
|
return false;
|
|
|
|
if (info) {
|
|
info->low = low;
|
|
info->high = high;
|
|
//info->type = STACK_TYPE_VMAP;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void dump_backtrace_entry_vmap(unsigned long ip, unsigned long fp,
|
|
unsigned long low, const char *loglvl)
|
|
{
|
|
unsigned long fp_size = 0;
|
|
unsigned long high;
|
|
|
|
high = low + THREAD_SIZE;
|
|
|
|
/*
|
|
* Since the target process may be rescheduled again,
|
|
* we have to add necessary validation checking for fp.
|
|
* The checking condition is borrowed from unwind_frame
|
|
*/
|
|
if (on_irq_stack(fp, sizeof(long)) ||
|
|
(fp >= low && fp <= high)) {
|
|
fp_size = *((unsigned long *)fp) - fp;
|
|
/* fp cross IRQ or vmap stack */
|
|
if (fp_size >= THREAD_SIZE)
|
|
fp_size = 0;
|
|
}
|
|
pr_info("[%016lx+%4ld][<%016lx>] %s %pSb\n",
|
|
fp, fp_size, (unsigned long)ip, loglvl, (void *)ip);
|
|
}
|
|
#else
|
|
static unsigned long irq_stack1[(THREAD_SIZE / sizeof(long))]
|
|
__aligned(THREAD_SIZE);
|
|
void *irq_stack[NR_CPUS] = {
|
|
irq_stack1, /* only assign 1st irq stack ,other need alloc */
|
|
};
|
|
|
|
static unsigned long vmap_stack1[(THREAD_SIZE / sizeof(long))]
|
|
__aligned(THREAD_SIZE);
|
|
|
|
static void *vmap_stack[NR_CPUS] = {
|
|
vmap_stack1, /* only assign 1st vmap stack ,other need alloc */
|
|
};
|
|
#endif
|
|
|
|
void update_vmap_stack(int diff)
|
|
{
|
|
atomic_add(diff, &vmap_stack_size);
|
|
}
|
|
EXPORT_SYMBOL(update_vmap_stack);
|
|
|
|
int get_vmap_stack_size(void)
|
|
{
|
|
return atomic_read(&vmap_stack_size);
|
|
}
|
|
EXPORT_SYMBOL(get_vmap_stack_size);
|
|
|
|
#ifdef CONFIG_ARM
|
|
void notrace __setup_vmap_stack(unsigned long cpu)
|
|
{
|
|
void *stack;
|
|
#define VMAP_MASK (GFP_ATOMIC | __GFP_ZERO)
|
|
#ifdef CONFIG_THUMB2_KERNEL
|
|
#define TAG "r"
|
|
#else
|
|
#define TAG "I"
|
|
#endif
|
|
stack = vmap_stack[cpu];
|
|
if (!stack) {
|
|
stack = (void *)__get_free_pages(VMAP_MASK, THREAD_SIZE_ORDER);
|
|
WARN_ON(!stack);
|
|
vmap_stack[cpu] = stack;
|
|
}
|
|
|
|
pr_debug("cpu %ld, vmap stack:[%lx-%lx]\n",
|
|
cpu, (unsigned long)stack,
|
|
(unsigned long)stack + THREAD_START_SP);
|
|
stack += THREAD_SIZE;
|
|
/*
|
|
* reserve 24 byte for r0, lr, spsr, sp_svc and 8 bytes gap
|
|
*/
|
|
stack -= (24);
|
|
asm volatile (
|
|
"msr cpsr_c, %1 \n"
|
|
"mov sp, %0 \n"
|
|
"msr cpsr_c, %2 \n"
|
|
:
|
|
: "r" (stack),
|
|
TAG(PSR_F_BIT | PSR_I_BIT | ABT_MODE),
|
|
TAG(PSR_F_BIT | PSR_I_BIT | SVC_MODE)
|
|
: "memory", "cc"
|
|
);
|
|
}
|
|
|
|
int on_vmap_irq_stack(unsigned long sp, int cpu)
|
|
{
|
|
unsigned long sp_irq;
|
|
|
|
sp_irq = (unsigned long)irq_stack[cpu];
|
|
if ((sp & ~(THREAD_SIZE - 1)) == (sp_irq & ~(THREAD_SIZE - 1)))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
unsigned long notrace irq_stack_entry(unsigned long sp)
|
|
{
|
|
int cpu = raw_smp_processor_id();
|
|
|
|
if (!on_vmap_irq_stack(sp, cpu)) {
|
|
unsigned long sp_irq = (unsigned long)irq_stack[cpu];
|
|
void *src, *dst;
|
|
|
|
/*
|
|
* copy some data to irq stack
|
|
*/
|
|
src = current_thread_info();
|
|
dst = (void *)(sp_irq + THREAD_INFO_OFFSET);
|
|
memcpy(dst, src, offsetof(struct thread_info, cpu_context));
|
|
sp_irq = (unsigned long)dst - 8;
|
|
/*
|
|
* save start addr of the interrupted task's context
|
|
* used to back trace stack call from irq
|
|
* Note: sp_irq must be aligned to 16 bytes
|
|
*/
|
|
*((unsigned long *)sp_irq) = sp;
|
|
return sp_irq;
|
|
}
|
|
return sp;
|
|
}
|
|
|
|
unsigned long notrace pmd_check(unsigned long addr, unsigned long far)
|
|
{
|
|
unsigned int index;
|
|
pgd_t *pgd, *pgd_k;
|
|
p4d_t *p4d, *p4d_k;
|
|
pud_t *pud, *pud_k;
|
|
pmd_t *pmd, *pmd_k;
|
|
|
|
if (far < TASK_SIZE)
|
|
return addr;
|
|
|
|
index = pgd_index(far);
|
|
|
|
pgd = cpu_get_pgd() + index;
|
|
pgd_k = init_mm.pgd + index;
|
|
|
|
p4d = p4d_offset(pgd, addr);
|
|
p4d_k = p4d_offset(pgd_k, addr);
|
|
|
|
if (p4d_none(*p4d_k))
|
|
goto bad_area;
|
|
if (!p4d_present(*p4d))
|
|
set_p4d(p4d, *p4d_k);
|
|
|
|
pud = pud_offset(p4d, far);
|
|
pud_k = pud_offset(p4d_k, far);
|
|
|
|
if (pud_none(*pud_k))
|
|
goto bad_area;
|
|
if (!pud_present(*pud))
|
|
set_pud(pud, *pud_k);
|
|
|
|
pmd = pmd_offset(pud, far);
|
|
pmd_k = pmd_offset(pud_k, far);
|
|
|
|
#ifdef CONFIG_ARM_LPAE
|
|
/*
|
|
* Only one hardware entry per PMD with LPAE.
|
|
*/
|
|
index = 0;
|
|
#else
|
|
/*
|
|
* On ARM one Linux PGD entry contains two hardware entries (see page
|
|
* tables layout in pgtable.h). We normally guarantee that we always
|
|
* fill both L1 entries. But create_mapping() doesn't follow the rule.
|
|
* It can create individual L1 entries, so here we have to call
|
|
* pmd_none() check for the entry really corresponded to address, not
|
|
* for the first of pair.
|
|
*/
|
|
index = (far >> SECTION_SHIFT) & 1;
|
|
#endif
|
|
if (pmd_none(pmd_k[index]))
|
|
goto bad_area;
|
|
|
|
copy_pmd(pmd, pmd_k);
|
|
bad_area:
|
|
return addr;
|
|
}
|
|
|
|
void __init fixup_init_thread_union(void)
|
|
{
|
|
void *p;
|
|
|
|
p = (void *)((unsigned long)&init_thread_union + THREAD_INFO_OFFSET);
|
|
memcpy(p, &init_thread_union, THREAD_INFO_SIZE);
|
|
memset(&init_thread_union, 0, THREAD_INFO_SIZE);
|
|
}
|
|
|
|
static void copy_pgd(void)
|
|
{
|
|
unsigned long index;
|
|
pgd_t *pgd_c = NULL, *pgd_k, *pgd_i;
|
|
unsigned long size;
|
|
|
|
/*
|
|
* sync pgd of current task and idmap_pgd from init mm
|
|
*/
|
|
index = pgd_index(TASK_SIZE);
|
|
pgd_c = cpu_get_pgd() + index;
|
|
pgd_i = idmap_pgd + index;
|
|
pgd_k = init_mm.pgd + index;
|
|
size = (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t);
|
|
pr_debug("pgd:%p, pgd_k:%p, pdg_i:%p\n",
|
|
pgd_c, pgd_k, pgd_i);
|
|
memcpy(pgd_c, pgd_k, size);
|
|
memcpy(pgd_i, pgd_k, size);
|
|
}
|
|
|
|
unsigned long save_suspend_context(unsigned int *ptr)
|
|
{
|
|
unsigned long ret;
|
|
|
|
if (likely(is_vmap_addr((unsigned long)ptr))) {
|
|
struct page *page = vmalloc_to_page(ptr);
|
|
unsigned long offset;
|
|
|
|
offset = (unsigned long)ptr & (PAGE_SIZE - 1);
|
|
ret = (page_to_phys(page) + offset);
|
|
pr_debug("%s, ptr:%p, page:%lx, save_ptr:%lx\n",
|
|
__func__, ptr, page_to_pfn(page), ret);
|
|
copy_pgd();
|
|
} else {
|
|
ret = virt_to_phys(ptr);
|
|
}
|
|
return ret;
|
|
}
|
|
#endif /* CONFIG_ARM */
|
|
|
|
int is_vmap_addr(unsigned long addr)
|
|
{
|
|
unsigned long start, end;
|
|
|
|
start = (unsigned long)avmap->root_vm->addr;
|
|
end = (unsigned long)avmap->root_vm->addr + avmap->root_vm->size;
|
|
if (addr >= start && addr < end)
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static struct page *get_vmap_cached_page(int *remain)
|
|
{
|
|
unsigned long flags;
|
|
struct page *page;
|
|
|
|
raw_spin_lock_irqsave(&avmap->page_lock, flags);
|
|
if (unlikely(!avmap->cached_pages)) {
|
|
raw_spin_unlock_irqrestore(&avmap->page_lock, flags);
|
|
return NULL;
|
|
}
|
|
page = list_first_entry(&avmap->list, struct page, lru);
|
|
list_del(&page->lru);
|
|
avmap->cached_pages--;
|
|
*remain = avmap->cached_pages;
|
|
raw_spin_unlock_irqrestore(&avmap->page_lock, flags);
|
|
|
|
return page;
|
|
}
|
|
|
|
struct page *check_pte_exist(unsigned long addr)
|
|
{
|
|
struct mm_struct *mm;
|
|
pgd_t *pgd;
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
|
|
mm = &init_mm;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
|
|
if (pgd_none(*pgd) || pgd_bad(*pgd))
|
|
return NULL;
|
|
|
|
p4d = p4d_offset(pgd, addr);
|
|
if (p4d_none(*p4d) || p4d_bad(*p4d))
|
|
return NULL;
|
|
|
|
pud = pud_offset(p4d, addr);
|
|
if (pud_none(*pud) || pud_bad(*pud))
|
|
return NULL;
|
|
|
|
pmd = pmd_offset(pud, addr);
|
|
if (pmd_none(*pmd) || pmd_bad(*pmd))
|
|
return NULL;
|
|
|
|
pte = pte_offset_kernel(pmd, addr);
|
|
if (pte_none(*pte))
|
|
return NULL;
|
|
#ifdef CONFIG_ARM64
|
|
return pte_page(*pte);
|
|
#elif defined(CONFIG_ARM)
|
|
return pte_page(pte_val(*pte));
|
|
#else
|
|
return NULL; /* not supported */
|
|
#endif
|
|
}
|
|
|
|
static int vmap_mmu_set(struct page *page, unsigned long addr, int set)
|
|
{
|
|
pgd_t *pgd = NULL;
|
|
p4d_t *p4d = NULL;
|
|
pud_t *pud = NULL;
|
|
pmd_t *pmd = NULL;
|
|
pte_t *pte = NULL;
|
|
|
|
pgd = pgd_offset_k(addr);
|
|
p4d = p4d_alloc(&init_mm, pgd, addr);
|
|
if (!p4d)
|
|
goto nomem;
|
|
|
|
pud = pud_alloc(&init_mm, p4d, addr);
|
|
if (!pud)
|
|
goto nomem;
|
|
|
|
if (pud_none(*pud)) {
|
|
pmd = pmd_alloc(&init_mm, pud, addr);
|
|
if (!pmd)
|
|
goto nomem;
|
|
}
|
|
|
|
pmd = pmd_offset(pud, addr);
|
|
if (pmd_none(*pmd)) {
|
|
pte = pte_alloc_kernel(pmd, addr);
|
|
if (!pte)
|
|
goto nomem;
|
|
}
|
|
|
|
pte = pte_offset_kernel(pmd, addr);
|
|
if (set)
|
|
set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL));
|
|
else
|
|
pte_clear(&init_mm, addr, pte);
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
#ifdef CONFIG_ARM64
|
|
D("add:%lx, pgd:%px %llx, pmd:%px %llx, pte:%px %llx\n",
|
|
addr, pgd, pgd_val(*pgd), pmd, pmd_val(*pmd),
|
|
pte, pte_val(*pte));
|
|
#elif defined(CONFIG_ARM)
|
|
D("add:%lx, pgd:%px %x, pmd:%px %x, pte:%px %x\n",
|
|
addr, pgd, (unsigned int)pgd_val(*pgd),
|
|
pmd, (unsigned int)pmd_val(*pmd),
|
|
pte, pte_val(*pte));
|
|
#endif
|
|
return 0;
|
|
nomem:
|
|
E("allocation page table failed, G:%px, U:%px, M:%px, T:%px",
|
|
pgd, pud, pmd, pte);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
static int stack_floor_page(unsigned long addr)
|
|
{
|
|
unsigned long pos;
|
|
|
|
pos = addr & (THREAD_SIZE - 1);
|
|
/*
|
|
* stack address must align to THREAD_SIZE
|
|
*/
|
|
if (THREAD_SIZE_ORDER > 1)
|
|
return pos < PAGE_SIZE;
|
|
else
|
|
return pos < (PAGE_SIZE / 4);
|
|
}
|
|
|
|
static int check_addr_up_flow(unsigned long addr)
|
|
{
|
|
/*
|
|
* It's the first page of 4 contiguous virtual address
|
|
* rage(aligned to THREAD_SIZE) but next page of this
|
|
* addr is not mapped
|
|
*/
|
|
if (stack_floor_page(addr) && !check_pte_exist(addr + PAGE_SIZE))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
#if (CONFIG_AMLOGIC_KERNEL_VERSION >= 14515) && defined(CONFIG_ARM64)
|
|
static bool aml_dump_backtrace_entry(void *arg, unsigned long where)
|
|
{
|
|
char *loglvl = arg;
|
|
|
|
pr_info("%s %pSb\n", loglvl, (void *)where);
|
|
return true;
|
|
}
|
|
#else
|
|
static void aml_dump_backtrace_entry(unsigned long ip, unsigned long fp,
|
|
unsigned long sp)
|
|
{
|
|
unsigned long fp_size = 0;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
if (fp >= VMALLOC_START) {
|
|
fp_size = *((unsigned long *)fp) - fp;
|
|
/* fp cross IRQ or vmap stack */
|
|
if (fp_size >= THREAD_SIZE)
|
|
fp_size = 0;
|
|
}
|
|
pr_info("[%016lx+%4ld][<%px>] %pS\n",
|
|
fp, fp_size, (void *)ip, (void *)ip);
|
|
#elif defined(CONFIG_ARM)
|
|
if (fp >= TASK_SIZE) {
|
|
fp_size = fp - sp + 4;
|
|
/* fp cross IRQ or vmap stack */
|
|
if (fp_size >= THREAD_SIZE)
|
|
fp_size = 0;
|
|
}
|
|
pr_info("[%08lx+%4ld][<%px>] %pS\n",
|
|
fp, fp_size, (void *)ip, (void *)ip);
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
static noinline void show_fault_stack(unsigned long addr, struct pt_regs *regs)
|
|
{
|
|
#if (CONFIG_AMLOGIC_KERNEL_VERSION >= 14515) && defined(CONFIG_ARM64)
|
|
//struct unwind_state frame;
|
|
|
|
arch_stack_walk(aml_dump_backtrace_entry, KERN_DEFAULT, current, NULL);
|
|
#else
|
|
struct stackframe frame;
|
|
unsigned long sp;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
frame.fp = regs->regs[29];
|
|
frame.prev_fp = addr;
|
|
frame.pc = (unsigned long)regs->regs[30];
|
|
sp = addr;
|
|
frame.prev_type = STACK_TYPE_UNKNOWN;
|
|
#elif defined(CONFIG_ARM)
|
|
frame.fp = regs->ARM_fp;
|
|
frame.sp = regs->ARM_sp;
|
|
frame.lr = regs->ARM_lr;
|
|
frame.pc = (unsigned long)regs->uregs[15];
|
|
sp = regs->ARM_sp;
|
|
#endif
|
|
|
|
pr_info("Addr:%lx, Call trace:\n", addr);
|
|
#ifdef CONFIG_ARM64
|
|
pr_info("[%016lx+%4ld][<%px>] %pS\n",
|
|
addr, frame.fp - addr, (void *)regs->pc, (void *)regs->pc);
|
|
#elif defined(CONFIG_ARM)
|
|
pr_info("[%08lx+%4ld][<%px>] %pS\n",
|
|
addr, frame.fp - addr, (void *)regs->uregs[15],
|
|
(void *)regs->uregs[15]);
|
|
#endif
|
|
while (1) {
|
|
int ret;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
dump_backtrace_entry(frame.pc, frame.fp, sp);
|
|
#if CONFIG_AMLOGIC_KERNEL_VERSION >= 14515
|
|
ret = unwind_next(&frame);
|
|
#else
|
|
ret = unwind_frame(current, &frame);
|
|
#endif
|
|
#elif defined(CONFIG_ARM)
|
|
aml_dump_backtrace_entry(frame.pc, frame.fp, frame.sp);
|
|
ret = unwind_frame(&frame);
|
|
#endif
|
|
if (ret < 0)
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void check_sp_fault_again(struct pt_regs *regs)
|
|
{
|
|
unsigned long sp = 0, addr;
|
|
struct page *page;
|
|
int cache = 0;
|
|
|
|
#ifdef CONFIG_ARM
|
|
sp = regs->ARM_sp;
|
|
#elif defined(CONFIG_ARM64)
|
|
sp = regs->sp;
|
|
#endif
|
|
addr = sp - sizeof(*regs);
|
|
|
|
/*
|
|
* When we handle vmap stack fault, we are in pre-allcated
|
|
* per-cpu vmap stack. But if sp is near bottom of a page and we
|
|
* return to normal handler, sp may down grow to another page
|
|
* to cause a vmap fault again. So we need map next page for
|
|
* stack before page-fault happen.
|
|
*
|
|
* But we need check sp is really in vmap stack range.
|
|
*/
|
|
if (!is_vmap_addr(addr)) /* addr may in linear mapping */
|
|
return;
|
|
|
|
if (sp && ((addr & PAGE_MASK) != (sp & PAGE_MASK))) {
|
|
/*
|
|
* will fault when we copy back context, so handle
|
|
* it first
|
|
*/
|
|
D("fault again, sp:%lx, addr:%lx\n", sp, addr);
|
|
page = get_vmap_cached_page(&cache);
|
|
WARN_ON(!page);
|
|
vmap_mmu_set(page, addr, 1);
|
|
update_vmap_stack(1);
|
|
#ifndef CONFIG_KASAN
|
|
if (THREAD_SIZE_ORDER > 1 && stack_floor_page(addr)) {
|
|
E("task:%d %s, stack near overflow, addr:%lx\n",
|
|
current->pid, current->comm, addr);
|
|
show_fault_stack(addr, regs);
|
|
}
|
|
#endif
|
|
|
|
/* cache is not enough */
|
|
if (cache <= (VMAP_CACHE_PAGE / 2))
|
|
atomic_inc(&vmap_cache_flag);
|
|
|
|
D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr);
|
|
atomic_inc(&vmap_pre_handle_count);
|
|
#if DEBUG_VMAP
|
|
show_fault_stack(addr, regs);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* IRQ should *NEVER* been opened in this handler
|
|
*/
|
|
int __handle_vmap_fault(unsigned long addr, unsigned int esr,
|
|
struct pt_regs *regs)
|
|
{
|
|
struct page *page;
|
|
int cache = 0;
|
|
|
|
if (!is_vmap_addr(addr)) {
|
|
check_sp_fault_again(regs);
|
|
return -EINVAL;
|
|
}
|
|
|
|
D("addr:%lx, esr:%x, task:%5d %s\n",
|
|
addr, esr, current->pid, current->comm);
|
|
#ifdef CONFIG_ARM64
|
|
D("pc:%ps, %llx, lr:%ps, %llx, sp:%llx, %lx\n",
|
|
(void *)regs->pc, regs->pc,
|
|
(void *)regs->regs[30], regs->regs[30], regs->sp,
|
|
current_stack_pointer);
|
|
#elif defined(CONFIG_ARM)
|
|
D("pc:%ps, %lx, lr:%ps, %lx, sp:%lx, %lx\n",
|
|
(void *)regs->uregs[15], regs->uregs[15],
|
|
(void *)regs->uregs[14], regs->uregs[14], regs->uregs[13],
|
|
current_stack_pointer);
|
|
#endif
|
|
|
|
if (check_addr_up_flow(addr)) {
|
|
E("address %lx out of range\n", addr);
|
|
#ifdef CONFIG_ARM64
|
|
E("PC is:%llx, %ps, LR is:%llx %ps\n",
|
|
regs->pc, (void *)regs->pc,
|
|
regs->regs[30], (void *)regs->regs[30]);
|
|
#elif defined(CONFIG_ARM)
|
|
E("PC is:%lx, %ps, LR is:%lx %ps\n",
|
|
regs->uregs[15], (void *)regs->uregs[15],
|
|
regs->uregs[14], (void *)regs->uregs[14]);
|
|
#endif
|
|
E("task:%d %s, stack:%px, %lx\n",
|
|
current->pid, current->comm, current->stack,
|
|
current_stack_pointer);
|
|
show_fault_stack(addr, regs);
|
|
check_sp_fault_again(regs);
|
|
return -ERANGE;
|
|
}
|
|
|
|
#ifdef CONFIG_ARM
|
|
page = check_pte_exist(addr);
|
|
if (page) {
|
|
D("task:%d %s, page:%lx mapped for addr:%lx\n",
|
|
current->pid, current->comm, page_to_pfn(page), addr);
|
|
check_sp_fault_again(regs);
|
|
return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* allocate a new page for vmap
|
|
*/
|
|
page = get_vmap_cached_page(&cache);
|
|
WARN_ON(!page);
|
|
vmap_mmu_set(page, addr, 1);
|
|
update_vmap_stack(1);
|
|
if (THREAD_SIZE_ORDER > 1 && stack_floor_page(addr)) {
|
|
E("task:%d %s, stack near overflow, addr:%lx\n",
|
|
current->pid, current->comm, addr);
|
|
show_fault_stack(addr, regs);
|
|
}
|
|
|
|
/* cache is not enough */
|
|
if (cache <= (VMAP_CACHE_PAGE / 2))
|
|
atomic_inc(&vmap_cache_flag);
|
|
|
|
atomic_inc(&vmap_fault_count);
|
|
D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr);
|
|
#if DEBUG_VMAP
|
|
show_fault_stack(addr, regs);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
struct pt_regs *handle_vmap_fault(unsigned long addr, unsigned int esr,
|
|
struct pt_regs *regs)
|
|
{
|
|
int ret;
|
|
|
|
ret = __handle_vmap_fault(addr, esr, regs);
|
|
if (!ret) {
|
|
#ifdef CONFIG_ARM64
|
|
__vmap_exit(regs);
|
|
#endif
|
|
return NULL;
|
|
}
|
|
#ifdef CONFIG_ARM64
|
|
return switch_vmap_context(regs);
|
|
#else
|
|
return (struct pt_regs *)-1UL;
|
|
#endif
|
|
}
|
|
EXPORT_SYMBOL(handle_vmap_fault);
|
|
|
|
/* APIs for external usage */
|
|
void vmap_report_meminfo(struct seq_file *m)
|
|
{
|
|
unsigned long kb = 1 << (PAGE_SHIFT - 10);
|
|
unsigned long tmp1, tmp2, tmp3;
|
|
|
|
tmp1 = kb * atomic_read(&vmap_stack_size);
|
|
tmp2 = kb * atomic_read(&vmap_fault_count);
|
|
tmp3 = kb * atomic_read(&vmap_pre_handle_count);
|
|
|
|
seq_printf(m, "Stack virtual: %8ld kB\n", global_node_page_state(NR_KERNEL_STACK_KB));
|
|
seq_printf(m, "VmapStack: %8ld kB\n", tmp1);
|
|
seq_printf(m, "VmapFault: %8ld kB\n", tmp2);
|
|
seq_printf(m, "VmapPfault: %8ld kB\n", tmp3);
|
|
}
|
|
|
|
/* FOR debug */
|
|
static unsigned long vmap_debug_jiff;
|
|
|
|
void aml_account_task_stack(struct task_struct *tsk, int account)
|
|
{
|
|
unsigned long stack = (unsigned long)task_stack_page(tsk);
|
|
struct page *first_page;
|
|
|
|
if (unlikely(!is_vmap_addr(stack))) {
|
|
/* stack get from kmalloc */
|
|
first_page = virt_to_page((void *)stack);
|
|
mod_lruvec_page_state(first_page, NR_KERNEL_STACK_KB,
|
|
THREAD_SIZE / 1024 * account);
|
|
|
|
update_vmap_stack(account * (THREAD_SIZE / PAGE_SIZE));
|
|
return;
|
|
}
|
|
stack += STACK_TOP_PAGE_OFF;
|
|
first_page = vmalloc_to_page((void *)stack);
|
|
mod_lruvec_page_state(first_page, NR_KERNEL_STACK_KB,
|
|
THREAD_SIZE / 1024 * account);
|
|
if (time_after(jiffies, vmap_debug_jiff + HZ * 5)) {
|
|
int ratio, rem;
|
|
|
|
vmap_debug_jiff = jiffies;
|
|
ratio = ((get_vmap_stack_size() << (PAGE_SHIFT - 10)) * 10000) /
|
|
global_node_page_state(NR_KERNEL_STACK_KB);
|
|
rem = ratio % 100;
|
|
D("STACK:%ld KB, vmap:%d KB, cached:%d KB, rate:%2d.%02d%%\n",
|
|
global_node_page_state(NR_KERNEL_STACK_KB),
|
|
get_vmap_stack_size() << (PAGE_SHIFT - 10),
|
|
avmap->cached_pages << (PAGE_SHIFT - 10),
|
|
ratio / 100, rem);
|
|
}
|
|
}
|
|
|
|
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
|
DEFINE_MUTEX(stack_shadow_lock); /* For kasan */
|
|
static void check_and_map_stack_shadow(unsigned long addr)
|
|
{
|
|
unsigned long shadow;
|
|
struct page *page, *pages[2] = {};
|
|
int ret;
|
|
|
|
shadow = (unsigned long)kasan_mem_to_shadow((void *)addr);
|
|
page = check_pte_exist(shadow);
|
|
if (page) {
|
|
WARN(page_address(page) == (void *)kasan_early_shadow_page,
|
|
"bad pte, page:%px, %lx, addr:%lx\n",
|
|
page_address(page), page_to_pfn(page), addr);
|
|
return;
|
|
}
|
|
shadow = shadow & PAGE_MASK;
|
|
page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM |
|
|
__GFP_ZERO | __GFP_HIGH);
|
|
if (!page) {
|
|
WARN(!page,
|
|
"alloc page for addr:%lx, shadow:%lx fail\n",
|
|
addr, shadow);
|
|
return;
|
|
}
|
|
pages[0] = page;
|
|
ret = vmap_pages_range_noflush(shadow, shadow + PAGE_SIZE, PAGE_KERNEL, pages, PAGE_SHIFT);
|
|
if (ret < 0) {
|
|
pr_err("%s, map shadow:%lx failed:%d\n", __func__, shadow, ret);
|
|
__free_page(page);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void *aml_stack_alloc(int node, struct task_struct *tsk)
|
|
{
|
|
unsigned long bitmap_no, raw_start;
|
|
struct page *page;
|
|
unsigned long addr, map_addr, flags;
|
|
|
|
raw_spin_lock_irqsave(&avmap->vmap_lock, flags);
|
|
raw_start = avmap->start_bit;
|
|
bitmap_no = find_next_zero_bit(avmap->bitmap, MAX_TASKS,
|
|
avmap->start_bit);
|
|
avmap->start_bit = bitmap_no + 1; /* next idle address space */
|
|
if (bitmap_no >= MAX_TASKS) {
|
|
raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags);
|
|
/*
|
|
* if vmap address space is full, we still need to try
|
|
* to get stack from kmalloc
|
|
*/
|
|
addr = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
|
|
E("BITMAP FULL, kmalloc task stack:%lx\n", addr);
|
|
return (void *)addr;
|
|
}
|
|
bitmap_set(avmap->bitmap, bitmap_no, 1);
|
|
raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags);
|
|
|
|
page = alloc_page(THREADINFO_GFP | __GFP_ZERO | __GFP_HIGHMEM);
|
|
if (!page) {
|
|
raw_spin_lock_irqsave(&avmap->vmap_lock, flags);
|
|
bitmap_clear(avmap->bitmap, bitmap_no, 1);
|
|
raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags);
|
|
E("allocation page failed\n");
|
|
return NULL;
|
|
}
|
|
/*
|
|
* map first page only
|
|
*/
|
|
addr = (unsigned long)avmap->root_vm->addr + THREAD_SIZE * bitmap_no;
|
|
map_addr = addr + STACK_TOP_PAGE_OFF;
|
|
vmap_mmu_set(page, map_addr, 1);
|
|
update_vmap_stack(1);
|
|
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
|
/* 2 thread stack can be a single shadow page, we need use lock */
|
|
mutex_lock(&stack_shadow_lock);
|
|
check_and_map_stack_shadow(addr);
|
|
mutex_unlock(&stack_shadow_lock);
|
|
#endif
|
|
|
|
D("bit idx:%5ld, start:%5ld, addr:%lx, page:%lx\n",
|
|
bitmap_no, raw_start, addr, page_to_pfn(page));
|
|
|
|
return (void *)addr;
|
|
}
|
|
|
|
void aml_stack_free(struct task_struct *tsk)
|
|
{
|
|
unsigned long stack = (unsigned long)tsk->stack;
|
|
unsigned long addr, bitmap_no;
|
|
struct page *page;
|
|
unsigned long flags;
|
|
|
|
if (unlikely(!is_vmap_addr(stack))) {
|
|
/* stack get from kmalloc */
|
|
free_pages(stack, THREAD_SIZE_ORDER);
|
|
return;
|
|
}
|
|
|
|
addr = stack + STACK_TOP_PAGE_OFF;
|
|
for (; addr >= stack; addr -= PAGE_SIZE) {
|
|
page = vmalloc_to_page((const void *)addr);
|
|
if (!page)
|
|
break;
|
|
#ifdef CONFIG_KASAN
|
|
kasan_unpoison_range((void *)addr, PAGE_SIZE);
|
|
#endif
|
|
vmap_mmu_set(page, addr, 0);
|
|
/* supplement for stack page cache first */
|
|
raw_spin_lock_irqsave(&avmap->page_lock, flags);
|
|
if (avmap->cached_pages < VMAP_CACHE_PAGE) {
|
|
list_add_tail(&page->lru, &avmap->list);
|
|
avmap->cached_pages++;
|
|
raw_spin_unlock_irqrestore(&avmap->page_lock, flags);
|
|
clear_highpage(page); /* clear for next use */
|
|
} else {
|
|
raw_spin_unlock_irqrestore(&avmap->page_lock, flags);
|
|
__free_page(page);
|
|
}
|
|
update_vmap_stack(-1);
|
|
}
|
|
bitmap_no = (stack - (unsigned long)avmap->root_vm->addr) / THREAD_SIZE;
|
|
raw_spin_lock_irqsave(&avmap->vmap_lock, flags);
|
|
bitmap_clear(avmap->bitmap, bitmap_no, 1);
|
|
if (bitmap_no < avmap->start_bit)
|
|
avmap->start_bit = bitmap_no;
|
|
raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags);
|
|
}
|
|
|
|
#if DEBUG_VMAP
|
|
static void check_stack_depth(void *p, int cnt)
|
|
{
|
|
unsigned char stack_buf[32];
|
|
unsigned long stack_end, cur_stack;
|
|
|
|
stack_end = (unsigned long)p & ~(THREAD_SIZE - 1);
|
|
cur_stack = (unsigned long)stack_buf & (THREAD_SIZE - 1);
|
|
if (cur_stack < 1024) {
|
|
pr_info("%s, %3d, p:%px, stack_buf:%px, cur_stack:%lx, end:%lx, quit!!!\n",
|
|
__func__, cnt, p, stack_buf, cur_stack, stack_end);
|
|
return;
|
|
}
|
|
pr_info("%s, %3d, p:%px, stack_buf:%px, cur_stack:%lx, end:%lx\n",
|
|
__func__, cnt, p, stack_buf, cur_stack, stack_end);
|
|
memcpy(stack_buf, p, sizeof(stack_buf));
|
|
check_stack_depth(stack_buf, cnt + 1);
|
|
}
|
|
|
|
static void stack_test(void)
|
|
{
|
|
unsigned char buf[32] = {};
|
|
|
|
check_stack_depth(buf, 0);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* page cache maintain task for vmap
|
|
*/
|
|
static int vmap_task(void *data)
|
|
{
|
|
struct page *page;
|
|
struct list_head head;
|
|
int i, cnt;
|
|
unsigned long flags;
|
|
struct aml_vmap *v = (struct aml_vmap *)data;
|
|
|
|
#if DEBUG_VMAP
|
|
stack_test();
|
|
#endif
|
|
set_user_nice(current, -19);
|
|
while (1) {
|
|
if (kthread_should_stop())
|
|
break;
|
|
|
|
if (!atomic_read(&vmap_cache_flag)) {
|
|
msleep_interruptible(20);
|
|
continue;
|
|
}
|
|
raw_spin_lock_irqsave(&v->page_lock, flags);
|
|
cnt = v->cached_pages;
|
|
raw_spin_unlock_irqrestore(&v->page_lock, flags);
|
|
if (cnt >= VMAP_CACHE_PAGE) {
|
|
D("cache full cnt:%d\n", cnt);
|
|
continue;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&head);
|
|
for (i = 0; i < VMAP_CACHE_PAGE - cnt; i++) {
|
|
page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM |
|
|
__GFP_ZERO | __GFP_HIGH);
|
|
if (!page) {
|
|
E("get page failed, allocated:%d, cnt:%d\n",
|
|
i, cnt);
|
|
break;
|
|
}
|
|
list_add(&page->lru, &head);
|
|
}
|
|
raw_spin_lock_irqsave(&v->page_lock, flags);
|
|
list_splice(&head, &v->list);
|
|
v->cached_pages += i;
|
|
raw_spin_unlock_irqrestore(&v->page_lock, flags);
|
|
atomic_set(&vmap_cache_flag, 0);
|
|
E("add %d pages, cnt:%d\n", i, cnt);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int __init start_thread_work(void)
|
|
{
|
|
kthread_run(vmap_task, avmap, "vmap_thread");
|
|
return 0;
|
|
}
|
|
arch_initcall(start_thread_work);
|
|
|
|
void __init thread_stack_cache_init(void)
|
|
{
|
|
int i;
|
|
struct page *page;
|
|
|
|
page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, VMAP_CACHE_PAGE_ORDER);
|
|
if (!page)
|
|
return;
|
|
|
|
avmap = kzalloc(sizeof(*avmap), GFP_KERNEL);
|
|
if (!avmap) {
|
|
__free_pages(page, VMAP_CACHE_PAGE_ORDER);
|
|
return;
|
|
}
|
|
|
|
avmap->bitmap = kzalloc(MAX_TASKS / 8, GFP_KERNEL);
|
|
if (!avmap->bitmap) {
|
|
__free_pages(page, VMAP_CACHE_PAGE_ORDER);
|
|
kfree(avmap);
|
|
return;
|
|
}
|
|
pr_info("%s, vmap:%px, bitmap:%px, cache page:%lx\n",
|
|
__func__, avmap, avmap->bitmap, page_to_pfn(page));
|
|
avmap->root_vm = __get_vm_area_node(VM_STACK_AREA_SIZE,
|
|
MAX_ORDER_NR_PAGES * PAGE_SIZE,
|
|
PAGE_SHIFT,
|
|
0, VMAP_ADDR_START, VMAP_ADDR_END,
|
|
NUMA_NO_NODE, GFP_KERNEL,
|
|
__builtin_return_address(0));
|
|
WARN(!avmap->root_vm, "alloc vmap area %x failed\n", VM_STACK_AREA_SIZE);
|
|
if (!avmap->root_vm) {
|
|
__free_pages(page, VMAP_CACHE_PAGE_ORDER);
|
|
kfree(avmap->bitmap);
|
|
kfree(avmap);
|
|
return;
|
|
}
|
|
pr_info("%s, allocation vm area:%px, addr:%px, size:%lx\n", __func__,
|
|
avmap->root_vm, avmap->root_vm->addr,
|
|
avmap->root_vm->size);
|
|
|
|
INIT_LIST_HEAD(&avmap->list);
|
|
raw_spin_lock_init(&avmap->page_lock);
|
|
raw_spin_lock_init(&avmap->vmap_lock);
|
|
|
|
for (i = 0; i < VMAP_CACHE_PAGE; i++) {
|
|
list_add(&page->lru, &avmap->list);
|
|
page++;
|
|
}
|
|
avmap->cached_pages = VMAP_CACHE_PAGE;
|
|
|
|
#ifdef CONFIG_ARM64
|
|
for_each_possible_cpu(i) {
|
|
unsigned long addr;
|
|
|
|
addr = (unsigned long)per_cpu_ptr(vmap_stack, i);
|
|
pr_info("cpu %d, vmap_stack:[%lx-%lx]\n",
|
|
i, addr, addr + THREAD_START_SP);
|
|
addr = (unsigned long)per_cpu_ptr(irq_stack, i);
|
|
pr_info("cpu %d, irq_stack: [%lx-%lx]\n",
|
|
i, addr, addr + THREAD_START_SP);
|
|
}
|
|
#endif
|
|
}
|