// SPDX-License-Identifier: (GPL-2.0+ OR MIT) /* * Copyright (c) 2019 Amlogic, Inc. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef CONFIG_KASAN #include #endif #include #include #include #include #define DEBUG_VMAP 0 #define MODULE_NAME "amlogic-vmap" #define D(format, args...) \ { if (DEBUG_VMAP) \ pr_info("%s " format, __func__, ##args); \ } #define E(format, args...) pr_err("%s " format, __func__, ##args) static atomic_t vmap_stack_size; static atomic_t vmap_fault_count; static atomic_t vmap_pre_handle_count; static struct aml_vmap *avmap; static atomic_t vmap_cache_flag; #ifdef CONFIG_ARM64 DEFINE_PER_CPU(unsigned long [THREAD_SIZE / sizeof(long)], vmap_stack) __aligned(PAGE_SIZE); bool on_vmap_stack(unsigned long sp, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_ptr(vmap_stack); unsigned long high = low + THREAD_SIZE; if (sp < low || sp >= high) return false; if (info) { info->low = low; info->high = high; //info->type = STACK_TYPE_VMAP; } return true; } void dump_backtrace_entry_vmap(unsigned long ip, unsigned long fp, unsigned long low, const char *loglvl) { unsigned long fp_size = 0; unsigned long high; high = low + THREAD_SIZE; /* * Since the target process may be rescheduled again, * we have to add necessary validation checking for fp. * The checking condition is borrowed from unwind_frame */ if (on_irq_stack(fp, sizeof(long)) || (fp >= low && fp <= high)) { fp_size = *((unsigned long *)fp) - fp; /* fp cross IRQ or vmap stack */ if (fp_size >= THREAD_SIZE) fp_size = 0; } pr_info("[%016lx+%4ld][<%016lx>] %s %pSb\n", fp, fp_size, (unsigned long)ip, loglvl, (void *)ip); } #else static unsigned long irq_stack1[(THREAD_SIZE / sizeof(long))] __aligned(THREAD_SIZE); void *irq_stack[NR_CPUS] = { irq_stack1, /* only assign 1st irq stack ,other need alloc */ }; static unsigned long vmap_stack1[(THREAD_SIZE / sizeof(long))] __aligned(THREAD_SIZE); static void *vmap_stack[NR_CPUS] = { vmap_stack1, /* only assign 1st vmap stack ,other need alloc */ }; #endif void update_vmap_stack(int diff) { atomic_add(diff, &vmap_stack_size); } EXPORT_SYMBOL(update_vmap_stack); int get_vmap_stack_size(void) { return atomic_read(&vmap_stack_size); } EXPORT_SYMBOL(get_vmap_stack_size); #ifdef CONFIG_ARM void notrace __setup_vmap_stack(unsigned long cpu) { void *stack; #define VMAP_MASK (GFP_ATOMIC | __GFP_ZERO) #ifdef CONFIG_THUMB2_KERNEL #define TAG "r" #else #define TAG "I" #endif stack = vmap_stack[cpu]; if (!stack) { stack = (void *)__get_free_pages(VMAP_MASK, THREAD_SIZE_ORDER); WARN_ON(!stack); vmap_stack[cpu] = stack; } pr_debug("cpu %ld, vmap stack:[%lx-%lx]\n", cpu, (unsigned long)stack, (unsigned long)stack + THREAD_START_SP); stack += THREAD_SIZE; /* * reserve 24 byte for r0, lr, spsr, sp_svc and 8 bytes gap */ stack -= (24); asm volatile ( "msr cpsr_c, %1 \n" "mov sp, %0 \n" "msr cpsr_c, %2 \n" : : "r" (stack), TAG(PSR_F_BIT | PSR_I_BIT | ABT_MODE), TAG(PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "memory", "cc" ); } int on_vmap_irq_stack(unsigned long sp, int cpu) { unsigned long sp_irq; sp_irq = (unsigned long)irq_stack[cpu]; if ((sp & ~(THREAD_SIZE - 1)) == (sp_irq & ~(THREAD_SIZE - 1))) return 1; return 0; } unsigned long notrace irq_stack_entry(unsigned long sp) { int cpu = raw_smp_processor_id(); if (!on_vmap_irq_stack(sp, cpu)) { unsigned long sp_irq = (unsigned long)irq_stack[cpu]; void *src, *dst; /* * copy some data to irq stack */ src = current_thread_info(); dst = (void *)(sp_irq + THREAD_INFO_OFFSET); memcpy(dst, src, offsetof(struct thread_info, cpu_context)); sp_irq = (unsigned long)dst - 8; /* * save start addr of the interrupted task's context * used to back trace stack call from irq * Note: sp_irq must be aligned to 16 bytes */ *((unsigned long *)sp_irq) = sp; return sp_irq; } return sp; } unsigned long notrace pmd_check(unsigned long addr, unsigned long far) { unsigned int index; pgd_t *pgd, *pgd_k; p4d_t *p4d, *p4d_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; if (far < TASK_SIZE) return addr; index = pgd_index(far); pgd = cpu_get_pgd() + index; pgd_k = init_mm.pgd + index; p4d = p4d_offset(pgd, addr); p4d_k = p4d_offset(pgd_k, addr); if (p4d_none(*p4d_k)) goto bad_area; if (!p4d_present(*p4d)) set_p4d(p4d, *p4d_k); pud = pud_offset(p4d, far); pud_k = pud_offset(p4d_k, far); if (pud_none(*pud_k)) goto bad_area; if (!pud_present(*pud)) set_pud(pud, *pud_k); pmd = pmd_offset(pud, far); pmd_k = pmd_offset(pud_k, far); #ifdef CONFIG_ARM_LPAE /* * Only one hardware entry per PMD with LPAE. */ index = 0; #else /* * On ARM one Linux PGD entry contains two hardware entries (see page * tables layout in pgtable.h). We normally guarantee that we always * fill both L1 entries. But create_mapping() doesn't follow the rule. * It can create individual L1 entries, so here we have to call * pmd_none() check for the entry really corresponded to address, not * for the first of pair. */ index = (far >> SECTION_SHIFT) & 1; #endif if (pmd_none(pmd_k[index])) goto bad_area; copy_pmd(pmd, pmd_k); bad_area: return addr; } void __init fixup_init_thread_union(void) { void *p; p = (void *)((unsigned long)&init_thread_union + THREAD_INFO_OFFSET); memcpy(p, &init_thread_union, THREAD_INFO_SIZE); memset(&init_thread_union, 0, THREAD_INFO_SIZE); } static void copy_pgd(void) { unsigned long index; pgd_t *pgd_c = NULL, *pgd_k, *pgd_i; unsigned long size; /* * sync pgd of current task and idmap_pgd from init mm */ index = pgd_index(TASK_SIZE); pgd_c = cpu_get_pgd() + index; pgd_i = idmap_pgd + index; pgd_k = init_mm.pgd + index; size = (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t); pr_debug("pgd:%p, pgd_k:%p, pdg_i:%p\n", pgd_c, pgd_k, pgd_i); if (pgd_c != pgd_k) memcpy(pgd_c, pgd_k, size); memcpy(pgd_i, pgd_k, size); } unsigned long save_suspend_context(unsigned int *ptr) { unsigned long ret; if (likely(is_vmap_addr((unsigned long)ptr))) { struct page *page = vmalloc_to_page(ptr); unsigned long offset; offset = (unsigned long)ptr & (PAGE_SIZE - 1); ret = (page_to_phys(page) + offset); pr_debug("%s, ptr:%p, page:%lx, save_ptr:%lx\n", __func__, ptr, page_to_pfn(page), ret); copy_pgd(); } else { ret = virt_to_phys(ptr); } return ret; } #endif /* CONFIG_ARM */ int is_vmap_addr(unsigned long addr) { unsigned long start, end; start = (unsigned long)avmap->root_vm->addr; end = (unsigned long)avmap->root_vm->addr + avmap->root_vm->size; if (addr >= start && addr < end) return 1; else return 0; } static struct page *get_vmap_cached_page(int *remain) { unsigned long flags; struct page *page; raw_spin_lock_irqsave(&avmap->page_lock, flags); if (unlikely(!avmap->cached_pages)) { raw_spin_unlock_irqrestore(&avmap->page_lock, flags); return NULL; } page = list_first_entry(&avmap->list, struct page, lru); list_del(&page->lru); avmap->cached_pages--; *remain = avmap->cached_pages; raw_spin_unlock_irqrestore(&avmap->page_lock, flags); return page; } struct page *check_pte_exist(unsigned long addr) { struct mm_struct *mm; pgd_t *pgd; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; mm = &init_mm; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || pgd_bad(*pgd)) return NULL; p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d) || p4d_bad(*p4d)) return NULL; pud = pud_offset(p4d, addr); if (pud_none(*pud) || pud_bad(*pud)) return NULL; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || pmd_bad(*pmd)) return NULL; pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) return NULL; #ifdef CONFIG_ARM64 return pte_page(*pte); #elif defined(CONFIG_ARM) return pte_page(pte_val(*pte)); #else return NULL; /* not supported */ #endif } static int vmap_mmu_set(struct page *page, unsigned long addr, int set) { pgd_t *pgd = NULL; p4d_t *p4d = NULL; pud_t *pud = NULL; pmd_t *pmd = NULL; pte_t *pte = NULL; pgd = pgd_offset_k(addr); p4d = p4d_alloc(&init_mm, pgd, addr); if (!p4d) goto nomem; pud = pud_alloc(&init_mm, p4d, addr); if (!pud) goto nomem; if (pud_none(*pud)) { pmd = pmd_alloc(&init_mm, pud, addr); if (!pmd) goto nomem; } pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { pte = pte_alloc_kernel(pmd, addr); if (!pte) goto nomem; } pte = pte_offset_kernel(pmd, addr); if (set) set_pte_at(&init_mm, addr, pte, mk_pte(page, PAGE_KERNEL)); else pte_clear(&init_mm, addr, pte); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); #ifdef CONFIG_ARM64 D("add:%lx, pgd:%px %llx, pmd:%px %llx, pte:%px %llx\n", addr, pgd, pgd_val(*pgd), pmd, pmd_val(*pmd), pte, pte_val(*pte)); #elif defined(CONFIG_ARM) D("add:%lx, pgd:%px %x, pmd:%px %x, pte:%px %x\n", addr, pgd, (unsigned int)pgd_val(*pgd), pmd, (unsigned int)pmd_val(*pmd), pte, pte_val(*pte)); #endif return 0; nomem: E("allocation page table failed, G:%px, U:%px, M:%px, T:%px", pgd, pud, pmd, pte); return -ENOMEM; } static int stack_floor_page(unsigned long addr) { unsigned long pos; pos = addr & (THREAD_SIZE - 1); /* * stack address must align to THREAD_SIZE */ if (THREAD_SIZE_ORDER > 1) return pos < PAGE_SIZE; else return pos < (PAGE_SIZE / 4); } static int check_addr_up_flow(unsigned long addr) { /* * It's the first page of 4 contiguous virtual address * rage(aligned to THREAD_SIZE) but next page of this * addr is not mapped */ if (stack_floor_page(addr) && !check_pte_exist(addr + PAGE_SIZE)) return 1; return 0; } #if (CONFIG_AMLOGIC_KERNEL_VERSION >= 14515) && defined(CONFIG_ARM64) static bool aml_dump_backtrace_entry(void *arg, unsigned long where) { char *loglvl = arg; pr_info("%s %pSb\n", loglvl, (void *)where); return true; } #else static void aml_dump_backtrace_entry(unsigned long ip, unsigned long fp, unsigned long sp) { unsigned long fp_size = 0; #ifdef CONFIG_ARM64 if (fp >= VMALLOC_START) { fp_size = *((unsigned long *)fp) - fp; /* fp cross IRQ or vmap stack */ if (fp_size >= THREAD_SIZE) fp_size = 0; } pr_info("[%016lx+%4ld][<%px>] %pS\n", fp, fp_size, (void *)ip, (void *)ip); #elif defined(CONFIG_ARM) if (fp >= TASK_SIZE) { fp_size = fp - sp + 4; /* fp cross IRQ or vmap stack */ if (fp_size >= THREAD_SIZE) fp_size = 0; } pr_info("[%08lx+%4ld][<%px>] %pS\n", fp, fp_size, (void *)ip, (void *)ip); #endif } #endif static noinline void show_fault_stack(unsigned long addr, struct pt_regs *regs) { #if (CONFIG_AMLOGIC_KERNEL_VERSION >= 14515) && defined(CONFIG_ARM64) //struct unwind_state frame; arch_stack_walk(aml_dump_backtrace_entry, KERN_DEFAULT, current, NULL); #else struct stackframe frame; unsigned long sp; #ifdef CONFIG_ARM64 frame.fp = regs->regs[29]; frame.prev_fp = addr; frame.pc = (unsigned long)regs->regs[30]; sp = addr; frame.prev_type = STACK_TYPE_UNKNOWN; #elif defined(CONFIG_ARM) frame.fp = regs->ARM_fp; frame.sp = regs->ARM_sp; frame.lr = regs->ARM_lr; frame.pc = (unsigned long)regs->uregs[15]; sp = regs->ARM_sp; #endif pr_info("Addr:%lx, Call trace:\n", addr); #ifdef CONFIG_ARM64 pr_info("[%016lx+%4ld][<%px>] %pS\n", addr, frame.fp - addr, (void *)regs->pc, (void *)regs->pc); #elif defined(CONFIG_ARM) pr_info("[%08lx+%4ld][<%px>] %pS\n", addr, frame.fp - addr, (void *)regs->uregs[15], (void *)regs->uregs[15]); #endif while (1) { int ret; #ifdef CONFIG_ARM64 dump_backtrace_entry(frame.pc, frame.fp, sp); #if CONFIG_AMLOGIC_KERNEL_VERSION >= 14515 ret = unwind_next(&frame); #else ret = unwind_frame(current, &frame); #endif #elif defined(CONFIG_ARM) aml_dump_backtrace_entry(frame.pc, frame.fp, frame.sp); ret = unwind_frame(&frame); #endif if (ret < 0) break; } #endif } static void check_sp_fault_again(struct pt_regs *regs) { unsigned long sp = 0, addr = 0; struct page *page; int cache = 0; int retries = 5000; #ifdef CONFIG_ARM sp = regs->ARM_sp; /* the offset needs to be the same as the one in __dabt_svc handle */ addr = sp - ALIGN(sizeof(struct svc_pt_regs), 8); #elif defined(CONFIG_ARM64) sp = regs->sp; addr = sp - sizeof(*regs); #endif /* * When we handle vmap stack fault, we are in pre-allcated * per-cpu vmap stack. But if sp is near bottom of a page and we * return to normal handler, sp may down grow to another page * to cause a vmap fault again. So we need map next page for * stack before page-fault happen. * * But we need check sp is really in vmap stack range. */ if (!is_vmap_addr(addr)) /* addr may in linear mapping */ return; if (sp && ((addr & PAGE_MASK) != (sp & PAGE_MASK))) { /* * will fault when we copy back context, so handle * it first */ D("fault again, sp:%lx, addr:%lx\n", sp, addr); if (check_pte_exist(addr)) return; do { page = get_vmap_cached_page(&cache); if (page) break; udelay(10); } while (retries--); if (!page) { pr_emerg("vmap_stack: get cached page failed\n"); panic("vmap stack"); } vmap_mmu_set(page, addr, 1); update_vmap_stack(1); #ifndef CONFIG_KASAN if (THREAD_SIZE_ORDER > 1 && stack_floor_page(addr)) { E("task:%d %s, stack near overflow, addr:%lx\n", current->pid, current->comm, addr); show_fault_stack(addr, regs); } #endif /* cache is not enough */ if (cache <= (VMAP_CACHE_PAGE / 2)) atomic_inc(&vmap_cache_flag); D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr); atomic_inc(&vmap_pre_handle_count); #if DEBUG_VMAP show_fault_stack(addr, regs); #endif } } /* * IRQ should *NEVER* been opened in this handler */ int __handle_vmap_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct page *page; int cache = 0; int retries = 5000; if (!is_vmap_addr(addr)) { check_sp_fault_again(regs); return -EINVAL; } D("addr:%lx, esr:%x, task:%5d %s\n", addr, esr, current->pid, current->comm); #ifdef CONFIG_ARM64 D("pc:%ps, %llx, lr:%ps, %llx, sp:%llx, %lx\n", (void *)regs->pc, regs->pc, (void *)regs->regs[30], regs->regs[30], regs->sp, current_stack_pointer); #elif defined(CONFIG_ARM) D("pc:%ps, %lx, lr:%ps, %lx, sp:%lx, %lx\n", (void *)regs->uregs[15], regs->uregs[15], (void *)regs->uregs[14], regs->uregs[14], regs->uregs[13], current_stack_pointer); #endif if (check_addr_up_flow(addr)) { E("address %lx out of range\n", addr); #ifdef CONFIG_ARM64 E("PC is:%llx, %ps, LR is:%llx %ps\n", regs->pc, (void *)regs->pc, regs->regs[30], (void *)regs->regs[30]); #elif defined(CONFIG_ARM) E("PC is:%lx, %ps, LR is:%lx %ps\n", regs->uregs[15], (void *)regs->uregs[15], regs->uregs[14], (void *)regs->uregs[14]); #endif E("task:%d %s, stack:%px, %lx\n", current->pid, current->comm, current->stack, current_stack_pointer); show_fault_stack(addr, regs); check_sp_fault_again(regs); return -ERANGE; } #ifdef CONFIG_ARM page = check_pte_exist(addr); if (page) { D("task:%d %s, page:%lx mapped for addr:%lx, armsp:%lx\n", current->pid, current->comm, page_to_pfn(page), addr, regs->ARM_sp); check_sp_fault_again(regs); return -EINVAL; } #endif /* * allocate a new page for vmap */ do { page = get_vmap_cached_page(&cache); if (page) break; udelay(10); } while (retries--); if (!page) { pr_emerg("vmap_stack: get cached page failed\n"); panic("vmap stack"); } vmap_mmu_set(page, addr, 1); update_vmap_stack(1); if (THREAD_SIZE_ORDER > 1 && stack_floor_page(addr)) { E("task:%d %s, stack near overflow, addr:%lx\n", current->pid, current->comm, addr); show_fault_stack(addr, regs); } /* cache is not enough */ if (cache <= (VMAP_CACHE_PAGE / 2)) atomic_inc(&vmap_cache_flag); atomic_inc(&vmap_fault_count); D("map page:%5lx for addr:%lx\n", page_to_pfn(page), addr); #if DEBUG_VMAP show_fault_stack(addr, regs); #endif return 0; } struct pt_regs *handle_vmap_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { int ret; ret = __handle_vmap_fault(addr, esr, regs); if (!ret) { #ifdef CONFIG_ARM64 __vmap_exit(regs); #endif return NULL; } #ifdef CONFIG_ARM64 return switch_vmap_context(regs); #else return (struct pt_regs *)-1UL; #endif } EXPORT_SYMBOL(handle_vmap_fault); /* APIs for external usage */ void vmap_report_meminfo(struct seq_file *m) { unsigned long kb = 1 << (PAGE_SHIFT - 10); unsigned long tmp1, tmp2, tmp3; tmp1 = kb * atomic_read(&vmap_stack_size); tmp2 = kb * atomic_read(&vmap_fault_count); tmp3 = kb * atomic_read(&vmap_pre_handle_count); seq_printf(m, "Stack virtual: %8ld kB\n", global_node_page_state(NR_KERNEL_STACK_KB)); seq_printf(m, "VmapStack: %8ld kB\n", tmp1); seq_printf(m, "VmapFault: %8ld kB\n", tmp2); seq_printf(m, "VmapPfault: %8ld kB\n", tmp3); } /* FOR debug */ static unsigned long vmap_debug_jiff; void aml_account_task_stack(struct task_struct *tsk, int account) { unsigned long stack = (unsigned long)task_stack_page(tsk); struct page *first_page; if (unlikely(!is_vmap_addr(stack))) { /* stack get from kmalloc */ first_page = virt_to_page((void *)stack); mod_lruvec_page_state(first_page, NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); update_vmap_stack(account * (THREAD_SIZE / PAGE_SIZE)); return; } stack += STACK_TOP_PAGE_OFF; first_page = vmalloc_to_page((void *)stack); mod_lruvec_page_state(first_page, NR_KERNEL_STACK_KB, THREAD_SIZE / 1024 * account); if (time_after(jiffies, vmap_debug_jiff + HZ * 5)) { int ratio, rem; vmap_debug_jiff = jiffies; ratio = ((get_vmap_stack_size() << (PAGE_SHIFT - 10)) * 10000) / global_node_page_state(NR_KERNEL_STACK_KB); rem = ratio % 100; D("STACK:%ld KB, vmap:%d KB, cached:%d KB, rate:%2d.%02d%%\n", global_node_page_state(NR_KERNEL_STACK_KB), get_vmap_stack_size() << (PAGE_SHIFT - 10), avmap->cached_pages << (PAGE_SHIFT - 10), ratio / 100, rem); } } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) DEFINE_MUTEX(stack_shadow_lock); /* For kasan */ static void check_and_map_stack_shadow(unsigned long addr) { unsigned long shadow; struct page *page, *pages[2] = {}; int ret; shadow = (unsigned long)kasan_mem_to_shadow((void *)addr); page = check_pte_exist(shadow); if (page) { WARN(page_address(page) == (void *)kasan_early_shadow_page, "bad pte, page:%px, %lx, addr:%lx\n", page_address(page), page_to_pfn(page), addr); return; } shadow = shadow & PAGE_MASK; page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | __GFP_HIGH); if (!page) { WARN(!page, "alloc page for addr:%lx, shadow:%lx fail\n", addr, shadow); return; } pages[0] = page; ret = vmap_pages_range_noflush(shadow, shadow + PAGE_SIZE, PAGE_KERNEL, pages, PAGE_SHIFT); if (ret < 0) { pr_err("%s, map shadow:%lx failed:%d\n", __func__, shadow, ret); __free_page(page); } } #endif void *aml_stack_alloc(int node, struct task_struct *tsk) { unsigned long bitmap_no, raw_start; struct page *page; unsigned long addr, map_addr, flags; raw_spin_lock_irqsave(&avmap->vmap_lock, flags); raw_start = avmap->start_bit; bitmap_no = find_next_zero_bit(avmap->bitmap, MAX_TASKS, avmap->start_bit); if (bitmap_no >= MAX_TASKS) { raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags); /* * if vmap address space is full, we still need to try * to get stack from kmalloc */ addr = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); E("BITMAP FULL, kmalloc task stack:%lx\n", addr); return (void *)addr; } avmap->start_bit = bitmap_no + 1; /* next idle address space */ bitmap_set(avmap->bitmap, bitmap_no, 1); raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags); page = alloc_page(THREADINFO_GFP | __GFP_ZERO | __GFP_HIGHMEM); if (!page) { raw_spin_lock_irqsave(&avmap->vmap_lock, flags); bitmap_clear(avmap->bitmap, bitmap_no, 1); raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags); E("allocation page failed\n"); return NULL; } /* * map first page only */ addr = (unsigned long)avmap->root_vm->addr + THREAD_SIZE * bitmap_no; map_addr = addr + STACK_TOP_PAGE_OFF; vmap_mmu_set(page, map_addr, 1); update_vmap_stack(1); #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) /* 2 thread stack can be a single shadow page, we need use lock */ mutex_lock(&stack_shadow_lock); check_and_map_stack_shadow(addr); mutex_unlock(&stack_shadow_lock); #endif D("bit idx:%5ld, start:%5ld, addr:%lx, page:%lx\n", bitmap_no, raw_start, addr, page_to_pfn(page)); return (void *)addr; } void aml_stack_free(struct task_struct *tsk) { unsigned long stack = (unsigned long)tsk->stack; unsigned long addr, bitmap_no; struct page *page; unsigned long flags; if (unlikely(!is_vmap_addr(stack))) { /* stack get from kmalloc */ free_pages(stack, THREAD_SIZE_ORDER); return; } addr = stack + STACK_TOP_PAGE_OFF; for (; addr >= stack; addr -= PAGE_SIZE) { page = vmalloc_to_page((const void *)addr); if (!page) break; #ifdef CONFIG_KASAN kasan_unpoison_range((void *)addr, PAGE_SIZE); #endif vmap_mmu_set(page, addr, 0); clear_highpage(page); /* clear for next use */ /* supplement for stack page cache first */ raw_spin_lock_irqsave(&avmap->page_lock, flags); if (avmap->cached_pages < VMAP_CACHE_PAGE) { list_add_tail(&page->lru, &avmap->list); avmap->cached_pages++; raw_spin_unlock_irqrestore(&avmap->page_lock, flags); } else { raw_spin_unlock_irqrestore(&avmap->page_lock, flags); __free_page(page); } update_vmap_stack(-1); } bitmap_no = (stack - (unsigned long)avmap->root_vm->addr) / THREAD_SIZE; raw_spin_lock_irqsave(&avmap->vmap_lock, flags); bitmap_clear(avmap->bitmap, bitmap_no, 1); if (bitmap_no < avmap->start_bit) avmap->start_bit = bitmap_no; raw_spin_unlock_irqrestore(&avmap->vmap_lock, flags); } #if DEBUG_VMAP static void check_stack_depth(void *p, int cnt) { unsigned char stack_buf[32]; unsigned long stack_end, cur_stack; stack_end = (unsigned long)p & ~(THREAD_SIZE - 1); cur_stack = (unsigned long)stack_buf & (THREAD_SIZE - 1); if (cur_stack < 1024) { pr_info("%s, %3d, p:%px, stack_buf:%px, cur_stack:%lx, end:%lx, quit!!!\n", __func__, cnt, p, stack_buf, cur_stack, stack_end); return; } pr_info("%s, %3d, p:%px, stack_buf:%px, cur_stack:%lx, end:%lx\n", __func__, cnt, p, stack_buf, cur_stack, stack_end); memcpy(stack_buf, p, sizeof(stack_buf)); check_stack_depth(stack_buf, cnt + 1); } static void stack_test(void) { unsigned char buf[32] = {}; check_stack_depth(buf, 0); } #endif /* * page cache maintain task for vmap */ static int vmap_task(void *data) { struct page *page; struct list_head head; int i, cnt; unsigned long flags; struct aml_vmap *v = (struct aml_vmap *)data; #if DEBUG_VMAP stack_test(); #endif set_user_nice(current, -19); while (1) { if (kthread_should_stop()) break; if (!atomic_read(&vmap_cache_flag)) { msleep_interruptible(10); continue; } raw_spin_lock_irqsave(&v->page_lock, flags); cnt = v->cached_pages; raw_spin_unlock_irqrestore(&v->page_lock, flags); if (cnt >= VMAP_CACHE_PAGE) { D("cache full cnt:%d\n", cnt); continue; } INIT_LIST_HEAD(&head); for (i = 0; i < VMAP_CACHE_PAGE - cnt; i++) { page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | __GFP_HIGH); if (!page) { E("get page failed, allocated:%d, cnt:%d\n", i, cnt); break; } list_add(&page->lru, &head); } raw_spin_lock_irqsave(&v->page_lock, flags); list_splice(&head, &v->list); v->cached_pages += i; raw_spin_unlock_irqrestore(&v->page_lock, flags); atomic_set(&vmap_cache_flag, 0); E("add %d pages, cnt:%d\n", i, cnt); } return 0; } int __init start_thread_work(void) { kthread_run(vmap_task, avmap, "vmap_thread"); return 0; } arch_initcall(start_thread_work); void __init thread_stack_cache_init(void) { int i; struct page *page; page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, VMAP_CACHE_PAGE_ORDER); if (!page) return; avmap = kzalloc(sizeof(*avmap), GFP_KERNEL); if (!avmap) { __free_pages(page, VMAP_CACHE_PAGE_ORDER); return; } avmap->bitmap = kzalloc(MAX_TASKS / 8, GFP_KERNEL); if (!avmap->bitmap) { __free_pages(page, VMAP_CACHE_PAGE_ORDER); kfree(avmap); return; } pr_info("%s, vmap:%px, bitmap:%px, cache page:%lx\n", __func__, avmap, avmap->bitmap, page_to_pfn(page)); avmap->root_vm = __get_vm_area_node(VM_STACK_AREA_SIZE, MAX_ORDER_NR_PAGES * PAGE_SIZE, PAGE_SHIFT, 0, VMAP_ADDR_START, VMAP_ADDR_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); WARN(!avmap->root_vm, "alloc vmap area %x failed\n", VM_STACK_AREA_SIZE); if (!avmap->root_vm) { __free_pages(page, VMAP_CACHE_PAGE_ORDER); kfree(avmap->bitmap); kfree(avmap); return; } pr_info("%s, allocation vm area:%px, addr:%px, size:%lx\n", __func__, avmap->root_vm, avmap->root_vm->addr, avmap->root_vm->size); INIT_LIST_HEAD(&avmap->list); raw_spin_lock_init(&avmap->page_lock); raw_spin_lock_init(&avmap->vmap_lock); for (i = 0; i < VMAP_CACHE_PAGE; i++) { list_add(&page->lru, &avmap->list); page++; } avmap->cached_pages = VMAP_CACHE_PAGE; #ifdef CONFIG_ARM64 for_each_possible_cpu(i) { unsigned long addr; addr = (unsigned long)per_cpu_ptr(vmap_stack, i); pr_info("cpu %d, vmap_stack:[%lx-%lx]\n", i, addr, addr + THREAD_START_SP); addr = (unsigned long)per_cpu_ptr(irq_stack, i); pr_info("cpu %d, irq_stack: [%lx-%lx]\n", i, addr, addr + THREAD_START_SP); } #endif }