From bcc37aab3547afa2b1fda11ae525930e0de76b90 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Jul 2013 15:37:12 +0100 Subject: [PATCH 01/93] arm64: mm: don't treat user cache maintenance faults as writes commit db6f41063cbdb58b14846e600e6bc3f4e4c2e888 upstream. On arm64, cache maintenance faults appear as data aborts with the CM bit set in the ESR. The WnR bit, usually used to distinguish between faulting loads and stores, always reads as 1 and (slightly confusingly) the instructions are treated as reads by the architecture. This patch fixes our fault handling code to treat cache maintenance faults in the same way as loads. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 88c0a794e5d9bcc29926e636cd1d6eb5c9dcb235) Signed-off-by: Mark Brown --- arch/arm64/mm/fault.c | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1426468b77f3..f51d669c8ebd 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -152,25 +152,8 @@ void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) #define ESR_CM (1 << 8) #define ESR_LNX_EXEC (1 << 24) -/* - * Check that the permissions on the VMA allow for the fault which occurred. - * If we encountered a write fault, we must have write permission, otherwise - * we allow any permission. - */ -static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) -{ - unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; - - if (esr & ESR_WRITE) - mask = VM_WRITE; - if (esr & ESR_LNX_EXEC) - mask = VM_EXEC; - - return vma->vm_flags & mask ? false : true; -} - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, unsigned int flags, + unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) { struct vm_area_struct *vma; @@ -188,12 +171,17 @@ static int __do_page_fault(struct mm_struct *mm, unsigned long addr, * it. */ good_area: - if (access_error(esr, vma)) { + /* + * Check that the permissions on the VMA allow for the fault which + * occurred. If we encountered a write or exec fault, we must have + * appropriate permissions, otherwise we allow any permission. + */ + if (!(vma->vm_flags & vm_flags)) { fault = VM_FAULT_BADACCESS; goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -208,9 +196,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code; - bool write = (esr & ESR_WRITE) && !(esr & ESR_CM); - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; + unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (esr & ESR_LNX_EXEC) { + vm_flags = VM_EXEC; + } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) { + vm_flags = VM_WRITE; + mm_flags |= FAULT_FLAG_WRITE; + } tsk = current; mm = tsk->mm; @@ -248,7 +242,7 @@ retry: #endif } - fault = __do_page_fault(mm, addr, esr, flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); /* * If we need to retry but a fatal signal is pending, handle the @@ -265,7 +259,7 @@ retry: */ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, @@ -280,7 +274,7 @@ retry: * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of * starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; goto retry; } } From 741dff38f169e49d8731e7362569fe4b1f9482de Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:39 +0100 Subject: [PATCH 02/93] arm64: perf: fix array out of bounds access in armpmu_map_hw_event() commit 868f6fea8fa63f09acbfa93256d0d2abdcabff79 upstream. This is a port of d9f966357b14 ("ARM: 7810/1: perf: Fix array out of bounds access in armpmu_map_hw_event()") to arm64, which fixes an oops in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 975a3cd4cc0dff06a635cb16b8fb25fd3ae88234) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9ba33c40cdf8..2012646fb46f 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -107,7 +107,12 @@ armpmu_map_cache_event(const unsigned (*cache_map) static int armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) { - int mapping = (*event_map)[config]; + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } From 4281a11a7e44655baa41241c470b09d4fae4be35 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:40 +0100 Subject: [PATCH 03/93] arm64: perf: fix event validation for software group leaders commit ee7538a008a45050c8f706d38b600f55953169f9 upstream. This is a port of c95eb3184ea1 ("ARM: 7809/1: perf: fix event validation for software group leaders") to arm64, which fixes a panic in the arm64 perf backend found as a result of Vince's fuzzing tool. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6c6f6c7166e9ee45e545eab850a2862a92c135b2) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2012646fb46f..12e6ccb88691 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -322,6 +322,9 @@ validate_event(struct pmu_hw_events *hw_events, struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; + if (is_software_event(event)) + return 1; + if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) return 1; From 0d7707cb914db6188e9041e4a4440687a3f89ddf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:41 +0100 Subject: [PATCH 04/93] arm64: perf: fix group validation when using enable_on_exec commit 8455e6ec70f33b0e8c3ffd47067e00481f09f454 upstream. This is a port of cb2d8b342aa0 ("ARM: 7698/1: perf: fix group validation when using enable_on_exec") to arm64, which fixes the event validation checking so that events in the OFF state are still considered when enable_on_exec is true. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1f96d83b38937294584ede9473c2b2961528f287) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 12e6ccb88691..2a1e9163d67a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -325,7 +325,10 @@ validate_event(struct pmu_hw_events *hw_events, if (is_software_event(event)) return 1; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, &fake_event) >= 0; From a73eb27dbbbbf1b3f40a4cec380f8d9f238c26aa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 20 Aug 2013 11:47:42 +0100 Subject: [PATCH 05/93] arm64: perf: fix ARMv8 EVTYPE_MASK to include NSH bit commit 178cd9ce377232518ec17ff2ecab2e80fa60784c upstream. This is a port of f2fe09b055e2 ("ARM: 7663/1: perf: fix ARMv7 EVTYPE_MASK to include NSH bit") to arm64, which fixes the broken evtype mask to include the NSH bit, allowing profiling at EL2. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0fe9a0dc92a64c088e76fcd3d35b2ba36b4d7f3c) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2a1e9163d67a..cea1594ff933 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -784,7 +784,7 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ #define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ /* From ff5ec2d401f7ab457cece4b1c00587e8af26d74c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Dec 2013 17:09:08 +0000 Subject: [PATCH 06/93] arm64: ptrace: avoid using HW_BREAKPOINT_EMPTY for disabled events commit cdc27c27843248ae7eb0df5fc261dd004eaa5670 upstream. Commit 8f34a1da35ae ("arm64: ptrace: use HW_BREAKPOINT_EMPTY type for disabled breakpoints") fixed an issue with GDB trying to zero breakpoint control registers. The problem there is that the arch hw_breakpoint code will attempt to create a (disabled), execute breakpoint of length 0. This will fail validation and report unexpected failure to GDB. To avoid this, we treated disabled breakpoints as HW_BREAKPOINT_EMPTY, but that seems to have broken with recent kernels, causing watchpoints to be treated as TYPE_INST in the core code and returning ENOSPC for any further breakpoints. This patch fixes the problem by prioritising the `enable' field of the breakpoint: if it is cleared, we simply update the perf_event_attr to indicate that the thing is disabled and don't bother changing either the type or the length. This reinforces the behaviour that the breakpoint control register is essentially read-only apart from the enable bit when disabling a breakpoint. Reported-by: Aaron Liu Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 11b81802921618b02122855db021a63df7d9fd49) Signed-off-by: Mark Brown --- arch/arm64/kernel/ptrace.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831c..5341534b6d04 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -236,31 +236,29 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, { int err, len, type, disabled = !ctrl.enabled; - if (disabled) { - len = 0; - type = HW_BREAKPOINT_EMPTY; - } else { - err = arch_bp_generic_fields(ctrl, &len, &type); - if (err) - return err; + attr->disabled = disabled; + if (disabled) + return 0; - switch (note_type) { - case NT_ARM_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_ARM_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) return -EINVAL; - } + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; } attr->bp_len = len; attr->bp_type = type; - attr->disabled = disabled; return 0; } From ec2a2a7f53125cf8e1c9ec40fa433909521647c2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 31 May 2013 16:30:58 +0100 Subject: [PATCH 07/93] arm64: spinlock: retry trylock operation if strex fails on free lock commit 4ecf7ccb1973fd826456b6ab1e6dfafe9023c753 upstream. An exclusive store instruction may fail for reasons other than lock contention (e.g. a cache eviction during the critical section) so, in line with other architectures using similar exclusive instructions (alpha, mips, powerpc), retry the trylock operation if the lock appears to be free but the strex reported failure. Signed-off-by: Catalin Marinas Reported-by: Tony Thompson Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 42b5bb47a62b7b2d8cd0b9af58f914c5e83ef76e) Signed-off-by: Mark Brown --- arch/arm64/include/asm/spinlock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 7065e920149d..0defa0728a9b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -59,9 +59,10 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned int tmp; asm volatile( - " ldaxr %w0, %1\n" + "2: ldaxr %w0, %1\n" " cbnz %w0, 1f\n" " stxr %w0, %w2, %1\n" + " cbnz %w0, 2b\n" "1:\n" : "=&r" (tmp), "+Q" (lock->lock) : "r" (1) From 31cc1d7d724ea7ec31c1d65d3a10e842c0e74b86 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 19 Jul 2013 15:08:15 +0100 Subject: [PATCH 08/93] arm64: Only enable local interrupts after the CPU is marked online commit 53ae3acd4390ffeecb3a11dbd5be347b5a3d98f2 upstream. There is a slight chance that (timer) interrupts are triggered before a secondary CPU has been marked online with implications on softirq thread affinity. Signed-off-by: Catalin Marinas Reported-by: Kirill Tkhai Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 81ca15a3e04961b54d4a0b395e681bffb6cfbc68) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5d54e3717bf8..9c93e126328c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -199,13 +199,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) raw_spin_lock(&boot_lock); raw_spin_unlock(&boot_lock); - /* - * Enable local interrupts. - */ - notify_cpu_starting(cpu); - local_irq_enable(); - local_fiq_enable(); - /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online @@ -214,6 +207,14 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + /* + * Enable GIC and timers. + */ + notify_cpu_starting(cpu); + + local_irq_enable(); + local_fiq_enable(); + /* * OK, it's off to the idle thread for us */ From 6a23efcb3808db22be5d2625ac928a5ae998a77c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 9 Jul 2013 15:16:06 +0100 Subject: [PATCH 09/93] arm64: virt: ensure visibility of __boot_cpu_mode commit 82b2f495fba338d1e3098dde1df54944a9c19751 upstream. Secondary CPUs write to __boot_cpu_mode with caches disabled, and thus a cached value of __boot_cpu_mode may be incoherent with that in memory. This could lead to a failure to detect mismatched boot modes. This patch adds flushing to ensure that writes by secondaries to __boot_cpu_mode are made visible before we test against it. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Christoffer Dall Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 5fb08df3dd1f7b8e83936808b042725a8b067562) Signed-off-by: Mark Brown --- arch/arm64/include/asm/virt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439827271e3d..26e310c54344 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -21,6 +21,7 @@ #define BOOT_CPU_MODE_EL2 (0x0e12b007) #ifndef __ASSEMBLY__ +#include /* * __boot_cpu_mode records what mode CPUs were booted in. @@ -36,9 +37,20 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); phys_addr_t __hyp_get_vectors(void); +static inline void sync_boot_mode(void) +{ + /* + * As secondaries write to __boot_cpu_mode with caches disabled, we + * must flush the corresponding cache entries to ensure the visibility + * of their writes. + */ + __flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode)); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + sync_boot_mode(); return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -46,6 +58,7 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + sync_boot_mode(); return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } From 6c0ab8b35469a97968c1482b14106afa8db3d743 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Tue, 23 Jul 2013 18:52:31 +0100 Subject: [PATCH 10/93] arm64: Change kernel stack size to 16K commit 845ad05ec31e0f3872a321e10dbeaf872022632c upstream. Written by Catalin Marinas, tested by APM on storm platform. This is needed because of the failures encountered when running SpecWeb benchmark test. Signed-off-by: Feng Kan Acked-by: Kumar Sankaran Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 79f783f05539479676406d3d42c3d86bd203f083) Signed-off-by: Mark Brown --- arch/arm64/include/asm/thread_info.h | 4 ++-- arch/arm64/kernel/entry.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 3659e460071d..23a3c4791d86 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -24,10 +24,10 @@ #include #ifndef CONFIG_ARM64_64K_PAGES -#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE_ORDER 2 #endif -#define THREAD_SIZE 8192 +#define THREAD_SIZE 16384 #define THREAD_START_SP (THREAD_SIZE - 16) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1d1314280a03..6ad781b21c08 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -121,7 +121,7 @@ .macro get_thread_info, rd mov \rd, sp - and \rd, \rd, #~((1 << 13) - 1) // top of 8K stack + and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack .endm /* From 47b1e20b206944ea097b775e0e4a569f8bdeeb7c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 27 Sep 2013 09:04:41 +0100 Subject: [PATCH 11/93] arm64: fix possible invalid FPSIMD initialization state commit 6db83cea1c975b9a102e17def7d2795814e1ae2b upstream. If context switching happens during executing fpsimd_flush_thread(), stale value in FPSIMD registers will be saved into current thread's fpsimd_state by fpsimd_thread_switch(). That may cause invalid initialization state for the new process, so disable preemption when executing fpsimd_flush_thread(). Signed-off-by: Jiang Liu Cc: Jiang Liu Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2bf5861acf602fe6201ac1f82955ac7283e56b6f) Signed-off-by: Mark Brown --- arch/arm64/kernel/fpsimd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e8b8357aedb4..2fa308e4a1fa 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -79,8 +79,10 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { + preempt_disable(); memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_load_state(¤t->thread.fpsimd_state); + preempt_enable(); } /* From af68cb6b43a9f32f0143e69e80d17802247818ee Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Thu, 3 Oct 2013 06:47:44 +0100 Subject: [PATCH 12/93] arm64: check for number of arguments in syscall_get/set_arguments() commit 7b22c03536a539142f931815528d55df455ffe2d upstream. In ftrace_syscall_enter(), syscall_get_arguments(..., 0, n, ...) if (i == 0) { ...; n--;} memcpy(..., n * sizeof(args[0])); If 'number of arguments(n)' is zero and 'argument index(i)' is also zero in syscall_get_arguments(), none of arguments should be copied by memcpy(). Otherwise 'n--' can be a big positive number and unexpected amount of data will be copied. Tracing system calls which take no argument, say sync(void), may hit this case and eventually make the system corrupted. This patch fixes the issue both in syscall_get_arguments() and syscall_set_arguments(). Signed-off-by: AKASHI Takahiro Acked-by: Will Deacon Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e2956ef5b5ddb3ede70962afe9297d8340787fa6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/syscall.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 89c047f9a971..70ba9d4ee978 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -59,6 +59,9 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i; unsigned int n_bad = n + i - SYSCALL_MAX_ARGS; @@ -82,6 +85,9 @@ static inline void syscall_set_arguments(struct task_struct *task, unsigned int i, unsigned int n, const unsigned long *args) { + if (n == 0) + return; + if (i + n > SYSCALL_MAX_ARGS) { pr_warning("%s called with max args %d, handling only %d\n", __func__, i + n, SYSCALL_MAX_ARGS); From 62416c1afeaa92d112ea2b0693a027f6b68f2272 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 14 Nov 2013 15:15:37 +0000 Subject: [PATCH 13/93] arm64: dts: Reserve the memory used for secondary CPU release address commit df503ba7f653c590b475ab80bde788edf5af70d5 upstream. With the spin-table SMP booting method, secondary CPUs poll a location passed in the DT. The foundation-v8.dts file doesn't have this memory reserved and there is a risk of Linux using it before secondary CPUs are started. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 29f37087bb51dfe4ff61daad60bd93f7bcfa3d72) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/foundation-v8.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/foundation-v8.dts b/arch/arm64/boot/dts/foundation-v8.dts index 84fcc5018284..519c4b2c0687 100644 --- a/arch/arm64/boot/dts/foundation-v8.dts +++ b/arch/arm64/boot/dts/foundation-v8.dts @@ -6,6 +6,8 @@ /dts-v1/; +/memreserve/ 0x80000000 0x00010000; + / { model = "Foundation-v8A"; compatible = "arm,foundation-aarch64", "arm,vexpress"; From 833b6f7f66c2343e1b23dd8d35c12324d70a6731 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Sep 2013 16:33:54 +0100 Subject: [PATCH 14/93] arm64: Remove unused cpu_name ascii in arch/arm64/mm/proc.S commit f3a1d7d53dccf51959aec16b574617cc6bfeca09 upstream. This string has been moved to arch/arm64/kernel/cputable.c. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ae5092fadd50cfd0d29e762771d30cbf7f2c2fec) Signed-off-by: Mark Brown --- arch/arm64/mm/proc.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a82ae8868077..f84fcf71f129 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -95,10 +95,6 @@ ENTRY(cpu_do_switch_mm) ret ENDPROC(cpu_do_switch_mm) -cpu_name: - .ascii "AArch64 Processor" - .align - .section ".text.init", #alloc, #execinstr /* From 66effa189f8ded4d968dd9c310137c50244477ad Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 30 Jan 2013 17:51:26 +0000 Subject: [PATCH 15/93] clocksource: arch_timer: use virtual counters commit 0d651e4e65e96989f72236bf83bd4c6e55eb6ce4 upstream. Switching between reading the virtual or physical counters is problematic, as some core code wants a view of time before we're fully set up. Using a function pointer and switching the source after the first read can make time appear to go backwards, and having a check in the read function is an unfortunate block on what we want to be a fast path. Instead, this patch makes us always use the virtual counters. If we're a guest, or don't have hyp mode, we'll use the virtual timers, and as such don't care about CNTVOFF as long as it doesn't change in such a way as to make time appear to travel backwards. As the guest will use the virtual timers, a (potential) KVM host must use the physical timers (which can wake up the host even if they fire while a guest is executing), and hence a host must have CNTVOFF set to zero so as to have a consistent view of time between the physical timers and virtual counters. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Santosh Shilimkar Cc: Rob Herring Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 714c21cb90951905b269870087a99c37f3a7af0c) Signed-off-by: Mark Brown --- arch/arm/include/asm/arch_timer.h | 9 --------- arch/arm64/include/asm/arch_timer.h | 10 ---------- drivers/clocksource/arm_arch_timer.c | 23 +++++------------------ include/clocksource/arm_arch_timer.h | 2 +- 4 files changed, 6 insertions(+), 38 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 7c1bfc0aea0c..accefe099182 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -80,15 +80,6 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrrc p15, 0, %Q0, %R0, c14" : "=r" (cval)); - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index bf6ab242f047..d56ed11ba9a3 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -110,16 +110,6 @@ static inline void __cpuinit arch_counter_set_user_access(void) asm volatile("msr cntkctl_el1, %0" : : "r" (cntkctl)); } -static inline u64 arch_counter_get_cntpct(void) -{ - u64 cval; - - isb(); - asm volatile("mrs %0, cntpct_el0" : "=r" (cval)); - - return cval; -} - static inline u64 arch_counter_get_cntvct(void) { u64 cval; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a2b254189782..053d846ab5b1 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -186,27 +186,19 @@ u32 arch_timer_get_rate(void) return arch_timer_rate; } -/* - * Some external users of arch_timer_read_counter (e.g. sched_clock) may try to - * call it before it has been initialised. Rather than incur a performance - * penalty checking for initialisation, provide a default implementation that - * won't lead to time appearing to jump backwards. - */ -static u64 arch_timer_read_zero(void) +u64 arch_timer_read_counter(void) { - return 0; + return arch_counter_get_cntvct(); } -u64 (*arch_timer_read_counter)(void) = arch_timer_read_zero; - static cycle_t arch_counter_read(struct clocksource *cs) { - return arch_timer_read_counter(); + return arch_counter_get_cntvct(); } static cycle_t arch_counter_read_cc(const struct cyclecounter *cc) { - return arch_timer_read_counter(); + return arch_counter_get_cntvct(); } static struct clocksource clocksource_counter = { @@ -287,7 +279,7 @@ static int __init arch_timer_register(void) cyclecounter.mult = clocksource_counter.mult; cyclecounter.shift = clocksource_counter.shift; timecounter_init(&timecounter, &cyclecounter, - arch_counter_get_cntpct()); + arch_counter_get_cntvct()); if (arch_timer_use_virtual) { ppi = arch_timer_ppi[VIRT_PPI]; @@ -376,11 +368,6 @@ static void __init arch_timer_init(struct device_node *np) } } - if (arch_timer_use_virtual) - arch_timer_read_counter = arch_counter_get_cntvct; - else - arch_timer_read_counter = arch_counter_get_cntpct; - arch_timer_register(); arch_timer_arch_init(); } diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index e6c9c4cc9b23..c463ce990c48 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -32,7 +32,7 @@ #ifdef CONFIG_ARM_ARCH_TIMER extern u32 arch_timer_get_rate(void); -extern u64 (*arch_timer_read_counter)(void); +extern u64 arch_timer_read_counter(void); extern struct timecounter *arch_timer_get_timecounter(void); #else From 84ddb8b066d3750c9971b9eed826e8d1dd4f9c53 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 12:23:05 +0100 Subject: [PATCH 16/93] arm64: Avoid cache flushing in flush_dcache_page() commit b5b6c9e9149d8a7c3f1d7b9d0c046c6184e1dd17 upstream. The flush_dcache_page() function is called when the kernel modified a page cache page. Since the D-cache on AArch64 does not have aliases this function can simply mark the page as dirty for later flushing via set_pte_at()/__sync_icache_dcache() if the page is executable (to ensure the I-D cache coherency). Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1e616427f20943c9966296dfff9e7a2b825846aa) Signed-off-by: Mark Brown --- arch/arm64/mm/flush.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 88611c3a421a..b9cd7a4deeca 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -94,28 +94,14 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) } /* - * Ensure cache coherency between kernel mapping and userspace mapping of this - * page. + * This function is called when a page has been modified by the kernel. Mark + * it as dirty for later flushing when mapped in user space (if executable, + * see __sync_icache_dcache). */ void flush_dcache_page(struct page *page) { - struct address_space *mapping; - - /* - * The zero page is never written to, so never has any dirty cache - * lines, and therefore never needs to be flushed. - */ - if (page == ZERO_PAGE(0)) - return; - - mapping = page_mapping(page); - if (mapping && mapping_mapped(mapping)) { - __flush_dcache_page(page); - __flush_icache_all(); - set_bit(PG_dcache_clean, &page->flags); - } else { + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); - } } EXPORT_SYMBOL(flush_dcache_page); From b67a42ee58fa4e5a2d5a20a9009b160e77e132a7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 16:34:22 +0100 Subject: [PATCH 17/93] arm64: Do not flush the D-cache for anonymous pages commit 7249b79f6b4cc3c2aa9138dca52e535a4c789107 upstream. The D-cache on AArch64 is VIPT non-aliasing, so there is no need to flush it for anonymous pages. Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fc54900e08d840fcfec9e5d2fba2c6f233aa49b9) Signed-off-by: Mark Brown --- arch/arm64/mm/flush.c | 8 +++----- arch/arm64/mm/mmu.c | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index b9cd7a4deeca..7c716634a671 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -77,14 +77,12 @@ void __flush_dcache_page(struct page *page) void __sync_icache_dcache(pte_t pte, unsigned long addr) { - unsigned long pfn; - struct page *page; + struct page *page = pte_page(pte); - pfn = pte_pfn(pte); - if (!pfn_valid(pfn)) + /* no flushing needed for anonymous pages */ + if (!page_mapping(page)) return; - page = pfn_to_page(pfn); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { __flush_dcache_page(page); __flush_icache_all(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eeecc9c8ed68..80a369eab637 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -339,7 +339,6 @@ void __init paging_init(void) bootmem_init(); empty_zero_page = virt_to_page(zero_page); - __flush_dcache_page(empty_zero_page); /* * TTBR0 is only used for the identity mapping at this stage. Make it From e850da006c7ca131c8aa1ef614bd1bcee4a3e929 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 29 Nov 2013 10:56:14 +0000 Subject: [PATCH 18/93] arm64: Use Normal NonCacheable memory for writecombine commit 4f00130b70e5eee813cc7bc298e0f3fdf79673cc upstream. This provides better performance compared to Device GRE and also allows unaligned accesses. Such memory is intended to be used with standard RAM (e.g. framebuffers) and not I/O. Signed-off-by: Catalin Marinas Cc: Mark Brown Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3655a197b1ea3ce989d34868768c5f4b6205061c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e333a243bfcc..e9a1a1d81892 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -184,7 +184,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define pgprot_noncached(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_GRE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) #define __HAVE_PHYS_MEM_ACCESS_PROT From 058fe8b82589846921b62de504f65bac8e2f3ecc Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 Feb 2014 19:48:52 +0000 Subject: [PATCH 19/93] arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE commit d4022a335271a48cce49df35d825897914fbffe3 upstream. Update wall-to-monotonic fields in the VDSO data page unconditionally. These are used to service CLOCK_MONOTONIC_COARSE, which is not guarded by use_syscall. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b666f382900c74f23c78556777041c2ceb7c2b24) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6a389dc1bd49..0ea7a22bcdf2 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -235,6 +235,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->use_syscall = use_syscall; vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { vdso_data->cs_cycle_last = tk->clock->cycle_last; @@ -242,8 +244,6 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; vdso_data->cs_shift = tk->shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; } smp_wmb(); From 36d43783007fa8ee9a782802ab28d02032de7740 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 14:41:26 +0000 Subject: [PATCH 20/93] arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k commit 40507403485fcb56b83d6ddfc954e9b08305054c upstream. Whilst the text segment for our VDSO is marked as PT_LOAD in the ELF headers, it is mapped by the kernel and not actually subject to demand-paging. ld doesn't realise this, and emits a p_align field of 64k (the maximum supported page size), which conflicts with the load address picked by the kernel on 4k systems, which will be 4k aligned. This causes GDB to fail with "Failed to read a valid object file image from memory" when attempting to load the VDSO. This patch passes the -n option to ld, which prevents it from aligning PT_LOAD segments to the maximum page size. Reported-by: Kyle McMartin Acked-by: Kyle McMartin Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6737eaebff6297e5b6aeb458a4b4ad4b61df8f57) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e62..6d20b7d162d8 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< From 3a44eb49a2734233b5fc9d98afee53be701f4c4d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:01:31 +0000 Subject: [PATCH 21/93] arm64: Invalidate the TLB when replacing pmd entries during boot commit a55f9929a9b257f84b6cc7b2397379cabd744a22 upstream. With the 64K page size configuration, __create_page_tables in head.S maps enough memory to get started but using 64K pages rather than 512M sections with a single pgd/pud/pmd entry pointing to a pte table. create_mapping() may override the pgd/pud/pmd table entry with a block (section) one if the RAM size is more than 512MB and aligned correctly. For the end of this block to be accessible, the old TLB entry must be invalidated. Reported-by: Mark Salter Tested-by: Mark Salter Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f35f27e775d6f8aa265fb698975c9c95f2757ef4) Signed-off-by: Mark Brown --- arch/arm64/mm/mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 80a369eab637..ba7477efad5c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) + if (((addr | next | phys) & ~SECTION_MASK) == 0) { + pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | prot_sect_kernel)); - else + /* + * Check for previous table entries created during + * boot (__create_page_tables) and flush them. + */ + if (!pmd_none(old_pmd)) + flush_tlb_all(); + } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + } phys += next - addr; } while (pmd++, addr = next, addr != end); } From 1a94c46eed86275ee899eae5210dd14d4129a03a Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 5 Feb 2014 05:53:04 +0000 Subject: [PATCH 22/93] arm64: vdso: fix coarse clock handling commit 069b918623e1510e58dacf178905a72c3baa3ae4 upstream. When __kernel_clock_gettime is called with a CLOCK_MONOTONIC_COARSE or CLOCK_REALTIME_COARSE clock id, it returns incorrectly to whatever the caller has placed in x2 ("ret x2" to return from the fast path). Fix this by saving x30/LR to x2 only in code that will call __do_get_tspec, restoring x30 afterward, and using a plain "ret" to return from the routine. Also: while the resulting tv_nsec value for CLOCK_REALTIME and CLOCK_MONOTONIC must be computed using intermediate values that are left-shifted by cs_shift (x12, set by __do_get_tspec), the results for coarse clocks should be calculated using unshifted values (xtime_coarse_nsec is in units of actual nanoseconds). The current code shifts intermediate values by x12 unconditionally, but x12 is uninitialized when servicing a coarse clock. Fix this by setting x12 to 0 once we know we are dealing with a coarse clock id. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fb569d15d867a06e89b1be8278404b6fbf6b5bde) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/gettimeofday.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b5211..fe652ffd34c2 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime) bl __do_get_tspec seqcnt_check w9, 1b + mov x30, x2 + cmp w0, #CLOCK_MONOTONIC b.ne 6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime) ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 8f + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + /* Get coarse timespec. */ adr vdso_data, _vdso_data 3: seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime) lsr x11, x11, x12 stp x10, x11, [x1, #TSPEC_TV_SEC] mov x0, xzr - ret x2 + ret 7: mov x30, x2 8: /* Syscall fallback. */ From 891ed08e70ce022069af9b421b689db5b0cb66ae Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 5 Feb 2014 09:34:36 +0000 Subject: [PATCH 23/93] arm64: add DSB after icache flush in __flush_icache_all() commit 5044bad43ee573d0b6d90e3ccb7a40c2c7d25eb4 upstream. Add DSB after icache flush to complete the cache maintenance operation. The function __flush_icache_all() is used only for user space mappings and an ISB is not required because of an exception return before executing user instructions. An exception return would behave like an ISB. Signed-off-by: Vinayak Kale Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 02599bad3774a5147eb8d98df7c9362fdc1a50c6) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cacheflush.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..0c13554965b8 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); + dsb(); } #define flush_dcache_mmap_lock(mapping) \ From 226b19b9d3d9d3f231ac889fb96b6ff98febf6a6 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 14 Feb 2014 19:35:15 +0000 Subject: [PATCH 24/93] ARM64: unwind: Fix PC calculation commit e306dfd06fcb44d21c80acb8e5a88d55f3d1cf63 upstream. The frame PC value in the unwind code used to just take the saved LR value and use that. That's incorrect as a stack trace, since it shows the return path stack, not the call path stack. In particular, it shows faulty information in case the bl is done as the very last instruction of one label, since the return point will be in the next label. That can easily be seen with tail calls to panic(), which is marked __noreturn and thus doesn't have anything useful after it. Easiest here is to just correct the unwind code and do a -4, to get the actual call site for the backtrace instead of the return site. Signed-off-by: Olof Johansson Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c56e0dc1b70f1537659acffc6ac8d8d615be2dae) Signed-off-by: Mark Brown --- arch/arm64/kernel/stacktrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d25459ff57fc..048334bb2651 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,7 +48,11 @@ int unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + /* + * -4 here because we care about the PC at time of bl, + * not where the return will go. + */ + frame->pc = *(unsigned long *)(fp + 8) - 4; return 0; } From 5aab0869c941fc278fa72ff1d05bbbbb70f7d7c0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:28:09 +0000 Subject: [PATCH 25/93] arm64: Do not synchronise I and D caches for special ptes commit 71fdb6bf61bf0692f004f9daf5650392c0cfe300 upstream. Special pte mappings are not intended to be executable and do not even have an associated struct page. This patch ensures that we do not call __sync_icache_dcache() on such ptes. Signed-off-by: Catalin Marinas Reported-by: Steve Capper Tested-by: Laura Abbott Tested-by: Bharat Bhushan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b1f2bfc9d8bb42b4e5a93470e8a0974cd4c04697) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e9a1a1d81892..1569b2bd91bb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -161,7 +161,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { - if (pte_exec(pte)) + if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (!pte_dirty(pte)) pte = pte_wrprotect(pte); From e4cb973f4aa39836b5dc01f35f4c100607229e06 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:07:06 +0000 Subject: [PATCH 26/93] arm64: Make DMA coherent and strongly ordered mappings not executable commit de2db7432917a82b62d55bb59635586eeca6d1bd upstream. pgprot_{dmacoherent,writecombine,noncached} don't need to generate executable mappings with side-effects like __sync_icache_dcache() being called when the mapping is in user space. Signed-off-by: Catalin Marinas Reported-by: Bharat Bhushan Tested-by: Laura Abbott Tested-by: Bharat Bhushan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 73e8697b71fa956642403304b96442fd4b57ce97) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1569b2bd91bb..3a710d7b14ce 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -182,11 +182,11 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, From f651b7f202cb2f70971db9050be120e437696ccb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 May 2013 16:38:23 +0100 Subject: [PATCH 27/93] arm64: Remove __flush_dcache_page() This function is only used in __sync_icache_dcache(), so remove it and call __flush_dcache_area() directly. The flush_icache_user_range() function is not used in the arm64 kernel. Signed-off-by: Catalin Marinas Reported-by: Will Deacon Acked-by: Will Deacon (cherry picked from commit ebd88367de80f9509bd30a09342d0a19c925b23e) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cacheflush.h | 3 --- arch/arm64/mm/flush.c | 7 +------ arch/arm64/mm/mm.h | 1 - 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3300cbd18a89..fea9ee327206 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -123,9 +123,6 @@ static inline void __flush_icache_all(void) #define flush_dcache_mmap_unlock(mapping) \ spin_unlock_irq(&(mapping)->tree_lock) -#define flush_icache_user_range(vma,page,addr,len) \ - flush_dcache_page(page) - /* * We don't appear to need to do anything here. In fact, if we did, we'd * duplicate cache flushing elsewhere performed by flush_dcache_page(). diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 88611c3a421a..d3751b5f7f07 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -70,11 +70,6 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, #endif } -void __flush_dcache_page(struct page *page) -{ - __flush_dcache_area(page_address(page), PAGE_SIZE); -} - void __sync_icache_dcache(pte_t pte, unsigned long addr) { unsigned long pfn; @@ -86,7 +81,7 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) page = pfn_to_page(pfn); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_page(page); + __flush_dcache_area(page_address(page), PAGE_SIZE); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h index 916701e6d040..d519f4f50c8c 100644 --- a/arch/arm64/mm/mm.h +++ b/arch/arm64/mm/mm.h @@ -1,3 +1,2 @@ -extern void __flush_dcache_page(struct page *page); extern void __init bootmem_init(void); extern void __init arm64_swiotlb_init(void); From aee2c0d2d061df8d978c73022b48049de78e6a46 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 23 Apr 2013 12:35:02 +0100 Subject: [PATCH 28/93] mm: hugetlb: Copy huge_pmd_share from x86 to mm. Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++ mm/hugetlb.c | 122 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6b4890fa57e7..981546ad231c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -69,6 +69,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); void copy_huge_page(struct page *dst, struct page *src); +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +#endif + extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2bfbf73a551..15d84e55ceac 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3169,6 +3169,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) hugetlb_acct_memory(h, -(chg - freed)); } +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* Allow segments to share if only one is marked locked */ + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vm_flags != svm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. + */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + pte_t *pte; + + if (!vma_shareable(vma, addr)) + return (pte_t *)pmd_alloc(mm, pud, addr); + + mutex_lock(&mapping->i_mmap_mutex); + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, + (pmd_t *)((unsigned long)spte & PAGE_MASK)); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); + mutex_unlock(&mapping->i_mmap_mutex); + return pte; +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 2b78f19ada9fc885b16d148163831ae7eb67b739 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 29 Apr 2013 14:29:48 +0100 Subject: [PATCH 29/93] x86: mm: Remove x86 version of huge_pmd_share. The huge_pmd_share code has been copied over to mm/hugetlb.c to make it accessible to other architectures. Remove the x86 copy of the huge_pmd_share code and enable the ARCH_WANT_HUGE_PMD_SHARE config flag. That way we reference the general one. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 + arch/x86/mm/hugetlbpage.c | 120 -------------------------------------- 2 files changed, 3 insertions(+), 120 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..56d606b2497c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e522a359972 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,126 +16,6 @@ #include #include -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { From ba310aa51d8b732c9f6d66d66b9458bc917c19a7 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:02:03 +0100 Subject: [PATCH 30/93] mm: hugetlb: Copy general hugetlb code from x86 to mm. The huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d functions in x86/mm/hugetlbpage.c do not rely on any architecture specific knowledge other than the fact that pmds and puds can be treated as huge ptes. To allow other architectures to use this code (and reduce the need for code duplication), this patch copies these functions into mm, replaces the use of pud_large with pud_huge and provides a config flag to activate them: CONFIG_ARCH_WANT_GENERAL_HUGETLB If CONFIG_ARCH_WANT_HUGE_PMD_SHARE is also active then the huge_pmd_share code will be called by huge_pte_alloc (othewise we call pmd_alloc and skip the sharing code). Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- mm/hugetlb.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 15d84e55ceac..dbd3d42ae031 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2931,15 +2931,6 @@ out_mutex: return ret; } -/* Can be overriden by architectures */ -__attribute__((weak)) struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -3289,8 +3280,96 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; return 1; } +#define want_pmd_share() (1) +#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + return NULL; +} +#define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else { + BUG_ON(sz != PMD_SIZE); + if (want_pmd_share() && pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } + } + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) { + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + +#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 60061a496643b97b9e36042c5a3af7f286d7f0fa Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:03:42 +0100 Subject: [PATCH 31/93] x86: mm: Remove general hugetlb code from x86. huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d have already been copied over to mm. This patch removes the x86 copies of these functions and activates the general ones by enabling: CONFIG_ARCH_WANT_GENERAL_HUGETLB Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 ++ arch/x86/mm/hugetlbpage.c | 67 --------------------------------------- 2 files changed, 3 insertions(+), 67 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 56d606b2497c..787072769a80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -210,6 +210,9 @@ config ARCH_SUSPEND_POSSIBLE config ARCH_WANT_HUGE_PMD_SHARE def_bool y +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e522a359972..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,49 +16,6 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -120,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ From 6991fe93fc746ffe73d88e0fc47b674bf4dad4f9 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 May 2013 14:46:03 +0100 Subject: [PATCH 32/93] mm: thp: Correct the HPAGE_PMD_ORDER check. All Transparent Huge Pages are allocated by the buddy allocator. A compile time check is in place that fails when the order of a transparent huge page is too large to be allocated by the buddy allocator. Unfortunately that compile time check passes when: HPAGE_PMD_ORDER == MAX_ORDER ( which is incorrect as the buddy allocator can only allocate memory of order strictly less than MAX_ORDER. ) This patch updates the compile time check to fail in the above case. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, } while (0) extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, pmd_t *pmd); -#if HPAGE_PMD_ORDER > MAX_ORDER +#if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif extern int hugepage_madvise(struct vm_area_struct *vma, From bf966f2702f43577b7f7b93db5c4ef2a0bc875a1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 11:00:33 +0100 Subject: [PATCH 33/93] ARM64: mm: Restore memblock limit when map_mem finished. In paging_init the memblock limit is set to restrict any addresses returned by early_alloc to fit within the initial direct kernel mapping in swapper_pg_dir. This allows map_mem to allocate puds, pmds and ptes from the initial direct kernel mapping. The limit stays low after paging_init() though, meaning any bootmem allocations will be from a restricted subset of memory. Gigabyte huge pages, for instance, are normally allocated from bootmem as their order (18) is too large for the default buddy allocator (MAX_ORDER = 11). This patch restores the memblock limit when map_mem has finished, allowing gigabyte huge pages (and other objects) to be allocated from all of bootmem. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ba7477efad5c..49961d1fa033 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -305,6 +305,16 @@ static void __init map_mem(void) { struct memblock_region *reg; + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + /* map all the memory banks */ for_each_memblock(memory, reg) { phys_addr_t start = reg->base; @@ -315,6 +325,9 @@ static void __init map_mem(void) create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -325,12 +338,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - init_mem_pgprot(); map_mem(); From 9bdb6ff441dabb89d736317de07c7b943e31052d Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 2 May 2013 16:25:42 +0100 Subject: [PATCH 34/93] ARM64: mm: Make PAGE_NONE pages read only and no-execute. If we consider the following code sequence: my_pte = pte_modify(entry, myprot); x = pte_write(my_pte); y = pte_exec(my_pte); If myprot comes from a PROT_NONE page, then x and y will both be true which is undesireable behaviour. This patch sets the no-execute and read-only bits for PAGE_NONE such that the code above will return false for both x and y. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3a710d7b14ce..31d4b1e5d44b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -76,7 +76,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) From d2ddf5c8634f83ee20f26b7b911a48e77e77912c Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 28 May 2013 13:35:51 +0100 Subject: [PATCH 35/93] ARM64: mm: Move PTE_PROT_NONE bit. Under ARM64, PTEs can be broadly categorised as follows: - Present and valid: Bit #0 is set. The PTE is valid and memory access to the region may fault. - Present and invalid: Bit #0 is clear and bit #1 is set. Represents present memory with PROT_NONE protection. The PTE is an invalid entry, and the user fault handler will raise a SIGSEGV. - Not present (file or swap): Bits #0 and #1 are clear. Memory represented has been paged out. The PTE is an invalid entry, and the fault handler will try and re-populate the memory where necessary. Huge PTEs are block descriptors that have bit #1 clear. If we wish to represent PROT_NONE huge PTEs we then run into a problem as there is no way to distinguish between regular and huge PTEs if we set bit #1. To resolve this ambiguity this patch moves PTE_PROT_NONE from bit #1 to bit #2 and moves PTE_FILE from bit #2 to bit #3. The number of swap/file bits is reduced by 1 as a consequence, leaving 60 bits for file and swap entries. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 31d4b1e5d44b..f1a357894c3c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,8 +25,8 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ +#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) @@ -281,12 +281,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-8: swap type * bits 9-63: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 4 #define __SWP_TYPE_BITS 6 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -306,15 +306,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-63: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 4) +#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 60 extern int kern_addr_valid(unsigned long addr); From 3b5a3a6a3716d107d0c5854eab70a4eac40534a9 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 10 Apr 2013 13:48:00 +0100 Subject: [PATCH 36/93] ARM64: mm: HugeTLB support. Add huge page support to ARM64, different huge page sizes are supported depending on the size of normal pages: PAGE_SIZE is 4KB: 2MB - (pmds) these can be allocated at any time. 1024MB - (puds) usually allocated on bootup with the command line with something like: hugepagesz=1G hugepages=6 PAGE_SIZE is 64KB: 512MB - (pmds) usually allocated on bootup via command line. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 9 ++ arch/arm64/include/asm/hugetlb.h | 117 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 8 ++ arch/arm64/include/asm/pgtable.h | 13 ++- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/fault.c | 19 +--- arch/arm64/mm/hugetlbpage.c | 70 +++++++++++++++ 7 files changed, 220 insertions(+), 17 deletions(-) create mode 100644 arch/arm64/include/asm/hugetlb.h create mode 100644 arch/arm64/mm/hugetlbpage.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..cd6eca84a21c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,15 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + source "mm/Kconfig" endmenu diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include +#include + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..e6e0a0d4cf9a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,12 +25,19 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section @@ -53,6 +60,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f1a357894c3c..b7ef981f912d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -173,8 +173,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f51d669c8ebd..6c8ba25bf6bb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -358,17 +358,6 @@ static int __kprobes do_translation_fault(unsigned long addr, return 0; } -/* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - /* * This abort handler always returns "fault". */ @@ -392,12 +381,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); From 00d9acb13773b654361ea92da5e915e2804e544f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 25 Apr 2013 15:19:21 +0100 Subject: [PATCH 37/93] ARM64: mm: Raise MAX_ORDER for 64KB pages and THP. The buddy allocator has a default MAX_ORDER of 11, which is too low to allocate enough memory for 512MB Transparent HugePages if our base page size is 64KB. This patch introduces MAX_ZONE_ORDER and sets it to 14 when 64KB pages are used in conjuction with THP, otherwise the default value of 11 is used. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cd6eca84a21c..10607d63b945 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -191,6 +191,11 @@ config ARCH_WANT_HUGE_PMD_SHARE source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" From d9ccf6b1647eae73f79d45c12474ae73e29dc784 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 19 Apr 2013 16:23:57 +0100 Subject: [PATCH 38/93] ARM64: mm: THP support. Bring Transparent HugePage support to ARM. The size of a transparent huge page depends on the normal page size. A transparent huge page is always represented as a pmd. If PAGE_SIZE is 4KB, THPs are 2MB. If PAGE_SIZE is 64KB, THPs are 512MB. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 3 ++ arch/arm64/include/asm/pgtable-hwdef.h | 4 ++ arch/arm64/include/asm/pgtable.h | 55 ++++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 6 +++ arch/arm64/include/asm/tlbflush.h | 2 + 5 files changed, 70 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 10607d63b945..308a55636f76 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -189,6 +189,9 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_WANT_HUGE_PMD_SHARE def_bool y if !ARM64_64K_PAGES +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e6e0a0d4cf9a..63c9d0de05bb 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -42,6 +42,10 @@ /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b7ef981f912d..c9a5f264cab8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -187,6 +187,61 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL +/* + * Software PMD bits for THP + */ + +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) + +/* + * THP definitions. + */ +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); +PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | + PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | + PMD_SECT_VALID; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 654f0968030b..46b3beb4b773 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -187,4 +187,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, dsb(); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif From 6d9c4ff8c2fc63a1d79453c1f68323cca4386e7d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2013 16:59:27 +0000 Subject: [PATCH 39/93] arm64: Move PTE_PROT_NONE higher up PTE_PROT_NONE means that a pte is present but does not have any read/write attributes. However, setting the memory type like pgprot_writecombine() is allowed and such bits overlap with PTE_PROT_NONE. This causes mmap/munmap issues in drivers that change the vma->vm_pg_prot on PROT_NONE mappings. This patch reverts the PTE_FILE/PTE_PROT_NONE shift in commit 59911ca4325d (ARM64: mm: Move PTE_PROT_NONE bit) and moves PTE_PROT_NONE together with the other software bits. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Tested-by: Steve Capper Cc: # 3.11+ --- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9a5f264cab8..6c85e51324a7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,10 +25,11 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ +#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) + /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. @@ -345,18 +346,20 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-8: swap type - * bits 9-63: swap offset + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-8: swap type + * bits 9-57: swap offset */ -#define __SWP_TYPE_SHIFT 4 +#define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_BITS 6 +#define __SWP_OFFSET_BITS 49 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) -#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) @@ -370,15 +373,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-63: file offset / PAGE_SIZE + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-57: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 4) -#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) -#define PTE_FILE_MAX_BITS 60 +#define PTE_FILE_MAX_BITS 55 extern int kern_addr_valid(unsigned long addr); From ce30e7d5f7d20ab0df99c6f3d7f45600f0deeecc Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 5 Dec 2013 12:04:51 +0000 Subject: [PATCH 40/93] arm64: mm: Fix PMD_SECT_PROT_NONE definition Modify the value of PMD_SECT_PROT_NONE to match that of PTE_NONE. This should have been in commit 3676f9ef5481 (Move PTE_PROT_NONE higher up). Signed-off-by: Steve Capper Cc: # 3.11+: 3676f9ef5481: arm64: Move PTE_PROT_NONE higher up Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 63c9d0de05bb..2294f2330960 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -43,7 +43,7 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) From d5edd6d254097253b9feb3707c7405777a91630b Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 21 Jan 2014 15:49:09 -0800 Subject: [PATCH 41/93] mm/hugetlb.c: call MMU notifiers when copying a hugetlb page range When copy_hugetlb_page_range() is called to copy a range of hugetlb mappings, the secondary MMUs are not notified if there is a protection downgrade, which breaks COW semantics in KVM. This patch adds the necessary MMU notifier calls. Signed-off-by: Andreas Sandberg Acked-by: Steve Capper Acked-by: Marc Zyngier Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dbd3d42ae031..a7482d1d2131 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2295,16 +2295,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int cow; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ + int ret = 0; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + mmun_start = vma->vm_start; + mmun_end = vma->vm_end; + if (cow) + mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end); + for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; dst_pte = huge_pte_alloc(dst, addr, sz); - if (!dst_pte) - goto nomem; + if (!dst_pte) { + ret = -ENOMEM; + break; + } /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) @@ -2324,10 +2334,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); } - return 0; -nomem: - return -ENOMEM; + if (cow) + mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end); + + return ret; } static int is_hugetlb_entry_migration(pte_t pte) From 9370103e565eebf905ae0ab6b437b4131b2c2e73 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:12 +0000 Subject: [PATCH 42/93] arm64: mm: Remove PTE_BIT_FUNC macro Expand out the pte manipulation functions. This makes our life easier when using things like tags and cscope. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6c85e51324a7..ac14abf80afd 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -140,16 +140,47 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) -#define PTE_BIT_FUNC(fn,op) \ -static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) |= PTE_RDONLY; + return pte; +} -PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); -PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); -PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); -PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); -PTE_BIT_FUNC(mkold, &= ~PTE_AF); -PTE_BIT_FUNC(mkyoung, |= PTE_AF); -PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) &= ~PTE_RDONLY; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~PTE_AF; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= PTE_AF; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= PTE_SPECIAL; + return pte; +} static inline void set_pte(pte_t *ptep, pte_t pte) { From 2f4620f7ab52b501c56125278225dce92e21bf1f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:13 +0000 Subject: [PATCH 43/93] arm64: mm: Introduce PTE_WRITE We have the following means for encoding writable or dirty ptes: PTE_DIRTY PTE_RDONLY !pte_dirty && !pte_write 0 1 !pte_dirty && pte_write 0 1 pte_dirty && !pte_write 1 1 pte_dirty && pte_write 1 0 So we can't distinguish between writable clean ptes and read only ptes. This can cause problems with ptes being incorrectly flagged as read only when they are writable but not dirty. This patch introduces a new software bit PTE_WRITE which allows us to correctly identify writable ptes. PTE_RDONLY is now only clear for valid ptes where a page is both writable and dirty. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/pgtable.h | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ac14abf80afd..7deb0105921d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -28,7 +28,7 @@ #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) - /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -67,23 +67,23 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) +define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #endif /* __ASSEMBLY__ */ @@ -134,7 +134,7 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_AF) #define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_write(pte) (pte_val(pte) & PTE_WRITE) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ @@ -142,13 +142,13 @@ extern struct page *empty_zero_page; static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) |= PTE_RDONLY; + pte_val(pte) &= ~PTE_WRITE; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~PTE_RDONLY; + pte_val(pte) |= PTE_WRITE; return pte; } @@ -195,8 +195,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (!pte_dirty(pte)) - pte = pte_wrprotect(pte); + if (pte_dirty(pte) && pte_write(pte)) + pte_val(pte) &= ~PTE_RDONLY; + else + pte_val(pte) |= PTE_RDONLY; } set_pte(ptep, pte); @@ -364,7 +366,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From 2457c2704c8b2bbd1caa254cb91ba787c8ec2a4d Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 11:38:53 +0000 Subject: [PATCH 44/93] arm64: mm: Add double logical invert to pte accessors Page table entries on ARM64 are 64 bits, and some pte functions such as pte_dirty return a bitwise-and of a flag with the pte value. If the flag to be tested resides in the upper 32 bits of the pte, then we run into the danger of the result being dropped if downcast. For example: gather_stats(page, md, pte_dirty(*pte), 1); where pte_dirty(*pte) is downcast to an int. This patch adds a double logical invert to all the pte_ accessors to ensure predictable downcasting. Signed-off-by: Steve Capper Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7deb0105921d..154590ef6646 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -130,11 +130,11 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (pte_val(pte) & PTE_WRITE) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ From ae41944bea69a143f6e464f02784a9e1f5161e07 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 14 Apr 2014 18:25:05 +0100 Subject: [PATCH 45/93] arm64: Fix build for __PAGE_NONE define Simple typo. Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 154590ef6646..b8940733ee30 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -77,7 +77,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) From f81685d08038409c7f46741372a7cbe7299d5f87 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Jun 2013 12:40:34 +0100 Subject: [PATCH 46/93] arm64: mm: don't bother invalidating the icache in switch_mm We don't support software broadcast of cache maintenance operations, so this flush is not required (__sync_icache_dcache will always affect all CPUs). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 737c16dffc458e58ee7840556d43b874cb8e16a0) Signed-off-by: Mark Brown --- arch/arm64/include/asm/mmu_context.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index e2bc385adb6b..a9eee33dfa62 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,12 +151,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); -#ifdef CONFIG_SMP - /* check for possible thread migration */ - if (!cpumask_empty(mm_cpumask(next)) && - !cpumask_test_cpu(cpu, mm_cpumask(next))) - __flush_icache_all(); -#endif if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } From b1793a187423385f424f1d71264ca768dae79e6a Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 21 May 2013 10:46:05 +0100 Subject: [PATCH 47/93] arm64: kernel: compiling issue, need delete read_current_timer() Under arm64, we will calibrate the delay loop statically using a known timer frequency, so delete read_current_timer(), or it will cause compiling issue with allmodconfig. The related error: ERROR: "read_current_timer" [lib/rbtree_test.ko] undefined! ERROR: "read_current_timer" [lib/interval_tree_test.ko] undefined! ERROR: "read_current_timer" [fs/ext4/ext4.ko] undefined! ERROR: "read_current_timer" [crypto/tcrypt.ko] undefined! Signed-off-by: Chen Gang Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 6916b14ea140ff5c915895eefe9431888a39a84d) Signed-off-by: Mark Brown --- arch/arm64/include/asm/timex.h | 6 +++--- arch/arm64/kernel/time.c | 6 ------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index b24a31a7e2c9..81a076eb37fa 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,14 +16,14 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H +#include + /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() ({ cycles_t c; read_current_timer(&c); c; }) +#define get_cycles() arch_counter_get_cntvct() #include -#define ARCH_HAS_READ_CURRENT_TIMER - #endif diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a551f88ae2c1..03dc3718eb13 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -68,12 +68,6 @@ unsigned long long notrace sched_clock(void) return arch_timer_read_counter() * sched_clock_mult; } -int read_current_timer(unsigned long *timer_value) -{ - *timer_value = arch_timer_read_counter(); - return 0; -} - void __init time_init(void) { u32 arch_timer_rate; From 712bda65b9c4206dd4f0aed78474c7dfdde3697f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:41 +0100 Subject: [PATCH 48/93] arm64: pgtable: use pte_index instead of __pte_index pte_index is a useful helper outside of arch/arm64, for things like the ARM SMMU driver, so rename __pte_index to pte_index to be consistent with both arch/arm/ and also the definitions of pmd_index and pgd_index. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 9ab6d02fddc6831b166812956ff387d7112ff626) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b8940733ee30..508ee304fde0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -120,7 +120,7 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) -#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + __pte_index(addr)) +#define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -361,7 +361,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #endif /* Find an entry in the third-level page table.. */ -#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { From 179e79828c8daac4e902fb8716fbedc275befc34 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Jun 2013 19:34:42 +0100 Subject: [PATCH 49/93] arm64: device: add iommu pointer to device archdata When using an IOMMU for device mappings, it is necessary to keep a pointer between the device and the IOMMU to which it is attached in order to obtain the correct IOMMU when attaching the device to a domain. This patch adds an iommu pointer to the dev_archdata structure, in a similar manner to other architectures (ARM, PowerPC, x86, ...). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 73150c983ac1f9b7653cfd3823b1ad4a44aad3bf) Signed-off-by: Mark Brown --- arch/arm64/include/asm/device.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 0d8453c755a8..cf98b362094b 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -18,6 +18,9 @@ struct dev_archdata { struct dma_map_ops *dma_ops; +#ifdef CONFIG_IOMMU_API + void *iommu; /* private IOMMU data */ +#endif }; struct pdev_archdata { From dc1307d2947cef80a10f53f1aa136b72accec79f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 May 2013 17:29:24 +0100 Subject: [PATCH 50/93] arm64: extable: sort the exception table at build time As is done for other architectures, sort the exception table at build-time rather than during boot. Since sortextable appears to be a standalone C program relying on the host elf.h to provide EM_AARCH64, I've had to add a conditional check in order to allow cross-compilation on machines that aren't running a bleeding-edge libc-dev. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit adace89562c7a9645b8dc84f6e1ac7ba8756094e) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/vmlinux.lds.S | 10 +--------- scripts/sortextable.c | 5 +++++ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 308a55636f76..7f553338d109 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b016..5e06a1786e26 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -56,7 +56,7 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -98,14 +98,6 @@ SECTIONS CACHELINE_ALIGNED_DATA(64) READ_MOSTLY_DATA(64) - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - /* * and the usual data section */ diff --git a/scripts/sortextable.c b/scripts/sortextable.c index 1f10e89d15b4..f9ce1160419b 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -31,6 +31,10 @@ #include #include +#ifndef EM_AARCH64 +#define EM_AARCH64 183 +#endif + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ @@ -249,6 +253,7 @@ do_file(char const *const fname) custom_sort = sort_relative_table; break; case EM_ARM: + case EM_AARCH64: case EM_MIPS: break; } /* end switch */ From 9377422c5c84f693385907fd61d8e7fb19596530 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 16 Mar 2013 08:48:13 +0000 Subject: [PATCH 51/93] arm64: debug: consolidate software breakpoint handlers The software breakpoint handlers are hooked in directly from ptrace, which makes it difficult to add additional handlers for things like kprobes and kgdb. This patch moves the handling code into debug-monitors.c, where we can dispatch to different debug subsystems more easily. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 1442b6ed249d2b3d2cfcf45b65ac64393495c96c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 9 ++++ arch/arm64/include/asm/ptrace.h | 2 - arch/arm64/kernel/debug-monitors.c | 66 ++++++++++++++++++++++++- arch/arm64/kernel/ptrace.c | 59 ---------------------- arch/arm64/kernel/traps.c | 5 +- 5 files changed, 75 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7eaa0b302493..ef8235c68c09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,6 +83,15 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif +#ifdef CONFIG_COMPAT +int aarch32_break_handler(struct pt_regs *regs); +#else +static int aarch32_break_handler(struct pt_regs *regs) +{ + return -EFAULT; +} +#endif + #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..0dacbbf9458b 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -171,7 +171,5 @@ extern unsigned long profile_pc(struct pt_regs *regs); #define profile_pc(regs) instruction_pointer(regs) #endif -extern int aarch32_break_trap(struct pt_regs *regs); - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4726dc054b3..08018e3df580 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -226,13 +227,74 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + unsigned int instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(instr, (u16 __user *)pc); + if (instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(instr, (u16 __user *)(pc + 2)); + bp = instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(instr, (u32 __user *)pc); + bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5341534b6d04..c484d5625ffb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -53,28 +53,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -815,33 +793,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1109,16 +1060,6 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } - -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - asmlinkage int syscall_trace(int dir, struct pt_regs *regs) { unsigned long saved_reg; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f30852d28590..7ffadddb645d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -261,11 +262,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { From a8ed08a5daf8292f9feacf9aaea2ec49def9be5f Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sun, 16 Jun 2013 20:32:44 +0100 Subject: [PATCH 52/93] arm64/Makefile: provide vdso_install target Provide a vdso_install target in the arm64 Makefile, as other architectures with a vdso do. Signed-off-by: Kyle McMartin Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3c01742a8ac93a3abf9b099758db970410427afd) Signed-off-by: Mark Brown --- arch/arm64/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..b6ccf8a36e2d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,6 +60,10 @@ zinstall install: vmlinux dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts dtbs +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) From a9439270f923281fae2213f13963ee64e767bafe Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:57 +0100 Subject: [PATCH 53/93] arm64: Add Kconfig option for APM X-Gene SOC family This patch adds arm64/Kconfig option for APM X-Gene SOC family. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit 159428538323188158a6058956c16c199909d844) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7f553338d109..52077610cb7e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -112,6 +112,11 @@ config ARCH_VEXPRESS This enables support for the ARMv8 software model (Versatile Express). +config ARCH_XGENE + bool "AppliedMicro X-Gene SOC Family" + help + This enables support for AppliedMicro X-Gene SOC Family + endmenu menu "Bus support" @@ -149,6 +154,8 @@ config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 depends on SMP + # These have to remain sorted largest to smallest + default "8" if ARCH_XGENE default "4" source kernel/Kconfig.preempt From 4e7b7c9c2a3a9779156890cc69d32731948c1106 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:58 +0100 Subject: [PATCH 54/93] arm64: Enable APM X-Gene SOC family in the defconfig This patch enables APM X-Gene SOC family in the defconfig. It also enables 8250 serial driver needed by X-Gene SOC family. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit c1db16dc9e74addba4ef8c954702f13e457f0c7e) Signed-off-by: Mark Brown --- arch/arm64/configs/defconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 8d9696adb440..5b3e83217b03 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -24,6 +24,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_CMDLINE="console=ttyAMA0" @@ -54,6 +55,9 @@ CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_LEGACY_PTY_COUNT=16 +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set From 63442c002f0090d38ba036c63b9e4f4595d46ccc Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:06:59 +0100 Subject: [PATCH 55/93] arm64: Add defines for APM ARMv8 implementation This patch adds defines for APM CPU implementer ID and APM CPU part numbers in asm/cputype.h Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit 4ad637a452d5683ca7ff9e9eb994ac4b7a517073) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cputype.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index cf2749488cd4..5fe138e0b828 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -37,11 +37,14 @@ }) #define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_PART_AEM_V8 0xD0F0 #define ARM_CPU_PART_FOUNDATION 0xD000 #define ARM_CPU_PART_CORTEX_A57 0xD070 +#define APM_CPU_PART_POTENZA 0x0000 + #ifndef __ASSEMBLY__ /* From eadf099dfab2335c8f975a8242b53fd5be4f890f Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 24 Apr 2013 10:07:00 +0100 Subject: [PATCH 56/93] arm64: Add initial DTS for APM X-Gene Storm SOC and APM Mustang board This patch adds initial DTS files required for APM Mustang board. Signed-off-by: Kumar Sankaran Signed-off-by: Loc Ho Signed-off-by: Feng Kan Signed-off-by: Catalin Marinas (cherry picked from commit ee877b5321c4dfee9dc9f2a12b19ddcd33149f6a) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/Makefile | 1 + arch/arm64/boot/dts/apm-mustang.dts | 26 +++++++ arch/arm64/boot/dts/apm-storm.dtsi | 116 ++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 arch/arm64/boot/dts/apm-mustang.dts create mode 100644 arch/arm64/boot/dts/apm-storm.dtsi diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..c52bdb051f66 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,4 +1,5 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb +dtb-$(CONFIG_ARCH_XGENE) += apm-mustang.dtb targets += dtbs targets += $(dtb-y) diff --git a/arch/arm64/boot/dts/apm-mustang.dts b/arch/arm64/boot/dts/apm-mustang.dts new file mode 100644 index 000000000000..1247ca1200b1 --- /dev/null +++ b/arch/arm64/boot/dts/apm-mustang.dts @@ -0,0 +1,26 @@ +/* + * dts file for AppliedMicro (APM) Mustang Board + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/dts-v1/; + +/include/ "apm-storm.dtsi" + +/ { + model = "APM X-Gene Mustang board"; + compatible = "apm,mustang", "apm,xgene-storm"; + + chosen { }; + + memory { + device_type = "memory"; + reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ + }; +}; diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi new file mode 100644 index 000000000000..bfdc57834929 --- /dev/null +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -0,0 +1,116 @@ +/* + * dts file for AppliedMicro (APM) X-Gene Storm SOC + * + * Copyright (C) 2013, Applied Micro Circuits Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +/ { + compatible = "apm,xgene-storm"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@000 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x000>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@001 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x001>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@100 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x100>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@101 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x101>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@200 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x200>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@201 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x201>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@300 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x300>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + cpu@301 { + device_type = "cpu"; + compatible = "apm,potenza", "arm,armv8"; + reg = <0x0 0x301>; + enable-method = "spin-table"; + cpu-release-addr = <0x1 0x0000fff8>; + }; + }; + + gic: interrupt-controller@78010000 { + compatible = "arm,cortex-a15-gic"; + #interrupt-cells = <3>; + interrupt-controller; + reg = <0x0 0x78010000 0x0 0x1000>, /* GIC Dist */ + <0x0 0x78020000 0x0 0x1000>, /* GIC CPU */ + <0x0 0x78040000 0x0 0x2000>, /* GIC VCPU Control */ + <0x0 0x78060000 0x0 0x2000>; /* GIC VCPU */ + interrupts = <1 9 0xf04>; /* GIC Maintenence IRQ */ + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ + <1 13 0xff01>, /* Non-secure Phys IRQ */ + <1 14 0xff01>, /* Virt IRQ */ + <1 15 0xff01>; /* Hyp IRQ */ + clock-frequency = <50000000>; + }; + + soc { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + serial0: serial@1c020000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0 0x1c020000 0x0 0x1000>; + reg-shift = <2>; + clock-frequency = <10000000>; /* Updated by bootloader */ + interrupt-parent = <&gic>; + interrupts = <0x0 0x4c 0x4>; + }; + }; +}; From c0d0b7ddd13cad209c8211a4da862c9e56096493 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Mon, 24 Jun 2013 10:27:49 +0100 Subject: [PATCH 57/93] arm64: add '#ifdef CONFIG_COMPAT' for aarch32_break_handler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If 'COMPAT' not defined, aarch32_break_handler() cannot pass compiling, and it can work independent with 'COMPAT', so remove dummy definition. The related error: arch/arm64/kernel/debug-monitors.c:249:5: error: redefinition of ‘aarch32_break_handler’ In file included from arch/arm64/kernel/debug-monitors.c:29:0: /root/linux-next/arch/arm64/include/asm/debug-monitors.h:89:12: note: previous definition of ‘aarch32_break_handler’ was here Signed-off-by: Chen Gang Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c783c2815e13bbb0c0b99997cc240bd7e91b6bb8) Signed-off-by: Mark Brown --- arch/arm64/include/asm/debug-monitors.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ef8235c68c09..a2232d07be9d 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -83,14 +83,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) } #endif -#ifdef CONFIG_COMPAT int aarch32_break_handler(struct pt_regs *regs); -#else -static int aarch32_break_handler(struct pt_regs *regs) -{ - return -EFAULT; -} -#endif #endif /* __ASSEMBLY */ #endif /* __KERNEL__ */ From 9a87b8d3cd27f298d18b2f1812031dae7520fd74 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 1 Jul 2013 14:20:35 -0400 Subject: [PATCH 58/93] of: Specify initrd location using 64-bit On some PAE architectures, the entire range of physical memory could reside outside the 32-bit limit. These systems need the ability to specify the initrd location using 64-bit numbers. This patch globally modifies the early_init_dt_setup_initrd_arch() function to use 64-bit numbers instead of the current unsigned long. There has been quite a bit of debate about whether to use u64 or phys_addr_t. It was concluded to stick to u64 to be consistent with rest of the device tree code. As summarized by Geert, "The address to load the initrd is decided by the bootloader/user and set at that point later in time. The dtb should not be tied to the kernel you are booting" More details on the discussion can be found here: https://lkml.org/lkml/2013/6/20/690 https://lkml.org/lkml/2012/9/13/544 Signed-off-by: Santosh Shilimkar Acked-by: Rob Herring Acked-by: Vineet Gupta Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Grant Likely (cherry picked from commit 374d5c9964c10373ba39bbe934f4262eb87d7114) Signed-off-by: Mark Brown --- arch/arc/mm/init.c | 5 ++--- arch/arm/mm/init.c | 2 +- arch/arm64/mm/init.c | 3 +-- arch/c6x/kernel/devicetree.c | 3 +-- arch/metag/mm/init.c | 5 ++--- arch/microblaze/kernel/prom.c | 3 +-- arch/mips/kernel/prom.c | 3 +-- arch/openrisc/kernel/prom.c | 3 +-- arch/powerpc/kernel/prom.c | 3 +-- arch/x86/kernel/devicetree.c | 3 +-- arch/xtensa/kernel/setup.c | 3 +-- drivers/of/fdt.c | 10 ++++++---- include/linux/of_fdt.h | 3 +-- 13 files changed, 20 insertions(+), 29 deletions(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 4a177365b2c4..7991e08d606b 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,9 +157,8 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", __func__, start, end); + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9a5cdc01fcdf..afeaef7a8ffc 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -76,7 +76,7 @@ static int __init parse_tag_initrd2(const struct tag *tag) __tagtable(ATAG_INITRD2, parse_tag_initrd2); #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f497ca77925a..704770891d06 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -44,8 +44,7 @@ static unsigned long phys_initrd_size __initdata = 0; phys_addr_t memstart_addr __read_mostly = 0; -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { phys_initrd_start = start; phys_initrd_size = end - start; diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c index bdb56f09d0ac..287d0e64dfba 100644 --- a/arch/c6x/kernel/devicetree.c +++ b/arch/c6x/kernel/devicetree.c @@ -33,8 +33,7 @@ void __init early_init_devtree(void *params) #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index d05b8455c44c..bdc48111f0df 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -419,10 +419,9 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { - pr_err("%s(%lx, %lx)\n", + pr_err("%s(%llx, %llx)\n", __func__, start, end); } #endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index 0a2c68f9f9b0..62e2e8f2c5d6 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -136,8 +136,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 5712bb532245..32b87882ac87 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -58,8 +58,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c index 5869e3fa5dd3..150215a91711 100644 --- a/arch/openrisc/kernel/prom.c +++ b/arch/openrisc/kernel/prom.c @@ -96,8 +96,7 @@ void __init early_init_devtree(void *params) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 8b6f7a99cce2..2f3e25225158 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -550,8 +550,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index b1581527a236..2fbad6b9f23c 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_BLK_DEV_INITRD -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (unsigned long)__va(start); initrd_end = (unsigned long)__va(end); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 6dd25ecde3f5..d45e602f4328 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -170,8 +170,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) +void __init early_init_dt_setup_initrd_arch(u64 start, u64 end) { initrd_start = (void *)__va(start); initrd_end = (void *)__va(end); diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 808be06bb67e..21123b8ee4d4 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -550,7 +550,8 @@ int __init of_flat_dt_match(unsigned long node, const char *const *compat) */ void __init early_init_dt_check_for_initrd(unsigned long node) { - unsigned long start, end, len; + u64 start, end; + unsigned long len; __be32 *prop; pr_debug("Looking for initrd properties... "); @@ -558,15 +559,16 @@ void __init early_init_dt_check_for_initrd(unsigned long node) prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len); if (!prop) return; - start = of_read_ulong(prop, len/4); + start = of_read_number(prop, len/4); prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len); if (!prop) return; - end = of_read_ulong(prop, len/4); + end = of_read_number(prop, len/4); early_init_dt_setup_initrd_arch(start, end); - pr_debug("initrd_start=0x%lx initrd_end=0x%lx\n", start, end); + pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", + (unsigned long long)start, (unsigned long long)end); } #else inline void early_init_dt_check_for_initrd(unsigned long node) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ed136ad698ce..4a17939b95cc 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -106,8 +106,7 @@ extern u64 dt_mem_next_cell(int s, __be32 **cellp); * physical addresses. */ #ifdef CONFIG_BLK_DEV_INITRD -extern void early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end); +extern void early_init_dt_setup_initrd_arch(u64 start, u64 end); #endif /* Early flat tree scan hooks */ From 52e83d9043b1699156b3a5f50356c23026ef17ad Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 21 Aug 2013 10:50:17 +0100 Subject: [PATCH 59/93] ARM64: include: asm: include "asm/types.h" in "pgtable-2level-types.h" and "pgtable-3level-types.h" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need include "asm/types.h", just like arm has done, or can not pass compiling, the related error: In file included from arch/arm64/include/asm/page.h:37:0, from drivers/staging/lustre/include/linux/lnet/linux/lib-lnet.h:42, from drivers/staging/lustre/include/linux/lnet/lib-lnet.h:44, from drivers/staging/lustre/lnet/lnet/api-ni.c:38: arch/arm64/include/asm/pgtable-2level-types.h:19:1: error: unknown type name ‘u64 arch/arm64/include/asm/pgtable-2level-types.h:20:1: error: unknown type name ‘u64’ Signed-off-by: Chen Gang Signed-off-by: Catalin Marinas (cherry picked from commit 360b35a874f130305715b1b854b67dc40826fc91) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable-2level-types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h index 3c3ca7d361e4..5f101e63dfc1 100644 --- a/arch/arm64/include/asm/pgtable-2level-types.h +++ b/arch/arm64/include/asm/pgtable-2level-types.h @@ -16,6 +16,8 @@ #ifndef __ASM_PGTABLE_2LEVEL_TYPES_H #define __ASM_PGTABLE_2LEVEL_TYPES_H +#include + typedef u64 pteval_t; typedef u64 pgdval_t; typedef pgdval_t pmdval_t; From 3033aae67ae55a86e2a0b73199984ff060effa7b Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Thu, 15 Aug 2013 00:10:00 +0100 Subject: [PATCH 60/93] arm64: Expand arm64 image header Expand the arm64 image header to allow for co-existance with PE/COFF header required by the EFI stub. The PE/COFF format requires the "MZ" header to be at offset 0, and the offset to the PE/COFF header to be at offset 0x3c. The image header is expanded to allow 2 instructions at the beginning to accommodate a benign intruction at offset 0 that includes the "MZ" header, a magic number, and the offset to the PE/COFF header. Signed-off-by: Roy Franz Signed-off-by: Catalin Marinas (cherry picked from commit 4370eec05a887b0cd4392cd5dc5b2713174745c0) Signed-off-by: Mark Brown --- Documentation/arm64/booting.txt | 16 +++++++++++++--- arch/arm64/kernel/head.S | 8 ++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 9c4d388daddc..5273c4d60e65 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -68,13 +68,23 @@ Image target is available instead. Requirement: MANDATORY -The decompressed kernel image contains a 32-byte header as follows: +The decompressed kernel image contains a 64-byte header as follows: - u32 magic = 0x14000008; /* branch to stext, little-endian */ - u32 res0 = 0; /* reserved */ + u32 code0; /* Executable code */ + u32 code1; /* Executable code */ u64 text_offset; /* Image load offset */ + u64 res0 = 0; /* reserved */ u64 res1 = 0; /* reserved */ u64 res2 = 0; /* reserved */ + u64 res3 = 0; /* reserved */ + u64 res4 = 0; /* reserved */ + u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */ + u32 res5 = 0; /* reserved */ + + +Header notes: + +- code0/code1 are responsible for branching to stext. The image must be placed at the specified offset (currently 0x80000) from the start of the system RAM and called there. The start of the diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 53dcae49e729..7090c126797c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -112,6 +112,14 @@ .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 + .word 0 // reserved ENTRY(stext) mov x21, x0 // x21=FDT From 0195e1c67cd4b47ed4c2dbd0a94e69543121e0f0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Aug 2013 11:47:37 +0100 Subject: [PATCH 61/93] arm64: Enable interrupts in the EL0 undef handler do_undefinstr() has to be called with interrupts disabled since it may read the instruction from the user address space which could lead to a data abort and subsequent might_sleep() warning in do_page_fault(). Signed-off-by: Catalin Marinas (cherry picked from commit 2600e130b3c90c8d6c13229d3d3a14dcb898a87b) Signed-off-by: Mark Brown --- arch/arm64/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 6ad781b21c08..57640df9d70c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -476,6 +476,8 @@ el0_undef: * Undefined instruction */ mov x0, sp + // enable interrupts before calling the main handler + enable_irq b do_undefinstr el0_dbg: /* From 60d914ababfa53c35bf92b6be1921f265e535ab6 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 23 Aug 2013 16:16:42 +0100 Subject: [PATCH 62/93] arm64: move elf notes into readonly segment The current vmlinux.lds.S places the notes sections between the end of rw data and start of bss. This means that _edata doesn't really point to the end of data. Since notes are read-only, this patch moves them to the read-only segment so that _edata does point to the end of initialized rw data. Signed-off-by: Mark Salter Signed-off-by: Catalin Marinas (cherry picked from commit c80b7ee8520606f77fbc8ced870c96659053269e) Signed-off-by: Mark Brown --- arch/arm64/kernel/vmlinux.lds.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 5e06a1786e26..1e8e6be0241c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,6 +57,7 @@ SECTIONS RO_DATA(PAGE_SIZE) EXCEPTION_TABLE(8) + NOTES _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -108,8 +109,6 @@ SECTIONS } _edata_loc = __data_loc + SIZEOF(.data); - NOTES - BSS_SECTION(0, 0, 0) _end = .; From 6d5002154f8dad3866240e5368c4341a1af6aafc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 23 Aug 2013 18:04:44 +0100 Subject: [PATCH 63/93] arm64: Fix mapping of memory banks not ending on a PMD_SIZE boundary The map_mem() function limits the current memblock limit to PGDIR_SIZE (the initial swapper_pg_dir mapping) to avoid create_mapping() allocating memory from unmapped areas. However, if the first block is within PGDIR_SIZE and not ending on a PMD_SIZE boundary, when 4K page configuration is enabled, create_mapping() will try to allocate a pte page. Such page may be returned by memblock_alloc() from the end of such bank (or any subsequent bank within PGDIR_SIZE) which is not mapped yet. The patch limits the current memblock limit to the aligned end of the first bank and gradually increases it as more memory is mapped. It also ensures that the start of the first bank is aligned to PMD_SIZE to avoid pte page allocation for this mapping. Signed-off-by: Catalin Marinas Reported-by: "Leizhen (ThunderTown, Euler)" Tested-by: "Leizhen (ThunderTown, Euler)" (cherry picked from commit e25208f77c2dad5a9f2ab3d3df61252a90b71afa) Signed-off-by: Mark Brown --- arch/arm64/mm/mmu.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49961d1fa033..f8dc7e8fce6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -304,6 +304,7 @@ void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt) static void __init map_mem(void) { struct memblock_region *reg; + phys_addr_t limit; /* * Temporarily limit the memblock range. We need to do this as @@ -311,9 +312,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * aligned to 2MB as per Documentation/arm64/booting.txt). */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + limit = PHYS_OFFSET + PGDIR_SIZE; + memblock_set_current_limit(limit); /* map all the memory banks */ for_each_memblock(memory, reg) { @@ -323,6 +326,22 @@ static void __init map_mem(void) if (start >= end) break; +#ifndef CONFIG_ARM64_64K_PAGES + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. + * When 64K pages are enabled, the pte page table for the + * first PGDIR_SIZE is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, PMD_SIZE); + if (end < limit) { + limit = end & PMD_MASK; + memblock_set_current_limit(limit); + } +#endif + create_mapping(start, __phys_to_virt(start), end - start); } From 0fa5264c3f0ce5625640fee0b7e82fe4ab9f7f17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Aug 2013 18:06:48 +0100 Subject: [PATCH 64/93] arm64: delay: don't bother reporting bogomips in /proc/cpuinfo We always use a timer-backed delay loop for arm64, so don't bother reporting a bogomips value which appears to confuse some people. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 326b16db9f69fd0d279be873c6c00f88c0a4aad5) Signed-off-by: Mark Brown --- arch/arm64/kernel/setup.c | 3 --- arch/arm64/kernel/smp.c | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index add6ea616843..bca4c1c2052a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -328,9 +328,6 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); } /* dump out the processor features */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 9c93e126328c..edbfe241eacb 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -223,11 +223,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void) void __init smp_cpus_done(unsigned int max_cpus) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); - - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); } void __init smp_prepare_boot_cpu(void) From 760b97f0952ff32d3856241ac6847c82d1477ffd Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Wed, 28 Aug 2013 14:24:53 +0100 Subject: [PATCH 65/93] Move the EM_ARM and EM_AARCH64 definitions to uapi/linux/elf-em.h Signed-off-by: Dan Aloni Signed-off-by: Catalin Marinas (cherry picked from commit 909e3ee4119f87b85c6e1b8534b2287ed1ea3ca2) Signed-off-by: Mark Brown --- arch/arm/include/asm/elf.h | 2 -- arch/arm64/include/asm/elf.h | 3 --- include/uapi/linux/elf-em.h | 2 ++ 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index 38050b1c4800..e110a672e160 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -19,8 +19,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fp elf_fpregset_t; -#define EM_ARM 40 - #define EF_ARM_EABI_MASK 0xff000000 #define EF_ARM_EABI_UNKNOWN 0x00000000 #define EF_ARM_EABI_VER1 0x01000000 diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fe32c0e4ac01..e7fa87f9201b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -33,8 +33,6 @@ typedef unsigned long elf_greg_t; typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct user_fpsimd_state elf_fpregset_t; -#define EM_AARCH64 183 - /* * AArch64 static relocation types. */ @@ -151,7 +149,6 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk #ifdef CONFIG_COMPAT -#define EM_ARM 40 #define COMPAT_ELF_PLATFORM ("v8l") #define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index 8e2b7bac4378..59c17a2d38ad 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -22,6 +22,7 @@ #define EM_PPC 20 /* PowerPC */ #define EM_PPC64 21 /* PowerPC64 */ #define EM_SPU 23 /* Cell BE SPU */ +#define EM_ARM 40 /* ARM 32 bit */ #define EM_SH 42 /* SuperH */ #define EM_SPARCV9 43 /* SPARC v9 64-bit */ #define EM_IA_64 50 /* HP/Intel IA-64 */ @@ -34,6 +35,7 @@ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_BLACKFIN 106 /* ADI Blackfin Processor */ #define EM_TI_C6000 140 /* TI C6X DSPs */ +#define EM_AARCH64 183 /* ARM 64 bit */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ From f164a6c211ef728f8a0f2fa8fa9f06549b138034 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 12 Jun 2013 16:28:04 +0100 Subject: [PATCH 66/93] arm64: mm: permit use of tagged pointers at EL0 TCR.TBI0 can be used to cause hardware address translation to ignore the top byte of userspace virtual addresses. Whilst not especially useful in standard C programs, this can be used by JITs to `tag' pointers with various pieces of metadata. This patch enables this bit for AArch64 Linux, and adds a new file to Documentation/arm64/ which describes some potential caveats when using tagged virtual addresses. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit d50240a5f6ceaf690a77b0fccb17be51cfa151c2) Signed-off-by: Mark Brown --- Documentation/arm64/tagged-pointers.txt | 34 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 1 + arch/arm64/kernel/entry.S | 1 + arch/arm64/mm/proc.S | 2 +- 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 Documentation/arm64/tagged-pointers.txt diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt new file mode 100644 index 000000000000..264e9841563a --- /dev/null +++ b/Documentation/arm64/tagged-pointers.txt @@ -0,0 +1,34 @@ + Tagged virtual addresses in AArch64 Linux + ========================================= + +Author: Will Deacon +Date : 12 June 2013 + +This document briefly describes the provision of tagged virtual +addresses in the AArch64 translation system and their potential uses +in AArch64 Linux. + +The kernel configures the translation tables so that translations made +via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of +the virtual address ignored by the translation hardware. This frees up +this byte for application use, with the following caveats: + + (1) The kernel requires that all user addresses passed to EL1 + are tagged with tag 0x00. This means that any syscall + parameters containing user virtual addresses *must* have + their top byte cleared before trapping to the kernel. + + (2) Tags are not guaranteed to be preserved when delivering + signals. This means that signal handlers in applications + making use of tags cannot rely on the tag information for + user virtual addresses being maintained for fields inside + siginfo_t. One exception to this rule is for signals raised + in response to debug exceptions, where the tag information + will be preserved. + + (3) Special care should be taken when using tagged pointers, + since it is likely that C compilers will not hazard two + addresses differing only in the upper bits. + +The architecture prevents the use of a tagged PC, so the upper byte will +be set to a sign-extension of bit 55 on exception return. diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2294f2330960..d25991747650 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -104,5 +104,6 @@ #define TCR_TG1_64K (UL(1) << 30) #define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) +#define TCR_TBI0 (UL(1) << 37) #endif diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 57640df9d70c..3881fd115ebb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -423,6 +423,7 @@ el0_da: * Data abort handling */ mrs x0, far_el1 + bic x0, x0, #(0xff << 56) disable_step x1 isb enable_dbg diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f84fcf71f129..b1b31bbc967b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -147,7 +147,7 @@ ENTRY(__cpu_setup) * both user and kernel. */ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ - TCR_ASID16 | (1 << 31) + TCR_ASID16 | TCR_TBI0 | (1 << 31) #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K From 08868ba57e4ec7729db37199e65480dadd9f5d30 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 16 Sep 2013 15:18:28 +0100 Subject: [PATCH 67/93] arm64: Make do_bad_area() function static This function is only called from arch/arm64/mm/fault.c. Signed-off-by: Catalin Marinas (cherry picked from commit 59f67e16e6b79697241c3fd030e3da300377893e) Signed-off-by: Mark Brown --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 6c8ba25bf6bb..df4f2fd187c3 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -130,7 +130,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, force_sig_info(sig, &si, tsk); } -void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; From 37f03baee485e2031b2226d77e4ae4c6f28ff390 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 17 Sep 2013 18:49:46 +0100 Subject: [PATCH 68/93] arm64: Correctly report LR and SP for compat tasks When a task crashes and we print debugging information, ensure that compat tasks show the actual AArch32 LR and SP registers rather than the AArch64 ones. Signed-off-by: Catalin Marinas (cherry picked from commit 6ca68e802612c87c31aa83d50c37ed0d88774a46) Signed-off-by: Mark Brown --- arch/arm64/kernel/process.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 46f02c3b5015..2090389b95b0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -143,15 +143,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; + + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); From 9d6eaf2f9176f131889d41801da4148f9b6718a3 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 18 Sep 2013 16:14:28 +0100 Subject: [PATCH 69/93] arm64: Widen hwcap to be 64 bit Under arm64 elf_hwcap is a 32 bit quantity, but it is stored in a 64 bit auxiliary ELF field and glibc reads hwcap as 64 bit. This patch widens elf_hwcap to be 64 bit. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas (cherry picked from commit 25804e6a96681d5d2142058948e218999e4f547c) Signed-off-by: Mark Brown --- arch/arm64/include/asm/hwcap.h | 2 +- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6d4482fa35bc..e2950b098e76 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -43,6 +43,6 @@ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) -extern unsigned int elf_hwcap; +extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bca4c1c2052a..40ae3ced37a1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -57,7 +57,7 @@ unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); static const char *cpu_name; From e9b67be1af51a366ae73c8893987cdafb081b935 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 26 Jun 2013 11:56:10 -0600 Subject: [PATCH 70/93] clk: arm64: Add DTS clock entry for APM X-Gene Storm SoC clk: arm64: Add DTS clock entry for APM X-Gene Storm SoC with reference to reference, PCP PLL, SoC PLL, and Ethernet clocks. Signed-off-by: Loc Ho Signed-off-by: Kumar Sankaran Signed-off-by: Vinayak Kale Signed-off-by: Feng Kan Signed-off-by: Mike Turquette (cherry picked from commit 3eb15d84e355f4ea1acf0eaba11ca173de342107) Signed-off-by: Mark Brown --- arch/arm64/boot/dts/apm-storm.dtsi | 75 ++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index bfdc57834929..d37d7369e260 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -103,6 +103,81 @@ #size-cells = <2>; ranges; + clocks { + #address-cells = <2>; + #size-cells = <2>; + ranges; + refclk: refclk { + compatible = "fixed-clock"; + #clock-cells = <1>; + clock-frequency = <100000000>; + clock-output-names = "refclk"; + }; + + pcppll: pcppll@17000100 { + compatible = "apm,xgene-pcppll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "pcppll"; + reg = <0x0 0x17000100 0x0 0x1000>; + clock-output-names = "pcppll"; + type = <0>; + }; + + socpll: socpll@17000120 { + compatible = "apm,xgene-socpll-clock"; + #clock-cells = <1>; + clocks = <&refclk 0>; + clock-names = "socpll"; + reg = <0x0 0x17000120 0x0 0x1000>; + clock-output-names = "socpll"; + type = <1>; + }; + + socplldiv2: socplldiv2 { + compatible = "fixed-factor-clock"; + #clock-cells = <1>; + clocks = <&socpll 0>; + clock-names = "socplldiv2"; + clock-mult = <1>; + clock-div = <2>; + clock-output-names = "socplldiv2"; + }; + + qmlclk: qmlclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "qmlclk"; + reg = <0x0 0x1703C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "qmlclk"; + }; + + ethclk: ethclk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + clock-names = "ethclk"; + reg = <0x0 0x17000000 0x0 0x1000>; + reg-names = "div-reg"; + divider-offset = <0x238>; + divider-width = <0x9>; + divider-shift = <0x0>; + clock-output-names = "ethclk"; + }; + + eth8clk: eth8clk { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <ðclk 0>; + clock-names = "eth8clk"; + reg = <0x0 0x1702C000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "eth8clk"; + }; + }; + serial0: serial@1c020000 { device_type = "serial"; compatible = "ns16550"; From 5aa580e940c4ba9027b348729a53ee3cd4032cce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:26 +0100 Subject: [PATCH 71/93] arm64: locks: introduce ticket-based spinlock implementation This patch introduces a ticket lock implementation for arm64, along the same lines as the implementation for arch/arm/. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 52ea2a560a9dba57fe5fd6b4726b1089751accf2) Signed-off-by: Mark Brown --- arch/arm64/include/asm/spinlock.h | 79 +++++++++++++++++-------- arch/arm64/include/asm/spinlock_types.h | 10 ++-- 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0defa0728a9b..525dd535443e 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -22,17 +22,10 @@ /* * Spinlock implementation. * - * The old value is read exclusively and the new one, if unlocked, is written - * exclusively. In case of failure, the loop is restarted. - * * The memory barriers are implicit with the load-acquire and store-release * instructions. - * - * Unlocked value: 0 - * Locked value: 1 */ -#define arch_spin_is_locked(x) ((x)->lock != 0) #define arch_spin_unlock_wait(lock) \ do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) @@ -41,32 +34,51 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval, newval; asm volatile( - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1b\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); + /* Atomically increment the next ticket. */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, %w5\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n" + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner), "I" (1 << TICKET_SHIFT) + : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int tmp; + arch_spinlock_t lockval; asm volatile( - "2: ldaxr %w0, %1\n" - " cbnz %w0, 1f\n" - " stxr %w0, %w2, %1\n" - " cbnz %w0, 2b\n" - "1:\n" - : "=&r" (tmp), "+Q" (lock->lock) - : "r" (1) - : "cc", "memory"); +" prfm pstl1strm, %2\n" +"1: ldaxr %w0, %2\n" +" eor %w1, %w0, %w0, ror #16\n" +" cbnz %w1, 2f\n" +" add %w0, %w0, %3\n" +" stxr %w1, %w0, %2\n" +" cbnz %w1, 1b\n" +"2:" + : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) + : "I" (1 << TICKET_SHIFT) + : "memory"); return !tmp; } @@ -74,10 +86,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { asm volatile( - " stlr %w1, %0\n" - : "=Q" (lock->lock) : "r" (0) : "memory"); +" stlrh %w1, %0\n" + : "=Q" (lock->owner) + : "r" (lock->owner + 1) + : "memory"); } +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + arch_spinlock_t lockval = ACCESS_ONCE(*lock); + return lockval.owner != lockval.next; +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + arch_spinlock_t lockval = ACCESS_ONCE(*lock); + return (lockval.next - lockval.owner) > 1; +} +#define arch_spin_is_contended arch_spin_is_contended + /* * Write lock implementation. * diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index 9a494346efed..87692750ed94 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -20,14 +20,14 @@ # error "please don't include this file directly" #endif -/* We only require natural alignment for exclusive accesses. */ -#define __lock_aligned +#define TICKET_SHIFT 16 typedef struct { - volatile unsigned int lock; -} arch_spinlock_t; + u16 owner; + u16 next; +} __aligned(4) arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 } typedef struct { volatile unsigned int lock; From 9f89c68bf6d720eae561dc5a796f727a57e8daea Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:27 +0100 Subject: [PATCH 72/93] arm64: lockref: add support for lockless lockrefs using cmpxchg Our spinlocks are only 32-bit (2x16-bit tickets) and our cmpxchg can deal with 8-bytes (as one would hope!). This patch wires up the cmpxchg-based lockless lockref implementation for arm64. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 5686b06cea34e31ec0a549d9b5ac00776e8e8d6d) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/spinlock.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 52077610cb7e..9115252e5425 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 525dd535443e..3d5cf064d7a1 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -92,10 +92,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) : "memory"); } +static inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return lock.owner == lock.next; +} + static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - arch_spinlock_t lockval = ACCESS_ONCE(*lock); - return lockval.owner != lockval.next; + return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) From 9e7ced5a249da2bec86d53d0e11e90b6a8e1afe2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Oct 2013 15:54:28 +0100 Subject: [PATCH 73/93] arm64: cmpxchg: implement cmpxchg64_relaxed This patch introduces cmpxchg64_relaxed for arm64 using the existing cmpxchg_local macro, which performs a cmpxchg operation (up to 64 bits) without barrier semantics. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit cf10b79a7d88edc689479af989b3a88e9adf07ff) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cmpxchg.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 8a8ce0e73a38..3914c0dcd09c 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -173,4 +173,6 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) +#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) + #endif /* __ASM_CMPXCHG_H */ From 9109f9215e9e238f0ba4d34b048f0d729a954460 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 7 Oct 2013 16:42:05 +0100 Subject: [PATCH 74/93] arm64: Export __copy_in_user() to modules This function may be called from loadable modules, so it needs exporting. Signed-off-by: Catalin Marinas Reported-by: Loc Ho (cherry picked from commit 2a3f912c782f2364f5e5813ab66ca6c92fb43acb) Signed-off-by: Mark Brown --- arch/arm64/kernel/arm64ksyms.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 41b4f626d554..e7ee770c0697 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -39,6 +39,7 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); From b357e64d1fb045568adeacc7e504788401d9db9b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 7 Oct 2013 18:30:34 +0100 Subject: [PATCH 75/93] arm64: update 32-bit kuser helpers to ARMv8 This patch updates the barrier semantics in the kuser helper functions to take advantage of the ARMv8 additions to AArch32, which are guaranteed to be available in situations where these functions will be called. Note that this slightly changes the cmpxchg functions in that they are no longer necessarily full barriers if they return 1. However, the documentation only states they include their own barriers "as needed", not that they are obligated to act as a full barrier for the caller. Signed-off-by: Robin Murphy Acked-by: Will Deacon CC: Matthew Leach CC: Dave Martin CC: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit d0f38f9130b7683e39611c5a661349e301ee43c8) Signed-off-by: Mark Brown --- arch/arm64/kernel/kuser32.S | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..f2754710f5e9 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -35,33 +35,30 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xf57ff05f // dmb sy - .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] + .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05f // dmb sy .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr .align 5 __kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe12fff1e // bx lr .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xf57ff05f // dmb sy - .inst 0xe1923f9f // 1: ldrex r3, [r2] + .inst 0xe1923e9f // 1: ldaex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xeaffffef // b <__kuser_memory_barrier> + .inst 0xe12fff1e // bx lr .align 5 __kuser_get_tls: // 0xffff0fe0 From b35f594dc7939d9be5fcae354738a419cc0c081e Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 4 Nov 2013 16:38:47 +0000 Subject: [PATCH 76/93] arm64: use generic RW_DATA_SECTION macro in linker script The .data section in the arm64 linker script currently lacks a definition for page-aligned data. This leads to a .page_aligned section being placed between the end of data and start of bss. This patch corrects that by using the generic RW_DATA_SECTION macro which includes support for page-aligned data. Signed-off-by: Mark Salter Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3c620626c0cd4cfca856d70a846398275b48a768) Signed-off-by: Mark Brown --- arch/arm64/kernel/vmlinux.lds.S | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1e8e6be0241c..047feef69828 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -83,30 +83,13 @@ SECTIONS PERCPU_SECTION(64) __init_end = .; - . = ALIGN(THREAD_SIZE); - __data_loc = .; - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(64) - READ_MOSTLY_DATA(64) - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } + . = ALIGN(PAGE_SIZE); + _data = .; + __data_loc = _data - LOAD_OFFSET; + _sdata = .; + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + _edata = .; _edata_loc = __data_loc + SIZEOF(.data); BSS_SECTION(0, 0, 0) From d5c5e6a15e4786dc21a08937821b6365466d6e34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Nov 2013 20:14:58 +0000 Subject: [PATCH 77/93] arm64: fix access to preempt_count from assembly code preempt_count is defined as an int. Oddly enough, we access it as a 64bit value. Things become interesting when running a BE kernel, and looking at the current CPU number, which is stored as an int next to preempt_count. Like in a per-cpu interrupt handler, for example... Using a 32bit access fixes the issue for good. Cc: Matthew Leach Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas (cherry picked from commit 717321fcb58ed95169bf344ae47ac6098ba5dfbe) Signed-off-by: Mark Brown --- arch/arm64/kernel/entry.S | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3881fd115ebb..e1166145ca29 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -311,14 +311,14 @@ el1_irq: #endif #ifdef CONFIG_PREEMPT get_thread_info tsk - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x0, x24, #1 // increment it - str x0, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w0, w24, #1 // increment it + str w0, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - str x24, [tsk, #TI_PREEMPT] // restore preempt count - cbnz x24, 1f // preempt count != 0 + str w24, [tsk, #TI_PREEMPT] // restore preempt count + cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt @@ -509,15 +509,15 @@ el0_irq_naked: #endif get_thread_info tsk #ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x23, x24, #1 // increment it - str x23, [tsk, #TI_PREEMPT] + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + add w23, w24, #1 // increment it + str w23, [tsk, #TI_PREEMPT] #endif irq_handler #ifdef CONFIG_PREEMPT - ldr x0, [tsk, #TI_PREEMPT] - str x24, [tsk, #TI_PREEMPT] - cmp x0, x23 + ldr w0, [tsk, #TI_PREEMPT] + str w24, [tsk, #TI_PREEMPT] + cmp w0, w23 b.eq 1f mov x1, #0 str x1, [x1] // BUG From 4b905a8fdae8797cebe274d0150b4a1799b98a3e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 23 Oct 2013 16:50:07 +0100 Subject: [PATCH 78/93] arm64: Use 42-bit address space with 64K pages This patch expands the VA_BITS to 42 when the 64K page configuration is enabled allowing 2TB kernel linear mapping. Linux still uses 2 levels of page tables in this configuration with pgd now being a full page. Signed-off-by: Catalin Marinas Acked-by: Will Deacon Acked-by: Marc Zyngier (cherry picked from commit 847264fb7e73ade5b5e4b6eea3daa243a1f5217e) Signed-off-by: Mark Brown --- Documentation/arm64/memory.txt | 27 ++++++++++++++++++- arch/arm64/include/asm/memory.h | 11 +++++--- arch/arm64/include/asm/pgtable-2level-hwdef.h | 4 +-- arch/arm64/include/asm/pgtable.h | 2 +- 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 5f583af0a6e1..a776d7a7c3cc 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -21,7 +21,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to TTBR0. -AArch64 Linux memory layout: +AArch64 Linux memory layout with 4KB pages: Start End Size Use ----------------------------------------------------------------------- @@ -46,6 +46,31 @@ ffffffbffc000000 ffffffbfffffffff 64MB modules ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map +AArch64 Linux memory layout with 64KB pages: + +Start End Size Use +----------------------------------------------------------------------- +0000000000000000 000003ffffffffff 4TB user + +fffffc0000000000 fffffdfbfffeffff ~2TB vmalloc + +fffffdfbffff0000 fffffdfbffffffff 64KB [guard page] + +fffffdfc00000000 fffffdfdffffffff 8GB vmemmap + +fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap] + +fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device + +fffffdfffbe00000 fffffdfffbe0ffff 64KB PCI I/O space + +fffffdfffbe10000 fffffdfffbffffff ~2MB [guard] + +fffffdfffc000000 fffffdffffffffff 64MB modules + +fffffe0000000000 ffffffffffffffff 2TB kernel logical memory map + + Translation table lookup with 4KB pages: +--------+--------+--------+--------+--------+--------+--------+--------+ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 381f556b664e..9abb57d743b9 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -33,18 +33,23 @@ #define UL(x) _AC(x, UL) /* - * PAGE_OFFSET - the virtual address of the start of the kernel image. + * PAGE_OFFSET - the virtual address of the start of the kernel image (top + * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ -#define PAGE_OFFSET UL(0xffffffc000000000) +#ifdef CONFIG_ARM64_64K_PAGES +#define VA_BITS (42) +#else +#define VA_BITS (39) +#endif +#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) #define EARLYCON_IOBASE (MODULES_VADDR - SZ_4M) -#define VA_BITS (39) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h index 0a8ed3f94e93..2593b490c56a 100644 --- a/arch/arm64/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm64/include/asm/pgtable-2level-hwdef.h @@ -21,10 +21,10 @@ * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each * entry representing 512MB. The user and kernel address spaces are limited to - * 512GB and therefore we only use 1024 entries in the PGD. + * 4TB in the 64KB page configuration. */ #define PTRS_PER_PTE 8192 -#define PTRS_PER_PGD 1024 +#define PTRS_PER_PGD 8192 /* * PGDIR_SHIFT determines the size a top-level page table entry can map. diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 508ee304fde0..608ec24f2f52 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,7 +34,7 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ -#define VMALLOC_START UL(0xffffff8000000000) +#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) From 3cf0e16070078af9afdd9a067af9ab64f322ebb8 Mon Sep 17 00:00:00 2001 From: "T.J. Purtell" Date: Tue, 5 Nov 2013 17:07:18 +0000 Subject: [PATCH 79/93] arm64: compat: Clear the IT state independent of the 32-bit ARM or Thumb-2 mode The ARM architecture reference specifies that the IT state bits in the PSR must be all zeros in ARM mode or behavior is unspecified. If an ARM function is registered as a signal handler, and that signal is delivered inside a block of instructions following an IT instruction, some of the instructions at the beginning of the signal handler may be skipped if the IT state bits of the Program Status Register are not cleared by the kernel. Signed-off-by: T.J. Purtell [catalin.marinas@arm.com: code comment and commit log updated] Signed-off-by: Catalin Marinas (cherry picked from commit aa62c2091129af81a172350b718eb35d5448cebc) Signed-off-by: Mark Brown --- arch/arm64/kernel/signal32.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..3edf7f48c54b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -474,12 +474,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; - if (thumb) { + if (thumb) spsr |= COMPAT_PSR_T_BIT; - spsr &= ~COMPAT_PSR_IT_MASK; - } else { + else spsr &= ~COMPAT_PSR_T_BIT; - } + + /* The IT state must be cleared for both ARM and Thumb-2 */ + spsr &= ~COMPAT_PSR_IT_MASK; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); From ff3986a150bbea5ef6c34b0cecdc23e230ef19ab Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 6 Nov 2013 11:42:41 +0000 Subject: [PATCH 80/93] arm64: locks: Remove CONFIG_GENERIC_LOCKBREAK Commit 52ea2a560a9d (arm64: locks: introduce ticket-based spinlock implementation) introduces the arch_spin_is_contended() function making CONFIG_GENERIC_LOCKBREAK unnecessary. Signed-off-by: Catalin Marinas Acked-by: Will Deacon (cherry picked from commit 61c77e0802719efce8966619cdc4234de7f252c1) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9115252e5425..0f24a8515841 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -63,10 +63,6 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config GENERIC_LOCKBREAK - def_bool y - depends on SMP && PREEMPT - config RWSEM_GENERIC_SPINLOCK def_bool y From 3b1bf3873e56409bbb5b2ec29d6ba418fd69b9fe Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Thu, 7 Nov 2013 15:25:44 +0000 Subject: [PATCH 81/93] ARM64: /proc/interrupts: display IPIs of online CPUs only The non-IPI interrupts are displayed only for the online cpus from show_interrupts in kernel/irq/proc.c before calling arch_show_interrupts(). As a result, the column headers and the IPI count don't match if any CPU is offline. This patch fixes show_ipi_list to display IPIs for online CPUs only. Signed-off-by: Sudeep KarkadaNagesha Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 67317c2689567c24d18e0dd43ab6d409fd42dc6e) Signed-off-by: Mark Brown --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index edbfe241eacb..97eaf79319d5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -451,7 +451,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, prec >= 4 ? " " : ""); - for_each_present_cpu(cpu) + for_each_online_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs[i])); seq_printf(p, " %s\n", ipi_types[i]); From 555fdaf51b5022858eb79151af4c780b3f4e6b59 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Dec 2013 16:11:00 +0000 Subject: [PATCH 82/93] arm64: ensure completion of TLB invalidatation Currently there is no dsb between the tlbi in __cpu_setup and the write to SCTLR_EL1 which enables the MMU in __turn_mmu_on. This means that the TLB invalidation is not guaranteed to have completed at the point address translation is enabled, leading to a number of possible issues including incorrect translations and TLB conflict faults. This patch moves the tlbi in __cpu_setup above an existing dsb used to synchronise I-cache invalidation, ensuring that the TLBs have been invalidated at the point the MMU is enabled. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 3cea71bc6b470372ae407881b87128aadf0afec0) Signed-off-by: Mark Brown --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b1b31bbc967b..2fbd93e9be15 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -111,12 +111,12 @@ ENTRY(__cpu_setup) bl __flush_dcache_all mov lr, x28 ic iallu // I+BTB cache invalidate + tlbi vmalle1is // invalidate I + D TLBs dsb sy mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD msr mdscr_el1, xzr // Reset mdscr_el1 - tlbi vmalle1is // invalidate I + D TLBs /* * Memory region attributes for LPAE: * From 8751cf9a6c5f5bf02a2de8a4c025e09b38556df4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 22 Nov 2013 21:07:31 +0000 Subject: [PATCH 83/93] arm64: make default NR_CPUS 8 Rather than continue to add per platform defaults, make the default a likely common core count. 8 is also the default for x86. Signed-off-by: Rob Herring Cc: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 62aceb8ff4b3f442575eb7e23629da36020dca77) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f24a8515841..dba7308041ad 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -152,8 +152,7 @@ config NR_CPUS range 2 32 depends on SMP # These have to remain sorted largest to smallest - default "8" if ARCH_XGENE - default "4" + default "8" source kernel/Kconfig.preempt From edf0cf19ba3b6c750871b391c5edcb679c02daad Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Nov 2013 18:10:47 +0000 Subject: [PATCH 84/93] arm64: percpu: implement optimised pcpu access using tpidr_el1 This patch implements optimised percpu variable accesses using the el1 r/w thread register (tpidr_el1) along the same lines as arch/arm/. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 7158627686f02319c50c8d9d78f75d4c8d126ff2) Signed-off-by: Mark Brown --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/percpu.h | 41 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 10 ++++++++ arch/arm64/kernel/smp.c | 6 +++-- 4 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/percpu.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 79a642d199f2..ae0612c4fa45 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -26,7 +26,6 @@ generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h generic-y += pci.h -generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h generic-y += resource.h diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h new file mode 100644 index 000000000000..13fb0b3efc5f --- /dev/null +++ b/arch/arm64/include/asm/percpu.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PERCPU_H +#define __ASM_PERCPU_H + +static inline void set_my_cpu_offset(unsigned long off) +{ + asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); +} + +static inline unsigned long __my_cpu_offset(void) +{ + unsigned long off; + register unsigned long *sp asm ("sp"); + + /* + * We want to allow caching the value, so avoid using volatile and + * instead use a fake stack read to hazard against barrier(). + */ + asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp)); + + return off; +} +#define __my_cpu_offset __my_cpu_offset() + +#include + +#endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3bab6766eed5..43c89cbb30fc 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -98,6 +98,16 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +void __init smp_setup_processor_id(void) +{ + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == cpu_logical_map(cpu); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 53ed158ec31c..08030da7f3c2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,8 +122,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); - /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -132,6 +130,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + printk("CPU%u: Booted secondary processor\n", cpu); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -274,6 +275,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static void (*smp_cross_call)(const struct cpumask *, unsigned int); From 18ce3c2a82fb1a7461b8d63cd54a40a47067be47 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 17:20:22 +0000 Subject: [PATCH 85/93] arm64: use generic strnlen_user and strncpy_from_user functions This patch implements the word-at-a-time interface for arm64 using the same algorithm as ARM. We use the fls64 macro, which expands to a clz instruction via a compiler builtin. Big-endian configurations make use of the implementation from asm-generic. With this implemented, we can replace our byte-at-a-time strnlen_user and strncpy_from_user functions with the optimised generic versions. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 12a0ef7b0ac38677bd2d85f33df5ca0a57868819) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/uaccess.h | 25 ++---------- arch/arm64/include/asm/word-at-a-time.h | 54 +++++++++++++++++++++++++ arch/arm64/kernel/arm64ksyms.c | 5 +-- arch/arm64/lib/Makefile | 8 ++-- arch/arm64/lib/strncpy_from_user.S | 50 ----------------------- arch/arm64/lib/strnlen_user.S | 47 --------------------- 7 files changed, 64 insertions(+), 127 deletions(-) create mode 100644 arch/arm64/include/asm/word-at-a-time.h delete mode 100644 arch/arm64/lib/strncpy_from_user.S delete mode 100644 arch/arm64/lib/strnlen_user.S diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40bbfe560551..8d555ca7cdfa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,8 @@ config ARM64 select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND select HAVE_ARCH_TRACEHOOK diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 008f8481da65..8b181415e843 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -100,6 +100,7 @@ static inline void set_fs(mm_segment_t fs) }) #define access_ok(type, addr, size) __range_ok(addr, size) +#define user_addr_max get_fs /* * The "__xxx" versions of the user access functions do not verify the address @@ -238,9 +239,6 @@ extern unsigned long __must_check __copy_to_user(void __user *to, const void *fr extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); -extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count); -extern unsigned long __must_check __strnlen_user(const char __user *s, long n); - static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) @@ -274,24 +272,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } -static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - res = __strncpy_from_user(dst, src, count); - return res; -} +extern long strncpy_from_user(char *dest, const char __user *src, long count); -#define strlen_user(s) strnlen_user(s, ~0UL >> 1) - -static inline long __must_check strnlen_user(const char __user *s, long n) -{ - unsigned long res = 0; - - if (__addr_ok(s)) - res = __strnlen_user(s, n); - - return res; -} +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); #endif /* __ASM_UACCESS_H */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..27a167d044d9 --- /dev/null +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_WORD_AT_A_TIME_H +#define __ASM_WORD_AT_A_TIME_H + +#ifndef __AARCH64EB__ + +#include + +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, + const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +#define prep_zero_mask(a, bits, c) (bits) + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +static inline unsigned long find_zero(unsigned long mask) +{ + return fls64(mask) >> 3; +} + +#else /* __AARCH64EB__ */ +#include +#endif + +#endif /* __ASM_WORD_AT_A_TIME_H */ diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index e7ee770c0697..338b568cd8ae 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,13 +29,10 @@ #include - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 59acc0ef0462..328ce1a99daa 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,6 +1,4 @@ -lib-y := bitops.o delay.o \ - strncpy_from_user.o strnlen_user.o clear_user.o \ - copy_from_user.o copy_to_user.o copy_in_user.o \ - copy_page.o clear_page.o \ - memchr.o memcpy.o memmove.o memset.o \ +lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ + copy_to_user.o copy_in_user.o copy_page.o \ + clear_page.o memchr.o memcpy.o memmove.o memset.o \ strchr.o strrchr.o diff --git a/arch/arm64/lib/strncpy_from_user.S b/arch/arm64/lib/strncpy_from_user.S deleted file mode 100644 index 56e448a831a0..000000000000 --- a/arch/arm64/lib/strncpy_from_user.S +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Based on arch/arm/lib/strncpy_from_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - - .text - .align 5 - -/* - * Copy a string from user space to kernel space. - * x0 = dst, x1 = src, x2 = byte length - * returns the number of characters copied (strlen of copied string), - * -EFAULT on exception, or "len" if we fill the whole buffer - */ -ENTRY(__strncpy_from_user) - mov x4, x1 -1: subs x2, x2, #1 - bmi 2f -USER(9f, ldrb w3, [x1], #1 ) - strb w3, [x0], #1 - cbnz w3, 1b - sub x1, x1, #1 // take NUL character out of count -2: sub x0, x1, x4 - ret -ENDPROC(__strncpy_from_user) - - .section .fixup,"ax" - .align 0 -9: strb wzr, [x0] // null terminate - mov x0, #-EFAULT - ret - .previous diff --git a/arch/arm64/lib/strnlen_user.S b/arch/arm64/lib/strnlen_user.S deleted file mode 100644 index 7f7b176a5646..000000000000 --- a/arch/arm64/lib/strnlen_user.S +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Based on arch/arm/lib/strnlen_user.S - * - * Copyright (C) 1995-2000 Russell King - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - - .text - .align 5 - -/* Prototype: unsigned long __strnlen_user(const char *str, long n) - * Purpose : get length of a string in user memory - * Params : str - address of string in user memory - * Returns : length of string *including terminator* - * or zero on exception, or n if too long - */ -ENTRY(__strnlen_user) - mov x2, x0 -1: subs x1, x1, #1 - b.mi 2f -USER(9f, ldrb w3, [x0], #1 ) - cbnz w3, 1b -2: sub x0, x0, x2 - ret -ENDPROC(__strnlen_user) - - .section .fixup,"ax" - .align 0 -9: mov x0, #0 - ret - .previous From a9e7bd3bdf50acc92e9e04c56d06026c70f515ef Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 19:31:24 +0000 Subject: [PATCH 86/93] arm64: futex: ensure .fixup entries are sufficiently aligned AArch64 instructions must be 4-byte aligned, so make sure this is true for the futex .fixup section. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 4da7a56c59f28e27e8dcff61b5d7b05f6e203606) Signed-off-by: Mark Brown --- arch/arm64/include/asm/futex.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index c582fa316366..78cc3aba5d69 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,6 +30,7 @@ " cbnz %w3, 1b\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ +" .align 2\n" \ "4: mov %w0, %w5\n" \ " b 3b\n" \ " .popsection\n" \ From 43296ae8c72a377a4af761de90de4de7cde90ce9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 6 Nov 2013 19:32:13 +0000 Subject: [PATCH 87/93] arm64: dcache: select DCACHE_WORD_ACCESS for little-endian CPUs DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad in much the same way as has been done for ARM, although big-endian systems are also supported. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 7bc13fd33adb9536bd73965cd46bbf7377df097c) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/word-at-a-time.h | 40 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8d555ca7cdfa..075075f9d247 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS select GENERIC_IOMAP select GENERIC_IRQ_PROBE diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 27a167d044d9..aab5bf09e9d9 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -47,8 +47,48 @@ static inline unsigned long find_zero(unsigned long mask) return fls64(mask) >> 3; } +#define zero_bytemask(mask) (mask) + #else /* __AARCH64EB__ */ #include #endif +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, offset; + + /* Load word from unaligned pointer addr */ + asm( + "1: ldr %0, %3\n" + "2:\n" + " .pushsection .fixup,\"ax\"\n" + " .align 2\n" + "3: and %1, %2, #0x7\n" + " bic %2, %2, #0x7\n" + " ldr %0, [%2]\n" + " lsl %1, %1, #0x3\n" +#ifndef __AARCH64EB__ + " lsr %0, %0, %1\n" +#else + " lsl %0, %0, %1\n" +#endif + " b 2b\n" + " .popsection\n" + " .pushsection __ex_table,\"a\"\n" + " .align 3\n" + " .quad 1b, 3b\n" + " .popsection" + : "=&r" (ret), "=&r" (offset) + : "r" (addr), "Q" (*(unsigned long *)addr)); + + return ret; +} + #endif /* __ASM_WORD_AT_A_TIME_H */ From e342d91c37452299405e70e80e4615890dec1c10 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 16 Dec 2013 17:50:08 +0000 Subject: [PATCH 88/93] arm64: kconfig: select HAVE_EFFICIENT_UNALIGNED_ACCESS ARMv8 CPUs can perform efficient unaligned memory accesses in hardware and this feature is relied up on by code such as the dcache word-at-a-time name hashing. This patch selects HAVE_EFFICIENT_UNALIGNED_ACCESS for arm64. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 50afc33a90e710c02d9bbf2f3673936365f0e690) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 075075f9d247..61ef3f0de6ee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS From a62c1e2a0a220b20d942e0cc0ca204347b7b7aa6 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Dec 2013 13:30:10 +0000 Subject: [PATCH 89/93] ARM64: check stack pointer in get_wchan get_wchan() is lockless. Task may wakeup at any time and change its own stack, thus each next stack frame may be overwritten and filled with random stuff. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Catalin Marinas (cherry picked from commit 408c3658b0d49315974ce8b5aed385c8e1527595) Signed-off-by: Mark Brown --- arch/arm64/kernel/process.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index adc68bf21d8b..02a41bba165d 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -308,6 +308,7 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -315,9 +316,11 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; From 75448bec59b915885b3e84bca0d38acf34b5db61 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 5 Dec 2013 13:30:16 +0000 Subject: [PATCH 90/93] ARM64: fix framepointer check in unwind_frame We need at least 24 bytes above frame pointer. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Catalin Marinas (cherry picked from commit 26920dd2da79a3207803da9453c0e6c82ac968ca) Signed-off-by: Mark Brown --- arch/arm64/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 048334bb2651..38f0558f0c0a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -43,7 +43,7 @@ int unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); - if (fp < low || fp > high || fp & 0xf) + if (fp < low || fp > high - 0x18 || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; From cc096d68fd9848cc0bd95719329184cc8bd14b2d Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 4 Dec 2013 10:09:50 +0000 Subject: [PATCH 91/93] genirq: Add an accessor for IRQ_PER_CPU flag This patch adds an accessor function for IRQ_PER_CPU flag. The accessor function is useful to determine whether an IRQ is percpu or not. This patch is based on an older patch posted by Chris Smith here [1]. There is a minor change w.r.t. Chris's original patch: The accessor function is renamed as 'irq_is_percpu' instead of 'irq_is_per_cpu'. [1]: http://lkml.indiana.edu/hypermail/linux/kernel/1207.3/02955.html Signed-off-by: Chris Smith Signed-off-by: Vinayak Kale Acked-by: Will Deacon Reviewed-by: Thomas Gleixner Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 7f4a8e7b1943c1fc7e4b08509e308197babdcd5b) Signed-off-by: Mark Brown --- include/linux/irqdesc.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 623325e2ff97..f4101f01286b 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -154,6 +154,14 @@ static inline int irq_balancing_disabled(unsigned int irq) return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; } +static inline int irq_is_percpu(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + return desc->status_use_accessors & IRQ_PER_CPU; +} + static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) { From 429e821545bd45eb3362d13bc2268ea720d9ee55 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 4 Dec 2013 10:09:51 +0000 Subject: [PATCH 92/93] arm64: perf: add support for percpu pmu interrupt Add support for irq registration when pmu interrupt is percpu. Signed-off-by: Vinayak Kale Signed-off-by: Tuan Phan [will: tidied up cross-calling to pass &irq] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 66aa8d6a145b6a66566b4fce219cc56c3d0e01c3) Signed-off-by: Mark Brown --- arch/arm64/kernel/perf_event.c | 108 ++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9ba33c40cdf8..60a41b9292a5 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -351,27 +352,54 @@ validate_group(struct perf_event *event) return 0; } +static void +armpmu_disable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + disable_percpu_irq(irq); +} + static void armpmu_release_hardware(struct arm_pmu *armpmu) { - int i, irq, irqs; + int irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (!irqs) + return; - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) + return; + + if (irq_is_percpu(irq)) { + on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &cpu_hw_events); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq > 0) + free_irq(irq, armpmu); + } } } +static void +armpmu_enable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int i, err, irq, irqs; + int err, irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; if (!pmu_device) { @@ -380,39 +408,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (!irqs) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) { + pr_err("failed to get valid irq for PMU device\n"); + return -ENODEV; + } - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, armpmu->handle_irq, + "arm-pmu", &cpu_hw_events); - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, - "arm-pmu", armpmu); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); + pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", + irq); armpmu_release_hardware(armpmu); return err; } - cpumask_set_cpu(i, &armpmu->active_irqs); + on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq <= 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } } return 0; From f15a6a129d6bee7e6a756eaaf6eddf7ed672eaba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 18 May 2014 18:36:28 +0100 Subject: [PATCH 93/93] configs: Make cpufreq not set for RT Not sure this is going to do the right thing but it fixes errors that the CI sees. Why isn't the RT testing just setting the performance governor anyway? Signed-off-by: Mark Brown --- linaro/configs/preempt-rt.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linaro/configs/preempt-rt.conf b/linaro/configs/preempt-rt.conf index f8873a6dfb32..98e036d75442 100644 --- a/linaro/configs/preempt-rt.conf +++ b/linaro/configs/preempt-rt.conf @@ -1,4 +1,4 @@ CONFIG_PREEMPT=y CONFIG_PREEMPT_RT_FULL=y CONFIG_SLUB=y -CONFIG_CPU_FREQ=n +# CONFIG_CPU_FREQ is not set