From ab7ccd91004b55c29910c999f51bc40880646810 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Jun 2014 11:47:23 +0100 Subject: [PATCH 001/214] arm64: ptrace: change fs when passing kernel pointer to regset code Our compat PTRACE_POKEUSR implementation simply passes the user data to regset_copy_from_user after some simple range checking. Unfortunately, the data in question has already been copied to the kernel stack by this point, so the subsequent access_ok check fails and the ptrace request returns -EFAULT. This causes problems tracing fork() with older versions of strace. This patch briefly changes the fs to KERNEL_DS, so that the access_ok check passes even with a kernel address. Signed-off-by: Will Deacon Cc: Signed-off-by: Catalin Marinas (cherry picked from commit c168870704bcde6bb63d05f7882b620dd3985a46) Signed-off-by: Mark Brown --- arch/arm64/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5341534b6d04..85536688f753 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -872,6 +872,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -879,10 +880,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } From dd7d4626b70d246242c57c4a9ffaecb961570406 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 3 Jun 2014 19:21:30 +0100 Subject: [PATCH 002/214] arm64: ptrace: fix empty registers set in prstatus of aarch32 process core Currently core file of aarch32 process prstatus note has empty registers set. As result aarch32 core files create by V8 kernel are not very useful. It happens because compat_gpr_get and compat_gpr_set functions can copy registers values to/from either kbuf or ubuf. ELF core file collection function fill_thread_core_info calls compat_gpr_get with kbuf set and ubuf set to 0. But current compat_gpr_get and compat_gpr_set function handle copy to/from only ubuf case. Fix is to handle kbuf and ubuf as two separate cases in similar way as other functions like user_regset_copyout, user_regset_copyin do. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas (cherry picked from commit 2227901a0230d8fde81ba9c602d649839390f56b) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/ptrace.c --- arch/arm64/kernel/ptrace.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 85536688f753..9212dc4bcd09 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -672,12 +672,16 @@ static int compat_gpr_get(struct task_struct *target, reg = (void *)&task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + if (kbuf) { + memcpy(kbuf, ®, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_to_user(ubuf, ®, sizeof(reg)); + if (ret) + break; - if (ret) - break; - else - ubuf += sizeof(compat_ulong_t); + ubuf += sizeof(reg); + } } return ret; @@ -705,7 +709,18 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + if (kbuf) { + memcpy(®, kbuf, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); + } switch (idx) { case 15: From 1c35c017ac61c43e75d49218f7a64d9455905b97 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:13 +0100 Subject: [PATCH 003/214] arm64: head: fix cache flushing and barriers in set_cpu_boot_mode_flag set_cpu_boot_mode_flag is used to identify which exception levels are encountered across the system by CPUs trying to enter the kernel. The basic algorithm is: if a CPU is booting at EL2, it will set a flag at an offset of #4 from __boot_cpu_mode, a cacheline-aligned variable. Otherwise, a flag is set at an offset of zero into the same cacheline. This enables us to check that all CPUs booted at the same exception level. This cacheline is written with the stage-1 MMU off (that is, via a strongly-ordered mapping) and will bypass any clean lines in the cache, leading to potential coherence problems when the variable is later checked via the normal, cacheable mapping of the kernel image. This patch reworks the broken flushing code so that we: (1) Use a DMB to order the strongly-ordered write of the cacheline against the subsequent cache-maintenance operation (by-VA operations only hazard against normal, cacheable accesses). (2) Use a single dc ivac instruction to invalidate any clean lines containing a stale copy of the line after it has been updated. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit d0488597a1b7105957b6d7d1bb0b6ee88aa51b37) Signed-off-by: Mark Brown --- arch/arm64/kernel/head.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 39a8a83f1883..f49bb0f5a7f8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -207,11 +207,9 @@ ENTRY(set_cpu_boot_mode_flag) cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 -1: dc cvac, x1 // Clean potentially dirty cache line - dsb sy - str w20, [x1] // This CPU has booted in EL1 - dc civac, x1 // Clean&invalidate potentially stale cache line - dsb sy +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line ret ENDPROC(set_cpu_boot_mode_flag) From 738497b67fdd04067ce70c47b6f9b4488274823c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 2 May 2014 16:24:15 +0100 Subject: [PATCH 004/214] arm64: mm: use inner-shareable barriers for inner-shareable maintenance In order to ensure ordering and completion of inner-shareable maintenance instructions (cache and TLB) on AArch64, we can use the -ish suffix to the dmb and dsb instructions respectively. This patch updates our low-level cache and tlb maintenance routines to use the inner-shareable barrier variants where appropriate. Acked-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit dc60b777fcdddbadab111028e266fd69d4702b34) Signed-off-by: Mark Brown --- arch/arm64/mm/cache.S | 6 +++--- arch/arm64/mm/proc.S | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index fda756875fa6..23663837acff 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -31,7 +31,7 @@ * Corrupted registers: x0-x7, x9-x11 */ __flush_dcache_all: - dsb sy // ensure ordering with previous memory accesses + dmb sy // ensure ordering with previous memory accesses mrs x0, clidr_el1 // read clidr and x3, x0, #0x7000000 // extract loc from clidr lsr x3, x3, #23 // left align loc bit field @@ -128,7 +128,7 @@ USER(9f, dc cvau, x4 ) // clean D line to PoU add x4, x4, x2 cmp x4, x1 b.lo 1b - dsb sy + dsb ish icache_line_size x2, x3 sub x3, x2, #1 @@ -139,7 +139,7 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU cmp x4, x1 b.lo 1b 9: // ignore any faulting cache operation - dsb sy + dsb ish isb ret ENDPROC(flush_icache_range) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 55a40f6dbf78..e85bf0667bd7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -106,7 +106,7 @@ ENDPROC(cpu_do_switch_mm) ENTRY(__cpu_setup) ic iallu // I+BTB cache invalidate tlbi vmalle1is // invalidate I + D TLBs - dsb sy + dsb ish mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD From 65da4b09ed98b49bfa85069fa4fc0b24d01c2e15 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 21 May 2013 14:24:11 +0000 Subject: [PATCH 005/214] arm: introduce psci_smp_ops Rename virt_smp_ops to psci_smp_ops and move them to arch/arm/kernel/psci_smp.c. Remove mach-virt/platsmp.c, now unused. Compile psci_smp if CONFIG_ARM_PSCI and CONFIG_SMP. Add a cpu_die smp_op based on psci_ops.cpu_off. Initialize PSCI before setting smp_ops in setup_arch. If PSCI is available on the platform, prefer psci_smp_ops over the platform smp_ops. Signed-off-by: Stefano Stabellini Acked-by: Will Deacon CC: arnd@arndb.de CC: marc.zyngier@arm.com CC: linux@arm.linux.org.uk CC: nico@linaro.org CC: rob.herring@calxeda.com (cherry picked from commit 05774088391c7430f6a4c1d5d18196ef17bb3ba9) Signed-off-by: Mark Brown --- arch/arm/include/asm/psci.h | 9 ++++ arch/arm/kernel/Makefile | 5 ++- arch/arm/kernel/psci.c | 7 ++- arch/arm/kernel/psci_smp.c | 84 ++++++++++++++++++++++++++++++++++++ arch/arm/kernel/setup.c | 7 ++- arch/arm/mach-virt/Makefile | 1 - arch/arm/mach-virt/platsmp.c | 50 --------------------- arch/arm/mach-virt/virt.c | 3 -- 8 files changed, 106 insertions(+), 60 deletions(-) create mode 100644 arch/arm/kernel/psci_smp.c delete mode 100644 arch/arm/mach-virt/platsmp.c diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index ce0dbe7c1625..c4ae171850f8 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -32,5 +32,14 @@ struct psci_operations { }; extern struct psci_operations psci_ops; +extern struct smp_operations psci_smp_ops; + +#ifdef CONFIG_ARM_PSCI +void psci_init(void); +bool psci_smp_available(void); +#else +static inline void psci_init(void) { } +static inline bool psci_smp_available(void) { return false; } +#endif #endif /* __ASM_ARM_PSCI_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..dd9d90ab65d0 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -82,6 +82,9 @@ obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o -obj-$(CONFIG_ARM_PSCI) += psci.o +ifeq ($(CONFIG_ARM_PSCI),y) +obj-y += psci.o +obj-$(CONFIG_SMP) += psci_smp.o +endif extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 36531643cc2c..46931880093d 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -158,7 +158,7 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -static int __init psci_init(void) +void __init psci_init(void) { struct device_node *np; const char *method; @@ -166,7 +166,7 @@ static int __init psci_init(void) np = of_find_matching_node(NULL, psci_of_match); if (!np) - return 0; + return; pr_info("probing function IDs from device-tree\n"); @@ -206,6 +206,5 @@ static int __init psci_init(void) out_put_node: of_node_put(np); - return 0; + return; } -early_initcall(psci_init); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c new file mode 100644 index 000000000000..23a11424c568 --- /dev/null +++ b/arch/arm/kernel/psci_smp.c @@ -0,0 +1,84 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon + */ + +#include +#include +#include +#include + +#include +#include + +/* + * psci_smp assumes that the following is true about PSCI: + * + * cpu_suspend Suspend the execution on a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * cpu_off Power down a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * no return on successful call + * + * cpu_on Power up a CPU + * @cpuid cpuid of target CPU, as from MPIDR + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * migrate Migrate the context to a different CPU + * @cpuid cpuid of target CPU, as from MPIDR + * returns 0 success, < 0 on failure + * + */ + +extern void secondary_startup(void); + +static int __cpuinit psci_boot_secondary(unsigned int cpu, + struct task_struct *idle) +{ + if (psci_ops.cpu_on) + return psci_ops.cpu_on(cpu_logical_map(cpu), + __pa(secondary_startup)); + return -ENODEV; +} + +#ifdef CONFIG_HOTPLUG_CPU +void __ref psci_cpu_die(unsigned int cpu) +{ + const struct psci_power_state ps = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + if (psci_ops.cpu_off) + psci_ops.cpu_off(ps); + + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); +} +#else +#define psci_cpu_die NULL +#endif + +bool __init psci_smp_available(void) +{ + /* is cpu_on available at least? */ + return (psci_ops.cpu_on != NULL); +} + +struct smp_operations __initdata psci_smp_ops = { + .smp_boot_secondary = psci_boot_secondary, + .cpu_die = psci_cpu_die, +}; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index b4b1d397592b..860952438bd3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -796,9 +797,13 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); arm_dt_init_cpu_maps(); + psci_init(); #ifdef CONFIG_SMP if (is_smp()) { - smp_set_ops(mdesc->smp); + if (psci_smp_available()) + smp_set_ops(&psci_smp_ops); + else if (mdesc->smp) + smp_set_ops(mdesc->smp); smp_init_cpus(); } #endif diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile index 042afc1f8c44..7ddbfa60227f 100644 --- a/arch/arm/mach-virt/Makefile +++ b/arch/arm/mach-virt/Makefile @@ -3,4 +3,3 @@ # obj-y := virt.o -obj-$(CONFIG_SMP) += platsmp.o diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c deleted file mode 100644 index f4143f5bfa5b..000000000000 --- a/arch/arm/mach-virt/platsmp.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Dummy Virtual Machine - does what it says on the tin. - * - * Copyright (C) 2012 ARM Ltd - * Author: Will Deacon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include - -extern void secondary_startup(void); - -static void __init virt_smp_init_cpus(void) -{ -} - -static void __init virt_smp_prepare_cpus(unsigned int max_cpus) -{ -} - -static int __cpuinit virt_boot_secondary(unsigned int cpu, - struct task_struct *idle) -{ - if (psci_ops.cpu_on) - return psci_ops.cpu_on(cpu_logical_map(cpu), - __pa(secondary_startup)); - return -ENODEV; -} - -struct smp_operations __initdata virt_smp_ops = { - .smp_init_cpus = virt_smp_init_cpus, - .smp_prepare_cpus = virt_smp_prepare_cpus, - .smp_boot_secondary = virt_boot_secondary, -}; diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c index 061f283f579e..a67d2dd5bb60 100644 --- a/arch/arm/mach-virt/virt.c +++ b/arch/arm/mach-virt/virt.c @@ -36,11 +36,8 @@ static const char *virt_dt_match[] = { NULL }; -extern struct smp_operations virt_smp_ops; - DT_MACHINE_START(VIRT, "Dummy Virtual Machine") .init_irq = irqchip_init, .init_machine = virt_init, - .smp = smp_ops(virt_smp_ops), .dt_compat = virt_dt_match, MACHINE_END From 0701a48d8b0353fc7338e20c1b14b00e7cae7b1f Mon Sep 17 00:00:00 2001 From: Ashwin Chaugule Date: Thu, 17 Apr 2014 14:38:41 -0400 Subject: [PATCH 006/214] PSCI: Add initial support for PSCIv0.2 functions The PSCIv0.2 spec defines standard values of function IDs and introduces a few new functions. Detect version of PSCI and appropriately select the right PSCI functions. Signed-off-by: Ashwin Chaugule Reviewed-by: Rob Herring Acked-by: Catalin Marinas (cherry picked from commit e71246a23acbc89e9cb4ebf1558d60e65733479f) Signed-off-by: Mark Brown --- arch/arm/include/asm/psci.h | 7 +- arch/arm/kernel/psci.c | 200 ++++++++++++++++++++++++++------- arch/arm64/include/asm/psci.h | 2 +- arch/arm64/kernel/psci.c | 204 +++++++++++++++++++++++++++------- 4 files changed, 332 insertions(+), 81 deletions(-) diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index c4ae171850f8..b93e34a9fdf1 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -29,16 +29,19 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); }; extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #ifdef CONFIG_ARM_PSCI -void psci_init(void); +int psci_init(void); bool psci_smp_available(void); #else -static inline void psci_init(void) { } +static inline int psci_init(void) { } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 46931880093d..f73891b6b730 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -17,63 +17,58 @@ #include #include +#include +#include +#include #include #include #include #include #include +#include struct psci_operations psci_ops; static int (*invoke_psci_fn)(u32, u32, u32, u32); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 - static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); } /* @@ -110,6 +105,14 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -153,26 +156,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; - -void __init psci_init(void) +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) { - struct device_node *np; const char *method; - u32 id; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return; - - pr_info("probing function IDs from device-tree\n"); + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -180,9 +193,98 @@ void __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); - goto out_put_node; + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } + } + + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; @@ -206,5 +308,25 @@ void __init psci_init(void) out_put_node: of_node_put(np); - return; + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); } diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index d15ab8b46336..e5312ea0ec1a 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,6 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -void psci_init(void); +int psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index ea4828a4aa96..90df6e641227 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -18,12 +18,16 @@ #include #include #include +#include +#include +#include #include #include #include #include #include +#include #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 @@ -40,58 +44,52 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); }; static struct psci_operations psci_ops; static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS 0 -#define PSCI_RET_EOPNOTSUPP -1 -#define PSCI_RET_EINVAL -2 -#define PSCI_RET_EPERM -3 - static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_EOPNOTSUPP: + case PSCI_RET_NOT_SUPPORTED: return -EOPNOTSUPP; - case PSCI_RET_EINVAL: + case PSCI_RET_INVALID_PARAMS: return -EINVAL; - case PSCI_RET_EPERM: + case PSCI_RET_DENIED: return -EPERM; }; return -EINVAL; } -#define PSCI_POWER_STATE_ID_MASK 0xffff -#define PSCI_POWER_STATE_ID_SHIFT 0 -#define PSCI_POWER_STATE_TYPE_MASK 0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT 16 -#define PSCI_POWER_STATE_AFFL_MASK 0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT 24 - static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id & PSCI_POWER_STATE_ID_MASK) - << PSCI_POWER_STATE_ID_SHIFT) | - ((state.type & PSCI_POWER_STATE_TYPE_MASK) - << PSCI_POWER_STATE_TYPE_SHIFT) | - ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) - << PSCI_POWER_STATE_AFFL_SHIFT); + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); } /* @@ -128,6 +126,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, return function_id; } +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -171,26 +177,36 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", }, - {}, -}; - -void __init psci_init(void) +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) { - struct device_node *np; const char *method; - u32 id; - np = of_find_matching_node(NULL, psci_of_match); - if (!np) - return; - - pr_info("probing function IDs from device-tree\n"); + pr_info("probing for conduit method from DT.\n"); if (of_property_read_string(np, "method", &method)) { - pr_warning("missing \"method\" property\n"); - goto out_put_node; + pr_warn("missing \"method\" property\n"); + return -ENXIO; } if (!strcmp("hvc", method)) { @@ -198,9 +214,98 @@ void __init psci_init(void) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warning("invalid \"method\" property: %s\n", method); - goto out_put_node; + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } + } + + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; @@ -224,7 +329,28 @@ void __init psci_init(void) out_put_node: of_node_put(np); - return; + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); } #ifdef CONFIG_SMP From 9224f461847062111eca24ea7fb9062f1ae0d8fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 3 Jun 2013 23:09:14 +0100 Subject: [PATCH 007/214] ARM: 7745/1: psci: fix building without HOTPLUG_CPU The cpu_die field in smp_operations is not valid with CONFIG_HOTPLUG_CPU, so we must enclose it in #ifdef, but at least that lets us remove two other lines. Signed-off-by: Arnd Bergmann Cc: Stefano Stabellini Cc: Will Deacon Signed-off-by: Russell King (cherry picked from commit fdeb94b5dc5bf9db7b3e36f3f38089a554f6a108) Signed-off-by: Mark Brown --- arch/arm/kernel/psci_smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 23a11424c568..219f1d73572a 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -68,8 +68,6 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } -#else -#define psci_cpu_die NULL #endif bool __init psci_smp_available(void) @@ -80,5 +78,7 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, +#endif }; From 6a0d4de350b1a37e6d67b2d8bae030886d291c4a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 14 May 2013 20:51:38 -0500 Subject: [PATCH 008/214] ARM: PSCI: remove unnecessary include of arm-gic.h Now that gic_secondary_init is no longer needed to be called by SMP init functions, the header is not needed. Signed-off-by: Rob Herring Cc: Russell King (cherry picked from commit 97fc4de3d93fea934b0ec8332465b9f1899a9adb) Signed-off-by: Mark Brown --- arch/arm/kernel/psci_smp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 219f1d73572a..1621ada3d068 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -14,7 +14,6 @@ */ #include -#include #include #include From 24629f96c1506bed9c5abecf73e2edb8e79ab379 Mon Sep 17 00:00:00 2001 From: Ashwin Chaugule Date: Wed, 7 May 2014 10:18:36 -0400 Subject: [PATCH 009/214] ARM: Check if a CPU has gone offline PSCIv0.2 adds a new function called AFFINITY_INFO, which can be used to query if a specified CPU has actually gone offline. Calling this function via cpu_kill ensures that a CPU has quiesced after a call to cpu_die. This helps prevent the CPU from doing arbitrary bad things when data or instructions are clobbered (as happens with kexec) in the window between a CPU announcing that it is dead and said CPU leaving the kernel. Signed-off-by: Ashwin Chaugule Signed-off-by: Mark Rutland Reviewed-by: Rob Herring Acked-by: Catalin Marinas (cherry picked from commit c814ca029e1015bb0ecec312f4bb9751ba1a711a) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/cpu_ops.h --- arch/arm/kernel/psci_smp.c | 33 ++++++++++++++++++++++++++++++++ arch/arm64/include/asm/cpu_ops.h | 2 ++ arch/arm64/kernel/psci.c | 31 ++++++++++++++++++++++++++++++ arch/arm64/kernel/smp.c | 22 +++++++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 1621ada3d068..fc1dbd258255 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -67,6 +69,36 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } + +int __ref psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make platform_cpu_kill() fail. */ + return 0; +} + #endif bool __init psci_smp_available(void) @@ -79,5 +111,6 @@ struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, + .cpu_kill = psci_cpu_kill, #endif }; diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index c4cdb5e5b73d..27207fee2521 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -39,6 +39,7 @@ struct device_node; * from the cpu to be killed. * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. + * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. */ struct cpu_operations { const char *name; @@ -49,6 +50,7 @@ struct cpu_operations { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_disable)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); + int (*cpu_kill)(unsigned int cpu); #endif }; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 90df6e641227..9e9798f91172 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -403,6 +404,35 @@ static void cpu_psci_cpu_die(unsigned int cpu) pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); } + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} #endif const struct cpu_operations cpu_psci_ops = { @@ -413,6 +443,7 @@ const struct cpu_operations cpu_psci_ops = { #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, #endif }; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6555060f9e97..8fbefa6e97a5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -220,6 +220,19 @@ int __cpu_disable(void) return 0; } +static int op_cpu_kill(unsigned int cpu) +{ + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; + + return cpu_ops[cpu]->cpu_kill(cpu); +} + static DECLARE_COMPLETION(cpu_died); /* @@ -233,6 +246,15 @@ void __cpu_die(unsigned int cpu) return; } pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); } /* From 223131ca0e4ed3f34dbdf53cf92ca5a63f9a8219 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 15 May 2014 15:19:22 +0100 Subject: [PATCH 010/214] arm64: fix pud_huge() for 2-level pagetables The following happens when trying to run a kvm guest on a kernel configured for 64k pages. This doesn't happen with 4k pages: BUG: failure at include/linux/mm.h:297/put_page_testzero()! Kernel panic - not syncing: BUG! CPU: 2 PID: 4228 Comm: qemu-system-aar Tainted: GF 3.13.0-0.rc7.31.sa2.k32v1.aarch64.debug #1 Call trace: [] dump_backtrace+0x0/0x16c [] show_stack+0x14/0x1c [] dump_stack+0x84/0xb0 [] panic+0xf4/0x220 [] free_reserved_area+0x0/0x110 [] free_pages+0x50/0x88 [] kvm_free_stage2_pgd+0x30/0x40 [] kvm_arch_destroy_vm+0x18/0x44 [] kvm_put_kvm+0xf0/0x184 [] kvm_vm_release+0x10/0x1c [] __fput+0xb0/0x288 [] ____fput+0xc/0x14 [] task_work_run+0xa8/0x11c [] do_notify_resume+0x54/0x58 In arch/arm/kvm/mmu.c:unmap_range(), we end up doing an extra put_page() on the stage2 pgd which leads to the BUG in put_page_testzero(). This happens because a pud_huge() test in unmap_range() returns true when it should always be false with 2-level pages tables used by 64k pages. This patch removes support for huge puds if 2-level pagetables are being used. Signed-off-by: Mark Salter [catalin.marinas@arm.com: removed #ifndef around PUD_SIZE check] Signed-off-by: Catalin Marinas Cc: # v3.11+ (cherry picked from commit 4797ec2dc83a43be35bad56037d1b53db9e2b5d5) Signed-off-by: Mark Brown --- arch/arm64/mm/hugetlbpage.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 2fc8258bab2d..023747bf4dd7 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,7 +51,11 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { +#ifndef __PAGETABLE_PMD_FOLDED return !(pud_val(pud) & PUD_TABLE_BIT); +#else + return 0; +#endif } static __init int setup_hugepagesz(char *opt) From 8a93ad4ff727a6b7331476293319d55ae6e8e6cd Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 06:11:29 +0100 Subject: [PATCH 011/214] arm64: lib: Implement optimized memcpy routine This patch, based on Linaro's Cortex Strings library, improves the performance of the assembly optimized memcpy() function. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit 808dbac6b51f3441eb5a07724c0b0d1257046d51) Signed-off-by: Mark Brown --- arch/arm64/lib/memcpy.S | 192 +++++++++++++++++++++++++++++++++++----- 1 file changed, 170 insertions(+), 22 deletions(-) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 27b5003609b6..8a9a96d3ddae 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -1,5 +1,13 @@ /* * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +24,7 @@ #include #include +#include /* * Copy a buffer from src to dest (alignment handled by the hardware) @@ -27,27 +36,166 @@ * Returns: * x0 - dest */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +tmp3 .req x5 +tmp3w .req w5 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + ENTRY(memcpy) - mov x4, x0 - subs x2, x2, #8 - b.mi 2f -1: ldr x3, [x1], #8 - subs x2, x2, #8 - str x3, [x4], #8 - b.pl 1b -2: adds x2, x2, #4 - b.mi 3f - ldr w3, [x1], #4 - sub x2, x2, #4 - str w3, [x4], #4 -3: adds x2, x2, #2 - b.mi 4f - ldrh w3, [x1], #2 - sub x2, x2, #2 - strh w3, [x4], #2 -4: adds x2, x2, #1 - b.mi 5f - ldrb w3, [x1] - strb w3, [x4] -5: ret + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb tmp1w, [src], #1 + strb tmp1w, [dst], #1 +1: + tbz tmp2, #1, 2f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +2: + tbz tmp2, #2, 3f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr tmp1, [src],#8 + str tmp1, [dst],#8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src], #16 + stp A_l, A_h, [dst], #16 +1: + ldp A_l, A_h, [src], #16 + stp A_l, A_h, [dst], #16 +2: + ldp A_l, A_h, [src], #16 + stp A_l, A_h, [dst], #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr tmp1, [src], #8 + str tmp1, [dst], #8 +1: + tbz count, #2, 2f + ldr tmp1w, [src], #4 + str tmp1w, [dst], #4 +2: + tbz count, #1, 3f + ldrh tmp1w, [src], #2 + strh tmp1w, [dst], #2 +3: + tbz count, #0, .Lexitfunc + ldrb tmp1w, [src] + strb tmp1w, [dst] + +.Lexitfunc: + ret + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp A_l, A_h, [src],#16 + stp A_l, A_h, [dst],#16 + ldp B_l, B_h, [src],#16 + ldp C_l, C_h, [src],#16 + stp B_l, B_h, [dst],#16 + stp C_l, C_h, [dst],#16 + ldp D_l, D_h, [src],#16 + stp D_l, D_h, [dst],#16 + + tst count, #0x3f + b.ne .Ltail63 + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp A_l, A_h, [src],#16 + ldp B_l, B_h, [src],#16 + ldp C_l, C_h, [src],#16 + ldp D_l, D_h, [src],#16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp A_l, A_h, [dst],#16 + ldp A_l, A_h, [src],#16 + stp B_l, B_h, [dst],#16 + ldp B_l, B_h, [src],#16 + stp C_l, C_h, [dst],#16 + ldp C_l, C_h, [src],#16 + stp D_l, D_h, [dst],#16 + ldp D_l, D_h, [src],#16 + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst],#16 + stp B_l, B_h, [dst],#16 + stp C_l, C_h, [dst],#16 + stp D_l, D_h, [dst],#16 + + tst count, #0x3f + b.ne .Ltail63 + ret ENDPROC(memcpy) From 64278d12423c867b223a7f0756a50ad7d9590599 Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 06:11:30 +0100 Subject: [PATCH 012/214] arm64: lib: Implement optimized memmove routine This patch, based on Linaro's Cortex Strings library, improves the performance of the assembly optimized memmove() function. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit 280adc1951c0c9fc8f2d85571ff563a1c412b1cd) Signed-off-by: Mark Brown --- arch/arm64/lib/memmove.S | 190 +++++++++++++++++++++++++++++++++------ 1 file changed, 165 insertions(+), 25 deletions(-) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index b79fdfa42d39..57b19ea2dad4 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -1,5 +1,13 @@ /* * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +24,7 @@ #include #include +#include /* * Move a buffer from src to test (alignment handled by the hardware). @@ -28,30 +37,161 @@ * Returns: * x0 - dest */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +tmp3 .req x5 +tmp3w .req w5 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + ENTRY(memmove) - cmp x0, x1 - b.ls memcpy - add x4, x0, x2 - add x1, x1, x2 - subs x2, x2, #8 - b.mi 2f -1: ldr x3, [x1, #-8]! - subs x2, x2, #8 - str x3, [x4, #-8]! - b.pl 1b -2: adds x2, x2, #4 - b.mi 3f - ldr w3, [x1, #-4]! - sub x2, x2, #4 - str w3, [x4, #-4]! -3: adds x2, x2, #2 - b.mi 4f - ldrh w3, [x1, #-2]! - sub x2, x2, #2 - strh w3, [x4, #-2]! -4: adds x2, x2, #1 - b.mi 5f - ldrb w3, [x1, #-1] - strb w3, [x4, #-1] -5: ret + cmp dstin, src + b.lo memcpy + add tmp1, src, count + cmp dstin, tmp1 + b.hs memcpy /* No overlap. */ + + add dst, dstin, count + add src, src, count + cmp count, #16 + b.lo .Ltail15 /*probably non-alignment accesses.*/ + + ands tmp2, src, #15 /* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * process the aligned offset length to make the src aligned firstly. + * those extra instructions' cost is acceptable. It also make the + * coming accesses are based on aligned address. + */ + tbz tmp2, #0, 1f + ldrb tmp1w, [src, #-1]! + strb tmp1w, [dst, #-1]! +1: + tbz tmp2, #1, 2f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +2: + tbz tmp2, #2, 3f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +3: + tbz tmp2, #3, .LSrcAligned + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltail15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp A_l, A_h, [src, #-16]! + stp A_l, A_h, [dst, #-16]! +1: + ldp A_l, A_h, [src, #-16]! + stp A_l, A_h, [dst, #-16]! +2: + ldp A_l, A_h, [src, #-16]! + stp A_l, A_h, [dst, #-16]! + +.Ltail15: + tbz count, #3, 1f + ldr tmp1, [src, #-8]! + str tmp1, [dst, #-8]! +1: + tbz count, #2, 2f + ldr tmp1w, [src, #-4]! + str tmp1w, [dst, #-4]! +2: + tbz count, #1, 3f + ldrh tmp1w, [src, #-2]! + strh tmp1w, [dst, #-2]! +3: + tbz count, #0, .Lexitfunc + ldrb tmp1w, [src, #-1] + strb tmp1w, [dst, #-1] + +.Lexitfunc: + ret + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 bytes here and then jump + * to the tail. + */ + ldp A_l, A_h, [src, #-16] + stp A_l, A_h, [dst, #-16] + ldp B_l, B_h, [src, #-32] + ldp C_l, C_h, [src, #-48] + stp B_l, B_h, [dst, #-32] + stp C_l, C_h, [dst, #-48] + ldp D_l, D_h, [src, #-64]! + stp D_l, D_h, [dst, #-64]! + + tst count, #0x3f + b.ne .Ltail63 + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-load 64 bytes data. */ + ldp A_l, A_h, [src, #-16] + ldp B_l, B_h, [src, #-32] + ldp C_l, C_h, [src, #-48] + ldp D_l, D_h, [src, #-64]! +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp A_l, A_h, [dst, #-16] + ldp A_l, A_h, [src, #-16] + stp B_l, B_h, [dst, #-32] + ldp B_l, B_h, [src, #-32] + stp C_l, C_h, [dst, #-48] + ldp C_l, C_h, [src, #-48] + stp D_l, D_h, [dst, #-64]! + ldp D_l, D_h, [src, #-64]! + subs count, count, #64 + b.ge 1b + stp A_l, A_h, [dst, #-16] + stp B_l, B_h, [dst, #-32] + stp C_l, C_h, [dst, #-48] + stp D_l, D_h, [dst, #-64]! + + tst count, #0x3f + b.ne .Ltail63 + ret ENDPROC(memmove) From 694e36fc1edf89965cf83df02e6a9ebf10db3c06 Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 06:11:31 +0100 Subject: [PATCH 013/214] arm64: lib: Implement optimized memset routine This patch, based on Linaro's Cortex Strings library, improves the performance of the assembly optimized memset() function. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit b29a51fe0e0be63157d8661666be8bbfd8f0c5d7) Signed-off-by: Mark Brown --- arch/arm64/lib/memset.S | 207 +++++++++++++++++++++++++++++++++++----- 1 file changed, 185 insertions(+), 22 deletions(-) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 87e4a68fbbbc..7c72dfd36b63 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -1,5 +1,13 @@ /* * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -16,6 +24,7 @@ #include #include +#include /* * Fill in the buffer with character c (alignment handled by the hardware) @@ -27,27 +36,181 @@ * Returns: * x0 - buf */ + +dstin .req x0 +val .req w1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +zva_len_x .req x5 +zva_len .req w5 +zva_bits_x .req x6 + +A_l .req x7 +A_lw .req w7 +dst .req x8 +tmp3w .req w9 +tmp3 .req x9 + ENTRY(memset) - mov x4, x0 - and w1, w1, #0xff - orr w1, w1, w1, lsl #8 - orr w1, w1, w1, lsl #16 - orr x1, x1, x1, lsl #32 - subs x2, x2, #8 - b.mi 2f -1: str x1, [x4], #8 - subs x2, x2, #8 - b.pl 1b -2: adds x2, x2, #4 - b.mi 3f - sub x2, x2, #4 - str w1, [x4], #4 -3: adds x2, x2, #2 - b.mi 4f - sub x2, x2, #2 - strh w1, [x4], #2 -4: adds x2, x2, #1 - b.mi 5f - strb w1, [x4] -5: ret + mov dst, dstin /* Preserve return value. */ + and A_lw, val, #255 + orr A_lw, A_lw, A_lw, lsl #8 + orr A_lw, A_lw, A_lw, lsl #16 + orr A_l, A_l, A_l, lsl #32 + + cmp count, #15 + b.hi .Lover16_proc + /*All store maybe are non-aligned..*/ + tbz count, #3, 1f + str A_l, [dst], #8 +1: + tbz count, #2, 2f + str A_lw, [dst], #4 +2: + tbz count, #1, 3f + strh A_lw, [dst], #2 +3: + tbz count, #0, 4f + strb A_lw, [dst] +4: + ret + +.Lover16_proc: + /*Whether the start address is aligned with 16.*/ + neg tmp2, dst + ands tmp2, tmp2, #15 + b.eq .Laligned +/* +* The count is not less than 16, we can use stp to store the start 16 bytes, +* then adjust the dst aligned with 16.This process will make the current +* memory address at alignment boundary. +*/ + stp A_l, A_l, [dst] /*non-aligned store..*/ + /*make the dst aligned..*/ + sub count, count, tmp2 + add dst, dst, tmp2 + +.Laligned: + cbz A_l, .Lzero_mem + +.Ltail_maybe_long: + cmp count, #64 + b.ge .Lnot_short +.Ltail63: + ands tmp1, count, #0x30 + b.eq 3f + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + stp A_l, A_l, [dst], #16 +1: + stp A_l, A_l, [dst], #16 +2: + stp A_l, A_l, [dst], #16 +/* +* The last store length is less than 16,use stp to write last 16 bytes. +* It will lead some bytes written twice and the access is non-aligned. +*/ +3: + ands count, count, #15 + cbz count, 4f + add dst, dst, count + stp A_l, A_l, [dst, #-16] /* Repeat some/all of last store. */ +4: + ret + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line, this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lnot_short: + sub dst, dst, #16/* Pre-bias. */ + sub count, count, #64 +1: + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + stp A_l, A_l, [dst, #48] + stp A_l, A_l, [dst, #64]! + subs count, count, #64 + b.ge 1b + tst count, #0x3f + add dst, dst, #16 + b.ne .Ltail63 +.Lexitfunc: + ret + + /* + * For zeroing memory, check to see if we can use the ZVA feature to + * zero entire 'cache' lines. + */ +.Lzero_mem: + cmp count, #63 + b.le .Ltail63 + /* + * For zeroing small amounts of memory, it's not worth setting up + * the line-clear code. + */ + cmp count, #128 + b.lt .Lnot_short /*count is at least 128 bytes*/ + + mrs tmp1, dczid_el0 + tbnz tmp1, #4, .Lnot_short + mov tmp3w, #4 + and zva_len, tmp1w, #15 /* Safety: other bits reserved. */ + lsl zva_len, tmp3w, zva_len + + ands tmp3w, zva_len, #63 + /* + * ensure the zva_len is not less than 64. + * It is not meaningful to use ZVA if the block size is less than 64. + */ + b.ne .Lnot_short +.Lzero_by_line: + /* + * Compute how far we need to go to become suitably aligned. We're + * already at quad-word alignment. + */ + cmp count, zva_len_x + b.lt .Lnot_short /* Not enough to reach alignment. */ + sub zva_bits_x, zva_len_x, #1 + neg tmp2, dst + ands tmp2, tmp2, zva_bits_x + b.eq 2f /* Already aligned. */ + /* Not aligned, check that there's enough to copy after alignment.*/ + sub tmp1, count, tmp2 + /* + * grantee the remain length to be ZVA is bigger than 64, + * avoid to make the 2f's process over mem range.*/ + cmp tmp1, #64 + ccmp tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */ + b.lt .Lnot_short + /* + * We know that there's at least 64 bytes to zero and that it's safe + * to overrun by 64 bytes. + */ + mov count, tmp1 +1: + stp A_l, A_l, [dst] + stp A_l, A_l, [dst, #16] + stp A_l, A_l, [dst, #32] + subs tmp2, tmp2, #64 + stp A_l, A_l, [dst, #48] + add dst, dst, #64 + b.ge 1b + /* We've overrun a bit, so adjust dst downwards.*/ + add dst, dst, tmp2 +2: + sub count, count, zva_len_x +3: + dc zva, dst + add dst, dst, zva_len_x + subs count, count, zva_len_x + b.ge 3b + ands count, count, zva_bits_x + b.ne .Ltail_maybe_long + ret ENDPROC(memset) From ae2c5bcd50eeb6ac60889333b56c85db570fe9ec Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 06:11:32 +0100 Subject: [PATCH 014/214] arm64: lib: Implement optimized memcmp routine This patch, based on Linaro's Cortex Strings library, adds an assembly optimized memcmp() function. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit d875c9b3724083cd2629cd8507e424cd3716cd28) Signed-off-by: Mark Brown --- arch/arm64/include/asm/string.h | 3 + arch/arm64/kernel/arm64ksyms.c | 1 + arch/arm64/lib/Makefile | 2 +- arch/arm64/lib/memcmp.S | 258 ++++++++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/lib/memcmp.S diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 3ee8b303d9a9..3a43305cda71 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -34,4 +34,7 @@ extern void *memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +#define __HAVE_ARCH_MEMCMP +extern int memcmp(const void *, const void *, size_t); + #endif diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 338b568cd8ae..909c18e155ea 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -48,6 +48,7 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); /* atomic bitops */ EXPORT_SYMBOL(set_bit); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 328ce1a99daa..112c67f2b109 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,4 +1,4 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ - strchr.o strrchr.o + memcmp.o strchr.o strrchr.o diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S new file mode 100644 index 000000000000..6ea0776ba6de --- /dev/null +++ b/arch/arm64/lib/memcmp.S @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* +* compare memory areas(when two memory areas' offset are different, +* alignment handled by the hardware) +* +* Parameters: +* x0 - const memory area 1 pointer +* x1 - const memory area 2 pointer +* x2 - the maximal compare byte length +* Returns: +* x0 - a compare result, maybe less than, equal to, or greater than ZERO +*/ + +/* Parameters and result. */ +src1 .req x0 +src2 .req x1 +limit .req x2 +result .req x0 + +/* Internal variables. */ +data1 .req x3 +data1w .req w3 +data2 .req x4 +data2w .req w4 +has_nul .req x5 +diff .req x6 +endloop .req x7 +tmp1 .req x8 +tmp2 .req x9 +tmp3 .req x10 +pos .req x11 +limit_wd .req x12 +mask .req x13 + +ENTRY(memcmp) + cbz limit, .Lret0 + eor tmp1, src1, src2 + tst tmp1, #7 + b.ne .Lmisaligned8 + ands tmp1, src1, #7 + b.ne .Lmutual_align + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ + lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ + /* + * The input source addresses are at alignment boundary. + * Directly compare eight bytes each time. + */ +.Lloop_aligned: + ldr data1, [src1], #8 + ldr data2, [src2], #8 +.Lstart_realigned: + subs limit_wd, limit_wd, #1 + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, cs /* Last Dword or differences. */ + cbz endloop, .Lloop_aligned + + /* Not reached the limit, must have found a diff. */ + tbz limit_wd, #63, .Lnot_limit + + /* Limit % 8 == 0 => the diff is in the last 8 bytes. */ + ands limit, limit, #7 + b.eq .Lnot_limit + /* + * The remained bytes less than 8. It is needed to extract valid data + * from last eight bytes of the intended memory range. + */ + lsl limit, limit, #3 /* bytes-> bits. */ + mov mask, #~0 +CPU_BE( lsr mask, mask, limit ) +CPU_LE( lsl mask, mask, limit ) + bic data1, data1, mask + bic data2, data2, mask + + orr diff, diff, mask + b .Lnot_limit + +.Lmutual_align: + /* + * Sources are mutually aligned, but are not currently at an + * alignment boundary. Round down the addresses and then mask off + * the bytes that precede the start point. + */ + bic src1, src1, #7 + bic src2, src2, #7 + ldr data1, [src1], #8 + ldr data2, [src2], #8 + /* + * We can not add limit with alignment offset(tmp1) here. Since the + * addition probably make the limit overflown. + */ + sub limit_wd, limit, #1/*limit != 0, so no underflow.*/ + and tmp3, limit_wd, #7 + lsr limit_wd, limit_wd, #3 + add tmp3, tmp3, tmp1 + add limit_wd, limit_wd, tmp3, lsr #3 + add limit, limit, tmp1/* Adjust the limit for the extra. */ + + lsl tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/ + neg tmp1, tmp1/* Bits to alignment -64. */ + mov tmp2, #~0 + /*mask off the non-intended bytes before the start address.*/ +CPU_BE( lsl tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/ + /* Little-endian. Early bytes are at LSB. */ +CPU_LE( lsr tmp2, tmp2, tmp1 ) + + orr data1, data1, tmp2 + orr data2, data2, tmp2 + b .Lstart_realigned + + /*src1 and src2 have different alignment offset.*/ +.Lmisaligned8: + cmp limit, #8 + b.lo .Ltiny8proc /*limit < 8: compare byte by byte*/ + + and tmp1, src1, #7 + neg tmp1, tmp1 + add tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/ + and tmp2, src2, #7 + neg tmp2, tmp2 + add tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/ + subs tmp3, tmp1, tmp2 + csel pos, tmp1, tmp2, hi /*Choose the maximum.*/ + + sub limit, limit, pos + /*compare the proceeding bytes in the first 8 byte segment.*/ +.Ltinycmp: + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs pos, pos, #1 + ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ + b.eq .Ltinycmp + cbnz pos, 1f /*diff occurred before the last byte.*/ + cmp data1w, data2w + b.eq .Lstart_align +1: + sub result, data1, data2 + ret + +.Lstart_align: + lsr limit_wd, limit, #3 + cbz limit_wd, .Lremain8 + + ands xzr, src1, #7 + b.eq .Lrecal_offset + /*process more leading bytes to make src1 aligned...*/ + add src1, src1, tmp3 /*backwards src1 to alignment boundary*/ + add src2, src2, tmp3 + sub limit, limit, tmp3 + lsr limit_wd, limit, #3 + cbz limit_wd, .Lremain8 + /*load 8 bytes from aligned SRC1..*/ + ldr data1, [src1], #8 + ldr data2, [src2], #8 + + subs limit_wd, limit_wd, #1 + eor diff, data1, data2 /*Non-zero if differences found.*/ + csinv endloop, diff, xzr, ne + cbnz endloop, .Lunequal_proc + /*How far is the current SRC2 from the alignment boundary...*/ + and tmp3, tmp3, #7 + +.Lrecal_offset:/*src1 is aligned now..*/ + neg pos, tmp3 +.Lloopcmp_proc: + /* + * Divide the eight bytes into two parts. First,backwards the src2 + * to an alignment boundary,load eight bytes and compare from + * the SRC2 alignment boundary. If all 8 bytes are equal,then start + * the second part's comparison. Otherwise finish the comparison. + * This special handle can garantee all the accesses are in the + * thread/task space in avoid to overrange access. + */ + ldr data1, [src1,pos] + ldr data2, [src2,pos] + eor diff, data1, data2 /* Non-zero if differences found. */ + cbnz diff, .Lnot_limit + + /*The second part process*/ + ldr data1, [src1], #8 + ldr data2, [src2], #8 + eor diff, data1, data2 /* Non-zero if differences found. */ + subs limit_wd, limit_wd, #1 + csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ + cbz endloop, .Lloopcmp_proc +.Lunequal_proc: + cbz diff, .Lremain8 + +/*There is differnence occured in the latest comparison.*/ +.Lnot_limit: +/* +* For little endian,reverse the low significant equal bits into MSB,then +* following CLZ can find how many equal bits exist. +*/ +CPU_LE( rev diff, diff ) +CPU_LE( rev data1, data1 ) +CPU_LE( rev data2, data2 ) + + /* + * The MS-non-zero bit of DIFF marks either the first bit + * that is different, or the end of the significant data. + * Shifting left now will bring the critical information into the + * top bits. + */ + clz pos, diff + lsl data1, data1, pos + lsl data2, data2, pos + /* + * We need to zero-extend (char is unsigned) the value and then + * perform a signed subtraction. + */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + ret + +.Lremain8: + /* Limit % 8 == 0 =>. all data are equal.*/ + ands limit, limit, #7 + b.eq .Lret0 + +.Ltiny8proc: + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs limit, limit, #1 + + ccmp data1w, data2w, #0, ne /* NZCV = 0b0000. */ + b.eq .Ltiny8proc + sub result, data1, data2 + ret +.Lret0: + mov result, #0 + ret +ENDPROC(memcmp) From 504c65c8fcd3fa42effda3090ba7b763bad96f03 Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 13:11:33 +0800 Subject: [PATCH 015/214] arm64: lib: Implement optimized string compare routines This patch, based on Linaro's Cortex Strings library, adds an assembly optimized strcmp() and strncmp() functions. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit 192c4d902f19b66902d7aacc19e9b169bebfb2e5) Signed-off-by: Mark Brown --- arch/arm64/include/asm/string.h | 6 + arch/arm64/kernel/arm64ksyms.c | 2 + arch/arm64/lib/Makefile | 2 +- arch/arm64/lib/strcmp.S | 234 ++++++++++++++++++++++++ arch/arm64/lib/strncmp.S | 310 ++++++++++++++++++++++++++++++++ 5 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/lib/strcmp.S create mode 100644 arch/arm64/lib/strncmp.S diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 3a43305cda71..6133f4970027 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -22,6 +22,12 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); +#define __HAVE_ARCH_STRCMP +extern int strcmp(const char *, const char *); + +#define __HAVE_ARCH_STRNCMP +extern int strncmp(const char *, const char *, __kernel_size_t); + #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 909c18e155ea..2784a79dbdd9 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -44,6 +44,8 @@ EXPORT_SYMBOL(memstart_addr); /* string / mem functions */ EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 112c67f2b109..aaaf6180c558 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,4 +1,4 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ - memcmp.o strchr.o strrchr.o + memcmp.o strcmp.o strncmp.o strchr.o strrchr.o diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S new file mode 100644 index 000000000000..42f828b06c59 --- /dev/null +++ b/arch/arm64/lib/strcmp.S @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * compare two strings + * + * Parameters: + * x0 - const string 1 pointer + * x1 - const string 2 pointer + * Returns: + * x0 - an integer less than, equal to, or greater than zero + * if s1 is found, respectively, to be less than, to match, + * or be greater than s2. + */ + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +/* Parameters and result. */ +src1 .req x0 +src2 .req x1 +result .req x0 + +/* Internal variables. */ +data1 .req x2 +data1w .req w2 +data2 .req x3 +data2w .req w3 +has_nul .req x4 +diff .req x5 +syndrome .req x6 +tmp1 .req x7 +tmp2 .req x8 +tmp3 .req x9 +zeroones .req x10 +pos .req x11 + +ENTRY(strcmp) + eor tmp1, src1, src2 + mov zeroones, #REP8_01 + tst tmp1, #7 + b.ne .Lmisaligned8 + ands tmp1, src1, #7 + b.ne .Lmutual_align + + /* + * NUL detection works on the principle that (X - 1) & (~X) & 0x80 + * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + * can be done in parallel across the entire word. + */ +.Lloop_aligned: + ldr data1, [src1], #8 + ldr data2, [src2], #8 +.Lstart_realigned: + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + orr syndrome, diff, has_nul + cbz syndrome, .Lloop_aligned + b .Lcal_cmpresult + +.Lmutual_align: + /* + * Sources are mutually aligned, but are not currently at an + * alignment boundary. Round down the addresses and then mask off + * the bytes that preceed the start point. + */ + bic src1, src1, #7 + bic src2, src2, #7 + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + ldr data1, [src1], #8 + neg tmp1, tmp1 /* Bits to alignment -64. */ + ldr data2, [src2], #8 + mov tmp2, #~0 + /* Big-endian. Early bytes are at MSB. */ +CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ + /* Little-endian. Early bytes are at LSB. */ +CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ + + orr data1, data1, tmp2 + orr data2, data2, tmp2 + b .Lstart_realigned + +.Lmisaligned8: + /* + * Get the align offset length to compare per byte first. + * After this process, one string's address will be aligned. + */ + and tmp1, src1, #7 + neg tmp1, tmp1 + add tmp1, tmp1, #8 + and tmp2, src2, #7 + neg tmp2, tmp2 + add tmp2, tmp2, #8 + subs tmp3, tmp1, tmp2 + csel pos, tmp1, tmp2, hi /*Choose the maximum. */ +.Ltinycmp: + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs pos, pos, #1 + ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq .Ltinycmp + cbnz pos, 1f /*find the null or unequal...*/ + cmp data1w, #1 + ccmp data1w, data2w, #0, cs + b.eq .Lstart_align /*the last bytes are equal....*/ +1: + sub result, data1, data2 + ret + +.Lstart_align: + ands xzr, src1, #7 + b.eq .Lrecal_offset + /*process more leading bytes to make str1 aligned...*/ + add src1, src1, tmp3 + add src2, src2, tmp3 + /*load 8 bytes from aligned str1 and non-aligned str2..*/ + ldr data1, [src1], #8 + ldr data2, [src2], #8 + + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bic has_nul, tmp1, tmp2 + eor diff, data1, data2 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + cbnz syndrome, .Lcal_cmpresult + /*How far is the current str2 from the alignment boundary...*/ + and tmp3, tmp3, #7 +.Lrecal_offset: + neg pos, tmp3 +.Lloopcmp_proc: + /* + * Divide the eight bytes into two parts. First,backwards the src2 + * to an alignment boundary,load eight bytes from the SRC2 alignment + * boundary,then compare with the relative bytes from SRC1. + * If all 8 bytes are equal,then start the second part's comparison. + * Otherwise finish the comparison. + * This special handle can garantee all the accesses are in the + * thread/task space in avoid to overrange access. + */ + ldr data1, [src1,pos] + ldr data2, [src2,pos] + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bic has_nul, tmp1, tmp2 + eor diff, data1, data2 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + cbnz syndrome, .Lcal_cmpresult + + /*The second part process*/ + ldr data1, [src1], #8 + ldr data2, [src2], #8 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bic has_nul, tmp1, tmp2 + eor diff, data1, data2 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + cbz syndrome, .Lloopcmp_proc + +.Lcal_cmpresult: + /* + * reversed the byte-order as big-endian,then CLZ can find the most + * significant zero bits. + */ +CPU_LE( rev syndrome, syndrome ) +CPU_LE( rev data1, data1 ) +CPU_LE( rev data2, data2 ) + + /* + * For big-endian we cannot use the trick with the syndrome value + * as carry-propagation can corrupt the upper bits if the trailing + * bytes in the string contain 0x01. + * However, if there is no NUL byte in the dword, we can generate + * the result directly. We ca not just subtract the bytes as the + * MSB might be significant. + */ +CPU_BE( cbnz has_nul, 1f ) +CPU_BE( cmp data1, data2 ) +CPU_BE( cset result, ne ) +CPU_BE( cneg result, result, lo ) +CPU_BE( ret ) +CPU_BE( 1: ) + /*Re-compute the NUL-byte detection, using a byte-reversed value. */ +CPU_BE( rev tmp3, data1 ) +CPU_BE( sub tmp1, tmp3, zeroones ) +CPU_BE( orr tmp2, tmp3, #REP8_7f ) +CPU_BE( bic has_nul, tmp1, tmp2 ) +CPU_BE( rev has_nul, has_nul ) +CPU_BE( orr syndrome, diff, has_nul ) + + clz pos, syndrome + /* + * The MS-non-zero bit of the syndrome marks either the first bit + * that is different, or the top bit of the first zero byte. + * Shifting left now will bring the critical information into the + * top bits. + */ + lsl data1, data1, pos + lsl data2, data2, pos + /* + * But we need to zero-extend (char is unsigned) the value and then + * perform a signed 32-bit subtraction. + */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + ret +ENDPROC(strcmp) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S new file mode 100644 index 000000000000..0224cf5a5533 --- /dev/null +++ b/arch/arm64/lib/strncmp.S @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * compare two strings + * + * Parameters: + * x0 - const string 1 pointer + * x1 - const string 2 pointer + * x2 - the maximal length to be compared + * Returns: + * x0 - an integer less than, equal to, or greater than zero if s1 is found, + * respectively, to be less than, to match, or be greater than s2. + */ + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +/* Parameters and result. */ +src1 .req x0 +src2 .req x1 +limit .req x2 +result .req x0 + +/* Internal variables. */ +data1 .req x3 +data1w .req w3 +data2 .req x4 +data2w .req w4 +has_nul .req x5 +diff .req x6 +syndrome .req x7 +tmp1 .req x8 +tmp2 .req x9 +tmp3 .req x10 +zeroones .req x11 +pos .req x12 +limit_wd .req x13 +mask .req x14 +endloop .req x15 + +ENTRY(strncmp) + cbz limit, .Lret0 + eor tmp1, src1, src2 + mov zeroones, #REP8_01 + tst tmp1, #7 + b.ne .Lmisaligned8 + ands tmp1, src1, #7 + b.ne .Lmutual_align + /* Calculate the number of full and partial words -1. */ + /* + * when limit is mulitply of 8, if not sub 1, + * the judgement of last dword will wrong. + */ + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ + lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ + + /* + * NUL detection works on the principle that (X - 1) & (~X) & 0x80 + * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + * can be done in parallel across the entire word. + */ +.Lloop_aligned: + ldr data1, [src1], #8 + ldr data2, [src2], #8 +.Lstart_realigned: + subs limit_wd, limit_wd, #1 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, pl /* Last Dword or differences.*/ + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + ccmp endloop, #0, #0, eq + b.eq .Lloop_aligned + + /*Not reached the limit, must have found the end or a diff. */ + tbz limit_wd, #63, .Lnot_limit + + /* Limit % 8 == 0 => all bytes significant. */ + ands limit, limit, #7 + b.eq .Lnot_limit + + lsl limit, limit, #3 /* Bits -> bytes. */ + mov mask, #~0 +CPU_BE( lsr mask, mask, limit ) +CPU_LE( lsl mask, mask, limit ) + bic data1, data1, mask + bic data2, data2, mask + + /* Make sure that the NUL byte is marked in the syndrome. */ + orr has_nul, has_nul, mask + +.Lnot_limit: + orr syndrome, diff, has_nul + b .Lcal_cmpresult + +.Lmutual_align: + /* + * Sources are mutually aligned, but are not currently at an + * alignment boundary. Round down the addresses and then mask off + * the bytes that precede the start point. + * We also need to adjust the limit calculations, but without + * overflowing if the limit is near ULONG_MAX. + */ + bic src1, src1, #7 + bic src2, src2, #7 + ldr data1, [src1], #8 + neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ + ldr data2, [src2], #8 + mov tmp2, #~0 + sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ + /* Big-endian. Early bytes are at MSB. */ +CPU_BE( lsl tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ + /* Little-endian. Early bytes are at LSB. */ +CPU_LE( lsr tmp2, tmp2, tmp3 ) /* Shift (tmp1 & 63). */ + + and tmp3, limit_wd, #7 + lsr limit_wd, limit_wd, #3 + /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/ + add limit, limit, tmp1 + add tmp3, tmp3, tmp1 + orr data1, data1, tmp2 + orr data2, data2, tmp2 + add limit_wd, limit_wd, tmp3, lsr #3 + b .Lstart_realigned + +/*when src1 offset is not equal to src2 offset...*/ +.Lmisaligned8: + cmp limit, #8 + b.lo .Ltiny8proc /*limit < 8... */ + /* + * Get the align offset length to compare per byte first. + * After this process, one string's address will be aligned.*/ + and tmp1, src1, #7 + neg tmp1, tmp1 + add tmp1, tmp1, #8 + and tmp2, src2, #7 + neg tmp2, tmp2 + add tmp2, tmp2, #8 + subs tmp3, tmp1, tmp2 + csel pos, tmp1, tmp2, hi /*Choose the maximum. */ + /* + * Here, limit is not less than 8, so directly run .Ltinycmp + * without checking the limit.*/ + sub limit, limit, pos +.Ltinycmp: + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs pos, pos, #1 + ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq .Ltinycmp + cbnz pos, 1f /*find the null or unequal...*/ + cmp data1w, #1 + ccmp data1w, data2w, #0, cs + b.eq .Lstart_align /*the last bytes are equal....*/ +1: + sub result, data1, data2 + ret + +.Lstart_align: + lsr limit_wd, limit, #3 + cbz limit_wd, .Lremain8 + /*process more leading bytes to make str1 aligned...*/ + ands xzr, src1, #7 + b.eq .Lrecal_offset + add src1, src1, tmp3 /*tmp3 is positive in this branch.*/ + add src2, src2, tmp3 + ldr data1, [src1], #8 + ldr data2, [src2], #8 + + sub limit, limit, tmp3 + lsr limit_wd, limit, #3 + subs limit_wd, limit_wd, #1 + + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ + bics has_nul, tmp1, tmp2 + ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ + b.ne .Lunequal_proc + /*How far is the current str2 from the alignment boundary...*/ + and tmp3, tmp3, #7 +.Lrecal_offset: + neg pos, tmp3 +.Lloopcmp_proc: + /* + * Divide the eight bytes into two parts. First,backwards the src2 + * to an alignment boundary,load eight bytes from the SRC2 alignment + * boundary,then compare with the relative bytes from SRC1. + * If all 8 bytes are equal,then start the second part's comparison. + * Otherwise finish the comparison. + * This special handle can garantee all the accesses are in the + * thread/task space in avoid to overrange access. + */ + ldr data1, [src1,pos] + ldr data2, [src2,pos] + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, eq + cbnz endloop, .Lunequal_proc + + /*The second part process*/ + ldr data1, [src1], #8 + ldr data2, [src2], #8 + subs limit_wd, limit_wd, #1 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + csinv endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/ + bics has_nul, tmp1, tmp2 + ccmp endloop, #0, #0, eq /*has_null is ZERO: no null byte*/ + b.eq .Lloopcmp_proc + +.Lunequal_proc: + orr syndrome, diff, has_nul + cbz syndrome, .Lremain8 +.Lcal_cmpresult: + /* + * reversed the byte-order as big-endian,then CLZ can find the most + * significant zero bits. + */ +CPU_LE( rev syndrome, syndrome ) +CPU_LE( rev data1, data1 ) +CPU_LE( rev data2, data2 ) + /* + * For big-endian we cannot use the trick with the syndrome value + * as carry-propagation can corrupt the upper bits if the trailing + * bytes in the string contain 0x01. + * However, if there is no NUL byte in the dword, we can generate + * the result directly. We can't just subtract the bytes as the + * MSB might be significant. + */ +CPU_BE( cbnz has_nul, 1f ) +CPU_BE( cmp data1, data2 ) +CPU_BE( cset result, ne ) +CPU_BE( cneg result, result, lo ) +CPU_BE( ret ) +CPU_BE( 1: ) + /* Re-compute the NUL-byte detection, using a byte-reversed value.*/ +CPU_BE( rev tmp3, data1 ) +CPU_BE( sub tmp1, tmp3, zeroones ) +CPU_BE( orr tmp2, tmp3, #REP8_7f ) +CPU_BE( bic has_nul, tmp1, tmp2 ) +CPU_BE( rev has_nul, has_nul ) +CPU_BE( orr syndrome, diff, has_nul ) + /* + * The MS-non-zero bit of the syndrome marks either the first bit + * that is different, or the top bit of the first zero byte. + * Shifting left now will bring the critical information into the + * top bits. + */ + clz pos, syndrome + lsl data1, data1, pos + lsl data2, data2, pos + /* + * But we need to zero-extend (char is unsigned) the value and then + * perform a signed 32-bit subtraction. + */ + lsr data1, data1, #56 + sub result, data1, data2, lsr #56 + ret + +.Lremain8: + /* Limit % 8 == 0 => all bytes significant. */ + ands limit, limit, #7 + b.eq .Lret0 +.Ltiny8proc: + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + subs limit, limit, #1 + + ccmp data1w, #1, #0, ne /* NZCV = 0b0000. */ + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.eq .Ltiny8proc + sub result, data1, data2 + ret + +.Lret0: + mov result, #0 + ret +ENDPROC(strncmp) From 9034246b709cc3640a7a6485fb7f2616a72b5d36 Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Mon, 28 Apr 2014 13:11:34 +0800 Subject: [PATCH 016/214] arm64: lib: Implement optimized string length routines This patch, based on Linaro's Cortex Strings library, adds an assembly optimized strlen() and strnlen() functions. Signed-off-by: Zhichang Yuan Signed-off-by: Deepak Saxena Signed-off-by: Catalin Marinas (cherry picked from commit 0a42cb0a6fa64cb17db11164a1ad3511b43acefe) Signed-off-by: Mark Brown --- arch/arm64/include/asm/string.h | 6 ++ arch/arm64/kernel/arm64ksyms.c | 2 + arch/arm64/lib/Makefile | 3 +- arch/arm64/lib/strlen.S | 126 +++++++++++++++++++++++ arch/arm64/lib/strnlen.S | 171 ++++++++++++++++++++++++++++++++ 5 files changed, 307 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/lib/strlen.S create mode 100644 arch/arm64/lib/strnlen.S diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 6133f4970027..64d2d4884a9d 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -28,6 +28,12 @@ extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); +#define __HAVE_ARCH_STRLEN +extern __kernel_size_t strlen(const char *); + +#define __HAVE_ARCH_STRNLEN +extern __kernel_size_t strnlen(const char *, __kernel_size_t); + #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 2784a79dbdd9..1edc792b4a1f 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -46,6 +46,8 @@ EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index aaaf6180c558..d98d3e39879e 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -1,4 +1,5 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ copy_to_user.o copy_in_user.o copy_page.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ - memcmp.o strcmp.o strncmp.o strchr.o strrchr.o + memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ + strchr.o strrchr.o diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S new file mode 100644 index 000000000000..987b68b9ce44 --- /dev/null +++ b/arch/arm64/lib/strlen.S @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * calculate the length of a string + * + * Parameters: + * x0 - const string pointer + * Returns: + * x0 - the return length of specific string + */ + +/* Arguments and results. */ +srcin .req x0 +len .req x0 + +/* Locals and temporaries. */ +src .req x1 +data1 .req x2 +data2 .req x3 +data2a .req x4 +has_nul1 .req x5 +has_nul2 .req x6 +tmp1 .req x7 +tmp2 .req x8 +tmp3 .req x9 +tmp4 .req x10 +zeroones .req x11 +pos .req x12 + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +ENTRY(strlen) + mov zeroones, #REP8_01 + bic src, srcin, #15 + ands tmp1, srcin, #15 + b.ne .Lmisaligned + /* + * NUL detection works on the principle that (X - 1) & (~X) & 0x80 + * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + * can be done in parallel across the entire word. + */ + /* + * The inner loop deals with two Dwords at a time. This has a + * slightly higher start-up cost, but we should win quite quickly, + * especially on cores with a high number of issue slots per + * cycle, as we get much better parallelism out of the operations. + */ +.Lloop: + ldp data1, data2, [src], #16 +.Lrealigned: + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bics has_nul2, tmp3, tmp4 + ccmp has_nul1, #0, #0, eq /* NZCV = 0000 */ + b.eq .Lloop + + sub len, src, srcin + cbz has_nul1, .Lnul_in_data2 +CPU_BE( mov data2, data1 ) /*prepare data to re-calculate the syndrome*/ + sub len, len, #8 + mov has_nul2, has_nul1 +.Lnul_in_data2: + /* + * For big-endian, carry propagation (if the final byte in the + * string is 0x01) means we cannot use has_nul directly. The + * easiest way to get the correct byte is to byte-swap the data + * and calculate the syndrome a second time. + */ +CPU_BE( rev data2, data2 ) +CPU_BE( sub tmp1, data2, zeroones ) +CPU_BE( orr tmp2, data2, #REP8_7f ) +CPU_BE( bic has_nul2, tmp1, tmp2 ) + + sub len, len, #8 + rev has_nul2, has_nul2 + clz pos, has_nul2 + add len, len, pos, lsr #3 /* Bits to bytes. */ + ret + +.Lmisaligned: + cmp tmp1, #8 + neg tmp1, tmp1 + ldp data1, data2, [src], #16 + lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ + mov tmp2, #~0 + /* Big-endian. Early bytes are at MSB. */ +CPU_BE( lsl tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ + /* Little-endian. Early bytes are at LSB. */ +CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ + + orr data1, data1, tmp2 + orr data2a, data2, tmp2 + csinv data1, data1, xzr, le + csel data2, data2, data2a, le + b .Lrealigned +ENDPROC(strlen) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S new file mode 100644 index 000000000000..2ca665711bf2 --- /dev/null +++ b/arch/arm64/lib/strnlen.S @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +/* + * determine the length of a fixed-size string + * + * Parameters: + * x0 - const string pointer + * x1 - maximal string length + * Returns: + * x0 - the return length of specific string + */ + +/* Arguments and results. */ +srcin .req x0 +len .req x0 +limit .req x1 + +/* Locals and temporaries. */ +src .req x2 +data1 .req x3 +data2 .req x4 +data2a .req x5 +has_nul1 .req x6 +has_nul2 .req x7 +tmp1 .req x8 +tmp2 .req x9 +tmp3 .req x10 +tmp4 .req x11 +zeroones .req x12 +pos .req x13 +limit_wd .req x14 + +#define REP8_01 0x0101010101010101 +#define REP8_7f 0x7f7f7f7f7f7f7f7f +#define REP8_80 0x8080808080808080 + +ENTRY(strnlen) + cbz limit, .Lhit_limit + mov zeroones, #REP8_01 + bic src, srcin, #15 + ands tmp1, srcin, #15 + b.ne .Lmisaligned + /* Calculate the number of full and partial words -1. */ + sub limit_wd, limit, #1 /* Limit != 0, so no underflow. */ + lsr limit_wd, limit_wd, #4 /* Convert to Qwords. */ + + /* + * NUL detection works on the principle that (X - 1) & (~X) & 0x80 + * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + * can be done in parallel across the entire word. + */ + /* + * The inner loop deals with two Dwords at a time. This has a + * slightly higher start-up cost, but we should win quite quickly, + * especially on cores with a high number of issue slots per + * cycle, as we get much better parallelism out of the operations. + */ +.Lloop: + ldp data1, data2, [src], #16 +.Lrealigned: + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + sub tmp3, data2, zeroones + orr tmp4, data2, #REP8_7f + bic has_nul1, tmp1, tmp2 + bic has_nul2, tmp3, tmp4 + subs limit_wd, limit_wd, #1 + orr tmp1, has_nul1, has_nul2 + ccmp tmp1, #0, #0, pl /* NZCV = 0000 */ + b.eq .Lloop + + cbz tmp1, .Lhit_limit /* No null in final Qword. */ + + /* + * We know there's a null in the final Qword. The easiest thing + * to do now is work out the length of the string and return + * MIN (len, limit). + */ + sub len, src, srcin + cbz has_nul1, .Lnul_in_data2 +CPU_BE( mov data2, data1 ) /*perpare data to re-calculate the syndrome*/ + + sub len, len, #8 + mov has_nul2, has_nul1 +.Lnul_in_data2: + /* + * For big-endian, carry propagation (if the final byte in the + * string is 0x01) means we cannot use has_nul directly. The + * easiest way to get the correct byte is to byte-swap the data + * and calculate the syndrome a second time. + */ +CPU_BE( rev data2, data2 ) +CPU_BE( sub tmp1, data2, zeroones ) +CPU_BE( orr tmp2, data2, #REP8_7f ) +CPU_BE( bic has_nul2, tmp1, tmp2 ) + + sub len, len, #8 + rev has_nul2, has_nul2 + clz pos, has_nul2 + add len, len, pos, lsr #3 /* Bits to bytes. */ + cmp len, limit + csel len, len, limit, ls /* Return the lower value. */ + ret + +.Lmisaligned: + /* + * Deal with a partial first word. + * We're doing two things in parallel here; + * 1) Calculate the number of words (but avoiding overflow if + * limit is near ULONG_MAX) - to do this we need to work out + * limit + tmp1 - 1 as a 65-bit value before shifting it; + * 2) Load and mask the initial data words - we force the bytes + * before the ones we are interested in to 0xff - this ensures + * early bytes will not hit any zero detection. + */ + ldp data1, data2, [src], #16 + + sub limit_wd, limit, #1 + and tmp3, limit_wd, #15 + lsr limit_wd, limit_wd, #4 + + add tmp3, tmp3, tmp1 + add limit_wd, limit_wd, tmp3, lsr #4 + + neg tmp4, tmp1 + lsl tmp4, tmp4, #3 /* Bytes beyond alignment -> bits. */ + + mov tmp2, #~0 + /* Big-endian. Early bytes are at MSB. */ +CPU_BE( lsl tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ + /* Little-endian. Early bytes are at LSB. */ +CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ + + cmp tmp1, #8 + + orr data1, data1, tmp2 + orr data2a, data2, tmp2 + + csinv data1, data1, xzr, le + csel data2, data2, data2a, le + b .Lrealigned + +.Lhit_limit: + mov len, limit + ret +ENDPROC(strnlen) From 86c43586aba61471730a5d3ae7138057b0e161c4 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 16 May 2014 18:26:01 +0100 Subject: [PATCH 017/214] arm64: Fix linker script entry point Change the arm64 linker script ENTRY() command to define _text as the kernel entry point. The arm64 boot protocol specifies that the kernel must be entered at the beginning of the kernel image. The existing ENTRY() command defined the symbol stext as the entry point, which emitted an incorrect entry point, but would not cause a runtime error because the existing entry code immediately jumps to stext. Signed-off-by: Geoff Levand Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit af885f4022622331f17227dfcfbb2dafdee25a43) Signed-off-by: Mark Brown --- arch/arm64/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 55d0e035205f..de93a4bbdb7c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -13,7 +13,7 @@ #define ARM_EXIT_DISCARD(x) x OUTPUT_ARCH(aarch64) -ENTRY(stext) +ENTRY(_text) jiffies = jiffies_64; From ae635c76a615535a1c080f5e1e49b8c3a032c1cd Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 10:02:13 +0000 Subject: [PATCH 018/214] arm64: mm: Route pmd thp functions through pte equivalents Rather than have separate hugetlb and transparent huge page pmd manipulation functions, re-wire our thp functions to simply call the pte equivalents. This allows THP to take advantage of the new PTE_WRITE logic introduced in: c2c93e5 arm64: mm: Introduce PTE_WRITE To represent splitting THPs we use the PTE_SPECIAL bit as this is not used for pmds. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas (cherry picked from commit 9c7e535fcc1725fc2e2d4f0d9dd14137f0243e23) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 52 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 225f0398b208..e10198123b4b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -231,36 +231,36 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL -/* - * Software PMD bits for THP - */ +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} -#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) -#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} /* * THP definitions. */ -#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) - -#define __HAVE_ARCH_PMD_WRITE -#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) -#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd)) #endif -#define PMD_BIT_FUNC(fn,op) \ -static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK)) -PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); -PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); -PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); -PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); -PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) @@ -270,15 +270,6 @@ PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) -static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -{ - const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | - PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | - PMD_SECT_VALID; - pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); - return pmd; -} - #define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) static inline int has_transparent_hugepage(void) @@ -392,6 +383,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return pte; } +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); +} + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; From 263d7734264145c7517194cf06dc84cebb15a2a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 May 2014 19:11:58 +0100 Subject: [PATCH 019/214] arm64: mm: fix pmd_write CoW brokenness Commit 9c7e535fcc17 ("arm64: mm: Route pmd thp functions through pte equivalents") changed the pmd manipulator and accessor functions to convert the target pmd to a pte, process it with the pte functions, then convert it back. Along the way, we gained support for PTE_WRITE, however this is completely ignored by set_pmd_at, and so we fail to set the PMD_SECT_RDONLY for PMDs, resulting in all sorts of lovely failures (like CoW not working). Partially reverting the offending commit (by making use of PMD_SECT_RDONLY explicitly for pmd_{write,wrprotect,mkwrite} functions) leads to further issues because pmd_write can then return potentially incorrect values for page table entries marked as RDONLY, leading to BUG_ON(pmd_write(entry)) tripping under some THP workloads. This patch fixes the issue by routing set_pmd_at through set_pte_at, which correctly takes the PTE_WRITE flag into account. Given that THP mappings are always anonymous, the additional cache-flushing code in __sync_icache_dcache won't impose any significant overhead as the flush will be skipped. Cc: Catalin Marinas Acked-by: Steve Capper Tested-by: Marc Zyngier Signed-off-by: Will Deacon (cherry picked from commit ceb218359de22e70980801d4fa04fffbfc44adb8) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e10198123b4b..fb4b26509276 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -270,7 +270,7 @@ static inline pmd_t pte_pmd(pte_t pte) #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) -#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) +#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) static inline int has_transparent_hugepage(void) { From 1d4a16a767d7bc89e6ab3713c1e8485bc30884c2 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 29 May 2014 18:16:54 +0100 Subject: [PATCH 020/214] arm64: kernel: initialize broadcast hrtimer based clock event device On platforms implementing CPU power management, the CPUidle subsystem can allow CPUs to enter idle states where local timers logic is lost on power down. To keep the software timers functional the kernel relies on an always-on broadcast timer to be present in the platform to relay the interrupt signalling the timer expiries. For platforms implementing CPU core gating that do not implement an always-on HW timer or implement it in a broken way, this patch adds code to initialize the kernel hrtimer based clock event device upon boot (which can be chosen as tick broadcast device by the kernel). It relies on a dynamically chosen CPU to be always powered-up. This CPU then relays the timer interrupt to CPUs in deep-idle states through its HW local timer device. Having a CPU always-on has implications on power management platform capabilities and makes CPUidle suboptimal, since at least a CPU is kept always in a shallow idle state by the kernel to relay timer interrupts, but at least leaves the kernel with a functional system with some working power management capabilities. The hrtimer based clock event device is unconditionally registered, but has the lowest possible rating such that any broadcast-capable HW clock event device present will be chosen in preference as the tick broadcast device. Reviewed-by: Preeti U Murthy Acked-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon (cherry picked from commit 9358d755bd5cba8965ea79f2a446e689323409f9) Signed-off-by: Mark Brown --- arch/arm64/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 03dc3718eb13..1ddb2bd5932a 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -74,6 +75,8 @@ void __init time_init(void) clocksource_of_init(); + tick_setup_hrtimer_broadcast(); + arch_timer_rate = arch_timer_get_rate(); if (!arch_timer_rate) panic("Unable to initialise architected timer.\n"); From f58774ef0b9c226e240ba8bb4f55a295531cd2d5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 9 May 2014 10:33:02 +0100 Subject: [PATCH 021/214] arm64: add ARCH_HAS_OPP to allow enabling OPP library The Operating Performance Point (OPP) Layer library is a generic library used by CPUFREQ and DEVFREQ. It can be enabled only on the platforms that specify ARCH_HAS_OPP option. This patch selects that option in order to allow ARM64 based platforms to use OPP library. Signed-off-by: Sudeep Holla Signed-off-by: Catalin Marinas (cherry picked from commit 333d17e566192efda500769fb55b11da71c6d960) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index de9eeb43f622..65044ae9d238 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_HAS_OPP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION From 8aaf231854e4397874817d9a02783b783ad77716 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Jun 2014 11:47:23 +0100 Subject: [PATCH 022/214] arm64: ptrace: change fs when passing kernel pointer to regset code Our compat PTRACE_POKEUSR implementation simply passes the user data to regset_copy_from_user after some simple range checking. Unfortunately, the data in question has already been copied to the kernel stack by this point, so the subsequent access_ok check fails and the ptrace request returns -EFAULT. This causes problems tracing fork() with older versions of strace. This patch briefly changes the fs to KERNEL_DS, so that the access_ok check passes even with a kernel address. Signed-off-by: Will Deacon Cc: Signed-off-by: Catalin Marinas (cherry picked from commit c168870704bcde6bb63d05f7882b620dd3985a46) Signed-off-by: Mark Brown --- arch/arm64/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c484d5625ffb..9fa78cd0f092 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -823,6 +823,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -830,10 +831,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } From 9efcded7f200e658058bf790e4936aa17a989da7 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 3 Jun 2014 19:21:30 +0100 Subject: [PATCH 023/214] arm64: ptrace: fix empty registers set in prstatus of aarch32 process core Currently core file of aarch32 process prstatus note has empty registers set. As result aarch32 core files create by V8 kernel are not very useful. It happens because compat_gpr_get and compat_gpr_set functions can copy registers values to/from either kbuf or ubuf. ELF core file collection function fill_thread_core_info calls compat_gpr_get with kbuf set and ubuf set to 0. But current compat_gpr_get and compat_gpr_set function handle copy to/from only ubuf case. Fix is to handle kbuf and ubuf as two separate cases in similar way as other functions like user_regset_copyout, user_regset_copyin do. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas (cherry picked from commit 2227901a0230d8fde81ba9c602d649839390f56b) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/ptrace.c --- arch/arm64/kernel/ptrace.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9fa78cd0f092..71c6a623e226 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -650,12 +650,16 @@ static int compat_gpr_get(struct task_struct *target, reg = (void *)&task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + if (kbuf) { + memcpy(kbuf, ®, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_to_user(ubuf, ®, sizeof(reg)); + if (ret) + break; - if (ret) - break; - else - ubuf += sizeof(compat_ulong_t); + ubuf += sizeof(reg); + } } return ret; @@ -683,7 +687,18 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + if (kbuf) { + memcpy(®, kbuf, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); + } switch (idx) { case 15: From 8198661461a12637b0e934aa83d553c531e3b247 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 6 Jun 2014 23:07:16 +0100 Subject: [PATCH 024/214] arm64/dma: Removing ARCH_HAS_DMA_GET_REQUIRED_MASK macro Arm64 does not define dma_get_required_mask() function. Therefore, it should not define the ARCH_HAS_DMA_GET_REQUIRED_MASK. This causes build errors in some device drivers (e.g. mpt2sas) Signed-off-by: Suravee Suthikulpanit Signed-off-by: Catalin Marinas Cc: (cherry picked from commit f3a183cb422574014538017b5b291a416396f97e) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/dma-mapping.h --- arch/arm64/include/asm/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 00a41aab4a37..fa6a0c5a8de3 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -23,8 +23,6 @@ #include -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; extern struct dma_map_ops coherent_swiotlb_dma_ops; From 84f329fa19404c78233f1325a6acb9a45afb5d08 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Jun 2014 14:06:27 +0100 Subject: [PATCH 025/214] arm64: mm: remove broken &= operator from pmd_mknotpresent This should be a plain old '&' and could easily lead to undefined behaviour if the target of a pmd_mknotpresent invocation was the same as the parameter. Fixes: 9c7e535fcc17 (arm64: mm: Route pmd thp functions through pte equivalents) Signed-off-by: Will Deacon Cc: # v3.15 Signed-off-by: Catalin Marinas (cherry picked from commit e3a920afc3482e954834a4ed95908c4bc5e4c000) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fb4b26509276..7099e3b84778 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -257,7 +257,7 @@ static inline pmd_t pte_pmd(pte_t pte) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK)) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) From 25057d5e48bf16b7a8bf569ed8c22c999ef30980 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 18 Jun 2014 21:10:09 +0100 Subject: [PATCH 026/214] arm64: implement TASK_SIZE_OF include/linux/sched.h implements TASK_SIZE_OF as TASK_SIZE if it is not set by the architecture headers. TASK_SIZE uses the current task to determine the size of the virtual address space. On a 64-bit kernel this will cause reading /proc/pid/pagemap of a 64-bit process from a 32-bit process to return EOF when it reads past 0xffffffff. Implement TASK_SIZE_OF exactly the same as TASK_SIZE with test_tsk_thread_flag instead of test_thread_flag. Cc: stable@vger.kernel.org Signed-off-by: Colin Cross Acked-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit fa2ec3ea10bd377f9d55772b1dab65178425a1a2) Signed-off-by: Mark Brown --- arch/arm64/include/asm/memory.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8b656af942c6..71cd41644279 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -56,6 +56,8 @@ #define TASK_SIZE_32 UL(0x100000000) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) #else #define TASK_SIZE TASK_SIZE_64 #endif /* CONFIG_COMPAT */ From a6683aa2fdd856a9e146e1fe1442f8ec3039c94f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 25 Jun 2014 23:55:03 +0100 Subject: [PATCH 027/214] arm64: Add CONFIG_CC_STACKPROTECTOR arm64 currently lacks support for -fstack-protector. Add similar functionality to arm to detect stack corruption. Acked-by: Will Deacon Acked-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas (cherry picked from commit c0c264ae5112d1cdb7d37d4e208b7a7e766a7418) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/stackprotector.h | 38 +++++++++++++++++++++++++ arch/arm64/kernel/process.c | 6 ++++ 3 files changed, 45 insertions(+) create mode 100644 arch/arm64/include/asm/stackprotector.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 65044ae9d238..4142cab9d4ff 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -25,6 +25,7 @@ config ARM64 select HARDIRQS_SW_RESEND select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h new file mode 100644 index 000000000000..fe5e287dc56b --- /dev/null +++ b/arch/arm64/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on ARM. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef __ASM_STACKPROTECTOR_H +#define __ASM_STACKPROTECTOR_H + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 57bd961f2917..feb01cf4b255 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -50,6 +50,12 @@ #include #include +#ifdef CONFIG_CC_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + static void setup_restart(void) { /* From 04d4eb47acdc3af0c7841487716d6141da242f6e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 10 Jul 2014 11:37:40 +0100 Subject: [PATCH 028/214] arm64: Cast KSTK_(EIP|ESP) to unsigned long This is for similarity with thread_saved_(pc|sp) and to avoid some compiler warnings in the audit code. Signed-off-by: Catalin Marinas (cherry picked from commit ebe6152e722b9df6b487a0d9464aeff216b6d364) Signed-off-by: Mark Brown --- arch/arm64/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ab239b2c456f..db3112886968 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -131,8 +131,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) -#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc -#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp +#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) +#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) /* * Prefetching support From 0065bf4206cd4fecb482aef751fd39360fa8bafa Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Fri, 5 Dec 2014 19:45:10 +0900 Subject: [PATCH 029/214] net: ipv6: allow choosing optimistic addresses with use_optimistic The use_optimistic sysctl makes optimistic IPv6 addresses equivalent to preferred addresses for source address selection (e.g., when calling connect()), but it does not allow an application to bind to optimistic addresses. This behaviour is inconsistent - for example, it doesn't make sense for bind() to an optimistic address fail with EADDRNOTAVAIL, but connect() to choose that address outgoing address on the same socket. Bug: 17769720 Bug: 18609055 Change-Id: I9de0d6c92ac45e29d28e318ac626c71806666f13 Signed-off-by: Erik Kline Signed-off-by: Lorenzo Colitti --- net/ipv6/addrconf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e1381119a6d8..165cee964e75 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1525,7 +1525,9 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && - !(ifp->flags&IFA_F_TENTATIVE) && + (!(ifp->flags&IFA_F_TENTATIVE) || + (ipv6_use_optimistic_addr(ifp->idev) && + ifp->flags&IFA_F_OPTIMISTIC)) && (dev == NULL || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); From 805863ee7ef93d05cd654e069f5d7f00a0f4b257 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Tue, 2 Dec 2014 17:20:50 -0800 Subject: [PATCH 030/214] cpufreq: interactive: only boost tunable affected cpus It is not correct to boost all the cpus when tunable boost parameters are changed. It also does not need to boost the cpus which is already boosted. Signed-off-by: Lianwei Wang --- drivers/cpufreq/cpufreq_interactive.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c index 3f69013769ed..0d4a4f30efab 100644 --- a/drivers/cpufreq/cpufreq_interactive.c +++ b/drivers/cpufreq/cpufreq_interactive.c @@ -107,6 +107,7 @@ struct cpufreq_interactive_tunables { int boostpulse_duration_val; /* End time of boost pulse in ktime converted to usecs */ u64 boostpulse_endtime; + bool boosted; /* * Max additional time to wait in idle, beyond timer_rate, at speeds * above minimum before wakeup to reduce speed, or -1 if unnecessary. @@ -384,7 +385,6 @@ static void cpufreq_interactive_timer(unsigned long data) unsigned int loadadjfreq; unsigned int index; unsigned long flags; - bool boosted; if (!down_read_trylock(&pcpu->enable_sem)) return; @@ -404,9 +404,9 @@ static void cpufreq_interactive_timer(unsigned long data) do_div(cputime_speedadj, delta_time); loadadjfreq = (unsigned int)cputime_speedadj * 100; cpu_load = loadadjfreq / pcpu->target_freq; - boosted = tunables->boost_val || now < tunables->boostpulse_endtime; + tunables->boosted = tunables->boost_val || now < tunables->boostpulse_endtime; - if (cpu_load >= tunables->go_hispeed_load || boosted) { + if (cpu_load >= tunables->go_hispeed_load || tunables->boosted) { if (pcpu->target_freq < tunables->hispeed_freq) { new_freq = tunables->hispeed_freq; } else { @@ -467,7 +467,7 @@ static void cpufreq_interactive_timer(unsigned long data) * (or the indefinite boost is turned off). */ - if (!boosted || new_freq > tunables->hispeed_freq) { + if (!tunables->boosted || new_freq > tunables->hispeed_freq) { pcpu->floor_freq = new_freq; pcpu->floor_validate_time = now; } @@ -625,19 +625,21 @@ static int cpufreq_interactive_speedchange_task(void *data) return 0; } -static void cpufreq_interactive_boost(void) +static void cpufreq_interactive_boost(struct cpufreq_interactive_tunables *tunables) { int i; int anyboost = 0; unsigned long flags[2]; struct cpufreq_interactive_cpuinfo *pcpu; - struct cpufreq_interactive_tunables *tunables; + + tunables->boosted = true; spin_lock_irqsave(&speedchange_cpumask_lock, flags[0]); for_each_online_cpu(i) { pcpu = &per_cpu(cpuinfo, i); - tunables = pcpu->policy->governor_data; + if (tunables != pcpu->policy->governor_data) + continue; spin_lock_irqsave(&pcpu->target_freq_lock, flags[1]); if (pcpu->target_freq < tunables->hispeed_freq) { @@ -950,7 +952,8 @@ static ssize_t store_boost(struct cpufreq_interactive_tunables *tunables, if (tunables->boost_val) { trace_cpufreq_interactive_boost("on"); - cpufreq_interactive_boost(); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); } else { tunables->boostpulse_endtime = ktime_to_us(ktime_get()); trace_cpufreq_interactive_unboost("off"); @@ -972,7 +975,8 @@ static ssize_t store_boostpulse(struct cpufreq_interactive_tunables *tunables, tunables->boostpulse_endtime = ktime_to_us(ktime_get()) + tunables->boostpulse_duration_val; trace_cpufreq_interactive_boost("pulse"); - cpufreq_interactive_boost(); + if (!tunables->boosted) + cpufreq_interactive_boost(tunables); return count; } From 22fff283762550ddc75c2ae070e71321559a29ac Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Tue, 2 Dec 2014 15:59:25 -0800 Subject: [PATCH 031/214] mm: frontswap: invalidate expired data on a dup-store failure commit fb993fa1a2f669215fa03a09eed7848f2663e336 upstream. If a frontswap dup-store failed, it should invalidate the expired page in the backend, or it could trigger some data corruption issue. Such as: 1. use zswap as the frontswap backend with writeback feature 2. store a swap page(version_1) to entry A, success 3. dup-store a newer page(version_2) to the same entry A, fail 4. use __swap_writepage() write version_2 page to swapfile, success 5. zswap do shrink, writeback version_1 page to swapfile 6. version_2 page is overwrited by version_1, data corrupt. This patch fixes this issue by invalidating expired data immediately when meet a dup-store failure. Signed-off-by: Weijie Yang Cc: Konrad Rzeszutek Wilk Cc: Seth Jennings Cc: Dan Streetman Cc: Minchan Kim Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/frontswap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/frontswap.c b/mm/frontswap.c index 1b24bdcb3197..a55036a68487 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -244,8 +244,10 @@ int __frontswap_store(struct page *page) the (older) page from frontswap */ inc_frontswap_failed_stores(); - if (dup) + if (dup) { __frontswap_clear(sis, offset); + frontswap_ops->invalidate_page(type, offset); + } } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ From 4542246879ccdb5f3cc58d8e66955246a74a9494 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 2 Dec 2014 15:59:39 -0800 Subject: [PATCH 032/214] mm: fix swapoff hang after page migration and fork commit 2022b4d18a491a578218ce7a4eca8666db895a73 upstream. I've been seeing swapoff hangs in recent testing: it's cycling around trying unsuccessfully to find an mm for some remaining pages of swap. I have been exercising swap and page migration more heavily recently, and now notice a long-standing error in copy_one_pte(): it's trying to add dst_mm to swapoff's mmlist when it finds a swap entry, but is doing so even when it's a migration entry or an hwpoison entry. Which wouldn't matter much, except it adds dst_mm next to src_mm, assuming src_mm is already on the mmlist: which may not be so. Then if pages are later swapped out from dst_mm, swapoff won't be able to find where to replace them. There's already a !non_swap_entry() test for stats: move that up before the swap_duplicate() and the addition to mmlist. Signed-off-by: Hugh Dickins Cc: Kelley Nielsen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 0984f398d746..0926ccd04d7a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -834,20 +834,20 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (!pte_file(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - if (swap_duplicate(entry) < 0) - return entry.val; + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - if (likely(!non_swap_entry(entry))) + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } rss[MM_SWAPENTS]++; - else if (is_migration_entry(entry)) { + } else if (is_migration_entry(entry)) { page = migration_entry_to_page(entry); if (PageAnon(page)) From 0a8c00c7edd66707bc60fb0dee21bdb2740baf4e Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 25 Nov 2014 20:28:24 -0600 Subject: [PATCH 033/214] xen-netfront: Remove BUGs on paged skb data which crosses a page boundary commit 8d609725d4357f499e2103e46011308b32f53513 upstream. These BUGs can be erroneously triggered by frags which refer to tail pages within a compound page. The data in these pages may overrun the hardware page while still being contained within the compound page, but since compound_order() evaluates to 0 for tail pages the assertion fails. The code already iterates through subsequent pages correctly in this scenario, so the BUGs are unnecessary and can be removed. Fixes: f36c374782e4 ("xen/netfront: handle compound page fragments on transmit") Signed-off-by: Seth Forshee Reviewed-by: David Vrabel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 82e0f1fd2254..a1db958df4a4 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -459,9 +459,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, len = skb_frag_size(frag); offset = frag->page_offset; - /* Data must not cross a page boundary. */ - BUG_ON(len + offset > PAGE_SIZE<> PAGE_SHIFT; offset &= ~PAGE_MASK; @@ -469,8 +466,6 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, while (len > 0) { unsigned long bytes; - BUG_ON(offset >= PAGE_SIZE); - bytes = PAGE_SIZE - offset; if (bytes > len) bytes = len; From daeac09879f0b1491bb5ebdb4405428f3ab95a21 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Tue, 18 Nov 2014 21:00:58 +0400 Subject: [PATCH 034/214] i2c: omap: fix NACK and Arbitration Lost irq handling commit 27caca9d2e01c92b26d0690f065aad093fea01c7 upstream. commit 1d7afc95946487945cc7f5019b41255b72224b70 (i2c: omap: ack IRQ in parts) changed the interrupt handler to complete transfers without clearing XRDY (AL case) and ARDY (NACK case) flags. XRDY or ARDY interrupts will be fired again. As a result, ISR keep processing transfer after it was already complete (from the driver code point of view). A didn't see real impacts of the 1d7afc9, but it is really bad idea to have ISR running on user data after transfer was complete. It looks, what 1d7afc9 violate TI specs in what how AL and NACK should be handled (see Note 1, sprugn4r, Figure 17-31 and Figure 17-32). According to specs (if I understood correctly), in case of NACK and AL driver must reset NACK, AL, ARDY, RDR, and RRDY (Master Receive Mode), and NACK, AL, ARDY, and XDR (Master Transmitter Mode). All that is done down the code under the if condition: if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK | OMAP_I2C_STAT_AL)) ... The patch restore pre 1d7afc9 logic of handling NACK and AL interrupts, so no interrupts is fired after ISR informs the rest of driver what transfer complete. Note: instead of removing break under NACK case, we could just replace 'break' with 'continue' and allow NACK transfer to finish using ARDY event. I found that NACK and ARDY bits usually set together. That case confirm TI wiki: http://processors.wiki.ti.com/index.php/I2C_Tips#Detecting_and_handling_NACK In order if someone interested in the event traces for NACK and AL cases, I sent them to mailing list. Tested on Beagleboard XM C. Signed-off-by: Alexander Kochetkov Fixes: 1d7afc9 i2c: omap: ack IRQ in parts Acked-by: Felipe Balbi Tested-by: Aaro Koskinen Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-omap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index b06be8e3bb76..1caa66929da0 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -928,14 +928,12 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) if (stat & OMAP_I2C_STAT_NACK) { err |= OMAP_I2C_STAT_NACK; omap_i2c_ack_stat(dev, OMAP_I2C_STAT_NACK); - break; } if (stat & OMAP_I2C_STAT_AL) { dev_err(dev->dev, "Arbitration lost\n"); err |= OMAP_I2C_STAT_AL; omap_i2c_ack_stat(dev, OMAP_I2C_STAT_AL); - break; } /* From 2cf69220fb211f3d565675dc356bb01697080e5f Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Fri, 21 Nov 2014 04:16:51 +0400 Subject: [PATCH 035/214] i2c: omap: fix i207 errata handling commit ccfc866356674cb3a61829d239c685af6e85f197 upstream. commit 6d9939f651419a63e091105663821f9c7d3fec37 (i2c: omap: split out [XR]DR and [XR]RDY) changed the way how errata i207 (I2C: RDR Flag May Be Incorrectly Set) get handled. 6d9939f6514 code doesn't correspond to workaround provided by errata. According to errata ISR must filter out spurious RDR before data read not after. ISR must read RXSTAT to get number of bytes available to read. Because RDR could be set while there could no data in the receive FIFO. Restored pre 6d9939f6514 way of handling errata. Found by code review. Real impact haven't seen. Tested on Beagleboard XM C. Signed-off-by: Alexander Kochetkov Fixes: 6d9939f651419a63e09110 i2c: omap: split out [XR]DR and [XR]RDY Tested-by: Felipe Balbi Reviewed-by: Felipe Balbi Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-omap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 1caa66929da0..7645924f9f8b 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -958,11 +958,13 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) if (dev->fifo_size) num_bytes = dev->buf_len; - omap_i2c_receive_data(dev, num_bytes, true); - - if (dev->errata & I2C_OMAP_ERRATA_I207) + if (dev->errata & I2C_OMAP_ERRATA_I207) { i2c_omap_errata_i207(dev, stat); + num_bytes = (omap_i2c_read_reg(dev, + OMAP_I2C_BUFSTAT_REG) >> 8) & 0x3F; + } + omap_i2c_receive_data(dev, num_bytes, true); omap_i2c_ack_stat(dev, OMAP_I2C_STAT_RDR); continue; } From 99dfbb31e4544a9a89632240b3ca2bd715636ed8 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 1 Dec 2014 17:34:04 +0200 Subject: [PATCH 036/214] i2c: davinci: generate STP always when NACK is received MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9ea359f7314132cbcb5a502d2d8ef095be1f45e4 upstream. According to I2C specification the NACK should be handled as follows: "When SDA remains HIGH during this ninth clock pulse, this is defined as the Not Acknowledge signal. The master can then generate either a STOP condition to abort the transfer, or a repeated START condition to start a new transfer." [I2C spec Rev. 6, 3.1.6: http://www.nxp.com/documents/user_manual/UM10204.pdf] Currently the Davinci i2c driver interrupts the transfer on receipt of a NACK but fails to send a STOP in some situations and so makes the bus stuck until next I2C IP reset (idle/enable). For example, the issue will happen during SMBus read transfer which consists from two i2c messages write command/address and read data: S Slave Address Wr A Command Code A Sr Slave Address Rd A D1..Dn A P <--- write -----------------------> <--- read ---------------------> The I2C client device will send NACK if it can't recognize "Command Code" and it's expected from I2C master to generate STP in this case. But now, Davinci i2C driver will just exit with -EREMOTEIO and STP will not be generated. Hence, fix it by generating Stop condition (STP) always when NACK is received. This patch fixes Davinci I2C in the same way it was done for OMAP I2C commit cda2109a26eb ("i2c: omap: query STP always when NACK is received"). Reviewed-by: Uwe Kleine-König Reported-by: Hein Tibosch Signed-off-by: Grygorii Strashko Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-davinci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index cf20e06a88e1..09f29e92095a 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -414,11 +414,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) if (dev->cmd_err & DAVINCI_I2C_STR_NACK) { if (msg->flags & I2C_M_IGNORE_NAK) return msg->len; - if (stop) { - w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); - w |= DAVINCI_I2C_MDR_STP; - davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); - } + w = davinci_i2c_read_reg(dev, DAVINCI_I2C_MDR_REG); + w |= DAVINCI_I2C_MDR_STP; + davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, w); return -EREMOTEIO; } return -EIO; From eae1f51fce481c2d3897ec49e6f457ffa8d7db3b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 27 Nov 2014 16:57:21 +0100 Subject: [PATCH 037/214] drm/radeon: kernel panic in drm_calc_vbltimestamp_from_scanoutpos with 3.18.0-rc6 commit f5475cc43c899e33098d4db44b7c5e710f16589d upstream. I was unable too boot 3.18.0-rc6 because of the following kernel panic in drm_calc_vbltimestamp_from_scanoutpos(): [drm] Initialized drm 1.1.0 20060810 [drm] radeon kernel modesetting enabled. [drm] initializing kernel modesetting (RV100 0x1002:0x515E 0x15D9:0x8080). [drm] register mmio base: 0xC8400000 [drm] register mmio size: 65536 radeon 0000:0b:01.0: VRAM: 128M 0x00000000D0000000 - 0x00000000D7FFFFFF (16M used) radeon 0000:0b:01.0: GTT: 512M 0x00000000B0000000 - 0x00000000CFFFFFFF [drm] Detected VRAM RAM=128M, BAR=128M [drm] RAM width 16bits DDR [TTM] Zone kernel: Available graphics memory: 3829346 kiB [TTM] Zone dma32: Available graphics memory: 2097152 kiB [TTM] Initializing pool allocator [TTM] Initializing DMA pool allocator [drm] radeon: 16M of VRAM memory ready [drm] radeon: 512M of GTT memory ready. [drm] GART: num cpu pages 131072, num gpu pages 131072 [drm] PCI GART of 512M enabled (table at 0x0000000037880000). radeon 0000:0b:01.0: WB disabled radeon 0000:0b:01.0: fence driver on ring 0 use gpu addr 0x00000000b0000000 and cpu addr 0xffff8800bbbfa000 [drm] Supports vblank timestamp caching Rev 2 (21.10.2013). [drm] Driver supports precise vblank timestamp query. [drm] radeon: irq initialized. [drm] Loading R100 Microcode radeon 0000:0b:01.0: Direct firmware load for radeon/R100_cp.bin failed with error -2 radeon_cp: Failed to load firmware "radeon/R100_cp.bin" [drm:r100_cp_init] *ERROR* Failed to load firmware! radeon 0000:0b:01.0: failed initializing CP (-2). radeon 0000:0b:01.0: Disabling GPU acceleration [drm] radeon: cp finalized BUG: unable to handle kernel NULL pointer dereference at 000000000000025c IP: [] drm_calc_vbltimestamp_from_scanoutpos+0x4b/0x320 PGD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.18.0-rc6-4-default #2649 Hardware name: Supermicro X7DB8/X7DB8, BIOS 6.00 07/26/2006 task: ffff880234da2010 ti: ffff880234da4000 task.ti: ffff880234da4000 RIP: 0010:[] [] drm_calc_vbltimestamp_from_scanoutpos+0x4b/0x320 RSP: 0000:ffff880234da7918 EFLAGS: 00010086 RAX: ffffffff81557890 RBX: 0000000000000000 RCX: ffff880234da7a48 RDX: ffff880234da79f4 RSI: 0000000000000000 RDI: ffff880232e15000 RBP: ffff880234da79b8 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000000a R11: 0000000000000001 R12: ffff880232dda1c0 R13: ffff880232e1518c R14: 0000000000000292 R15: ffff880232e15000 FS: 0000000000000000(0000) GS:ffff88023fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000000025c CR3: 0000000002014000 CR4: 00000000000007e0 Stack: ffff880234da79d8 0000000000000286 ffff880232dcbc00 0000000000002480 ffff880234da7958 0000000000000296 ffff880234da7998 ffffffff8151b51d ffff880234da7a48 0000000032dcbeb0 ffff880232dcbc00 ffff880232dcbc58 Call Trace: [] ? drm_vma_offset_remove+0x1d/0x110 [] radeon_get_vblank_timestamp_kms+0x38/0x60 [] ? ttm_bo_release_list+0xba/0x180 [] drm_get_last_vbltimestamp+0x41/0x70 [] vblank_disable_and_save+0x73/0x1d0 [] ? try_to_del_timer_sync+0x4f/0x70 [] drm_vblank_cleanup+0x65/0xa0 [] radeon_irq_kms_fini+0x1a/0x70 [] r100_init+0x26e/0x410 [] radeon_device_init+0x7ae/0xb50 [] radeon_driver_load_kms+0x8f/0x210 [] drm_dev_register+0xb5/0x110 [] drm_get_pci_dev+0x8f/0x200 [] radeon_pci_probe+0xad/0xe0 [] local_pci_probe+0x45/0xa0 [] pci_device_probe+0xd1/0x130 [] driver_probe_device+0x12d/0x3e0 [] __driver_attach+0x9b/0xa0 [] ? __device_attach+0x40/0x40 [] bus_for_each_dev+0x63/0xa0 [] driver_attach+0x1e/0x20 [] bus_add_driver+0x180/0x240 [] driver_register+0x64/0xf0 [] __pci_register_driver+0x4c/0x50 [] drm_pci_init+0xf5/0x120 [] ? ttm_init+0x6a/0x6a [] radeon_init+0x97/0xb5 [] do_one_initcall+0xbc/0x1f0 [] ? __wake_up+0x48/0x60 [] kernel_init_freeable+0x18a/0x215 [] ? initcall_blacklist+0xc0/0xc0 [] ? rest_init+0x80/0x80 [] kernel_init+0xe/0xf0 [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x80/0x80 Code: 45 ac 0f 88 a8 01 00 00 3b b7 d0 01 00 00 49 89 ff 0f 83 99 01 00 00 48 8b 47 20 48 8b 80 88 00 00 00 48 85 c0 0f 84 cd 01 00 00 <41> 8b b1 5c 02 00 00 41 8b 89 58 02 00 00 89 75 98 41 8b b1 60 RIP [] drm_calc_vbltimestamp_from_scanoutpos+0x4b/0x320 RSP CR2: 000000000000025c ---[ end trace ad2c0aadf48e2032 ]--- Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 It has helped me to add a NULL pointer check that was suggested at http://lists.freedesktop.org/archives/dri-devel/2014-October/070663.html I am not familiar with the code. But the change looks sane and we need something fast at this stage of 3.18 development. Suggested-by: Helge Deller Signed-off-by: Petr Mladek Tested-by: Petr Mladek Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_kms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 1113e8f69137..2c3c4c58a765 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -666,6 +666,8 @@ int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, /* Get associated drm_crtc: */ drmcrtc = &rdev->mode_info.crtcs[crtc]->base; + if (!drmcrtc) + return -EINVAL; /* Helper routine in DRM core does all the work: */ return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc, max_error, From ca372d75eb86d01367ca8e5f6f684eb30a8b8195 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 1 Dec 2014 17:56:54 +0100 Subject: [PATCH 038/214] drm/i915: Unlock panel even when LVDS is disabled commit b0616c5306b342ceca07044dbc4f917d95c4f825 upstream. Otherwise we'll have backtraces in assert_panel_unlocked because the BIOS locks the register. In the reporter's case this regression was introduced in commit c31407a3672aaebb4acddf90944a114fa5c8af7b Author: Chris Wilson Date: Thu Oct 18 21:07:01 2012 +0100 drm/i915: Add no-lvds quirk for Supermicro X7SPA-H Reported-by: Alexey Orishko Cc: Alexey Orishko Cc: Chris Wilson Cc: Francois Tigeot Signed-off-by: Daniel Vetter Tested-by: Alexey Orishko Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lvds.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 08e8e18b3f85..f5d1dc5b5563 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -1097,6 +1097,17 @@ bool intel_lvds_init(struct drm_device *dev) int pipe; u8 pin; + /* + * Unlock registers and just leave them unlocked. Do this before + * checking quirk lists to avoid bogus WARNINGs. + */ + if (HAS_PCH_SPLIT(dev)) { + I915_WRITE(PCH_PP_CONTROL, + I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); + } else { + I915_WRITE(PP_CONTROL, + I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); + } if (!intel_lvds_supported(dev)) return false; @@ -1280,17 +1291,6 @@ out: DRM_DEBUG_KMS("detected %s-link lvds configuration\n", lvds_encoder->is_dual_link ? "dual" : "single"); - /* - * Unlock registers and just - * leave them unlocked - */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else { - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); - } lvds_connector->lid_notifier.notifier_call = intel_lid_notify; if (acpi_lid_notifier_register(&lvds_connector->lid_notifier)) { DRM_DEBUG_KMS("lid notifier registration failed\n"); From df3300033604498941be3519b2c403977c148674 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 6 Nov 2014 17:49:45 -0300 Subject: [PATCH 039/214] media: smiapp: Only some selection targets are settable commit b31eb901c4e5eeef4c83c43dfbc7fe0d4348cb21 upstream. Setting a non-settable selection target caused BUG() to be called. The check for valid selections only takes the selection target into account, but does not tell whether it may be set, or only get. Fix the issue by simply returning an error to the user. Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/smiapp/smiapp-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index cae4f4683851..b280216de31b 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -2139,7 +2139,7 @@ static int smiapp_set_selection(struct v4l2_subdev *subdev, ret = smiapp_set_compose(subdev, fh, sel); break; default: - BUG(); + ret = -EINVAL; } mutex_unlock(&sensor->mutex); From 4c2098f864b10dbb018ae82d50ffa0dfb0928175 Mon Sep 17 00:00:00 2001 From: Devin Ryles Date: Fri, 7 Nov 2014 17:59:05 -0500 Subject: [PATCH 040/214] AHCI: Add DeviceIDs for Sunrise Point-LP SATA controller commit 249cd0a187ed4ef1d0af7f74362cc2791ec5581b upstream. This patch adds DeviceIDs for Sunrise Point-LP. Signed-off-by: Devin Ryles Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 64150a9ffff3..e19752634b2a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -320,6 +320,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x8c87), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8e), board_ahci }, /* 9 Series RAID */ { PCI_VDEVICE(INTEL, 0x8c8f), board_ahci }, /* 9 Series RAID */ + { PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */ + { PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */ { PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */ From 7d1da805838186a88316ef3b08df458b1acd6296 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Dec 2014 13:13:28 -0500 Subject: [PATCH 041/214] ahci: disable MSI on SAMSUNG 0xa800 SSD commit 2b21ef0aae65f22f5ba86b13c4588f6f0c2dbefb upstream. Just like 0x1600 which got blacklisted by 66a7cbc303f4 ("ahci: disable MSI instead of NCQ on Samsung pci-e SSDs on macbooks"), 0xa800 chokes on NCQ commands if MSI is enabled. Disable MSI. Signed-off-by: Tejun Heo Reported-by: Dominik Mierzejewski Link: https://bugzilla.kernel.org/show_bug.cgi?id=89171 Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e19752634b2a..9064a2f2760c 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -494,6 +494,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { * enabled. https://bugzilla.kernel.org/show_bug.cgi?id=60731 */ { PCI_VDEVICE(SAMSUNG, 0x1600), board_ahci_nomsi }, + { PCI_VDEVICE(SAMSUNG, 0xa800), board_ahci_nomsi }, /* Enmotus */ { PCI_DEVICE(0x1c44, 0x8000), board_ahci }, From 69052167b48fb6878001073cfe44f5b1949538ae Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 14 Nov 2014 13:39:05 -0800 Subject: [PATCH 042/214] sata_fsl: fix error handling of irq_of_parse_and_map commit aad0b624129709c94c2e19e583b6053520353fa8 upstream. irq_of_parse_and_map() returns 0 on error (the result is unsigned int), so testing for negative result never works. Signed-off-by: Dmitry Torokhov Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_fsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 8401061b4040..38a2389f5b1b 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1501,7 +1501,7 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->csr_base = csr_base; irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (irq < 0) { + if (!irq) { dev_err(&ofdev->dev, "invalid irq from platform\n"); goto error_exit_with_cleanup; } From c556ebe8c839cdf2f4e03d5daab4f9d0df35d935 Mon Sep 17 00:00:00 2001 From: Yuri Chislov Date: Mon, 24 Nov 2014 11:25:15 +0100 Subject: [PATCH 043/214] ipv6: gre: fix wrong skb->protocol in WCCP [ Upstream commit be6572fdb1bfbe23b2624d477de50af50b02f5d6 ] When using GRE redirection in WCCP, it sets the wrong skb->protocol, that is, ETH_P_IP instead of ETH_P_IPV6 for the encapuslated traffic. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Cc: Dmitry Kozlov Signed-off-by: Yuri Chislov Tested-by: Yuri Chislov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6c20f4731f1a..65156a73b3f3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -512,11 +512,11 @@ static int ip6gre_rcv(struct sk_buff *skb) skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP + * - Change protocol to IPv6 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; } From a72e06f5ed0af18baecee57adf2659583de8cdbf Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 25 Nov 2014 14:21:11 -0200 Subject: [PATCH 044/214] tg3: fix ring init when there are more TX than RX channels [ Upstream commit a620a6bc1c94c22d6c312892be1e0ae171523125 ] If TX channels are set to 4 and RX channels are set to less than 4, using ethtool -L, the driver will try to initialize more RX channels than it has allocated, causing an oops. This fix only initializes the RX ring if it has been allocated. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 3de4069f020e..5501cad30cfa 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8392,7 +8392,8 @@ static int tg3_init_rings(struct tg3 *tp) if (tnapi->rx_rcb) memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); - if (tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { + if (tnapi->prodring.rx_std && + tg3_rx_prodring_alloc(tp, &tnapi->prodring)) { tg3_free_rings(tp); return -ENOMEM; } From cfcff6abc8bc6c4b9aa9e9cc93c2924a5c96b22d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 25 Nov 2014 11:54:31 +0200 Subject: [PATCH 045/214] net/mlx4_core: Limit count field to 24 bits in qp_alloc_res [ Upstream commit 2d5c57d7fbfaa642fb7f0673df24f32b83d9066c ] Some VF drivers use the upper byte of "param1" (the qp count field) in mlx4_qp_reserve_range() to pass flags which are used to optimize the range allocation. Under the current code, if any of these flags are set, the 32-bit count field yields a count greater than 2^24, which is out of range, and this VF fails. As these flags represent a "best-effort" allocation hint anyway, they may safely be ignored. Therefore, the PF driver may simply mask out the bits. Fixes: c82e9aa0a8 "mlx4_core: resource tracking for HCA resources used by guests" Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1157f028a90f..6cc808865e95 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1207,7 +1207,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: - count = get_param_l(&in_param); + count = get_param_l(&in_param) & 0xffffff; align = get_param_h(&in_param); err = __mlx4_qp_reserve_range(dev, count, align, &base); if (err) From 1973ac37c15ab85b170aa166b6192f45c03a3a5d Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 27 Nov 2014 10:16:15 +0100 Subject: [PATCH 046/214] rtnetlink: release net refcnt on error in do_setlink() [ Upstream commit e0ebde0e131b529fd721b24f62872def5ec3718c ] rtnl_link_get_net() holds a reference on the 'struct net', we need to release it in case of error. CC: Eric W. Biederman Fixes: b51642f6d77b ("net: Enable a userns root rtnl calls that are safe for unprivilged users") Signed-off-by: Nicolas Dichtel Reviewed-by: "Eric W. Biederman" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ae43dd807bb2..25c4dd563a79 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1318,6 +1318,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); err = -EPERM; goto errout; } From 8df2d2d784394efd9ab16d8f05bdc49ce46ac32c Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Tue, 2 Dec 2014 08:13:04 +0100 Subject: [PATCH 047/214] net: mvneta: fix Tx interrupt delay [ Upstream commit aebea2ba0f7495e1a1c9ea5e753d146cb2f6b845 ] The mvneta driver sets the amount of Tx coalesce packets to 16 by default. Normally that does not cause any trouble since the driver uses a much larger Tx ring size (532 packets). But some sockets might run with very small buffers, much smaller than the equivalent of 16 packets. This is what ping is doing for example, by setting SNDBUF to 324 bytes rounded up to 2kB by the kernel. The problem is that there is no documented method to force a specific packet to emit an interrupt (eg: the last of the ring) nor is it possible to make the NIC emit an interrupt after a given delay. In this case, it causes trouble, because when ping sends packets over its raw socket, the few first packets leave the system, and the first 15 packets will be emitted without an IRQ being generated, so without the skbs being freed. And since the socket's buffer is small, there's no way to reach that amount of packets, and the ping ends up with "send: no buffer available" after sending 6 packets. Running with 3 instances of ping in parallel is enough to hide the problem, because with 6 packets per instance, that's 18 packets total, which is enough to grant a Tx interrupt before all are sent. The original driver in the LSP kernel worked around this design flaw by using a software timer to clean up the Tx descriptors. This timer was slow and caused terrible network performance on some Tx-bound workloads (such as routing) but was enough to make tools like ping work correctly. Instead here, we simply set the packet counts before interrupt to 1. This ensures that each packet sent will produce an interrupt. NAPI takes care of coalescing interrupts since the interrupt is disabled once generated. No measurable performance impact nor CPU usage were observed on small nor large packets, including when saturating the link on Tx, and this fixes tools like ping which rely on too small a send buffer. If one wants to increase this value for certain workloads where it is safe to do so, "ethtool -C $dev tx-frames" will override this default setting. This fix needs to be applied to stable kernels starting with 3.10. Tested-By: Maggie Mae Roxas Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f8821ce27802..8b6c9237eda4 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -210,7 +210,7 @@ /* Various constants */ /* Coalescing */ -#define MVNETA_TXDONE_COAL_PKTS 16 +#define MVNETA_TXDONE_COAL_PKTS 1 #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 From 751e5624915f39d16f935a06002cba2a4712df42 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Dec 2014 12:13:58 +0100 Subject: [PATCH 048/214] net: sctp: use MAX_HEADER for headroom reserve in output path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9772b54c55266ce80c639a80aa68eeb908f8ecf5 ] To accomodate for enough headroom for tunnels, use MAX_HEADER instead of LL_MAX_HEADER. Robert reported that he has hit after roughly 40hrs of trinity an skb_under_panic() via SCTP output path (see reference). I couldn't reproduce it from here, but not using MAX_HEADER as elsewhere in other protocols might be one possible cause for this. In any case, it looks like accounting on chunks themself seems to look good as the skb already passed the SCTP output path and did not hit any skb_over_panic(). Given tunneling was enabled in his .config, the headroom would have been expanded by MAX_HEADER in this case. Reported-by: Robert Święcki Reference: https://lkml.org/lkml/2014/12/1/507 Fixes: 594ccc14dfe4d ("[SCTP] Replace incorrect use of dev_alloc_skb with alloc_skb in sctp_packet_transmit().") Signed-off-by: Daniel Borkmann Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index b6f5fc3127b9..73b8ca51ba14 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -413,12 +413,12 @@ int sctp_packet_transmit(struct sctp_packet *packet) sk = chunk->skb->sk; /* Allocate the new skb. */ - nskb = alloc_skb(packet->size + LL_MAX_HEADER, GFP_ATOMIC); + nskb = alloc_skb(packet->size + MAX_HEADER, GFP_ATOMIC); if (!nskb) goto nomem; /* Make sure the outbound skb has enough header room reserved. */ - skb_reserve(nskb, packet->overhead + LL_MAX_HEADER); + skb_reserve(nskb, packet->overhead + MAX_HEADER); /* Set the owning socket so that we know where to get the * destination IP address. From 4ca178ae0f426c595e62ebde92fc713255ccd101 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Mon, 5 Aug 2013 11:07:17 +0300 Subject: [PATCH 049/214] nEPT: Nested INVEPT commit bfd0a56b90005f8c8a004baf407ad90045c2b11e upstream. If we let L1 use EPT, we should probably also support the INVEPT instruction. In our current nested EPT implementation, when L1 changes its EPT table for L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course of this modification already calls INVEPT. But if last level of shadow page is unsync not all L1's changes to EPT12 are intercepted, which means roots need to be synced when L1 calls INVEPT. Global INVEPT should not be different since roots are synced by kvm_mmu_load() each time EPTP02 changes. Reviewed-by: Xiao Guangrong Signed-off-by: Nadav Har'El Signed-off-by: Jun Nakajima Signed-off-by: Xinhao Xu Signed-off-by: Yang Zhang Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini [bwh: Backported to 3.2: - Adjust context, filename - Simplify handle_invept() as recommended by Paolo - nEPT is not supported so we always raise #UD] Signed-off-by: Ben Hutchings Cc: Vinson Lee Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d651082c7cf7..7a34e8fe54bd 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -65,6 +65,7 @@ #define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 51139ff34917..7112be5f1eaf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6242,6 +6242,12 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return 1; } +static int handle_invept(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6286,6 +6292,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, + [EXIT_REASON_INVEPT] = handle_invept, }; static const int kvm_vmx_max_exit_handlers = @@ -6512,6 +6519,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: + case EXIT_REASON_INVEPT: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! From b083f7cd529b45344f3129d5de9a73a06dd1cc7e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Dec 2013 11:20:06 +0100 Subject: [PATCH 050/214] ext2: Fix oops in ext2_get_block() called from ext2_quota_write() commit df4e7ac0bb70abc97fbfd9ef09671fc084b3f9db upstream. ext2_quota_write() doesn't properly setup bh it passes to ext2_get_block() and thus we hit assertion BUG_ON(maxblocks == 0) in ext2_get_blocks() (or we could actually ask for mapping arbitrary number of blocks depending on whatever value was on stack). Fix ext2_quota_write() to properly fill in number of blocks to map. Reviewed-by: "Theodore Ts'o" Reviewed-by: Christoph Hellwig Reported-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/ext2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..20d6697bd638 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + tmp_bh.b_size = sb->s_blocksize; err = ext2_get_block(inode, blk, &tmp_bh, 1); if (err < 0) goto out; From e6a18c108e386866325b3b6e986f612ee512787c Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Tue, 17 Jun 2014 06:58:11 +0000 Subject: [PATCH 051/214] igb: bring link up when PHY is powered up commit aec653c43b0c55667355e26d7de1236bda9fb4e3 upstream. Call igb_setup_link() when the PHY is powered up. Signed-off-by: Todd Fujinaka Reported-by: Jeff Westfahl Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Cc: Vincent Donnefort Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 4d3c8122e2aa..3f342fbe9ccf 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1584,6 +1584,8 @@ void igb_power_up_link(struct igb_adapter *adapter) igb_power_up_phy_copper(&adapter->hw); else igb_power_up_serdes_link_82575(&adapter->hw); + + igb_setup_link(&adapter->hw); } /** From 7f45ce8f415f1aed22af1d0a8c551053ad540c8b Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 17 Jun 2013 15:40:58 -0700 Subject: [PATCH 052/214] ARM: sched_clock: Load cycle count after epoch stabilizes commit 336ae1180df5f69b9e0fb6561bec01c5f64361cf upstream. There is a small race between when the cycle count is read from the hardware and when the epoch stabilizes. Consider this scenario: CPU0 CPU1 ---- ---- cyc = read_sched_clock() cyc_to_sched_clock() update_sched_clock() ... cd.epoch_cyc = cyc; epoch_cyc = cd.epoch_cyc; ... epoch_ns + cyc_to_ns((cyc - epoch_cyc) The cyc on cpu0 was read before the epoch changed. But we calculate the nanoseconds based on the new epoch by subtracting the new epoch from the old cycle count. Since epoch is most likely larger than the old cycle count we calculate a large number that will be converted to nanoseconds and added to epoch_ns, causing time to jump forward too much. Fix this problem by reading the hardware after the epoch has stabilized. Cc: Russell King Signed-off-by: Stephen Boyd Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/sched_clock.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c index e8edcaa0e432..a57cc5d33540 100644 --- a/arch/arm/kernel/sched_clock.c +++ b/arch/arm/kernel/sched_clock.c @@ -51,10 +51,11 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) return (cyc * mult) >> shift; } -static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) +static unsigned long long notrace sched_clock_32(void) { u64 epoch_ns; u32 epoch_cyc; + u32 cyc; if (cd.suspended) return cd.epoch_ns; @@ -73,7 +74,9 @@ static unsigned long long notrace cyc_to_sched_clock(u32 cyc, u32 mask) smp_rmb(); } while (epoch_cyc != cd.epoch_cyc_copy); - return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift); + cyc = read_sched_clock(); + cyc = (cyc - epoch_cyc) & sched_clock_mask; + return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); } /* @@ -165,12 +168,6 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } -static unsigned long long notrace sched_clock_32(void) -{ - u32 cyc = read_sched_clock(); - return cyc_to_sched_clock(cyc, sched_clock_mask); -} - unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; unsigned long long notrace sched_clock(void) From 4088246f870e3d64b70cf4ff597910d0612d9893 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 27 Nov 2014 08:11:28 +1100 Subject: [PATCH 053/214] powerpc: 32 bit getcpu VDSO function uses 64 bit instructions commit 152d44a853e42952f6c8a504fb1f8eefd21fd5fd upstream. I used some 64 bit instructions when adding the 32 bit getcpu VDSO function. Fix it. Fixes: 18ad51dd342a ("powerpc: Add VDSO version of getcpu") Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/vdso32/getcpu.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 47afd08c90f7..fe7e97a1aad9 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -30,8 +30,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_USPRG3 - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 + cmpwi cr0,r3,0 + cmpwi cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f From 4b36a914ae129171631ade18ff3e6c417e21a84c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 6 Dec 2014 18:02:55 +0100 Subject: [PATCH 054/214] ALSA: usb-audio: Don't resubmit pending URBs at MIDI error recovery commit 66139a48cee1530c91f37c145384b4ee7043f0b7 upstream. In snd_usbmidi_error_timer(), the driver tries to resubmit MIDI input URBs to reactivate the MIDI stream, but this causes the error when some of URBs are still pending like: WARNING: CPU: 0 PID: 0 at ../drivers/usb/core/urb.c:339 usb_submit_urb+0x5f/0x70() URB ef705c40 submitted while active CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.16.6-2-desktop #1 Hardware name: FOXCONN TPS01/TPS01, BIOS 080015 03/23/2010 c0984bfa f4009ed4 c078deaf f4009ee4 c024c884 c09a135c f4009f00 00000000 c0984bfa 00000153 c061ac4f c061ac4f 00000009 00000001 ef705c40 e854d1c0 f4009eec c024c8d3 00000009 f4009ee4 c09a135c f4009f00 f4009f04 c061ac4f Call Trace: [] try_stack_unwind+0x156/0x170 [] dump_trace+0x5a/0x1b0 [] show_trace_log_lvl+0x46/0x50 [] show_stack_log_lvl+0x51/0xe0 [] show_stack+0x27/0x50 [] dump_stack+0x45/0x65 [] warn_slowpath_common+0x84/0xa0 [] warn_slowpath_fmt+0x33/0x40 [] usb_submit_urb+0x5f/0x70 [] snd_usbmidi_submit_urb+0x14/0x60 [snd_usbmidi_lib] [] snd_usbmidi_error_timer+0x6a/0xa0 [snd_usbmidi_lib] [] call_timer_fn+0x30/0x130 [] run_timer_softirq+0x1c2/0x260 [] __do_softirq+0xc3/0x270 [] do_softirq_own_stack+0x22/0x30 [] irq_exit+0x8d/0xa0 [] smp_apic_timer_interrupt+0x38/0x50 [] apic_timer_interrupt+0x34/0x3c [] cpuidle_enter_state+0x3e/0xd0 [] cpu_idle_loop+0x29d/0x3e0 [] cpu_startup_entry+0x53/0x60 [] start_kernel+0x415/0x41a For avoiding these errors, check the pending URBs and skip resubmitting such ones. Reported-and-tested-by: Stefan Seyfried Acked-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 8e01fa4991c5..93249133aeec 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -364,6 +364,8 @@ static void snd_usbmidi_error_timer(unsigned long data) if (in && in->error_resubmit) { in->error_resubmit = 0; for (j = 0; j < INPUT_URBS; ++j) { + if (atomic_read(&in->urbs[j]->use_count)) + continue; in->urbs[j]->dev = umidi->dev; snd_usbmidi_submit_urb(in->urbs[j], GFP_ATOMIC); } From a472efc75989c7092187fe00f0400e02c495c436 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Dec 2014 09:09:56 -0800 Subject: [PATCH 055/214] Linux 3.10.63 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a0b86250d6ca..9383fe24baa9 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 62 +SUBLEVEL = 63 EXTRAVERSION = NAME = TOSSUG Baby Fish From 49536e1f387afc5b42f447d382c3ea3fe4817520 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Tue, 16 Dec 2014 15:59:51 -0800 Subject: [PATCH 056/214] video: adf: fix wrong bitops in adf_modeinfo_to_fb_videomode() Change-Id: I1296153e382c0b66b713a0e7d09665ed5961f13d Signed-off-by: Greg Hackmann Reported-by: Dan Carpenter --- drivers/video/adf/adf_fbdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/adf/adf_fbdev.c b/drivers/video/adf/adf_fbdev.c index 9d3c245850af..a5b53bc08c3f 100644 --- a/drivers/video/adf/adf_fbdev.c +++ b/drivers/video/adf/adf_fbdev.c @@ -136,13 +136,13 @@ void adf_modeinfo_to_fb_videomode(const struct drm_mode_modeinfo *mode, vmode->vsync_len = mode->vsync_end - mode->vsync_start; vmode->sync = 0; - if (mode->flags | DRM_MODE_FLAG_PHSYNC) + if (mode->flags & DRM_MODE_FLAG_PHSYNC) vmode->sync |= FB_SYNC_HOR_HIGH_ACT; - if (mode->flags | DRM_MODE_FLAG_PVSYNC) + if (mode->flags & DRM_MODE_FLAG_PVSYNC) vmode->sync |= FB_SYNC_VERT_HIGH_ACT; - if (mode->flags | DRM_MODE_FLAG_PCSYNC) + if (mode->flags & DRM_MODE_FLAG_PCSYNC) vmode->sync |= FB_SYNC_COMP_HIGH_ACT; - if (mode->flags | DRM_MODE_FLAG_BCAST) + if (mode->flags & DRM_MODE_FLAG_BCAST) vmode->sync |= FB_SYNC_BROADCAST; vmode->vmode = 0; From a6f79c5f0b1f34e8de72834eaa54e7ecba2efad9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Jul 2013 16:05:54 -0700 Subject: [PATCH 057/214] driver core: device.h: add RW and RO attribute macros Make it easier to create attributes without having to always audit the mode settings. Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman (cherry picked from commit aead4f11289457e4c4dd8c65e901740fe18e5750) Signed-off-by: Mathieu Poirier --- include/linux/device.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index c0a126125325..de1d2860fca5 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -47,7 +47,11 @@ struct bus_attribute { }; #define BUS_ATTR(_name, _mode, _show, _store) \ -struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) + struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define BUS_ATTR_RW(_name) \ + struct bus_attribute bus_attr_##_name = __ATTR_RW(_name) +#define BUS_ATTR_RO(_name) \ + struct bus_attribute bus_attr_##_name = __ATTR_RO(_name) extern int __must_check bus_create_file(struct bus_type *, struct bus_attribute *); @@ -253,9 +257,12 @@ struct driver_attribute { size_t count); }; -#define DRIVER_ATTR(_name, _mode, _show, _store) \ -struct driver_attribute driver_attr_##_name = \ - __ATTR(_name, _mode, _show, _store) +#define DRIVER_ATTR(_name, _mode, _show, _store) \ + struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define DRIVER_ATTR_RW(_name) \ + struct driver_attribute driver_attr_##_name = __ATTR_RW(_name) +#define DRIVER_ATTR_RO(_name) \ + struct driver_attribute driver_attr_##_name = __ATTR_RO(_name) extern int __must_check driver_create_file(struct device_driver *driver, const struct driver_attribute *attr); @@ -406,8 +413,12 @@ struct class_attribute { const struct class_attribute *attr); }; -#define CLASS_ATTR(_name, _mode, _show, _store) \ -struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define CLASS_ATTR(_name, _mode, _show, _store) \ + struct class_attribute class_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define CLASS_ATTR_RW(_name) \ + struct class_attribute class_attr_##_name = __ATTR_RW(_name) +#define CLASS_ATTR_RO(_name) \ + struct class_attribute class_attr_##_name = __ATTR_RO(_name) extern int __must_check class_create_file(struct class *class, const struct class_attribute *attr); @@ -415,7 +426,6 @@ extern void class_remove_file(struct class *class, const struct class_attribute *attr); /* Simple class attribute that is just a static string */ - struct class_attribute_string { struct class_attribute attr; char *str; @@ -504,6 +514,10 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) +#define DEVICE_ATTR_RW(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RW(_name) +#define DEVICE_ATTR_RO(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_RO(_name) #define DEVICE_ULONG_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } From 2990c832abedddddce89e9633cbcbceb355feede Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 8 Oct 2014 12:02:38 -0600 Subject: [PATCH 058/214] of: back-porting v4l2 graph bindings This patch is back-porting the generic _portion_ of fd9fdb78a9bf85b94fb2190c82ff280c8f8375cc to 3.10 Signed-off-by: Mathieu Poirier --- drivers/of/base.c | 120 +++++++++++++++++++++++++++++++++++++++ include/linux/of_graph.h | 46 +++++++++++++++ 2 files changed, 166 insertions(+) create mode 100644 include/linux/of_graph.h diff --git a/drivers/of/base.c b/drivers/of/base.c index a6f584a7f4a1..4caad01b6fec 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1703,3 +1703,123 @@ const char *of_prop_next_string(struct property *prop, const char *cur) return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); + +/** + * of_graph_get_next_endpoint() - get next endpoint node + * @parent: pointer to the parent device node + * @prev: previous endpoint node, or NULL to get first + * + * Return: An 'endpoint' node pointer with refcount incremented. Refcount + * of the passed @prev node is not decremented, the caller have to use + * of_node_put() on it when done. + */ +struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, + struct device_node *prev) +{ + struct device_node *endpoint; + struct device_node *port; + + if (!parent) + return NULL; + + /* + * Start by locating the port node. If no previous endpoint is specified + * search for the first port node, otherwise get the previous endpoint + * parent port node. + */ + if (!prev) { + struct device_node *node; + + node = of_get_child_by_name(parent, "ports"); + if (node) + parent = node; + + port = of_get_child_by_name(parent, "port"); + of_node_put(node); + + if (!port) { + pr_err("%s(): no port node found in %s\n", + __func__, parent->full_name); + return NULL; + } + } else { + port = of_get_parent(prev); + if (WARN_ONCE(!port, "%s(): endpoint %s has no parent node\n", + __func__, prev->full_name)) + return NULL; + + /* + * Avoid dropping prev node refcount to 0 when getting the next + * child below. + */ + of_node_get(prev); + } + + while (1) { + /* + * Now that we have a port node, get the next endpoint by + * getting the next child. If the previous endpoint is NULL this + * will return the first child. + */ + endpoint = of_get_next_child(port, prev); + if (endpoint) { + of_node_put(port); + return endpoint; + } + + /* No more endpoints under this port, try the next one. */ + prev = NULL; + + do { + port = of_get_next_child(parent, port); + if (!port) + return NULL; + } while (of_node_cmp(port->name, "port")); + } +} +EXPORT_SYMBOL(of_graph_get_next_endpoint); + +/** + * of_graph_get_remote_port_parent() - get remote port's parent node + * @node: pointer to a local endpoint device_node + * + * Return: Remote device node associated with remote endpoint node linked + * to @node. Use of_node_put() on it when done. + */ +struct device_node *of_graph_get_remote_port_parent( + const struct device_node *node) +{ + struct device_node *np; + unsigned int depth; + + /* Get remote endpoint node. */ + np = of_parse_phandle(node, "remote-endpoint", 0); + + /* Walk 3 levels up only if there is 'ports' node. */ + for (depth = 3; depth && np; depth--) { + np = of_get_next_parent(np); + if (depth == 2 && of_node_cmp(np->name, "ports")) + break; + } + return np; +} +EXPORT_SYMBOL(of_graph_get_remote_port_parent); + +/** + * of_graph_get_remote_port() - get remote port node + * @node: pointer to a local endpoint device_node + * + * Return: Remote port node associated with remote endpoint node linked + * to @node. Use of_node_put() on it when done. + */ +struct device_node *of_graph_get_remote_port(const struct device_node *node) +{ + struct device_node *np; + + /* Get remote endpoint node. */ + np = of_parse_phandle(node, "remote-endpoint", 0); + if (!np) + return NULL; + return of_get_next_parent(np); +} +EXPORT_SYMBOL(of_graph_get_remote_port); diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h new file mode 100644 index 000000000000..3bbeb609a360 --- /dev/null +++ b/include/linux/of_graph.h @@ -0,0 +1,46 @@ +/* + * OF graph binding parsing helpers + * + * Copyright (C) 2012 - 2013 Samsung Electronics Co., Ltd. + * Author: Sylwester Nawrocki + * + * Copyright (C) 2012 Renesas Electronics Corp. + * Author: Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + */ +#ifndef __LINUX_OF_GRAPH_H +#define __LINUX_OF_GRAPH_H + +#ifdef CONFIG_OF +struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, + struct device_node *previous); +struct device_node *of_graph_get_remote_port_parent( + const struct device_node *node); +struct device_node *of_graph_get_remote_port(const struct device_node *node); +#else + +static inline struct device_node *of_graph_get_next_endpoint( + const struct device_node *parent, + struct device_node *previous) +{ + return NULL; +} + +static inline struct device_node *of_graph_get_remote_port_parent( + const struct device_node *node) +{ + return NULL; +} + +static inline struct device_node *of_graph_get_remote_port( + const struct device_node *node) +{ + return NULL; +} + +#endif /* CONFIG_OF */ + +#endif /* __LINUX_OF_GRAPH_H */ From f746093345ca15c6fb8a05e6343575b345e969d6 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 8 Oct 2014 13:09:30 -0600 Subject: [PATCH 059/214] of: back-porting generic graph bindings This patch is back-porting the generic _portion_ of f2a575f67695dcba9062acd666ae5aab2380b95c to 3.10 Signed-off-by: Mathieu Poirier --- drivers/of/base.c | 32 ++++++++++++++++++++++++++++++++ include/linux/of_graph.h | 19 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 4caad01b6fec..c66ecf958b67 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1704,6 +1705,37 @@ const char *of_prop_next_string(struct property *prop, const char *cur) } EXPORT_SYMBOL_GPL(of_prop_next_string); +/** + * of_graph_parse_endpoint() - parse common endpoint node properties + * @node: pointer to endpoint device_node + * @endpoint: pointer to the OF endpoint data structure + * + * The caller should hold a reference to @node. + */ +int of_graph_parse_endpoint(const struct device_node *node, + struct of_endpoint *endpoint) +{ + struct device_node *port_node = of_get_parent(node); + + WARN_ONCE(!port_node, "%s(): endpoint %s has no parent node\n", + __func__, node->full_name); + + memset(endpoint, 0, sizeof(*endpoint)); + + endpoint->local_node = node; + /* + * It doesn't matter whether the two calls below succeed. + * If they don't then the default value 0 is used. + */ + of_property_read_u32(port_node, "reg", &endpoint->port); + of_property_read_u32(node, "reg", &endpoint->id); + + of_node_put(port_node); + + return 0; +} +EXPORT_SYMBOL(of_graph_parse_endpoint); + /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node diff --git a/include/linux/of_graph.h b/include/linux/of_graph.h index 3bbeb609a360..56e0507a0d58 100644 --- a/include/linux/of_graph.h +++ b/include/linux/of_graph.h @@ -14,7 +14,21 @@ #ifndef __LINUX_OF_GRAPH_H #define __LINUX_OF_GRAPH_H +/** + * struct of_endpoint - the OF graph endpoint data structure + * @port: identifier (value of reg property) of a port this endpoint belongs to + * @id: identifier (value of reg property) of this endpoint + * @local_node: pointer to device_node of this endpoint + */ +struct of_endpoint { + unsigned int port; + unsigned int id; + const struct device_node *local_node; +}; + #ifdef CONFIG_OF +int of_graph_parse_endpoint(const struct device_node *node, + struct of_endpoint *endpoint); struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *previous); struct device_node *of_graph_get_remote_port_parent( @@ -22,6 +36,11 @@ struct device_node *of_graph_get_remote_port_parent( struct device_node *of_graph_get_remote_port(const struct device_node *node); #else +static inline int of_graph_parse_endpoint(const struct device_node *node, + struct of_endpoint *endpoint) +{ + return -ENOSYS; +} static inline struct device_node *of_graph_get_next_endpoint( const struct device_node *parent, struct device_node *previous) From 54d37488baccb0b31610458ac9833591b76de1e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Aug 2013 15:02:56 -0700 Subject: [PATCH 060/214] driver core: create write-only attribute macros for devices and drivers This creates the macros DRIVER_ATTR_WO() and DEVICE_ATTR_WO() for write-only attributes for drivers and devices. Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 25d134e51f990962ecb54e5449c97b82f818ab30) Signed-off-by: Mathieu Poirier --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/device.h b/include/linux/device.h index de1d2860fca5..ddb4a1732d20 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -263,6 +263,8 @@ struct driver_attribute { struct driver_attribute driver_attr_##_name = __ATTR_RW(_name) #define DRIVER_ATTR_RO(_name) \ struct driver_attribute driver_attr_##_name = __ATTR_RO(_name) +#define DRIVER_ATTR_WO(_name) \ + struct driver_attribute driver_attr_##_name = __ATTR_WO(_name) extern int __must_check driver_create_file(struct device_driver *driver, const struct driver_attribute *attr); @@ -518,6 +520,8 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, struct device_attribute dev_attr_##_name = __ATTR_RW(_name) #define DEVICE_ATTR_RO(_name) \ struct device_attribute dev_attr_##_name = __ATTR_RO(_name) +#define DEVICE_ATTR_WO(_name) \ + struct device_attribute dev_attr_##_name = __ATTR_WO(_name) #define DEVICE_ULONG_ATTR(_name, _mode, _var) \ struct dev_ext_attribute dev_attr_##_name = \ { __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) } From bfab9062c7be44b3a255d478bda5d4a7a3dba7e2 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Tue, 9 Dec 2014 14:38:21 -0700 Subject: [PATCH 061/214] bitops: Introduce a more generic BITMASK macro This patch is back-porting the generic _portion_ of 10ef6b0dffe404bcc54e94cb2ca1a5b18445a66b to 3.10 Signed-off-by: Mathieu Poirier --- include/linux/bitops.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a3b6b82108b9..bd0c4598d03b 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -10,6 +10,14 @@ #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) +#define GENMASK_ULL(h, l) (((U64_C(1) << ((h) - (l) + 1)) - 1) << (l)) + extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); From 452f486a3a98aa1874515569d6347844e103c788 Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Sun, 14 Jul 2013 16:06:00 -0700 Subject: [PATCH 062/214] sysfs: use file mode defines from stat.h With the last patches stat.h was included to the header, and thus those permission defines should be used. Signed-off-by: Oliver Schinagl Signed-off-by: Greg Kroah-Hartman (cherry picked from commit aa01aa3ca205ea04f44423a58bae38aec886fb96) Signed-off-by: Mathieu Poirier Conflicts: include/linux/sysfs.h --- include/linux/sysfs.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e2cee22f578a..83678c0887ba 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -68,17 +68,20 @@ struct attribute_group { * for examples.. */ -#define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ - .show = _show, \ - .store = _store, \ +#define __ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ } -#define __ATTR_RO(_name) { \ - .attr = { .name = __stringify(_name), .mode = 0444 }, \ - .show = _name##_show, \ +#define __ATTR_RO(_name) { \ + .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \ + .show = _name##_show, \ } +#define __ATTR_RW(_name) __ATTR(_name, (S_IWUSR | S_IRUGO), \ + _name##_show, _name##_store) + #define __ATTR_NULL { .attr = { .name = NULL } } #ifdef CONFIG_DEBUG_LOCK_ALLOC From 24006f88a31e8898a5c1067e603ffc6ba237ab91 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Dec 2014 13:55:16 -0700 Subject: [PATCH 063/214] backing-dev: remove compilation warning Removed compilation warning introduced by back porting aa01aa3ca205ea04f44423a58bae38aec886fb96 to 3.10 Signed-off-by: Mathieu Poirier --- mm/backing-dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 502517492258..8623ceac964b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -232,8 +232,6 @@ static ssize_t stable_pages_required_show(struct device *dev, bdi_cap_stable_pages_required(bdi) ? 1 : 0); } -#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) - static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), From d1adb5fedc4a30a5eba2b720da34ea1623e6dbf4 Mon Sep 17 00:00:00 2001 From: Oliver Schinagl Date: Sun, 14 Jul 2013 16:05:59 -0700 Subject: [PATCH 064/214] sysfs: add more helper macro's for (bin_)attribute(_groups) With the recent changes to sysfs there's various helper macro's. However there's no RW, RO BIN_ helper macro's. This patch adds them. Signed-off-by: Oliver Schinagl Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3493f69f4c4e8703961919a9a56c2d2e6a25b46f) Signed-off-by: Mathieu Poirier Conflicts: include/linux/sysfs.h --- include/linux/sysfs.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 83678c0887ba..b1dc7aa872fd 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -17,6 +17,7 @@ #include #include #include +#include #include struct kobject; @@ -95,6 +96,18 @@ struct attribute_group { #define __ATTR_IGNORE_LOCKDEP __ATTR #endif +#define __ATTRIBUTE_GROUPS(_name) \ +static const struct attribute_group *_name##_groups[] = { \ + &_name##_group, \ + NULL, \ +} + +#define ATTRIBUTE_GROUPS(_name) \ +static const struct attribute_group _name##_group = { \ + .attrs = _name##_attrs, \ +}; \ +__ATTRIBUTE_GROUPS(_name) + #define attr_name(_attr) (_attr).attr.name struct file; @@ -124,6 +137,36 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) +/* macros to create static binary attributes easier */ +#define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .read = _read, \ + .write = _write, \ + .size = _size, \ +} + +#define __BIN_ATTR_RO(_name, _size) { \ + .attr = { .name = __stringify(_name), .mode = S_IRUGO }, \ + .read = _name##_read, \ + .size = _size, \ +} + +#define __BIN_ATTR_RW(_name, _size) __BIN_ATTR(_name, \ + (S_IWUSR | S_IRUGO), _name##_read, \ + _name##_write) + +#define __BIN_ATTR_NULL __ATTR_NULL + +#define BIN_ATTR(_name, _mode, _read, _write, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read, \ + _write, _size) + +#define BIN_ATTR_RO(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size) + +#define BIN_ATTR_RW(_name, _size) \ +struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size) + struct sysfs_ops { ssize_t (*show)(struct kobject *, struct attribute *,char *); ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); From cf5b765ecbf6721ac2fefef4fafc86610a95927a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 23 Aug 2013 15:02:01 -0700 Subject: [PATCH 065/214] sysfs: create __ATTR_WO() This creates the macro __ATTR_WO() for write-only attributes, instead of having to "open define" them. Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a65fcce75a75c0d41b938f86d09d42b6f1733309) Signed-off-by: Mathieu Poirier --- include/linux/sysfs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index b1dc7aa872fd..7dd65cbfcdb3 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -80,6 +80,11 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_WO(_name) { \ + .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ + .store = _name##_store, \ +} + #define __ATTR_RW(_name) __ATTR(_name, (S_IWUSR | S_IRUGO), \ _name##_show, _name##_store) From 62bb2301ad099dae579f145e2fcff409796f7021 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 12 Nov 2014 16:36:59 -0700 Subject: [PATCH 066/214] coresight: bindings for coresight drivers Coresight IP blocks allow for the support of HW assisted tracing on ARM SoCs. Bindings for the currently available blocks are presented herein. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 799656de6f6587cecc0b05477501e41c211a75fa) Signed-off-by: Mathieu Poirier --- .../devicetree/bindings/arm/coresight.txt | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/coresight.txt diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt new file mode 100644 index 000000000000..d790f49066f3 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -0,0 +1,204 @@ +* CoreSight Components: + +CoreSight components are compliant with the ARM CoreSight architecture +specification and can be connected in various topologies to suit a particular +SoCs tracing needs. These trace components can generally be classified as +sinks, links and sources. Trace data produced by one or more sources flows +through the intermediate links connecting the source to the currently selected +sink. Each CoreSight component device should use these properties to describe +its hardware characteristcs. + +* Required properties for all components *except* non-configurable replicators: + + * compatible: These have to be supplemented with "arm,primecell" as + drivers are using the AMBA bus interface. Possible values include: + - "arm,coresight-etb10", "arm,primecell"; + - "arm,coresight-tpiu", "arm,primecell"; + - "arm,coresight-tmc", "arm,primecell"; + - "arm,coresight-funnel", "arm,primecell"; + - "arm,coresight-etm3x", "arm,primecell"; + + * reg: physical base address and length of the register + set(s) of the component. + + * clocks: the clock associated to this component. + + * clock-names: the name of the clock as referenced by the code. + Since we are using the AMBA framework, the name should be + "apb_pclk". + + * port or ports: The representation of the component's port + layout using the generic DT graph presentation found in + "bindings/graph.txt". + +* Required properties for devices that don't show up on the AMBA bus, such as + non-configurable replicators: + + * compatible: Currently supported value is (note the absence of the + AMBA markee): + - "arm,coresight-replicator" + + * id: a unique number that will identify this replicator. + + * port or ports: same as above. + +* Optional properties for ETM/PTMs: + + * arm,cp14: must be present if the system accesses ETM/PTM management + registers via co-processor 14. + + * cpu: the cpu phandle this ETM/PTM is affined to. When omitted the + source is considered to belong to CPU0. + +* Optional property for TMC: + + * arm,buffer-size: size of contiguous buffer space for TMC ETR + (embedded trace router) + + +Example: + +1. Sinks + etb@20010000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0x20010000 0 0x1000>; + + coresight-default-sink; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + etb_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator_out_port0>; + }; + }; + }; + + tpiu@20030000 { + compatible = "arm,coresight-tpiu", "arm,primecell"; + reg = <0 0x20030000 0 0x1000>; + + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + tpiu_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator_out_port1>; + }; + }; + }; + +2. Links + replicator { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + /* this will show up in debugfs as "0.replicator" */ + id = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator_out_port0: endpoint { + remote-endpoint = <&etb_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator_out_port1: endpoint { + remote-endpoint = <&tpiu_in_port>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel_out_port0>; + }; + }; + }; + }; + + funnel@20040000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0x20040000 0 0x1000>; + + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel_out_port0: endpoint { + remote-endpoint = + <&replicator_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm0_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm1_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel_in_port2: endpoint { + slave-mode; + remote-endpoint = <&etm0_out_port>; + }; + }; + + }; + }; + +3. Sources + ptm@2201c000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2201c000 0 0x1000>; + + cpu = <&cpu0>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + ptm0_out_port: endpoint { + remote-endpoint = <&funnel_in_port0>; + }; + }; + }; + + ptm@2201d000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2201d000 0 0x1000>; + + cpu = <&cpu1>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + ptm1_out_port: endpoint { + remote-endpoint = <&funnel_in_port1>; + }; + }; + }; From a66ebf16a42afea803d6f9fd36fbcaad90cd0b87 Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:35 -0700 Subject: [PATCH 067/214] coresight: add CoreSight core layer framework CoreSight components are compliant with the ARM CoreSight architecture specification and can be connected in various topologies to suit a particular SoC tracing needs. These trace components can generally be classified as sources, links and sinks. Trace data produced by one or more sources flows through the intermediate links connecting the source to the currently selected sink. The CoreSight framework provides an interface for the CoreSight trace drivers to register themselves with. It's intended to build up a topological view of the CoreSight components and configure the correct serie of components on user input via sysfs. For eg., when enabling a source, the framework builds up a path consisting of all the components connecting the source to the currently selected sink(s) and enables all of them. The framework also supports switching between available sinks and provides status information to user space applications through the debugfs interface. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a06ae8609b3dd06b957a6e4e965772a8a14d3af5) Signed-off-by: Mathieu Poirier Conflicts: arch/arm/Kconfig.debug drivers/Makefile --- MAINTAINERS | 8 + arch/arm/Kconfig.debug | 20 + drivers/Makefile | 6 + drivers/amba/bus.c | 2 +- drivers/coresight/Makefile | 5 + drivers/coresight/coresight-priv.h | 63 +++ drivers/coresight/coresight.c | 717 +++++++++++++++++++++++++++++ drivers/coresight/of_coresight.c | 204 ++++++++ include/linux/amba/bus.h | 1 + include/linux/coresight.h | 263 +++++++++++ 10 files changed, 1288 insertions(+), 1 deletion(-) create mode 100644 drivers/coresight/Makefile create mode 100644 drivers/coresight/coresight-priv.h create mode 100644 drivers/coresight/coresight.c create mode 100644 drivers/coresight/of_coresight.c create mode 100644 include/linux/coresight.h diff --git a/MAINTAINERS b/MAINTAINERS index ad7e322ad17b..02e914546198 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -783,6 +783,14 @@ M: Hubert Feurstein S: Maintained F: arch/arm/mach-ep93xx/micro9.c +ARM/CORESIGHT FRAMEWORK AND DRIVERS +M: Mathieu Poirier +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: drivers/coresight/* +F: Documentation/trace/coresight.txt +F: Documentation/devicetree/bindings/arm/coresight.txt + ARM/CORGI MACHINE SUPPORT M: Richard Purdie S: Maintained diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 1d41908d5cda..aa103b6b2b7f 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -692,4 +692,24 @@ config PID_IN_CONTEXTIDR additional instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + ---help--- + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + +menuconfig CORESIGHT + bool "CoreSight Tracing Support" + select ARM_AMBA + help + This framework provides a kernel interface for the CoreSight debug + and trace drivers to register themselves with. It's intended to build + a topological view of the CoreSight components based on a DT + specification and configure the right serie of components when a + trace source gets enabled. endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 130abc1dfd65..ab7c7bb6183f 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -152,3 +152,9 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ +obj-$(CONFIG_FMC) += fmc/ +obj-$(CONFIG_POWERCAP) += powercap/ +obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_RAS) += ras/ +obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ +obj-$(CONFIG_CORESIGHT) += coresight/ diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index cdbad3a454a0..4144a3a999bd 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -506,7 +506,7 @@ int amba_device_add(struct amba_device *dev, struct resource *parent) amba_put_disable_pclk(dev); - if (cid == AMBA_CID) + if (cid == AMBA_CID || cid == CORESIGHT_CID) dev->periphid = pid; if (!dev->periphid) diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile new file mode 100644 index 000000000000..218e3b589f93 --- /dev/null +++ b/drivers/coresight/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for CoreSight drivers. +# +obj-$(CONFIG_CORESIGHT) += coresight.o +obj-$(CONFIG_OF) += of_coresight.o diff --git a/drivers/coresight/coresight-priv.h b/drivers/coresight/coresight-priv.h new file mode 100644 index 000000000000..8d1180c47c0b --- /dev/null +++ b/drivers/coresight/coresight-priv.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CORESIGHT_PRIV_H +#define _CORESIGHT_PRIV_H + +#include +#include +#include + +/* + * Coresight management registers (0xf00-0xfcc) + * 0xfa0 - 0xfa4: Management registers in PFTv1.0 + * Trace registers in PFTv1.1 + */ +#define CORESIGHT_ITCTRL 0xf00 +#define CORESIGHT_CLAIMSET 0xfa0 +#define CORESIGHT_CLAIMCLR 0xfa4 +#define CORESIGHT_LAR 0xfb0 +#define CORESIGHT_LSR 0xfb4 +#define CORESIGHT_AUTHSTATUS 0xfb8 +#define CORESIGHT_DEVID 0xfc8 +#define CORESIGHT_DEVTYPE 0xfcc + +#define TIMEOUT_US 100 +#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) + +static inline void CS_LOCK(void __iomem *addr) +{ + do { + /* Wait for things to settle */ + mb(); + writel_relaxed(0x0, addr + CORESIGHT_LAR); + } while (0); +} + +static inline void CS_UNLOCK(void __iomem *addr) +{ + do { + writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR); + /* Make sure eveyone has seen this */ + mb(); + } while (0); +} + +#ifdef CONFIG_CORESIGHT_SOURCE_ETM3X +extern int etm_readl_cp14(u32 off, unsigned int *val); +extern int etm_writel_cp14(u32 off, u32 val); +#else +static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; } +static inline int etm_writel_cp14(u32 val, u32 off) { return 0; } +#endif + +#endif diff --git a/drivers/coresight/coresight.c b/drivers/coresight/coresight.c new file mode 100644 index 000000000000..6e0181f84425 --- /dev/null +++ b/drivers/coresight/coresight.c @@ -0,0 +1,717 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +static DEFINE_MUTEX(coresight_mutex); + +static int coresight_id_match(struct device *dev, void *data) +{ + int trace_id, i_trace_id; + struct coresight_device *csdev, *i_csdev; + + csdev = data; + i_csdev = to_coresight_device(dev); + + /* + * No need to care about oneself and components that are not + * sources or not enabled + */ + if (i_csdev == csdev || !i_csdev->enable || + i_csdev->type != CORESIGHT_DEV_TYPE_SOURCE) + return 0; + + /* Get the source ID for both compoment */ + trace_id = source_ops(csdev)->trace_id(csdev); + i_trace_id = source_ops(i_csdev)->trace_id(i_csdev); + + /* All you need is one */ + if (trace_id == i_trace_id) + return 1; + + return 0; +} + +static int coresight_source_is_unique(struct coresight_device *csdev) +{ + int trace_id = source_ops(csdev)->trace_id(csdev); + + /* this shouldn't happen */ + if (trace_id < 0) + return 0; + + return !bus_for_each_dev(&coresight_bustype, NULL, + csdev, coresight_id_match); +} + +static int coresight_find_link_inport(struct coresight_device *csdev) +{ + int i; + struct coresight_device *parent; + struct coresight_connection *conn; + + parent = container_of(csdev->path_link.next, + struct coresight_device, path_link); + + for (i = 0; i < parent->nr_outport; i++) { + conn = &parent->conns[i]; + if (conn->child_dev == csdev) + return conn->child_port; + } + + dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n", + dev_name(&parent->dev), dev_name(&csdev->dev)); + + return 0; +} + +static int coresight_find_link_outport(struct coresight_device *csdev) +{ + int i; + struct coresight_device *child; + struct coresight_connection *conn; + + child = container_of(csdev->path_link.prev, + struct coresight_device, path_link); + + for (i = 0; i < csdev->nr_outport; i++) { + conn = &csdev->conns[i]; + if (conn->child_dev == child) + return conn->outport; + } + + dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n", + dev_name(&csdev->dev), dev_name(&child->dev)); + + return 0; +} + +static int coresight_enable_sink(struct coresight_device *csdev) +{ + int ret; + + if (!csdev->enable) { + if (sink_ops(csdev)->enable) { + ret = sink_ops(csdev)->enable(csdev); + if (ret) + return ret; + } + csdev->enable = true; + } + + atomic_inc(csdev->refcnt); + + return 0; +} + +static void coresight_disable_sink(struct coresight_device *csdev) +{ + if (atomic_dec_return(csdev->refcnt) == 0) { + if (sink_ops(csdev)->disable) { + sink_ops(csdev)->disable(csdev); + csdev->enable = false; + } + } +} + +static int coresight_enable_link(struct coresight_device *csdev) +{ + int ret; + int link_subtype; + int refport, inport, outport; + + inport = coresight_find_link_inport(csdev); + outport = coresight_find_link_outport(csdev); + link_subtype = csdev->subtype.link_subtype; + + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) + refport = inport; + else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) + refport = outport; + else + refport = 0; + + if (atomic_inc_return(&csdev->refcnt[refport]) == 1) { + if (link_ops(csdev)->enable) { + ret = link_ops(csdev)->enable(csdev, inport, outport); + if (ret) + return ret; + } + } + + csdev->enable = true; + + return 0; +} + +static void coresight_disable_link(struct coresight_device *csdev) +{ + int i, nr_conns; + int link_subtype; + int refport, inport, outport; + + inport = coresight_find_link_inport(csdev); + outport = coresight_find_link_outport(csdev); + link_subtype = csdev->subtype.link_subtype; + + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) { + refport = inport; + nr_conns = csdev->nr_inport; + } else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) { + refport = outport; + nr_conns = csdev->nr_outport; + } else { + refport = 0; + nr_conns = 1; + } + + if (atomic_dec_return(&csdev->refcnt[refport]) == 0) { + if (link_ops(csdev)->disable) + link_ops(csdev)->disable(csdev, inport, outport); + } + + for (i = 0; i < nr_conns; i++) + if (atomic_read(&csdev->refcnt[i]) != 0) + return; + + csdev->enable = false; +} + +static int coresight_enable_source(struct coresight_device *csdev) +{ + int ret; + + if (!coresight_source_is_unique(csdev)) { + dev_warn(&csdev->dev, "traceID %d not unique\n", + source_ops(csdev)->trace_id(csdev)); + return -EINVAL; + } + + if (!csdev->enable) { + if (source_ops(csdev)->enable) { + ret = source_ops(csdev)->enable(csdev); + if (ret) + return ret; + } + csdev->enable = true; + } + + atomic_inc(csdev->refcnt); + + return 0; +} + +static void coresight_disable_source(struct coresight_device *csdev) +{ + if (atomic_dec_return(csdev->refcnt) == 0) { + if (source_ops(csdev)->disable) { + source_ops(csdev)->disable(csdev); + csdev->enable = false; + } + } +} + +static int coresight_enable_path(struct list_head *path) +{ + int ret = 0; + struct coresight_device *cd; + + list_for_each_entry(cd, path, path_link) { + if (cd == list_first_entry(path, struct coresight_device, + path_link)) { + ret = coresight_enable_sink(cd); + } else if (list_is_last(&cd->path_link, path)) { + /* + * Don't enable the source just yet - this needs to + * happen at the very end when all links and sink + * along the path have been configured properly. + */ + ; + } else { + ret = coresight_enable_link(cd); + } + if (ret) + goto err; + } + + return 0; +err: + list_for_each_entry_continue_reverse(cd, path, path_link) { + if (cd == list_first_entry(path, struct coresight_device, + path_link)) { + coresight_disable_sink(cd); + } else if (list_is_last(&cd->path_link, path)) { + ; + } else { + coresight_disable_link(cd); + } + } + + return ret; +} + +static int coresight_disable_path(struct list_head *path) +{ + struct coresight_device *cd; + + list_for_each_entry_reverse(cd, path, path_link) { + if (cd == list_first_entry(path, struct coresight_device, + path_link)) { + coresight_disable_sink(cd); + } else if (list_is_last(&cd->path_link, path)) { + /* + * The source has already been stopped, no need + * to do it again here. + */ + ; + } else { + coresight_disable_link(cd); + } + } + + return 0; +} + +static int coresight_build_paths(struct coresight_device *csdev, + struct list_head *path, + bool enable) +{ + int i, ret = -EINVAL; + struct coresight_connection *conn; + + list_add(&csdev->path_link, path); + + if (csdev->type == CORESIGHT_DEV_TYPE_SINK && csdev->activated) { + if (enable) + ret = coresight_enable_path(path); + else + ret = coresight_disable_path(path); + } else { + for (i = 0; i < csdev->nr_outport; i++) { + conn = &csdev->conns[i]; + if (coresight_build_paths(conn->child_dev, + path, enable) == 0) + ret = 0; + } + } + + if (list_first_entry(path, struct coresight_device, path_link) != csdev) + dev_err(&csdev->dev, "wrong device in %s\n", __func__); + + list_del(&csdev->path_link); + + return ret; +} + +int coresight_enable(struct coresight_device *csdev) +{ + int ret = 0; + LIST_HEAD(path); + + mutex_lock(&coresight_mutex); + if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) { + ret = -EINVAL; + dev_err(&csdev->dev, "wrong device type in %s\n", __func__); + goto out; + } + if (csdev->enable) + goto out; + + if (coresight_build_paths(csdev, &path, true)) { + dev_err(&csdev->dev, "building path(s) failed\n"); + goto out; + } + + if (coresight_enable_source(csdev)) + dev_err(&csdev->dev, "source enable failed\n"); +out: + mutex_unlock(&coresight_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(coresight_enable); + +void coresight_disable(struct coresight_device *csdev) +{ + LIST_HEAD(path); + + mutex_lock(&coresight_mutex); + if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) { + dev_err(&csdev->dev, "wrong device type in %s\n", __func__); + goto out; + } + if (!csdev->enable) + goto out; + + coresight_disable_source(csdev); + if (coresight_build_paths(csdev, &path, false)) + dev_err(&csdev->dev, "releasing path(s) failed\n"); + +out: + mutex_unlock(&coresight_mutex); +} +EXPORT_SYMBOL_GPL(coresight_disable); + +static ssize_t enable_sink_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct coresight_device *csdev = to_coresight_device(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->activated); +} + +static ssize_t enable_sink_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct coresight_device *csdev = to_coresight_device(dev); + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val) + csdev->activated = true; + else + csdev->activated = false; + + return size; + +} +static DEVICE_ATTR_RW(enable_sink); + +static ssize_t enable_source_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct coresight_device *csdev = to_coresight_device(dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->enable); +} + +static ssize_t enable_source_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret = 0; + unsigned long val; + struct coresight_device *csdev = to_coresight_device(dev); + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val) { + ret = coresight_enable(csdev); + if (ret) + return ret; + } else { + coresight_disable(csdev); + } + + return size; +} +static DEVICE_ATTR_RW(enable_source); + +static struct attribute *coresight_sink_attrs[] = { + &dev_attr_enable_sink.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_sink); + +static struct attribute *coresight_source_attrs[] = { + &dev_attr_enable_source.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_source); + +static struct device_type coresight_dev_type[] = { + { + .name = "none", + }, + { + .name = "sink", + .groups = coresight_sink_groups, + }, + { + .name = "link", + }, + { + .name = "linksink", + .groups = coresight_sink_groups, + }, + { + .name = "source", + .groups = coresight_source_groups, + }, +}; + +static void coresight_device_release(struct device *dev) +{ + struct coresight_device *csdev = to_coresight_device(dev); + + kfree(csdev); +} + +static int coresight_orphan_match(struct device *dev, void *data) +{ + int i; + bool still_orphan = false; + struct coresight_device *csdev, *i_csdev; + struct coresight_connection *conn; + + csdev = data; + i_csdev = to_coresight_device(dev); + + /* No need to check oneself */ + if (csdev == i_csdev) + return 0; + + /* Move on to another component if no connection is orphan */ + if (!i_csdev->orphan) + return 0; + /* + * Circle throuch all the connection of that component. If we find + * an orphan connection whose name matches @csdev, link it. + */ + for (i = 0; i < i_csdev->nr_outport; i++) { + conn = &i_csdev->conns[i]; + + /* We have found at least one orphan connection */ + if (conn->child_dev == NULL) { + /* Does it match this newly added device? */ + if (!strcmp(dev_name(&csdev->dev), conn->child_name)) + conn->child_dev = csdev; + } else { + /* Too bad, this component still has an orphan */ + still_orphan = true; + } + } + + i_csdev->orphan = still_orphan; + + /* + * Returning '0' ensures that all known component on the + * bus will be checked. + */ + return 0; +} + +static void coresight_fixup_orphan_conns(struct coresight_device *csdev) +{ + /* + * No need to check for a return value as orphan connection(s) + * are hooked-up with each newly added component. + */ + bus_for_each_dev(&coresight_bustype, NULL, + csdev, coresight_orphan_match); +} + + +static int coresight_name_match(struct device *dev, void *data) +{ + char *to_match; + struct coresight_device *i_csdev; + + to_match = data; + i_csdev = to_coresight_device(dev); + + if (!strcmp(to_match, dev_name(&i_csdev->dev))) + return 1; + + return 0; +} + +static void coresight_fixup_device_conns(struct coresight_device *csdev) +{ + int i; + struct device *dev = NULL; + struct coresight_connection *conn; + + for (i = 0; i < csdev->nr_outport; i++) { + conn = &csdev->conns[i]; + dev = bus_find_device(&coresight_bustype, NULL, + (void *)conn->child_name, + coresight_name_match); + + if (dev) { + conn->child_dev = to_coresight_device(dev); + } else { + csdev->orphan = true; + conn->child_dev = NULL; + } + } +} + +/** + * coresight_timeout - loop until a bit has changed to a specific state. + * @addr: base address of the area of interest. + * @offset: address of a register, starting from @addr. + * @position: the position of the bit of interest. + * @value: the value the bit should have. + * + * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if + * TIMEOUT_US has elapsed, which ever happens first. + */ + +int coresight_timeout(void __iomem *addr, u32 offset, int position, int value) +{ + int i; + u32 val; + + for (i = TIMEOUT_US; i > 0; i--) { + val = __raw_readl(addr + offset); + /* waiting on the bit to go from 0 to 1 */ + if (value) { + if (val & BIT(position)) + return 0; + /* waiting on the bit to go from 1 to 0 */ + } else { + if (!(val & BIT(position))) + return 0; + } + + /* + * Delay is arbitrary - the specification doesn't say how long + * we are expected to wait. Extra check required to make sure + * we don't wait needlessly on the last iteration. + */ + if (i - 1) + udelay(1); + } + + return -EAGAIN; +} + +struct bus_type coresight_bustype = { + .name = "coresight", +}; + +static int __init coresight_init(void) +{ + return bus_register(&coresight_bustype); +} +postcore_initcall(coresight_init); + +struct coresight_device *coresight_register(struct coresight_desc *desc) +{ + int i; + int ret; + int link_subtype; + int nr_refcnts = 1; + atomic_t *refcnts = NULL; + struct coresight_device *csdev; + struct coresight_connection *conns; + + csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); + if (!csdev) { + ret = -ENOMEM; + goto err_kzalloc_csdev; + } + + if (desc->type == CORESIGHT_DEV_TYPE_LINK || + desc->type == CORESIGHT_DEV_TYPE_LINKSINK) { + link_subtype = desc->subtype.link_subtype; + + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) + nr_refcnts = desc->pdata->nr_inport; + else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) + nr_refcnts = desc->pdata->nr_outport; + } + + refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); + if (!refcnts) { + ret = -ENOMEM; + goto err_kzalloc_refcnts; + } + + csdev->refcnt = refcnts; + + csdev->nr_inport = desc->pdata->nr_inport; + csdev->nr_outport = desc->pdata->nr_outport; + conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL); + if (!conns) { + ret = -ENOMEM; + goto err_kzalloc_conns; + } + + for (i = 0; i < csdev->nr_outport; i++) { + conns[i].outport = desc->pdata->outports[i]; + conns[i].child_name = desc->pdata->child_names[i]; + conns[i].child_port = desc->pdata->child_ports[i]; + } + + csdev->conns = conns; + + csdev->type = desc->type; + csdev->subtype = desc->subtype; + csdev->ops = desc->ops; + csdev->orphan = false; + + csdev->dev.type = &coresight_dev_type[desc->type]; + csdev->dev.groups = desc->groups; + csdev->dev.parent = desc->dev; + csdev->dev.release = coresight_device_release; + csdev->dev.bus = &coresight_bustype; + dev_set_name(&csdev->dev, "%s", desc->pdata->name); + + ret = device_register(&csdev->dev); + if (ret) + goto err_device_register; + + mutex_lock(&coresight_mutex); + + coresight_fixup_device_conns(csdev); + coresight_fixup_orphan_conns(csdev); + + mutex_unlock(&coresight_mutex); + + return csdev; + +err_device_register: + kfree(conns); +err_kzalloc_conns: + kfree(refcnts); +err_kzalloc_refcnts: + kfree(csdev); +err_kzalloc_csdev: + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(coresight_register); + +void coresight_unregister(struct coresight_device *csdev) +{ + mutex_lock(&coresight_mutex); + + kfree(csdev->conns); + device_unregister(&csdev->dev); + + mutex_unlock(&coresight_mutex); +} +EXPORT_SYMBOL_GPL(coresight_unregister); + +MODULE_LICENSE("GPL v2"); diff --git a/drivers/coresight/of_coresight.c b/drivers/coresight/of_coresight.c new file mode 100644 index 000000000000..5030c0734508 --- /dev/null +++ b/drivers/coresight/of_coresight.c @@ -0,0 +1,204 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static int of_dev_node_match(struct device *dev, void *data) +{ + return dev->of_node == data; +} + +static struct device * +of_coresight_get_endpoint_device(struct device_node *endpoint) +{ + struct device *dev = NULL; + + /* + * If we have a non-configuable replicator, it will be found on the + * platform bus. + */ + dev = bus_find_device(&platform_bus_type, NULL, + endpoint, of_dev_node_match); + if (dev) + return dev; + + /* + * We have a configurable component - circle through the AMBA bus + * looking for the device that matches the endpoint node. + */ + return bus_find_device(&amba_bustype, NULL, + endpoint, of_dev_node_match); +} + +static struct device_node *of_get_coresight_endpoint( + const struct device_node *parent, struct device_node *prev) +{ + struct device_node *node = of_graph_get_next_endpoint(parent, prev); + + of_node_put(prev); + return node; +} + +static void of_coresight_get_ports(struct device_node *node, + int *nr_inport, int *nr_outport) +{ + struct device_node *ep = NULL; + int in = 0, out = 0; + + do { + ep = of_get_coresight_endpoint(node, ep); + if (!ep) + break; + + if (of_property_read_bool(ep, "slave-mode")) + in++; + else + out++; + + } while (ep); + + *nr_inport = in; + *nr_outport = out; +} + +static int of_coresight_alloc_memory(struct device *dev, + struct coresight_platform_data *pdata) +{ + /* List of output port on this component */ + pdata->outports = devm_kzalloc(dev, pdata->nr_outport * + sizeof(*pdata->outports), + GFP_KERNEL); + if (!pdata->outports) + return -ENOMEM; + + /* Children connected to this component via @outport */ + pdata->child_names = devm_kzalloc(dev, pdata->nr_outport * + sizeof(*pdata->child_names), + GFP_KERNEL); + if (!pdata->child_names) + return -ENOMEM; + + /* Port number on the child this component is connected to */ + pdata->child_ports = devm_kzalloc(dev, pdata->nr_outport * + sizeof(*pdata->child_ports), + GFP_KERNEL); + if (!pdata->child_ports) + return -ENOMEM; + + return 0; +} + +struct coresight_platform_data *of_get_coresight_platform_data( + struct device *dev, struct device_node *node) +{ + int i = 0, ret = 0; + struct coresight_platform_data *pdata; + struct of_endpoint endpoint, rendpoint; + struct device *rdev; + struct device_node *cpu; + struct device_node *ep = NULL; + struct device_node *rparent = NULL; + struct device_node *rport = NULL; + + pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + /* Use device name as debugfs handle */ + pdata->name = dev_name(dev); + + /* Get the number of input and output port for this component */ + of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport); + + if (pdata->nr_outport) { + ret = of_coresight_alloc_memory(dev, pdata); + if (ret) + return ERR_PTR(ret); + + /* Iterate through each port to discover topology */ + do { + /* Get a handle on a port */ + ep = of_get_coresight_endpoint(node, ep); + if (!ep) + break; + + /* + * No need to deal with input ports, processing for as + * processing for output ports will deal with them. + */ + if (of_find_property(ep, "slave-mode", NULL)) + continue; + + /* Get a handle on the local endpoint */ + ret = of_graph_parse_endpoint(ep, &endpoint); + + if (ret) + continue; + + /* The local out port number */ + pdata->outports[i] = endpoint.id; + + /* + * Get a handle on the remote port and parent + * attached to it. + */ + rparent = of_graph_get_remote_port_parent(ep); + rport = of_graph_get_remote_port(ep); + + if (!rparent || !rport) + continue; + + if (of_graph_parse_endpoint(rport, &rendpoint)) + continue; + + rdev = of_coresight_get_endpoint_device(rparent); + if (!dev) + continue; + + pdata->child_names[i] = dev_name(rdev); + pdata->child_ports[i] = rendpoint.id; + + i++; + } while (ep); + } + + /* Affinity defaults to CPU0 */ + pdata->cpu = 0; + cpu = of_parse_phandle(node, "cpu", 0); + if (cpu) { + const u32 *mpidr; + int len, index; + + mpidr = of_get_property(cpu, "reg", &len); + if (mpidr && len == 4) { + index = get_logical_index(be32_to_cpup(mpidr)); + if (index != -EINVAL) + pdata->cpu = index; + } + } + + return pdata; +} +EXPORT_SYMBOL_GPL(of_get_coresight_platform_data); diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 43ec7e247a80..aafb89c3a3e6 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -23,6 +23,7 @@ #define AMBA_NR_IRQS 2 #define AMBA_CID 0xb105f00d +#define CORESIGHT_CID 0xb105900d struct clk; diff --git a/include/linux/coresight.h b/include/linux/coresight.h new file mode 100644 index 000000000000..bdde4199c74a --- /dev/null +++ b/include/linux/coresight.h @@ -0,0 +1,263 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_CORESIGHT_H +#define _LINUX_CORESIGHT_H + +#include + +/* Peripheral id registers (0xFD0-0xFEC) */ +#define CORESIGHT_PERIPHIDR4 0xfd0 +#define CORESIGHT_PERIPHIDR5 0xfd4 +#define CORESIGHT_PERIPHIDR6 0xfd8 +#define CORESIGHT_PERIPHIDR7 0xfdC +#define CORESIGHT_PERIPHIDR0 0xfe0 +#define CORESIGHT_PERIPHIDR1 0xfe4 +#define CORESIGHT_PERIPHIDR2 0xfe8 +#define CORESIGHT_PERIPHIDR3 0xfeC +/* Component id registers (0xFF0-0xFFC) */ +#define CORESIGHT_COMPIDR0 0xff0 +#define CORESIGHT_COMPIDR1 0xff4 +#define CORESIGHT_COMPIDR2 0xff8 +#define CORESIGHT_COMPIDR3 0xffC + +#define ETM_ARCH_V3_3 0x23 +#define ETM_ARCH_V3_5 0x25 +#define PFT_ARCH_V1_0 0x30 +#define PFT_ARCH_V1_1 0x31 + +#define CORESIGHT_UNLOCK 0xc5acce55 + +extern struct bus_type coresight_bustype; + +enum coresight_dev_type { + CORESIGHT_DEV_TYPE_NONE, + CORESIGHT_DEV_TYPE_SINK, + CORESIGHT_DEV_TYPE_LINK, + CORESIGHT_DEV_TYPE_LINKSINK, + CORESIGHT_DEV_TYPE_SOURCE, +}; + +enum coresight_dev_subtype_sink { + CORESIGHT_DEV_SUBTYPE_SINK_NONE, + CORESIGHT_DEV_SUBTYPE_SINK_PORT, + CORESIGHT_DEV_SUBTYPE_SINK_BUFFER, +}; + +enum coresight_dev_subtype_link { + CORESIGHT_DEV_SUBTYPE_LINK_NONE, + CORESIGHT_DEV_SUBTYPE_LINK_MERG, + CORESIGHT_DEV_SUBTYPE_LINK_SPLIT, + CORESIGHT_DEV_SUBTYPE_LINK_FIFO, +}; + +enum coresight_dev_subtype_source { + CORESIGHT_DEV_SUBTYPE_SOURCE_NONE, + CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, + CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, + CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, +}; + +/** + * struct coresight_dev_subtype - further characterisation of a type + * @sink_subtype: type of sink this component is, as defined + by @coresight_dev_subtype_sink. + * @link_subtype: type of link this component is, as defined + by @coresight_dev_subtype_link. + * @source_subtype: type of source this component is, as defined + by @coresight_dev_subtype_source. + */ +struct coresight_dev_subtype { + enum coresight_dev_subtype_sink sink_subtype; + enum coresight_dev_subtype_link link_subtype; + enum coresight_dev_subtype_source source_subtype; +}; + +/** + * struct coresight_platform_data - data harvested from the DT specification + * @cpu: the CPU a source belongs to. Only applicable for ETM/PTMs. + * @name: name of the component as shown under sysfs. + * @nr_inport: number of input ports for this component. + * @outports: list of remote enpoint port number. + * @child_names:name of all child components connected to this device. + * @child_ports:child component port number the current component is + connected to. + * @nr_outport: number of output ports for this component. + * @clk: The clock this component is associated to. + */ +struct coresight_platform_data { + int cpu; + const char *name; + int nr_inport; + int *outports; + const char **child_names; + int *child_ports; + int nr_outport; + struct clk *clk; +}; + +/** + * struct coresight_desc - description of a component required from drivers + * @type: as defined by @coresight_dev_type. + * @subtype: as defined by @coresight_dev_subtype. + * @ops: generic operations for this component, as defined + by @coresight_ops. + * @pdata: platform data collected from DT. + * @dev: The device entity associated to this component. + * @groups :operations specific to this component. These will end up + in the component's sysfs sub-directory. + */ +struct coresight_desc { + enum coresight_dev_type type; + struct coresight_dev_subtype subtype; + const struct coresight_ops *ops; + struct coresight_platform_data *pdata; + struct device *dev; + const struct attribute_group **groups; +}; + +/** + * struct coresight_connection - representation of a single connection + * @ref_count: keeping count a port' references. + * @outport: a connection's output port number. + * @chid_name: remote component's name. + * @child_port: remote component's port number @output is connected to. + * @child_dev: a @coresight_device representation of the component + connected to @outport. + */ +struct coresight_connection { + int outport; + const char *child_name; + int child_port; + struct coresight_device *child_dev; +}; + +/** + * struct coresight_device - representation of a device as used by the framework + * @nr_inport: number of input port associated to this component. + * @nr_outport: number of output port associated to this component. + * @type: as defined by @coresight_dev_type. + * @subtype: as defined by @coresight_dev_subtype. + * @ops: generic operations for this component, as defined + by @coresight_ops. + * @dev: The device entity associated to this component. + * @refcnt: keep track of what is in use. + * @path_link: link of current component into the path being enabled. + * @orphan: true if the component has connections that haven't been linked. + * @enable: 'true' if component is currently part of an active path. + * @activated: 'true' only if a _sink_ has been activated. A sink can be + activated but not yet enabled. Enabling for a _sink_ + happens when a source has been selected for that it. + */ +struct coresight_device { + struct coresight_connection *conns; + int nr_inport; + int nr_outport; + enum coresight_dev_type type; + struct coresight_dev_subtype subtype; + const struct coresight_ops *ops; + struct device dev; + atomic_t *refcnt; + struct list_head path_link; + bool orphan; + bool enable; /* true only if configured as part of a path */ + bool activated; /* true only if a sink is part of a path */ +}; + +#define to_coresight_device(d) container_of(d, struct coresight_device, dev) + +#define source_ops(csdev) csdev->ops->source_ops +#define sink_ops(csdev) csdev->ops->sink_ops +#define link_ops(csdev) csdev->ops->link_ops + +#define CORESIGHT_DEBUGFS_ENTRY(__name, __entry_name, \ + __mode, __get, __set, __fmt) \ +DEFINE_SIMPLE_ATTRIBUTE(__name ## _ops, __get, __set, __fmt); \ +static const struct coresight_ops_entry __name ## _entry = { \ + .name = __entry_name, \ + .mode = __mode, \ + .ops = &__name ## _ops \ +} + +/** + * struct coresight_ops_sink - basic operations for a sink + * Operations available for sinks + * @enable: enables the sink. + * @disable: disables the sink. + */ +struct coresight_ops_sink { + int (*enable)(struct coresight_device *csdev); + void (*disable)(struct coresight_device *csdev); +}; + +/** + * struct coresight_ops_link - basic operations for a link + * Operations available for links. + * @enable: enables flow between iport and oport. + * @disable: disables flow between iport and oport. + */ +struct coresight_ops_link { + int (*enable)(struct coresight_device *csdev, int iport, int oport); + void (*disable)(struct coresight_device *csdev, int iport, int oport); +}; + +/** + * struct coresight_ops_source - basic operations for a source + * Operations available for sources. + * @trace_id: returns the value of the component's trace ID as known + to the HW. + * @enable: enables tracing from a source. + * @disable: disables tracing for a source. + */ +struct coresight_ops_source { + int (*trace_id)(struct coresight_device *csdev); + int (*enable)(struct coresight_device *csdev); + void (*disable)(struct coresight_device *csdev); +}; + +struct coresight_ops { + const struct coresight_ops_sink *sink_ops; + const struct coresight_ops_link *link_ops; + const struct coresight_ops_source *source_ops; +}; + +#ifdef CONFIG_CORESIGHT +extern struct coresight_device * +coresight_register(struct coresight_desc *desc); +extern void coresight_unregister(struct coresight_device *csdev); +extern int coresight_enable(struct coresight_device *csdev); +extern void coresight_disable(struct coresight_device *csdev); +extern int coresight_is_bit_set(u32 val, int position, int value); +extern int coresight_timeout(void __iomem *addr, u32 offset, + int position, int value); +#ifdef CONFIG_OF +extern struct coresight_platform_data *of_get_coresight_platform_data( + struct device *dev, struct device_node *node); +#endif +#else +static inline struct coresight_device * +coresight_register(struct coresight_desc *desc) { return NULL; } +static inline void coresight_unregister(struct coresight_device *csdev) {} +static inline int +coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } +static inline void coresight_disable(struct coresight_device *csdev) {} +static inline int coresight_is_bit_set(u32 val, int position, int value) + { return 0; } +static inline int coresight_timeout(void __iomem *addr, u32 offset, + int position, int value) { return 1; } +#ifdef CONFIG_OF +static inline struct coresight_platform_data *of_get_coresight_platform_data( + struct device *dev, struct device_node *node) { return NULL; } +#endif +#endif + +#endif From a74f8a17d82119959da1f69db7260f9fb668a77f Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:36 -0700 Subject: [PATCH 068/214] coresight-tmc: add CoreSight TMC driver This driver manages CoreSight TMC (Trace Memory Controller) which can act as a link or a sink depending upon its configuration. It can present itself as an ETF (Embedded Trace FIFO) or ETR (Embedded Trace Router). ETF when configured in circular buffer mode acts as a trace collection sink. When configured in HW fifo mode it acts as link. ETR always acts as a sink and can be used to route data to memory allocated in RAM. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit bc4bf7fe98daf4e64cc5ffc6cdc0e820f4d99c14) Signed-off-by: Mathieu Poirier --- arch/arm/Kconfig.debug | 20 + drivers/coresight/Makefile | 1 + drivers/coresight/coresight-tmc.c | 776 ++++++++++++++++++++++++++++++ 3 files changed, 797 insertions(+) create mode 100644 drivers/coresight/coresight-tmc.c diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index aa103b6b2b7f..00c150c767c6 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -712,4 +712,24 @@ menuconfig CORESIGHT a topological view of the CoreSight components based on a DT specification and configure the right serie of components when a trace source gets enabled. + +if CORESIGHT +config CORESIGHT_LINKS_AND_SINKS + bool "CoreSight Link and Sink drivers" + help + This enables support for CoreSight link and sink drivers that are + responsible for transporting and collecting the trace data + respectively. Link and sinks are dynamically aggregated with a trace + entity at run time to form a complete trace path. + +config CORESIGHT_LINK_AND_SINK_TMC + bool "Coresight generic TMC driver" + depends on CORESIGHT_LINKS_AND_SINKS + help + This enables support for the Trace Memory Controller driver. Depending + on its configuration the device can act as a link (embedded trace router + - ETR) or sink (embedded trace FIFO). The driver complies with the + generic implementation of the component without special enhancement or + added features. +endif endmenu diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index 218e3b589f93..d26bd0e78122 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_CORESIGHT) += coresight.o obj-$(CONFIG_OF) += of_coresight.o +obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o diff --git a/drivers/coresight/coresight-tmc.c b/drivers/coresight/coresight-tmc.c new file mode 100644 index 000000000000..ce2c293f1707 --- /dev/null +++ b/drivers/coresight/coresight-tmc.c @@ -0,0 +1,776 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +#define TMC_RSZ 0x004 +#define TMC_STS 0x00c +#define TMC_RRD 0x010 +#define TMC_RRP 0x014 +#define TMC_RWP 0x018 +#define TMC_TRG 0x01c +#define TMC_CTL 0x020 +#define TMC_RWD 0x024 +#define TMC_MODE 0x028 +#define TMC_LBUFLEVEL 0x02c +#define TMC_CBUFLEVEL 0x030 +#define TMC_BUFWM 0x034 +#define TMC_RRPHI 0x038 +#define TMC_RWPHI 0x03c +#define TMC_AXICTL 0x110 +#define TMC_DBALO 0x118 +#define TMC_DBAHI 0x11c +#define TMC_FFSR 0x300 +#define TMC_FFCR 0x304 +#define TMC_PSCR 0x308 +#define TMC_ITMISCOP0 0xee0 +#define TMC_ITTRFLIN 0xee8 +#define TMC_ITATBDATA0 0xeec +#define TMC_ITATBCTR2 0xef0 +#define TMC_ITATBCTR1 0xef4 +#define TMC_ITATBCTR0 0xef8 + +/* register description */ +/* TMC_CTL - 0x020 */ +#define TMC_CTL_CAPT_EN BIT(0) +/* TMC_STS - 0x00C */ +#define TMC_STS_TRIGGERED BIT(1) +/* TMC_AXICTL - 0x110 */ +#define TMC_AXICTL_PROT_CTL_B0 BIT(0) +#define TMC_AXICTL_PROT_CTL_B1 BIT(1) +#define TMC_AXICTL_SCT_GAT_MODE BIT(7) +#define TMC_AXICTL_WR_BURST_LEN 0xF00 +/* TMC_FFCR - 0x304 */ +#define TMC_FFCR_EN_FMT BIT(0) +#define TMC_FFCR_EN_TI BIT(1) +#define TMC_FFCR_FON_FLIN BIT(4) +#define TMC_FFCR_FON_TRIG_EVT BIT(5) +#define TMC_FFCR_FLUSHMAN BIT(6) +#define TMC_FFCR_TRIGON_TRIGIN BIT(8) +#define TMC_FFCR_STOP_ON_FLUSH BIT(12) + +#define TMC_STS_TRIGGERED_BIT 2 +#define TMC_FFCR_FLUSHMAN_BIT 6 + +enum tmc_config_type { + TMC_CONFIG_TYPE_ETB, + TMC_CONFIG_TYPE_ETR, + TMC_CONFIG_TYPE_ETF, +}; + +enum tmc_mode { + TMC_MODE_CIRCULAR_BUFFER, + TMC_MODE_SOFTWARE_FIFO, + TMC_MODE_HARDWARE_FIFO, +}; + +enum tmc_mem_intf_width { + TMC_MEM_INTF_WIDTH_32BITS = 0x2, + TMC_MEM_INTF_WIDTH_64BITS = 0x3, + TMC_MEM_INTF_WIDTH_128BITS = 0x4, + TMC_MEM_INTF_WIDTH_256BITS = 0x5, +}; + +/** + * struct tmc_drvdata - specifics associated to an TMC component + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @miscdev: specifics to handle "/dev/xyz.tmc" entry. + * @clk: the clock this component is associated to. + * @spinlock: only one at a time pls. + * @read_count: manages preparation of buffer for reading. + * @buf: area of memory where trace data get sent. + * @paddr: DMA start location in RAM. + * @vaddr: virtual representation of @paddr. + * @size: @buf size. + * @enable: this TMC is being used. + * @config_type: TMC variant, must be of type @tmc_config_type. + * @trigger_cntr: amount of words to store after a trigger. + */ +struct tmc_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct miscdevice miscdev; + struct clk *clk; + spinlock_t spinlock; + int read_count; + bool reading; + char *buf; + dma_addr_t paddr; + void __iomem *vaddr; + u32 size; + bool enable; + enum tmc_config_type config_type; + u32 trigger_cntr; +}; + +static void tmc_wait_for_ready(struct tmc_drvdata *drvdata) +{ + /* Ensure formatter, unformatter and hardware fifo are empty */ + if (coresight_timeout(drvdata->base, + TMC_STS, TMC_STS_TRIGGERED_BIT, 1)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", + TMC_STS); + } +} + +static void tmc_flush_and_stop(struct tmc_drvdata *drvdata) +{ + u32 ffcr; + + ffcr = readl_relaxed(drvdata->base + TMC_FFCR); + ffcr |= TMC_FFCR_STOP_ON_FLUSH; + writel_relaxed(ffcr, drvdata->base + TMC_FFCR); + ffcr |= TMC_FFCR_FLUSHMAN; + writel_relaxed(ffcr, drvdata->base + TMC_FFCR); + /* Ensure flush completes */ + if (coresight_timeout(drvdata->base, + TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", + TMC_FFCR); + } + + tmc_wait_for_ready(drvdata); +} + +static void tmc_enable_hw(struct tmc_drvdata *drvdata) +{ + writel_relaxed(TMC_CTL_CAPT_EN, drvdata->base + TMC_CTL); +} + +static void tmc_disable_hw(struct tmc_drvdata *drvdata) +{ + writel_relaxed(0x0, drvdata->base + TMC_CTL); +} + +static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) +{ + /* Zero out the memory to help with debug */ + memset(drvdata->buf, 0, drvdata->size); + + CS_UNLOCK(drvdata->base); + + writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | + TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | + TMC_FFCR_TRIGON_TRIGIN, + drvdata->base + TMC_FFCR); + + writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) +{ + u32 axictl; + + /* Zero out the memory to help with debug */ + memset(drvdata->vaddr, 0, drvdata->size); + + CS_UNLOCK(drvdata->base); + + writel_relaxed(drvdata->size / 4, drvdata->base + TMC_RSZ); + writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); + + axictl = readl_relaxed(drvdata->base + TMC_AXICTL); + axictl |= TMC_AXICTL_WR_BURST_LEN; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + axictl &= ~TMC_AXICTL_SCT_GAT_MODE; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + axictl = (axictl & + ~(TMC_AXICTL_PROT_CTL_B0 | TMC_AXICTL_PROT_CTL_B1)) | + TMC_AXICTL_PROT_CTL_B1; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); + + writel_relaxed(drvdata->paddr, drvdata->base + TMC_DBALO); + writel_relaxed(0x0, drvdata->base + TMC_DBAHI); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI | + TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT | + TMC_FFCR_TRIGON_TRIGIN, + drvdata->base + TMC_FFCR); + writel_relaxed(drvdata->trigger_cntr, drvdata->base + TMC_TRG); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + writel_relaxed(TMC_MODE_HARDWARE_FIFO, drvdata->base + TMC_MODE); + writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI, + drvdata->base + TMC_FFCR); + writel_relaxed(0x0, drvdata->base + TMC_BUFWM); + tmc_enable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static int tmc_enable(struct tmc_drvdata *drvdata, enum tmc_mode mode) +{ + int ret; + unsigned long flags; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); + return -EBUSY; + } + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_enable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_enable_hw(drvdata); + } else { + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_enable_hw(drvdata); + else + tmc_etf_enable_hw(drvdata); + } + drvdata->enable = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "TMC enabled\n"); + return 0; +} + +static int tmc_enable_sink(struct coresight_device *csdev) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return tmc_enable(drvdata, TMC_MODE_CIRCULAR_BUFFER); +} + +static int tmc_enable_link(struct coresight_device *csdev, int inport, + int outport) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + return tmc_enable(drvdata, TMC_MODE_HARDWARE_FIFO); +} + +static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata) +{ + enum tmc_mem_intf_width memwidth; + u8 memwords; + char *bufp; + u32 read_data; + int i; + + memwidth = BMVAL(readl_relaxed(drvdata->base + CORESIGHT_DEVID), 8, 10); + if (memwidth == TMC_MEM_INTF_WIDTH_32BITS) + memwords = 1; + else if (memwidth == TMC_MEM_INTF_WIDTH_64BITS) + memwords = 2; + else if (memwidth == TMC_MEM_INTF_WIDTH_128BITS) + memwords = 4; + else + memwords = 8; + + bufp = drvdata->buf; + while (1) { + for (i = 0; i < memwords; i++) { + read_data = readl_relaxed(drvdata->base + TMC_RRD); + if (read_data == 0xFFFFFFFF) + return; + memcpy(bufp, &read_data, 4); + bufp += 4; + } + } +} + +static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_etb_dump_hw(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) +{ + u32 rwp, val; + + rwp = readl_relaxed(drvdata->base + TMC_RWP); + val = readl_relaxed(drvdata->base + TMC_STS); + + /* How much memory do we still have */ + if (val & BIT(0)) + drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr; + else + drvdata->buf = drvdata->vaddr; +} + +static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_etr_dump_hw(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_disable_hw(drvdata); + + CS_LOCK(drvdata->base); +} + +static void tmc_disable(struct tmc_drvdata *drvdata, enum tmc_mode mode) +{ + unsigned long flags; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_disable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_disable_hw(drvdata); + } else { + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_disable_hw(drvdata); + else + tmc_etf_disable_hw(drvdata); + } +out: + drvdata->enable = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + clk_disable_unprepare(drvdata->clk); + + dev_info(drvdata->dev, "TMC disabled\n"); +} + +static void tmc_disable_sink(struct coresight_device *csdev) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + tmc_disable(drvdata, TMC_MODE_CIRCULAR_BUFFER); +} + +static void tmc_disable_link(struct coresight_device *csdev, int inport, + int outport) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + tmc_disable(drvdata, TMC_MODE_HARDWARE_FIFO); +} + +static const struct coresight_ops_sink tmc_sink_ops = { + .enable = tmc_enable_sink, + .disable = tmc_disable_sink, +}; + +static const struct coresight_ops_link tmc_link_ops = { + .enable = tmc_enable_link, + .disable = tmc_disable_link, +}; + +static const struct coresight_ops tmc_etb_cs_ops = { + .sink_ops = &tmc_sink_ops, +}; + +static const struct coresight_ops tmc_etr_cs_ops = { + .sink_ops = &tmc_sink_ops, +}; + +static const struct coresight_ops tmc_etf_cs_ops = { + .sink_ops = &tmc_sink_ops, + .link_ops = &tmc_link_ops, +}; + +static int tmc_read_prepare(struct tmc_drvdata *drvdata) +{ + int ret; + unsigned long flags; + enum tmc_mode mode; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (!drvdata->enable) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_disable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_disable_hw(drvdata); + } else { + mode = readl_relaxed(drvdata->base + TMC_MODE); + if (mode == TMC_MODE_CIRCULAR_BUFFER) { + tmc_etb_disable_hw(drvdata); + } else { + ret = -ENODEV; + goto err; + } + } +out: + drvdata->reading = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "TMC read start\n"); + return 0; +err: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return ret; +} + +static void tmc_read_unprepare(struct tmc_drvdata *drvdata) +{ + unsigned long flags; + enum tmc_mode mode; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (!drvdata->enable) + goto out; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + tmc_etb_enable_hw(drvdata); + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + tmc_etr_enable_hw(drvdata); + } else { + mode = readl_relaxed(drvdata->base + TMC_MODE); + if (mode == TMC_MODE_CIRCULAR_BUFFER) + tmc_etb_enable_hw(drvdata); + } +out: + drvdata->reading = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "TMC read end\n"); +} + +static int tmc_open(struct inode *inode, struct file *file) +{ + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, miscdev); + int ret = 0; + + if (drvdata->read_count++) + goto out; + + ret = tmc_read_prepare(drvdata); + if (ret) + return ret; +out: + nonseekable_open(inode, file); + + dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__); + return 0; +} + +static ssize_t tmc_read(struct file *file, char __user *data, size_t len, + loff_t *ppos) +{ + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, miscdev); + char *bufp = drvdata->buf + *ppos; + + if (*ppos + len > drvdata->size) + len = drvdata->size - *ppos; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + if (bufp == (char *)(drvdata->vaddr + drvdata->size)) + bufp = drvdata->vaddr; + else if (bufp > (char *)(drvdata->vaddr + drvdata->size)) + bufp -= drvdata->size; + if ((bufp + len) > (char *)(drvdata->vaddr + drvdata->size)) + len = (char *)(drvdata->vaddr + drvdata->size) - bufp; + } + + if (copy_to_user(data, bufp, len)) { + dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__); + return -EFAULT; + } + + *ppos += len; + + dev_dbg(drvdata->dev, "%s: %d bytes copied, %d bytes left\n", + __func__, len, (int) (drvdata->size - *ppos)); + return len; +} + +static int tmc_release(struct inode *inode, struct file *file) +{ + struct tmc_drvdata *drvdata = container_of(file->private_data, + struct tmc_drvdata, miscdev); + + if (--drvdata->read_count) { + if (drvdata->read_count < 0) { + dev_err(drvdata->dev, "mismatched close\n"); + drvdata->read_count = 0; + } + goto out; + } + + tmc_read_unprepare(drvdata); +out: + dev_dbg(drvdata->dev, "%s: released\n", __func__); + return 0; +} + +static const struct file_operations tmc_fops = { + .owner = THIS_MODULE, + .open = tmc_open, + .read = tmc_read, + .release = tmc_release, + .llseek = no_llseek, +}; + +static ssize_t trigger_cntr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned long val = drvdata->trigger_cntr; + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t trigger_cntr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->trigger_cntr = val; + return size; +} +static DEVICE_ATTR_RW(trigger_cntr); + +static struct attribute *coresight_etb_attrs[] = { + &dev_attr_trigger_cntr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etb); + +static struct attribute *coresight_etr_attrs[] = { + &dev_attr_trigger_cntr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etr); + +static struct attribute *coresight_etf_attrs[] = { + &dev_attr_trigger_cntr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etf); + +static int tmc_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret = 0; + u32 devid; + void __iomem *base; + struct device *dev = &adev->dev; + struct coresight_platform_data *pdata = NULL; + struct tmc_drvdata *drvdata; + struct resource *res = &adev->res; + struct coresight_desc *desc; + struct device_node *np = adev->dev.of_node; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + drvdata->dev = &adev->dev; + dev_set_drvdata(dev, drvdata); + + /* Validity for the resource is already checked by the AMBA core */ + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + drvdata->base = base; + + spin_lock_init(&drvdata->spinlock); + + drvdata->clk = adev->pclk; + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); + drvdata->config_type = BMVAL(devid, 6, 7); + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + if (np) + ret = of_property_read_u32(np, + "arm,buffer-size", + &drvdata->size); + if (ret) + drvdata->size = SZ_1M; + } else { + drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4; + } + + clk_disable_unprepare(drvdata->clk); + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + drvdata->vaddr = dma_alloc_coherent(dev, drvdata->size, + &drvdata->paddr, GFP_KERNEL); + if (!drvdata->vaddr) + return -ENOMEM; + + memset(drvdata->vaddr, 0, drvdata->size); + drvdata->buf = drvdata->vaddr; + } else { + drvdata->buf = devm_kzalloc(dev, drvdata->size, GFP_KERNEL); + if (!drvdata->buf) + return -ENOMEM; + } + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto err_devm_kzalloc; + } + + desc->pdata = pdata; + desc->dev = dev; + desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER; + + if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) { + desc->type = CORESIGHT_DEV_TYPE_SINK; + desc->ops = &tmc_etb_cs_ops; + desc->groups = coresight_etb_groups; + } else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { + desc->type = CORESIGHT_DEV_TYPE_SINK; + desc->ops = &tmc_etr_cs_ops; + desc->groups = coresight_etr_groups; + } else { + desc->type = CORESIGHT_DEV_TYPE_LINKSINK; + desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO; + desc->ops = &tmc_etf_cs_ops; + desc->groups = coresight_etf_groups; + } + + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) { + ret = PTR_ERR(drvdata->csdev); + goto err_devm_kzalloc; + } + + drvdata->miscdev.name = pdata->name; + drvdata->miscdev.minor = MISC_DYNAMIC_MINOR; + drvdata->miscdev.fops = &tmc_fops; + ret = misc_register(&drvdata->miscdev); + if (ret) + goto err_misc_register; + + dev_info(dev, "TMC initialized\n"); + return 0; + +err_misc_register: + coresight_unregister(drvdata->csdev); +err_devm_kzalloc: + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) + dma_free_coherent(dev, drvdata->size, + &drvdata->paddr, GFP_KERNEL); + return ret; +} + +static int tmc_remove(struct amba_device *adev) +{ + struct tmc_drvdata *drvdata = amba_get_drvdata(adev); + + misc_deregister(&drvdata->miscdev); + coresight_unregister(drvdata->csdev); + if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) + dma_free_coherent(drvdata->dev, drvdata->size, + &drvdata->paddr, GFP_KERNEL); + + return 0; +} + +static struct amba_id tmc_ids[] = { + { + .id = 0x0003b961, + .mask = 0x0003ffff, + }, + { 0, 0}, +}; + +static struct amba_driver tmc_driver = { + .drv = { + .name = "coresight-tmc", + .owner = THIS_MODULE, + }, + .probe = tmc_probe, + .remove = tmc_remove, + .id_table = tmc_ids, +}; + +static int __init tmc_init(void) +{ + return amba_driver_register(&tmc_driver); +} +module_init(tmc_init); + +static void __exit tmc_exit(void) +{ + amba_driver_unregister(&tmc_driver); +} +module_exit(tmc_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Trace Memory Controller driver"); From d48e6aeceefa4fb98224b34ee084fd945d7cf15e Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:37 -0700 Subject: [PATCH 069/214] coresight-tpiu: add CoreSight TPIU driver This driver manages CoreSight TPIU (Trace Port Interface Unit) which acts as a sink. TPIU is typically connected to some offchip hardware hosting a storage buffer. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit dc161b9f01142c6b2c985290b7aa3e58e2408036) Signed-off-by: Mathieu Poirier --- arch/arm/Kconfig.debug | 9 ++ drivers/coresight/Makefile | 1 + drivers/coresight/coresight-tpiu.c | 217 +++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 drivers/coresight/coresight-tpiu.c diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 00c150c767c6..afedb865f0c0 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -731,5 +731,14 @@ config CORESIGHT_LINK_AND_SINK_TMC - ETR) or sink (embedded trace FIFO). The driver complies with the generic implementation of the component without special enhancement or added features. + +config CORESIGHT_SINK_TPIU + bool "Coresight generic TPIU driver" + depends on CORESIGHT_LINKS_AND_SINKS + help + This enables support for the Trace Port Interface Unit driver, responsible + for bridging the gap between the on-chip coresight components and a trace + port collection engine, typically connected to an external host for use + case capturing more traces than the on-board coresight memory can handle. endif endmenu diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index d26bd0e78122..41ab769b4822 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_CORESIGHT) += coresight.o obj-$(CONFIG_OF) += of_coresight.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o +obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o diff --git a/drivers/coresight/coresight-tpiu.c b/drivers/coresight/coresight-tpiu.c new file mode 100644 index 000000000000..ae101082791a --- /dev/null +++ b/drivers/coresight/coresight-tpiu.c @@ -0,0 +1,217 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +#define TPIU_SUPP_PORTSZ 0x000 +#define TPIU_CURR_PORTSZ 0x004 +#define TPIU_SUPP_TRIGMODES 0x100 +#define TPIU_TRIG_CNTRVAL 0x104 +#define TPIU_TRIG_MULT 0x108 +#define TPIU_SUPP_TESTPATM 0x200 +#define TPIU_CURR_TESTPATM 0x204 +#define TPIU_TEST_PATREPCNTR 0x208 +#define TPIU_FFSR 0x300 +#define TPIU_FFCR 0x304 +#define TPIU_FSYNC_CNTR 0x308 +#define TPIU_EXTCTL_INPORT 0x400 +#define TPIU_EXTCTL_OUTPORT 0x404 +#define TPIU_ITTRFLINACK 0xee4 +#define TPIU_ITTRFLIN 0xee8 +#define TPIU_ITATBDATA0 0xeec +#define TPIU_ITATBCTR2 0xef0 +#define TPIU_ITATBCTR1 0xef4 +#define TPIU_ITATBCTR0 0xef8 + +/** register definition **/ +/* FFCR - 0x304 */ +#define FFCR_FON_MAN BIT(6) + +/** + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @clk: the clock this component is associated to. + */ +struct tpiu_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct clk *clk; +}; + +static void tpiu_enable_hw(struct tpiu_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + /* TODO: fill this up */ + + CS_LOCK(drvdata->base); +} + +static int tpiu_enable(struct coresight_device *csdev) +{ + struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int ret; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + tpiu_enable_hw(drvdata); + + dev_info(drvdata->dev, "TPIU enabled\n"); + return 0; +} + +static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) +{ + CS_UNLOCK(drvdata->base); + + /* Clear formatter controle reg. */ + writel_relaxed(0x0, drvdata->base + TPIU_FFCR); + /* Generate manual flush */ + writel_relaxed(FFCR_FON_MAN, drvdata->base + TPIU_FFCR); + + CS_LOCK(drvdata->base); +} + +static void tpiu_disable(struct coresight_device *csdev) +{ + struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + tpiu_disable_hw(drvdata); + + clk_disable_unprepare(drvdata->clk); + + dev_info(drvdata->dev, "TPIU disabled\n"); +} + +static const struct coresight_ops_sink tpiu_sink_ops = { + .enable = tpiu_enable, + .disable = tpiu_disable, +}; + +static const struct coresight_ops tpiu_cs_ops = { + .sink_ops = &tpiu_sink_ops, +}; + +static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret; + void __iomem *base; + struct device *dev = &adev->dev; + struct coresight_platform_data *pdata = NULL; + struct tpiu_drvdata *drvdata; + struct resource *res = &adev->res; + struct coresight_desc *desc; + struct device_node *np = adev->dev.of_node; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + drvdata->dev = &adev->dev; + dev_set_drvdata(dev, drvdata); + + /* Validity for the resource is already checked by the AMBA core */ + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + drvdata->base = base; + + drvdata->clk = adev->pclk; + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + /* Disable tpiu to support older devices */ + tpiu_disable_hw(drvdata); + + clk_disable_unprepare(drvdata->clk); + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + desc->type = CORESIGHT_DEV_TYPE_SINK; + desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT; + desc->ops = &tpiu_cs_ops; + desc->pdata = pdata; + desc->dev = dev; + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) + return PTR_ERR(drvdata->csdev); + + dev_info(dev, "TPIU initialized\n"); + return 0; +} + +static int tpiu_remove(struct amba_device *adev) +{ + struct tpiu_drvdata *drvdata = amba_get_drvdata(adev); + + coresight_unregister(drvdata->csdev); + return 0; +} + +static struct amba_id tpiu_ids[] = { + { + .id = 0x0003b912, + .mask = 0x0003ffff, + }, + { 0, 0}, +}; + +static struct amba_driver tpiu_driver = { + .drv = { + .name = "coresight-tpiu", + .owner = THIS_MODULE, + }, + .probe = tpiu_probe, + .remove = tpiu_remove, + .id_table = tpiu_ids, +}; + +static int __init tpiu_init(void) +{ + return amba_driver_register(&tpiu_driver); +} +module_init(tpiu_init); + +static void __exit tpiu_exit(void) +{ + amba_driver_unregister(&tpiu_driver); +} +module_exit(tpiu_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Trace Port Interface Unit driver"); From de0d42a918c71b2e8d972505c8cdb63535e2bfe0 Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:38 -0700 Subject: [PATCH 070/214] coresight-etb: add CoreSight ETB driver This driver manages CoreSight ETB (Embedded Trace Buffer) which acts as a circular buffer sink collecting generated trace data. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit fdfc0d8a06b56c3ca8fc3d7d271b3c8e99e6d55c) Signed-off-by: Mathieu Poirier --- arch/arm/Kconfig.debug | 8 + drivers/coresight/Makefile | 1 + drivers/coresight/coresight-etb10.c | 537 ++++++++++++++++++++++++++++ 3 files changed, 546 insertions(+) create mode 100644 drivers/coresight/coresight-etb10.c diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index afedb865f0c0..1a406b5d1bab 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -740,5 +740,13 @@ config CORESIGHT_SINK_TPIU for bridging the gap between the on-chip coresight components and a trace port collection engine, typically connected to an external host for use case capturing more traces than the on-board coresight memory can handle. + +config CORESIGHT_SINK_ETBV10 + bool "Coresight ETBv1.0 driver" + depends on CORESIGHT_LINKS_AND_SINKS + help + This enables support for the Embedded Trace Buffer version 1.0 driver + that complies with the generic implementation of the component without + special enhancement or added features. endif endmenu diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index 41ab769b4822..4c43dade4c14 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_CORESIGHT) += coresight.o obj-$(CONFIG_OF) += of_coresight.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o +obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o diff --git a/drivers/coresight/coresight-etb10.c b/drivers/coresight/coresight-etb10.c new file mode 100644 index 000000000000..c922d4aded8a --- /dev/null +++ b/drivers/coresight/coresight-etb10.c @@ -0,0 +1,537 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +#define ETB_RAM_DEPTH_REG 0x004 +#define ETB_STATUS_REG 0x00c +#define ETB_RAM_READ_DATA_REG 0x010 +#define ETB_RAM_READ_POINTER 0x014 +#define ETB_RAM_WRITE_POINTER 0x018 +#define ETB_TRG 0x01c +#define ETB_CTL_REG 0x020 +#define ETB_RWD_REG 0x024 +#define ETB_FFSR 0x300 +#define ETB_FFCR 0x304 +#define ETB_ITMISCOP0 0xee0 +#define ETB_ITTRFLINACK 0xee4 +#define ETB_ITTRFLIN 0xee8 +#define ETB_ITATBDATA0 0xeeC +#define ETB_ITATBCTR2 0xef0 +#define ETB_ITATBCTR1 0xef4 +#define ETB_ITATBCTR0 0xef8 + +/* register description */ +/* STS - 0x00C */ +#define ETB_STATUS_RAM_FULL BIT(0) +/* CTL - 0x020 */ +#define ETB_CTL_CAPT_EN BIT(0) +/* FFCR - 0x304 */ +#define ETB_FFCR_EN_FTC BIT(0) +#define ETB_FFCR_FON_MAN BIT(6) +#define ETB_FFCR_STOP_FI BIT(12) +#define ETB_FFCR_STOP_TRIGGER BIT(13) + +#define ETB_FFCR_BIT 6 +#define ETB_FFSR_BIT 1 +#define ETB_FRAME_SIZE_WORDS 4 + +/** + * struct etb_drvdata - specifics associated to an ETB component + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @miscdev: specifics to handle "/dev/xyz.etb" entry. + * @clk: the clock this component is associated to. + * @spinlock: only one at a time pls. + * @in_use: synchronise user space access to etb buffer. + * @buf: area of memory where ETB buffer content gets sent. + * @buffer_depth: size of @buf. + * @enable: this ETB is being used. + * @trigger_cntr: amount of words to store after a trigger. + */ +struct etb_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct miscdevice miscdev; + struct clk *clk; + spinlock_t spinlock; + atomic_t in_use; + u8 *buf; + u32 buffer_depth; + bool enable; + u32 trigger_cntr; +}; + +static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata) +{ + int ret; + u32 depth = 0; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + /* RO registers don't need locking */ + depth = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG); + + clk_disable_unprepare(drvdata->clk); + return depth; +} + +static void etb_enable_hw(struct etb_drvdata *drvdata) +{ + int i; + u32 depth; + + CS_UNLOCK(drvdata->base); + + depth = drvdata->buffer_depth; + /* reset write RAM pointer address */ + writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER); + /* clear entire RAM buffer */ + for (i = 0; i < depth; i++) + writel_relaxed(0x0, drvdata->base + ETB_RWD_REG); + + /* reset write RAM pointer address */ + writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER); + /* reset read RAM pointer address */ + writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER); + + writel_relaxed(drvdata->trigger_cntr, drvdata->base + ETB_TRG); + writel_relaxed(ETB_FFCR_EN_FTC | ETB_FFCR_STOP_TRIGGER, + drvdata->base + ETB_FFCR); + /* ETB trace capture enable */ + writel_relaxed(ETB_CTL_CAPT_EN, drvdata->base + ETB_CTL_REG); + + CS_LOCK(drvdata->base); +} + +static int etb_enable(struct coresight_device *csdev) +{ + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int ret; + unsigned long flags; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + etb_enable_hw(drvdata); + drvdata->enable = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "ETB enabled\n"); + return 0; +} + +static void etb_disable_hw(struct etb_drvdata *drvdata) +{ + u32 ffcr; + + CS_UNLOCK(drvdata->base); + + ffcr = readl_relaxed(drvdata->base + ETB_FFCR); + /* stop formatter when a stop has completed */ + ffcr |= ETB_FFCR_STOP_FI; + writel_relaxed(ffcr, drvdata->base + ETB_FFCR); + /* manually generate a flush of the system */ + ffcr |= ETB_FFCR_FON_MAN; + writel_relaxed(ffcr, drvdata->base + ETB_FFCR); + + if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", + ETB_FFCR); + } + + /* disable trace capture */ + writel_relaxed(0x0, drvdata->base + ETB_CTL_REG); + + if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", + ETB_FFCR); + } + + CS_LOCK(drvdata->base); +} + +static void etb_dump_hw(struct etb_drvdata *drvdata) +{ + int i; + u8 *buf_ptr; + u32 read_data, depth; + u32 read_ptr, write_ptr; + u32 frame_off, frame_endoff; + + CS_UNLOCK(drvdata->base); + + read_ptr = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER); + write_ptr = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER); + + frame_off = write_ptr % ETB_FRAME_SIZE_WORDS; + frame_endoff = ETB_FRAME_SIZE_WORDS - frame_off; + if (frame_off) { + dev_err(drvdata->dev, + "write_ptr: %lu not aligned to formatter frame size\n", + (unsigned long)write_ptr); + dev_err(drvdata->dev, "frameoff: %lu, frame_endoff: %lu\n", + (unsigned long)frame_off, (unsigned long)frame_endoff); + write_ptr += frame_endoff; + } + + if ((readl_relaxed(drvdata->base + ETB_STATUS_REG) + & ETB_STATUS_RAM_FULL) == 0) + writel_relaxed(0x0, drvdata->base + ETB_RAM_READ_POINTER); + else + writel_relaxed(write_ptr, drvdata->base + ETB_RAM_READ_POINTER); + + depth = drvdata->buffer_depth; + buf_ptr = drvdata->buf; + for (i = 0; i < depth; i++) { + read_data = readl_relaxed(drvdata->base + + ETB_RAM_READ_DATA_REG); + *buf_ptr++ = read_data >> 0; + *buf_ptr++ = read_data >> 8; + *buf_ptr++ = read_data >> 16; + *buf_ptr++ = read_data >> 24; + } + + if (frame_off) { + buf_ptr -= (frame_endoff * 4); + for (i = 0; i < frame_endoff; i++) { + *buf_ptr++ = 0x0; + *buf_ptr++ = 0x0; + *buf_ptr++ = 0x0; + *buf_ptr++ = 0x0; + } + } + + writel_relaxed(read_ptr, drvdata->base + ETB_RAM_READ_POINTER); + + CS_LOCK(drvdata->base); +} + +static void etb_disable(struct coresight_device *csdev) +{ + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + + spin_lock_irqsave(&drvdata->spinlock, flags); + etb_disable_hw(drvdata); + etb_dump_hw(drvdata); + drvdata->enable = false; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + clk_disable_unprepare(drvdata->clk); + + dev_info(drvdata->dev, "ETB disabled\n"); +} + +static const struct coresight_ops_sink etb_sink_ops = { + .enable = etb_enable, + .disable = etb_disable, +}; + +static const struct coresight_ops etb_cs_ops = { + .sink_ops = &etb_sink_ops, +}; + +static void etb_dump(struct etb_drvdata *drvdata) +{ + unsigned long flags; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->enable) { + etb_disable_hw(drvdata); + etb_dump_hw(drvdata); + etb_enable_hw(drvdata); + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + dev_info(drvdata->dev, "ETB dumped\n"); +} + +static int etb_open(struct inode *inode, struct file *file) +{ + struct etb_drvdata *drvdata = container_of(file->private_data, + struct etb_drvdata, miscdev); + + if (atomic_cmpxchg(&drvdata->in_use, 0, 1)) + return -EBUSY; + + dev_dbg(drvdata->dev, "%s: successfully opened\n", __func__); + return 0; +} + +static ssize_t etb_read(struct file *file, char __user *data, + size_t len, loff_t *ppos) +{ + u32 depth; + struct etb_drvdata *drvdata = container_of(file->private_data, + struct etb_drvdata, miscdev); + + etb_dump(drvdata); + + depth = drvdata->buffer_depth; + if (*ppos + len > depth * 4) + len = depth * 4 - *ppos; + + if (copy_to_user(data, drvdata->buf + *ppos, len)) { + dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__); + return -EFAULT; + } + + *ppos += len; + + dev_dbg(drvdata->dev, "%s: %d bytes copied, %d bytes left\n", + __func__, len, (int) (depth * 4 - *ppos)); + return len; +} + +static int etb_release(struct inode *inode, struct file *file) +{ + struct etb_drvdata *drvdata = container_of(file->private_data, + struct etb_drvdata, miscdev); + atomic_set(&drvdata->in_use, 0); + + dev_dbg(drvdata->dev, "%s: released\n", __func__); + return 0; +} + +static const struct file_operations etb_fops = { + .owner = THIS_MODULE, + .open = etb_open, + .read = etb_read, + .release = etb_release, + .llseek = no_llseek, +}; + +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + unsigned long flags; + u32 etb_rdr, etb_sr, etb_rrp, etb_rwp; + u32 etb_trg, etb_cr, etb_ffsr, etb_ffcr; + struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + goto out; + + spin_lock_irqsave(&drvdata->spinlock, flags); + CS_UNLOCK(drvdata->base); + + etb_rdr = readl_relaxed(drvdata->base + ETB_RAM_DEPTH_REG); + etb_sr = readl_relaxed(drvdata->base + ETB_STATUS_REG); + etb_rrp = readl_relaxed(drvdata->base + ETB_RAM_READ_POINTER); + etb_rwp = readl_relaxed(drvdata->base + ETB_RAM_WRITE_POINTER); + etb_trg = readl_relaxed(drvdata->base + ETB_TRG); + etb_cr = readl_relaxed(drvdata->base + ETB_CTL_REG); + etb_ffsr = readl_relaxed(drvdata->base + ETB_FFSR); + etb_ffcr = readl_relaxed(drvdata->base + ETB_FFCR); + + CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + clk_disable_unprepare(drvdata->clk); + + return sprintf(buf, + "Depth:\t\t0x%x\n" + "Status:\t\t0x%x\n" + "RAM read ptr:\t0x%x\n" + "RAM wrt ptr:\t0x%x\n" + "Trigger cnt:\t0x%x\n" + "Control:\t0x%x\n" + "Flush status:\t0x%x\n" + "Flush ctrl:\t0x%x\n", + etb_rdr, etb_sr, etb_rrp, etb_rwp, + etb_trg, etb_cr, etb_ffsr, etb_ffcr); +out: + return -EINVAL; +} +static DEVICE_ATTR_RO(status); + +static ssize_t trigger_cntr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned long val = drvdata->trigger_cntr; + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t trigger_cntr_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etb_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->trigger_cntr = val; + return size; +} +static DEVICE_ATTR_RW(trigger_cntr); + +static struct attribute *coresight_etb_attrs[] = { + &dev_attr_trigger_cntr.attr, + &dev_attr_status.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etb); + +static int etb_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret; + void __iomem *base; + struct device *dev = &adev->dev; + struct coresight_platform_data *pdata = NULL; + struct etb_drvdata *drvdata; + struct resource *res = &adev->res; + struct coresight_desc *desc; + struct device_node *np = adev->dev.of_node; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + drvdata->dev = &adev->dev; + dev_set_drvdata(dev, drvdata); + + /* validity for the resource is already checked by the AMBA core */ + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + drvdata->base = base; + + spin_lock_init(&drvdata->spinlock); + + drvdata->clk = adev->pclk; + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + drvdata->buffer_depth = etb_get_buffer_depth(drvdata); + clk_disable_unprepare(drvdata->clk); + + if (drvdata->buffer_depth < 0) + return -EINVAL; + + drvdata->buf = devm_kzalloc(dev, + drvdata->buffer_depth * 4, GFP_KERNEL); + if (!drvdata->buf) + return -ENOMEM; + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + desc->type = CORESIGHT_DEV_TYPE_SINK; + desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER; + desc->ops = &etb_cs_ops; + desc->pdata = pdata; + desc->dev = dev; + desc->groups = coresight_etb_groups; + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) + return PTR_ERR(drvdata->csdev); + + drvdata->miscdev.name = pdata->name; + drvdata->miscdev.minor = MISC_DYNAMIC_MINOR; + drvdata->miscdev.fops = &etb_fops; + ret = misc_register(&drvdata->miscdev); + if (ret) + goto err_misc_register; + + dev_info(dev, "ETB initialized\n"); + return 0; + +err_misc_register: + coresight_unregister(drvdata->csdev); + return ret; +} + +static int etb_remove(struct amba_device *adev) +{ + struct etb_drvdata *drvdata = amba_get_drvdata(adev); + + misc_deregister(&drvdata->miscdev); + coresight_unregister(drvdata->csdev); + return 0; +} + +static struct amba_id etb_ids[] = { + { + .id = 0x0003b907, + .mask = 0x0003ffff, + }, + { 0, 0}, +}; + +static struct amba_driver etb_driver = { + .drv = { + .name = "coresight-etb10", + .owner = THIS_MODULE, + }, + .probe = etb_probe, + .remove = etb_remove, + .id_table = etb_ids, +}; + +static int __init etb_init(void) +{ + return amba_driver_register(&etb_driver); +} +module_init(etb_init); + +static void __exit etb_exit(void) +{ + amba_driver_unregister(&etb_driver); +} +module_exit(etb_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Embedded Trace Buffer driver"); From 75c5f8c84ffe053398036a88a73da3280214332b Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:39 -0700 Subject: [PATCH 071/214] coresight-funnel: add CoreSight Funnel driver This driver manages CoreSight Funnel which acts as a link. Funnels have multiple input ports (typically 8) each of which represents an input trace data stream. These multiple input trace data streams are interleaved into a single output stream coming out of the Funnel. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6e21e3451556af6ada01e2206d5949fc654d75e1) Signed-off-by: Mathieu Poirier --- drivers/coresight/Makefile | 1 + drivers/coresight/coresight-funnel.c | 268 +++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 drivers/coresight/coresight-funnel.c diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index 4c43dade4c14..05b8a1c75f73 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_OF) += of_coresight.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o +obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o diff --git a/drivers/coresight/coresight-funnel.c b/drivers/coresight/coresight-funnel.c new file mode 100644 index 000000000000..2108edffe1f4 --- /dev/null +++ b/drivers/coresight/coresight-funnel.c @@ -0,0 +1,268 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +#define FUNNEL_FUNCTL 0x000 +#define FUNNEL_PRICTL 0x004 + +#define FUNNEL_HOLDTIME_MASK 0xf00 +#define FUNNEL_HOLDTIME_SHFT 0x8 +#define FUNNEL_HOLDTIME (0x7 << FUNNEL_HOLDTIME_SHFT) + +/** + * struct funnel_drvdata - specifics associated to a funnel component + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @clk: the clock this component is associated to. + * @priority: port selection order. + */ +struct funnel_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct clk *clk; + unsigned long priority; +}; + +static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port) +{ + u32 functl; + + CS_UNLOCK(drvdata->base); + + functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL); + functl &= ~FUNNEL_HOLDTIME_MASK; + functl |= FUNNEL_HOLDTIME; + functl |= (1 << port); + writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); + writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL); + + CS_LOCK(drvdata->base); +} + +static int funnel_enable(struct coresight_device *csdev, int inport, + int outport) +{ + struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int ret; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + funnel_enable_hw(drvdata, inport); + + dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport); + return 0; +} + +static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) +{ + u32 functl; + + CS_UNLOCK(drvdata->base); + + functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL); + functl &= ~(1 << inport); + writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); + + CS_LOCK(drvdata->base); +} + +static void funnel_disable(struct coresight_device *csdev, int inport, + int outport) +{ + struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + funnel_disable_hw(drvdata, inport); + + clk_disable_unprepare(drvdata->clk); + + dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport); +} + +static const struct coresight_ops_link funnel_link_ops = { + .enable = funnel_enable, + .disable = funnel_disable, +}; + +static const struct coresight_ops funnel_cs_ops = { + .link_ops = &funnel_link_ops, +}; + +static ssize_t priority_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent); + unsigned long val = drvdata->priority; + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t priority_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->priority = val; + return size; +} +static DEVICE_ATTR_RW(priority); + +static u32 get_funnel_ctrl_hw(struct funnel_drvdata *drvdata) +{ + u32 functl; + + CS_UNLOCK(drvdata->base); + functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL); + CS_LOCK(drvdata->base); + + return functl; +} + +static ssize_t funnel_ctrl_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + u32 val; + struct funnel_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + val = get_funnel_ctrl_hw(drvdata); + clk_disable_unprepare(drvdata->clk); + + return sprintf(buf, "%#x\n", val); +} +static DEVICE_ATTR_RO(funnel_ctrl); + +static struct attribute *coresight_funnel_attrs[] = { + &dev_attr_funnel_ctrl.attr, + &dev_attr_priority.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_funnel); + +static int funnel_probe(struct amba_device *adev, const struct amba_id *id) +{ + void __iomem *base; + struct device *dev = &adev->dev; + struct coresight_platform_data *pdata = NULL; + struct funnel_drvdata *drvdata; + struct resource *res = &adev->res; + struct coresight_desc *desc; + struct device_node *np = adev->dev.of_node; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + adev->dev.platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + drvdata->dev = &adev->dev; + dev_set_drvdata(dev, drvdata); + + /* Validity for the resource is already checked by the AMBA core */ + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + drvdata->base = base; + + drvdata->clk = adev->pclk; + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + desc->type = CORESIGHT_DEV_TYPE_LINK; + desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG; + desc->ops = &funnel_cs_ops; + desc->pdata = pdata; + desc->dev = dev; + desc->groups = coresight_funnel_groups; + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) + return PTR_ERR(drvdata->csdev); + + dev_info(dev, "FUNNEL initialized\n"); + return 0; +} + +static int funnel_remove(struct amba_device *adev) +{ + struct funnel_drvdata *drvdata = amba_get_drvdata(adev); + + coresight_unregister(drvdata->csdev); + return 0; +} + +static struct amba_id funnel_ids[] = { + { + .id = 0x0003b908, + .mask = 0x0003ffff, + }, + { 0, 0}, +}; + +static struct amba_driver funnel_driver = { + .drv = { + .name = "coresight-funnel", + .owner = THIS_MODULE, + }, + .probe = funnel_probe, + .remove = funnel_remove, + .id_table = funnel_ids, +}; + +static int __init funnel_init(void) +{ + return amba_driver_register(&funnel_driver); +} +module_init(funnel_init); + +static void __exit funnel_exit(void) +{ + amba_driver_unregister(&funnel_driver); +} +module_exit(funnel_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Funnel driver"); From 522ed3f58bb775cc4d87ab37e6c0dd80e44add07 Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:40 -0700 Subject: [PATCH 072/214] coresight-replicator: add CoreSight Replicator driver This driver manages non-configurable CoreSight Replicator that takes a single input trace data stream and replicates it to produce two identical trace data output streams. Replicators are typically used to route single interleaved trace data stream to two or more sinks. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ceacc1d9b7ae41e4be185596306be17537682fb1) Signed-off-by: Mathieu Poirier --- drivers/coresight/Makefile | 3 +- drivers/coresight/coresight-replicator.c | 138 +++++++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 drivers/coresight/coresight-replicator.c diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index 05b8a1c75f73..574d5fa496fa 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_OF) += of_coresight.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o -obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o +obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \ + coresight-replicator.o diff --git a/drivers/coresight/coresight-replicator.c b/drivers/coresight/coresight-replicator.c new file mode 100644 index 000000000000..c879039d59cc --- /dev/null +++ b/drivers/coresight/coresight-replicator.c @@ -0,0 +1,138 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-priv.h" + +/** + * struct replicator_drvdata - specifics associated to a replicator component + * @dev: the device entity associated with this component + * @csdev: component vitals needed by the framework + */ +struct replicator_drvdata { + struct device *dev; + struct coresight_device *csdev; +}; + +static int replicator_enable(struct coresight_device *csdev, int inport, + int outport) +{ + struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + dev_info(drvdata->dev, "REPLICATOR enabled\n"); + return 0; +} + +static void replicator_disable(struct coresight_device *csdev, int inport, + int outport) +{ + struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + dev_info(drvdata->dev, "REPLICATOR disabled\n"); +} + +static const struct coresight_ops_link replicator_link_ops = { + .enable = replicator_enable, + .disable = replicator_disable, +}; + +static const struct coresight_ops replicator_cs_ops = { + .link_ops = &replicator_link_ops, +}; + +static int replicator_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct coresight_platform_data *pdata = NULL; + struct replicator_drvdata *drvdata; + struct coresight_desc *desc; + struct device_node *np = pdev->dev.of_node; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + pdev->dev.platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + drvdata->dev = &pdev->dev; + platform_set_drvdata(pdev, drvdata); + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + desc->type = CORESIGHT_DEV_TYPE_LINK; + desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT; + desc->ops = &replicator_cs_ops; + desc->pdata = pdev->dev.platform_data; + desc->dev = &pdev->dev; + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) + return PTR_ERR(drvdata->csdev); + + dev_info(dev, "REPLICATOR initialized\n"); + return 0; +} + +static int replicator_remove(struct platform_device *pdev) +{ + struct replicator_drvdata *drvdata = platform_get_drvdata(pdev); + + coresight_unregister(drvdata->csdev); + return 0; +} + +static struct of_device_id replicator_match[] = { + {.compatible = "arm,coresight-replicator"}, + {} +}; + +static struct platform_driver replicator_driver = { + .probe = replicator_probe, + .remove = replicator_remove, + .driver = { + .name = "coresight-replicator", + .owner = THIS_MODULE, + .of_match_table = replicator_match, + }, +}; + +static int __init replicator_init(void) +{ + return platform_driver_register(&replicator_driver); +} +module_init(replicator_init); + +static void __exit replicator_exit(void) +{ + platform_driver_unregister(&replicator_driver); +} +module_exit(replicator_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Replicator driver"); From 29276f394f6f607a56139eba9e22a0a318ab0ce1 Mon Sep 17 00:00:00 2001 From: Pratik Patel Date: Mon, 3 Nov 2014 11:07:41 -0700 Subject: [PATCH 073/214] coresight-etm: add CoreSight ETM/PTM driver This driver manages CoreSight ETM (Embedded Trace Macrocell) that supports processor tracing. Currently supported version are ARM ETMv3.x and PTM1.x. Signed-off-by: Pratik Patel Signed-off-by: Mathieu Poirier coresight-etm3x: adding missing error checking Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a939fc5a71ad531633610242400c262e78731532) Signed-off-by: Mathieu Poirier --- arch/arm/Kconfig.debug | 9 + arch/arm/include/asm/hardware/cp14.h | 542 +++++++ drivers/coresight/Makefile | 1 + drivers/coresight/coresight-etm-cp14.c | 591 ++++++++ drivers/coresight/coresight-etm.h | 251 +++ drivers/coresight/coresight-etm3x.c | 1928 ++++++++++++++++++++++++ 6 files changed, 3322 insertions(+) create mode 100644 arch/arm/include/asm/hardware/cp14.h create mode 100644 drivers/coresight/coresight-etm-cp14.c create mode 100644 drivers/coresight/coresight-etm.h create mode 100644 drivers/coresight/coresight-etm3x.c diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 1a406b5d1bab..6a961376f929 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -748,5 +748,14 @@ config CORESIGHT_SINK_ETBV10 This enables support for the Embedded Trace Buffer version 1.0 driver that complies with the generic implementation of the component without special enhancement or added features. + +config CORESIGHT_SOURCE_ETM3X + bool "CoreSight Embedded Trace Macrocell 3.x driver" + select CORESIGHT_LINKS_AND_SINKS + help + This driver provides support for processor ETM3.x and PTM1.x modules, + which allows tracing the instructions that a processor is executing + This is primarily useful for instruction level tracing. Depending + the ETM version data tracing may also be available. endif endmenu diff --git a/arch/arm/include/asm/hardware/cp14.h b/arch/arm/include/asm/hardware/cp14.h new file mode 100644 index 000000000000..61576dc58ede --- /dev/null +++ b/arch/arm/include/asm/hardware/cp14.h @@ -0,0 +1,542 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __ASM_HARDWARE_CP14_H +#define __ASM_HARDWARE_CP14_H + +#include + +/* Accessors for CP14 registers */ +#define dbg_read(reg) RCP14_##reg() +#define dbg_write(val, reg) WCP14_##reg(val) +#define etm_read(reg) RCP14_##reg() +#define etm_write(val, reg) WCP14_##reg(val) + +/* MRC14 and MCR14 */ +#define MRC14(op1, crn, crm, op2) \ +({ \ +u32 val; \ +asm volatile("mrc p14, "#op1", %0, "#crn", "#crm", "#op2 : "=r" (val)); \ +val; \ +}) + +#define MCR14(val, op1, crn, crm, op2) \ +({ \ +asm volatile("mcr p14, "#op1", %0, "#crn", "#crm", "#op2 : : "r" (val));\ +}) + +/* + * Debug Registers + * + * Available only in DBGv7 + * DBGECR, DBGDSCCR, DBGDSMCR, DBGDRCR + * + * Available only in DBGv7.1 + * DBGBXVRm, DBGOSDLR, DBGDEVID2, DBGDEVID1 + * + * Read only + * DBGDIDR, DBGDSCRint, DBGDTRRXint, DBGDRAR, DBGOSLSR, DBGOSSRR, DBGPRSR, + * DBGPRSR, DBGDSAR, DBGAUTHSTATUS, DBGDEVID2, DBGDEVID1, DBGDEVID + * + * Write only + * DBGDTRTXint, DBGOSLAR + */ +#define RCP14_DBGDIDR() MRC14(0, c0, c0, 0) +#define RCP14_DBGDSCRint() MRC14(0, c0, c1, 0) +#define RCP14_DBGDTRRXint() MRC14(0, c0, c5, 0) +#define RCP14_DBGWFAR() MRC14(0, c0, c6, 0) +#define RCP14_DBGVCR() MRC14(0, c0, c7, 0) +#define RCP14_DBGECR() MRC14(0, c0, c9, 0) +#define RCP14_DBGDSCCR() MRC14(0, c0, c10, 0) +#define RCP14_DBGDSMCR() MRC14(0, c0, c11, 0) +#define RCP14_DBGDTRRXext() MRC14(0, c0, c0, 2) +#define RCP14_DBGDSCRext() MRC14(0, c0, c2, 2) +#define RCP14_DBGDTRTXext() MRC14(0, c0, c3, 2) +#define RCP14_DBGDRCR() MRC14(0, c0, c4, 2) +#define RCP14_DBGBVR0() MRC14(0, c0, c0, 4) +#define RCP14_DBGBVR1() MRC14(0, c0, c1, 4) +#define RCP14_DBGBVR2() MRC14(0, c0, c2, 4) +#define RCP14_DBGBVR3() MRC14(0, c0, c3, 4) +#define RCP14_DBGBVR4() MRC14(0, c0, c4, 4) +#define RCP14_DBGBVR5() MRC14(0, c0, c5, 4) +#define RCP14_DBGBVR6() MRC14(0, c0, c6, 4) +#define RCP14_DBGBVR7() MRC14(0, c0, c7, 4) +#define RCP14_DBGBVR8() MRC14(0, c0, c8, 4) +#define RCP14_DBGBVR9() MRC14(0, c0, c9, 4) +#define RCP14_DBGBVR10() MRC14(0, c0, c10, 4) +#define RCP14_DBGBVR11() MRC14(0, c0, c11, 4) +#define RCP14_DBGBVR12() MRC14(0, c0, c12, 4) +#define RCP14_DBGBVR13() MRC14(0, c0, c13, 4) +#define RCP14_DBGBVR14() MRC14(0, c0, c14, 4) +#define RCP14_DBGBVR15() MRC14(0, c0, c15, 4) +#define RCP14_DBGBCR0() MRC14(0, c0, c0, 5) +#define RCP14_DBGBCR1() MRC14(0, c0, c1, 5) +#define RCP14_DBGBCR2() MRC14(0, c0, c2, 5) +#define RCP14_DBGBCR3() MRC14(0, c0, c3, 5) +#define RCP14_DBGBCR4() MRC14(0, c0, c4, 5) +#define RCP14_DBGBCR5() MRC14(0, c0, c5, 5) +#define RCP14_DBGBCR6() MRC14(0, c0, c6, 5) +#define RCP14_DBGBCR7() MRC14(0, c0, c7, 5) +#define RCP14_DBGBCR8() MRC14(0, c0, c8, 5) +#define RCP14_DBGBCR9() MRC14(0, c0, c9, 5) +#define RCP14_DBGBCR10() MRC14(0, c0, c10, 5) +#define RCP14_DBGBCR11() MRC14(0, c0, c11, 5) +#define RCP14_DBGBCR12() MRC14(0, c0, c12, 5) +#define RCP14_DBGBCR13() MRC14(0, c0, c13, 5) +#define RCP14_DBGBCR14() MRC14(0, c0, c14, 5) +#define RCP14_DBGBCR15() MRC14(0, c0, c15, 5) +#define RCP14_DBGWVR0() MRC14(0, c0, c0, 6) +#define RCP14_DBGWVR1() MRC14(0, c0, c1, 6) +#define RCP14_DBGWVR2() MRC14(0, c0, c2, 6) +#define RCP14_DBGWVR3() MRC14(0, c0, c3, 6) +#define RCP14_DBGWVR4() MRC14(0, c0, c4, 6) +#define RCP14_DBGWVR5() MRC14(0, c0, c5, 6) +#define RCP14_DBGWVR6() MRC14(0, c0, c6, 6) +#define RCP14_DBGWVR7() MRC14(0, c0, c7, 6) +#define RCP14_DBGWVR8() MRC14(0, c0, c8, 6) +#define RCP14_DBGWVR9() MRC14(0, c0, c9, 6) +#define RCP14_DBGWVR10() MRC14(0, c0, c10, 6) +#define RCP14_DBGWVR11() MRC14(0, c0, c11, 6) +#define RCP14_DBGWVR12() MRC14(0, c0, c12, 6) +#define RCP14_DBGWVR13() MRC14(0, c0, c13, 6) +#define RCP14_DBGWVR14() MRC14(0, c0, c14, 6) +#define RCP14_DBGWVR15() MRC14(0, c0, c15, 6) +#define RCP14_DBGWCR0() MRC14(0, c0, c0, 7) +#define RCP14_DBGWCR1() MRC14(0, c0, c1, 7) +#define RCP14_DBGWCR2() MRC14(0, c0, c2, 7) +#define RCP14_DBGWCR3() MRC14(0, c0, c3, 7) +#define RCP14_DBGWCR4() MRC14(0, c0, c4, 7) +#define RCP14_DBGWCR5() MRC14(0, c0, c5, 7) +#define RCP14_DBGWCR6() MRC14(0, c0, c6, 7) +#define RCP14_DBGWCR7() MRC14(0, c0, c7, 7) +#define RCP14_DBGWCR8() MRC14(0, c0, c8, 7) +#define RCP14_DBGWCR9() MRC14(0, c0, c9, 7) +#define RCP14_DBGWCR10() MRC14(0, c0, c10, 7) +#define RCP14_DBGWCR11() MRC14(0, c0, c11, 7) +#define RCP14_DBGWCR12() MRC14(0, c0, c12, 7) +#define RCP14_DBGWCR13() MRC14(0, c0, c13, 7) +#define RCP14_DBGWCR14() MRC14(0, c0, c14, 7) +#define RCP14_DBGWCR15() MRC14(0, c0, c15, 7) +#define RCP14_DBGDRAR() MRC14(0, c1, c0, 0) +#define RCP14_DBGBXVR0() MRC14(0, c1, c0, 1) +#define RCP14_DBGBXVR1() MRC14(0, c1, c1, 1) +#define RCP14_DBGBXVR2() MRC14(0, c1, c2, 1) +#define RCP14_DBGBXVR3() MRC14(0, c1, c3, 1) +#define RCP14_DBGBXVR4() MRC14(0, c1, c4, 1) +#define RCP14_DBGBXVR5() MRC14(0, c1, c5, 1) +#define RCP14_DBGBXVR6() MRC14(0, c1, c6, 1) +#define RCP14_DBGBXVR7() MRC14(0, c1, c7, 1) +#define RCP14_DBGBXVR8() MRC14(0, c1, c8, 1) +#define RCP14_DBGBXVR9() MRC14(0, c1, c9, 1) +#define RCP14_DBGBXVR10() MRC14(0, c1, c10, 1) +#define RCP14_DBGBXVR11() MRC14(0, c1, c11, 1) +#define RCP14_DBGBXVR12() MRC14(0, c1, c12, 1) +#define RCP14_DBGBXVR13() MRC14(0, c1, c13, 1) +#define RCP14_DBGBXVR14() MRC14(0, c1, c14, 1) +#define RCP14_DBGBXVR15() MRC14(0, c1, c15, 1) +#define RCP14_DBGOSLSR() MRC14(0, c1, c1, 4) +#define RCP14_DBGOSSRR() MRC14(0, c1, c2, 4) +#define RCP14_DBGOSDLR() MRC14(0, c1, c3, 4) +#define RCP14_DBGPRCR() MRC14(0, c1, c4, 4) +#define RCP14_DBGPRSR() MRC14(0, c1, c5, 4) +#define RCP14_DBGDSAR() MRC14(0, c2, c0, 0) +#define RCP14_DBGITCTRL() MRC14(0, c7, c0, 4) +#define RCP14_DBGCLAIMSET() MRC14(0, c7, c8, 6) +#define RCP14_DBGCLAIMCLR() MRC14(0, c7, c9, 6) +#define RCP14_DBGAUTHSTATUS() MRC14(0, c7, c14, 6) +#define RCP14_DBGDEVID2() MRC14(0, c7, c0, 7) +#define RCP14_DBGDEVID1() MRC14(0, c7, c1, 7) +#define RCP14_DBGDEVID() MRC14(0, c7, c2, 7) + +#define WCP14_DBGDTRTXint(val) MCR14(val, 0, c0, c5, 0) +#define WCP14_DBGWFAR(val) MCR14(val, 0, c0, c6, 0) +#define WCP14_DBGVCR(val) MCR14(val, 0, c0, c7, 0) +#define WCP14_DBGECR(val) MCR14(val, 0, c0, c9, 0) +#define WCP14_DBGDSCCR(val) MCR14(val, 0, c0, c10, 0) +#define WCP14_DBGDSMCR(val) MCR14(val, 0, c0, c11, 0) +#define WCP14_DBGDTRRXext(val) MCR14(val, 0, c0, c0, 2) +#define WCP14_DBGDSCRext(val) MCR14(val, 0, c0, c2, 2) +#define WCP14_DBGDTRTXext(val) MCR14(val, 0, c0, c3, 2) +#define WCP14_DBGDRCR(val) MCR14(val, 0, c0, c4, 2) +#define WCP14_DBGBVR0(val) MCR14(val, 0, c0, c0, 4) +#define WCP14_DBGBVR1(val) MCR14(val, 0, c0, c1, 4) +#define WCP14_DBGBVR2(val) MCR14(val, 0, c0, c2, 4) +#define WCP14_DBGBVR3(val) MCR14(val, 0, c0, c3, 4) +#define WCP14_DBGBVR4(val) MCR14(val, 0, c0, c4, 4) +#define WCP14_DBGBVR5(val) MCR14(val, 0, c0, c5, 4) +#define WCP14_DBGBVR6(val) MCR14(val, 0, c0, c6, 4) +#define WCP14_DBGBVR7(val) MCR14(val, 0, c0, c7, 4) +#define WCP14_DBGBVR8(val) MCR14(val, 0, c0, c8, 4) +#define WCP14_DBGBVR9(val) MCR14(val, 0, c0, c9, 4) +#define WCP14_DBGBVR10(val) MCR14(val, 0, c0, c10, 4) +#define WCP14_DBGBVR11(val) MCR14(val, 0, c0, c11, 4) +#define WCP14_DBGBVR12(val) MCR14(val, 0, c0, c12, 4) +#define WCP14_DBGBVR13(val) MCR14(val, 0, c0, c13, 4) +#define WCP14_DBGBVR14(val) MCR14(val, 0, c0, c14, 4) +#define WCP14_DBGBVR15(val) MCR14(val, 0, c0, c15, 4) +#define WCP14_DBGBCR0(val) MCR14(val, 0, c0, c0, 5) +#define WCP14_DBGBCR1(val) MCR14(val, 0, c0, c1, 5) +#define WCP14_DBGBCR2(val) MCR14(val, 0, c0, c2, 5) +#define WCP14_DBGBCR3(val) MCR14(val, 0, c0, c3, 5) +#define WCP14_DBGBCR4(val) MCR14(val, 0, c0, c4, 5) +#define WCP14_DBGBCR5(val) MCR14(val, 0, c0, c5, 5) +#define WCP14_DBGBCR6(val) MCR14(val, 0, c0, c6, 5) +#define WCP14_DBGBCR7(val) MCR14(val, 0, c0, c7, 5) +#define WCP14_DBGBCR8(val) MCR14(val, 0, c0, c8, 5) +#define WCP14_DBGBCR9(val) MCR14(val, 0, c0, c9, 5) +#define WCP14_DBGBCR10(val) MCR14(val, 0, c0, c10, 5) +#define WCP14_DBGBCR11(val) MCR14(val, 0, c0, c11, 5) +#define WCP14_DBGBCR12(val) MCR14(val, 0, c0, c12, 5) +#define WCP14_DBGBCR13(val) MCR14(val, 0, c0, c13, 5) +#define WCP14_DBGBCR14(val) MCR14(val, 0, c0, c14, 5) +#define WCP14_DBGBCR15(val) MCR14(val, 0, c0, c15, 5) +#define WCP14_DBGWVR0(val) MCR14(val, 0, c0, c0, 6) +#define WCP14_DBGWVR1(val) MCR14(val, 0, c0, c1, 6) +#define WCP14_DBGWVR2(val) MCR14(val, 0, c0, c2, 6) +#define WCP14_DBGWVR3(val) MCR14(val, 0, c0, c3, 6) +#define WCP14_DBGWVR4(val) MCR14(val, 0, c0, c4, 6) +#define WCP14_DBGWVR5(val) MCR14(val, 0, c0, c5, 6) +#define WCP14_DBGWVR6(val) MCR14(val, 0, c0, c6, 6) +#define WCP14_DBGWVR7(val) MCR14(val, 0, c0, c7, 6) +#define WCP14_DBGWVR8(val) MCR14(val, 0, c0, c8, 6) +#define WCP14_DBGWVR9(val) MCR14(val, 0, c0, c9, 6) +#define WCP14_DBGWVR10(val) MCR14(val, 0, c0, c10, 6) +#define WCP14_DBGWVR11(val) MCR14(val, 0, c0, c11, 6) +#define WCP14_DBGWVR12(val) MCR14(val, 0, c0, c12, 6) +#define WCP14_DBGWVR13(val) MCR14(val, 0, c0, c13, 6) +#define WCP14_DBGWVR14(val) MCR14(val, 0, c0, c14, 6) +#define WCP14_DBGWVR15(val) MCR14(val, 0, c0, c15, 6) +#define WCP14_DBGWCR0(val) MCR14(val, 0, c0, c0, 7) +#define WCP14_DBGWCR1(val) MCR14(val, 0, c0, c1, 7) +#define WCP14_DBGWCR2(val) MCR14(val, 0, c0, c2, 7) +#define WCP14_DBGWCR3(val) MCR14(val, 0, c0, c3, 7) +#define WCP14_DBGWCR4(val) MCR14(val, 0, c0, c4, 7) +#define WCP14_DBGWCR5(val) MCR14(val, 0, c0, c5, 7) +#define WCP14_DBGWCR6(val) MCR14(val, 0, c0, c6, 7) +#define WCP14_DBGWCR7(val) MCR14(val, 0, c0, c7, 7) +#define WCP14_DBGWCR8(val) MCR14(val, 0, c0, c8, 7) +#define WCP14_DBGWCR9(val) MCR14(val, 0, c0, c9, 7) +#define WCP14_DBGWCR10(val) MCR14(val, 0, c0, c10, 7) +#define WCP14_DBGWCR11(val) MCR14(val, 0, c0, c11, 7) +#define WCP14_DBGWCR12(val) MCR14(val, 0, c0, c12, 7) +#define WCP14_DBGWCR13(val) MCR14(val, 0, c0, c13, 7) +#define WCP14_DBGWCR14(val) MCR14(val, 0, c0, c14, 7) +#define WCP14_DBGWCR15(val) MCR14(val, 0, c0, c15, 7) +#define WCP14_DBGBXVR0(val) MCR14(val, 0, c1, c0, 1) +#define WCP14_DBGBXVR1(val) MCR14(val, 0, c1, c1, 1) +#define WCP14_DBGBXVR2(val) MCR14(val, 0, c1, c2, 1) +#define WCP14_DBGBXVR3(val) MCR14(val, 0, c1, c3, 1) +#define WCP14_DBGBXVR4(val) MCR14(val, 0, c1, c4, 1) +#define WCP14_DBGBXVR5(val) MCR14(val, 0, c1, c5, 1) +#define WCP14_DBGBXVR6(val) MCR14(val, 0, c1, c6, 1) +#define WCP14_DBGBXVR7(val) MCR14(val, 0, c1, c7, 1) +#define WCP14_DBGBXVR8(val) MCR14(val, 0, c1, c8, 1) +#define WCP14_DBGBXVR9(val) MCR14(val, 0, c1, c9, 1) +#define WCP14_DBGBXVR10(val) MCR14(val, 0, c1, c10, 1) +#define WCP14_DBGBXVR11(val) MCR14(val, 0, c1, c11, 1) +#define WCP14_DBGBXVR12(val) MCR14(val, 0, c1, c12, 1) +#define WCP14_DBGBXVR13(val) MCR14(val, 0, c1, c13, 1) +#define WCP14_DBGBXVR14(val) MCR14(val, 0, c1, c14, 1) +#define WCP14_DBGBXVR15(val) MCR14(val, 0, c1, c15, 1) +#define WCP14_DBGOSLAR(val) MCR14(val, 0, c1, c0, 4) +#define WCP14_DBGOSSRR(val) MCR14(val, 0, c1, c2, 4) +#define WCP14_DBGOSDLR(val) MCR14(val, 0, c1, c3, 4) +#define WCP14_DBGPRCR(val) MCR14(val, 0, c1, c4, 4) +#define WCP14_DBGITCTRL(val) MCR14(val, 0, c7, c0, 4) +#define WCP14_DBGCLAIMSET(val) MCR14(val, 0, c7, c8, 6) +#define WCP14_DBGCLAIMCLR(val) MCR14(val, 0, c7, c9, 6) + +/* + * ETM Registers + * + * Available only in ETMv3.3, 3.4, 3.5 + * ETMASICCR, ETMTECR2, ETMFFRR, ETMVDEVR, ETMVDCR1, ETMVDCR2, ETMVDCR3, + * ETMDCVRn, ETMDCMRn + * + * Available only in ETMv3.5 as read only + * ETMIDR2 + * + * Available only in ETMv3.5, PFTv1.0, 1.1 + * ETMTSEVR, ETMVMIDCVR, ETMPDCR + * + * Read only + * ETMCCR, ETMSCR, ETMIDR, ETMCCER, ETMOSLSR + * ETMLSR, ETMAUTHSTATUS, ETMDEVID, ETMDEVTYPE, ETMPIDR4, ETMPIDR5, ETMPIDR6, + * ETMPIDR7, ETMPIDR0, ETMPIDR1, ETMPIDR2, ETMPIDR2, ETMPIDR3, ETMCIDR0, + * ETMCIDR1, ETMCIDR2, ETMCIDR3 + * + * Write only + * ETMOSLAR, ETMLAR + * Note: ETMCCER[11] controls WO nature of certain regs. Refer ETM arch spec. + */ +#define RCP14_ETMCR() MRC14(1, c0, c0, 0) +#define RCP14_ETMCCR() MRC14(1, c0, c1, 0) +#define RCP14_ETMTRIGGER() MRC14(1, c0, c2, 0) +#define RCP14_ETMASICCR() MRC14(1, c0, c3, 0) +#define RCP14_ETMSR() MRC14(1, c0, c4, 0) +#define RCP14_ETMSCR() MRC14(1, c0, c5, 0) +#define RCP14_ETMTSSCR() MRC14(1, c0, c6, 0) +#define RCP14_ETMTECR2() MRC14(1, c0, c7, 0) +#define RCP14_ETMTEEVR() MRC14(1, c0, c8, 0) +#define RCP14_ETMTECR1() MRC14(1, c0, c9, 0) +#define RCP14_ETMFFRR() MRC14(1, c0, c10, 0) +#define RCP14_ETMFFLR() MRC14(1, c0, c11, 0) +#define RCP14_ETMVDEVR() MRC14(1, c0, c12, 0) +#define RCP14_ETMVDCR1() MRC14(1, c0, c13, 0) +#define RCP14_ETMVDCR2() MRC14(1, c0, c14, 0) +#define RCP14_ETMVDCR3() MRC14(1, c0, c15, 0) +#define RCP14_ETMACVR0() MRC14(1, c0, c0, 1) +#define RCP14_ETMACVR1() MRC14(1, c0, c1, 1) +#define RCP14_ETMACVR2() MRC14(1, c0, c2, 1) +#define RCP14_ETMACVR3() MRC14(1, c0, c3, 1) +#define RCP14_ETMACVR4() MRC14(1, c0, c4, 1) +#define RCP14_ETMACVR5() MRC14(1, c0, c5, 1) +#define RCP14_ETMACVR6() MRC14(1, c0, c6, 1) +#define RCP14_ETMACVR7() MRC14(1, c0, c7, 1) +#define RCP14_ETMACVR8() MRC14(1, c0, c8, 1) +#define RCP14_ETMACVR9() MRC14(1, c0, c9, 1) +#define RCP14_ETMACVR10() MRC14(1, c0, c10, 1) +#define RCP14_ETMACVR11() MRC14(1, c0, c11, 1) +#define RCP14_ETMACVR12() MRC14(1, c0, c12, 1) +#define RCP14_ETMACVR13() MRC14(1, c0, c13, 1) +#define RCP14_ETMACVR14() MRC14(1, c0, c14, 1) +#define RCP14_ETMACVR15() MRC14(1, c0, c15, 1) +#define RCP14_ETMACTR0() MRC14(1, c0, c0, 2) +#define RCP14_ETMACTR1() MRC14(1, c0, c1, 2) +#define RCP14_ETMACTR2() MRC14(1, c0, c2, 2) +#define RCP14_ETMACTR3() MRC14(1, c0, c3, 2) +#define RCP14_ETMACTR4() MRC14(1, c0, c4, 2) +#define RCP14_ETMACTR5() MRC14(1, c0, c5, 2) +#define RCP14_ETMACTR6() MRC14(1, c0, c6, 2) +#define RCP14_ETMACTR7() MRC14(1, c0, c7, 2) +#define RCP14_ETMACTR8() MRC14(1, c0, c8, 2) +#define RCP14_ETMACTR9() MRC14(1, c0, c9, 2) +#define RCP14_ETMACTR10() MRC14(1, c0, c10, 2) +#define RCP14_ETMACTR11() MRC14(1, c0, c11, 2) +#define RCP14_ETMACTR12() MRC14(1, c0, c12, 2) +#define RCP14_ETMACTR13() MRC14(1, c0, c13, 2) +#define RCP14_ETMACTR14() MRC14(1, c0, c14, 2) +#define RCP14_ETMACTR15() MRC14(1, c0, c15, 2) +#define RCP14_ETMDCVR0() MRC14(1, c0, c0, 3) +#define RCP14_ETMDCVR2() MRC14(1, c0, c2, 3) +#define RCP14_ETMDCVR4() MRC14(1, c0, c4, 3) +#define RCP14_ETMDCVR6() MRC14(1, c0, c6, 3) +#define RCP14_ETMDCVR8() MRC14(1, c0, c8, 3) +#define RCP14_ETMDCVR10() MRC14(1, c0, c10, 3) +#define RCP14_ETMDCVR12() MRC14(1, c0, c12, 3) +#define RCP14_ETMDCVR14() MRC14(1, c0, c14, 3) +#define RCP14_ETMDCMR0() MRC14(1, c0, c0, 4) +#define RCP14_ETMDCMR2() MRC14(1, c0, c2, 4) +#define RCP14_ETMDCMR4() MRC14(1, c0, c4, 4) +#define RCP14_ETMDCMR6() MRC14(1, c0, c6, 4) +#define RCP14_ETMDCMR8() MRC14(1, c0, c8, 4) +#define RCP14_ETMDCMR10() MRC14(1, c0, c10, 4) +#define RCP14_ETMDCMR12() MRC14(1, c0, c12, 4) +#define RCP14_ETMDCMR14() MRC14(1, c0, c14, 4) +#define RCP14_ETMCNTRLDVR0() MRC14(1, c0, c0, 5) +#define RCP14_ETMCNTRLDVR1() MRC14(1, c0, c1, 5) +#define RCP14_ETMCNTRLDVR2() MRC14(1, c0, c2, 5) +#define RCP14_ETMCNTRLDVR3() MRC14(1, c0, c3, 5) +#define RCP14_ETMCNTENR0() MRC14(1, c0, c4, 5) +#define RCP14_ETMCNTENR1() MRC14(1, c0, c5, 5) +#define RCP14_ETMCNTENR2() MRC14(1, c0, c6, 5) +#define RCP14_ETMCNTENR3() MRC14(1, c0, c7, 5) +#define RCP14_ETMCNTRLDEVR0() MRC14(1, c0, c8, 5) +#define RCP14_ETMCNTRLDEVR1() MRC14(1, c0, c9, 5) +#define RCP14_ETMCNTRLDEVR2() MRC14(1, c0, c10, 5) +#define RCP14_ETMCNTRLDEVR3() MRC14(1, c0, c11, 5) +#define RCP14_ETMCNTVR0() MRC14(1, c0, c12, 5) +#define RCP14_ETMCNTVR1() MRC14(1, c0, c13, 5) +#define RCP14_ETMCNTVR2() MRC14(1, c0, c14, 5) +#define RCP14_ETMCNTVR3() MRC14(1, c0, c15, 5) +#define RCP14_ETMSQ12EVR() MRC14(1, c0, c0, 6) +#define RCP14_ETMSQ21EVR() MRC14(1, c0, c1, 6) +#define RCP14_ETMSQ23EVR() MRC14(1, c0, c2, 6) +#define RCP14_ETMSQ31EVR() MRC14(1, c0, c3, 6) +#define RCP14_ETMSQ32EVR() MRC14(1, c0, c4, 6) +#define RCP14_ETMSQ13EVR() MRC14(1, c0, c5, 6) +#define RCP14_ETMSQR() MRC14(1, c0, c7, 6) +#define RCP14_ETMEXTOUTEVR0() MRC14(1, c0, c8, 6) +#define RCP14_ETMEXTOUTEVR1() MRC14(1, c0, c9, 6) +#define RCP14_ETMEXTOUTEVR2() MRC14(1, c0, c10, 6) +#define RCP14_ETMEXTOUTEVR3() MRC14(1, c0, c11, 6) +#define RCP14_ETMCIDCVR0() MRC14(1, c0, c12, 6) +#define RCP14_ETMCIDCVR1() MRC14(1, c0, c13, 6) +#define RCP14_ETMCIDCVR2() MRC14(1, c0, c14, 6) +#define RCP14_ETMCIDCMR() MRC14(1, c0, c15, 6) +#define RCP14_ETMIMPSPEC0() MRC14(1, c0, c0, 7) +#define RCP14_ETMIMPSPEC1() MRC14(1, c0, c1, 7) +#define RCP14_ETMIMPSPEC2() MRC14(1, c0, c2, 7) +#define RCP14_ETMIMPSPEC3() MRC14(1, c0, c3, 7) +#define RCP14_ETMIMPSPEC4() MRC14(1, c0, c4, 7) +#define RCP14_ETMIMPSPEC5() MRC14(1, c0, c5, 7) +#define RCP14_ETMIMPSPEC6() MRC14(1, c0, c6, 7) +#define RCP14_ETMIMPSPEC7() MRC14(1, c0, c7, 7) +#define RCP14_ETMSYNCFR() MRC14(1, c0, c8, 7) +#define RCP14_ETMIDR() MRC14(1, c0, c9, 7) +#define RCP14_ETMCCER() MRC14(1, c0, c10, 7) +#define RCP14_ETMEXTINSELR() MRC14(1, c0, c11, 7) +#define RCP14_ETMTESSEICR() MRC14(1, c0, c12, 7) +#define RCP14_ETMEIBCR() MRC14(1, c0, c13, 7) +#define RCP14_ETMTSEVR() MRC14(1, c0, c14, 7) +#define RCP14_ETMAUXCR() MRC14(1, c0, c15, 7) +#define RCP14_ETMTRACEIDR() MRC14(1, c1, c0, 0) +#define RCP14_ETMIDR2() MRC14(1, c1, c2, 0) +#define RCP14_ETMVMIDCVR() MRC14(1, c1, c0, 1) +#define RCP14_ETMOSLSR() MRC14(1, c1, c1, 4) +/* Not available in PFTv1.1 */ +#define RCP14_ETMOSSRR() MRC14(1, c1, c2, 4) +#define RCP14_ETMPDCR() MRC14(1, c1, c4, 4) +#define RCP14_ETMPDSR() MRC14(1, c1, c5, 4) +#define RCP14_ETMITCTRL() MRC14(1, c7, c0, 4) +#define RCP14_ETMCLAIMSET() MRC14(1, c7, c8, 6) +#define RCP14_ETMCLAIMCLR() MRC14(1, c7, c9, 6) +#define RCP14_ETMLSR() MRC14(1, c7, c13, 6) +#define RCP14_ETMAUTHSTATUS() MRC14(1, c7, c14, 6) +#define RCP14_ETMDEVID() MRC14(1, c7, c2, 7) +#define RCP14_ETMDEVTYPE() MRC14(1, c7, c3, 7) +#define RCP14_ETMPIDR4() MRC14(1, c7, c4, 7) +#define RCP14_ETMPIDR5() MRC14(1, c7, c5, 7) +#define RCP14_ETMPIDR6() MRC14(1, c7, c6, 7) +#define RCP14_ETMPIDR7() MRC14(1, c7, c7, 7) +#define RCP14_ETMPIDR0() MRC14(1, c7, c8, 7) +#define RCP14_ETMPIDR1() MRC14(1, c7, c9, 7) +#define RCP14_ETMPIDR2() MRC14(1, c7, c10, 7) +#define RCP14_ETMPIDR3() MRC14(1, c7, c11, 7) +#define RCP14_ETMCIDR0() MRC14(1, c7, c12, 7) +#define RCP14_ETMCIDR1() MRC14(1, c7, c13, 7) +#define RCP14_ETMCIDR2() MRC14(1, c7, c14, 7) +#define RCP14_ETMCIDR3() MRC14(1, c7, c15, 7) + +#define WCP14_ETMCR(val) MCR14(val, 1, c0, c0, 0) +#define WCP14_ETMTRIGGER(val) MCR14(val, 1, c0, c2, 0) +#define WCP14_ETMASICCR(val) MCR14(val, 1, c0, c3, 0) +#define WCP14_ETMSR(val) MCR14(val, 1, c0, c4, 0) +#define WCP14_ETMTSSCR(val) MCR14(val, 1, c0, c6, 0) +#define WCP14_ETMTECR2(val) MCR14(val, 1, c0, c7, 0) +#define WCP14_ETMTEEVR(val) MCR14(val, 1, c0, c8, 0) +#define WCP14_ETMTECR1(val) MCR14(val, 1, c0, c9, 0) +#define WCP14_ETMFFRR(val) MCR14(val, 1, c0, c10, 0) +#define WCP14_ETMFFLR(val) MCR14(val, 1, c0, c11, 0) +#define WCP14_ETMVDEVR(val) MCR14(val, 1, c0, c12, 0) +#define WCP14_ETMVDCR1(val) MCR14(val, 1, c0, c13, 0) +#define WCP14_ETMVDCR2(val) MCR14(val, 1, c0, c14, 0) +#define WCP14_ETMVDCR3(val) MCR14(val, 1, c0, c15, 0) +#define WCP14_ETMACVR0(val) MCR14(val, 1, c0, c0, 1) +#define WCP14_ETMACVR1(val) MCR14(val, 1, c0, c1, 1) +#define WCP14_ETMACVR2(val) MCR14(val, 1, c0, c2, 1) +#define WCP14_ETMACVR3(val) MCR14(val, 1, c0, c3, 1) +#define WCP14_ETMACVR4(val) MCR14(val, 1, c0, c4, 1) +#define WCP14_ETMACVR5(val) MCR14(val, 1, c0, c5, 1) +#define WCP14_ETMACVR6(val) MCR14(val, 1, c0, c6, 1) +#define WCP14_ETMACVR7(val) MCR14(val, 1, c0, c7, 1) +#define WCP14_ETMACVR8(val) MCR14(val, 1, c0, c8, 1) +#define WCP14_ETMACVR9(val) MCR14(val, 1, c0, c9, 1) +#define WCP14_ETMACVR10(val) MCR14(val, 1, c0, c10, 1) +#define WCP14_ETMACVR11(val) MCR14(val, 1, c0, c11, 1) +#define WCP14_ETMACVR12(val) MCR14(val, 1, c0, c12, 1) +#define WCP14_ETMACVR13(val) MCR14(val, 1, c0, c13, 1) +#define WCP14_ETMACVR14(val) MCR14(val, 1, c0, c14, 1) +#define WCP14_ETMACVR15(val) MCR14(val, 1, c0, c15, 1) +#define WCP14_ETMACTR0(val) MCR14(val, 1, c0, c0, 2) +#define WCP14_ETMACTR1(val) MCR14(val, 1, c0, c1, 2) +#define WCP14_ETMACTR2(val) MCR14(val, 1, c0, c2, 2) +#define WCP14_ETMACTR3(val) MCR14(val, 1, c0, c3, 2) +#define WCP14_ETMACTR4(val) MCR14(val, 1, c0, c4, 2) +#define WCP14_ETMACTR5(val) MCR14(val, 1, c0, c5, 2) +#define WCP14_ETMACTR6(val) MCR14(val, 1, c0, c6, 2) +#define WCP14_ETMACTR7(val) MCR14(val, 1, c0, c7, 2) +#define WCP14_ETMACTR8(val) MCR14(val, 1, c0, c8, 2) +#define WCP14_ETMACTR9(val) MCR14(val, 1, c0, c9, 2) +#define WCP14_ETMACTR10(val) MCR14(val, 1, c0, c10, 2) +#define WCP14_ETMACTR11(val) MCR14(val, 1, c0, c11, 2) +#define WCP14_ETMACTR12(val) MCR14(val, 1, c0, c12, 2) +#define WCP14_ETMACTR13(val) MCR14(val, 1, c0, c13, 2) +#define WCP14_ETMACTR14(val) MCR14(val, 1, c0, c14, 2) +#define WCP14_ETMACTR15(val) MCR14(val, 1, c0, c15, 2) +#define WCP14_ETMDCVR0(val) MCR14(val, 1, c0, c0, 3) +#define WCP14_ETMDCVR2(val) MCR14(val, 1, c0, c2, 3) +#define WCP14_ETMDCVR4(val) MCR14(val, 1, c0, c4, 3) +#define WCP14_ETMDCVR6(val) MCR14(val, 1, c0, c6, 3) +#define WCP14_ETMDCVR8(val) MCR14(val, 1, c0, c8, 3) +#define WCP14_ETMDCVR10(val) MCR14(val, 1, c0, c10, 3) +#define WCP14_ETMDCVR12(val) MCR14(val, 1, c0, c12, 3) +#define WCP14_ETMDCVR14(val) MCR14(val, 1, c0, c14, 3) +#define WCP14_ETMDCMR0(val) MCR14(val, 1, c0, c0, 4) +#define WCP14_ETMDCMR2(val) MCR14(val, 1, c0, c2, 4) +#define WCP14_ETMDCMR4(val) MCR14(val, 1, c0, c4, 4) +#define WCP14_ETMDCMR6(val) MCR14(val, 1, c0, c6, 4) +#define WCP14_ETMDCMR8(val) MCR14(val, 1, c0, c8, 4) +#define WCP14_ETMDCMR10(val) MCR14(val, 1, c0, c10, 4) +#define WCP14_ETMDCMR12(val) MCR14(val, 1, c0, c12, 4) +#define WCP14_ETMDCMR14(val) MCR14(val, 1, c0, c14, 4) +#define WCP14_ETMCNTRLDVR0(val) MCR14(val, 1, c0, c0, 5) +#define WCP14_ETMCNTRLDVR1(val) MCR14(val, 1, c0, c1, 5) +#define WCP14_ETMCNTRLDVR2(val) MCR14(val, 1, c0, c2, 5) +#define WCP14_ETMCNTRLDVR3(val) MCR14(val, 1, c0, c3, 5) +#define WCP14_ETMCNTENR0(val) MCR14(val, 1, c0, c4, 5) +#define WCP14_ETMCNTENR1(val) MCR14(val, 1, c0, c5, 5) +#define WCP14_ETMCNTENR2(val) MCR14(val, 1, c0, c6, 5) +#define WCP14_ETMCNTENR3(val) MCR14(val, 1, c0, c7, 5) +#define WCP14_ETMCNTRLDEVR0(val) MCR14(val, 1, c0, c8, 5) +#define WCP14_ETMCNTRLDEVR1(val) MCR14(val, 1, c0, c9, 5) +#define WCP14_ETMCNTRLDEVR2(val) MCR14(val, 1, c0, c10, 5) +#define WCP14_ETMCNTRLDEVR3(val) MCR14(val, 1, c0, c11, 5) +#define WCP14_ETMCNTVR0(val) MCR14(val, 1, c0, c12, 5) +#define WCP14_ETMCNTVR1(val) MCR14(val, 1, c0, c13, 5) +#define WCP14_ETMCNTVR2(val) MCR14(val, 1, c0, c14, 5) +#define WCP14_ETMCNTVR3(val) MCR14(val, 1, c0, c15, 5) +#define WCP14_ETMSQ12EVR(val) MCR14(val, 1, c0, c0, 6) +#define WCP14_ETMSQ21EVR(val) MCR14(val, 1, c0, c1, 6) +#define WCP14_ETMSQ23EVR(val) MCR14(val, 1, c0, c2, 6) +#define WCP14_ETMSQ31EVR(val) MCR14(val, 1, c0, c3, 6) +#define WCP14_ETMSQ32EVR(val) MCR14(val, 1, c0, c4, 6) +#define WCP14_ETMSQ13EVR(val) MCR14(val, 1, c0, c5, 6) +#define WCP14_ETMSQR(val) MCR14(val, 1, c0, c7, 6) +#define WCP14_ETMEXTOUTEVR0(val) MCR14(val, 1, c0, c8, 6) +#define WCP14_ETMEXTOUTEVR1(val) MCR14(val, 1, c0, c9, 6) +#define WCP14_ETMEXTOUTEVR2(val) MCR14(val, 1, c0, c10, 6) +#define WCP14_ETMEXTOUTEVR3(val) MCR14(val, 1, c0, c11, 6) +#define WCP14_ETMCIDCVR0(val) MCR14(val, 1, c0, c12, 6) +#define WCP14_ETMCIDCVR1(val) MCR14(val, 1, c0, c13, 6) +#define WCP14_ETMCIDCVR2(val) MCR14(val, 1, c0, c14, 6) +#define WCP14_ETMCIDCMR(val) MCR14(val, 1, c0, c15, 6) +#define WCP14_ETMIMPSPEC0(val) MCR14(val, 1, c0, c0, 7) +#define WCP14_ETMIMPSPEC1(val) MCR14(val, 1, c0, c1, 7) +#define WCP14_ETMIMPSPEC2(val) MCR14(val, 1, c0, c2, 7) +#define WCP14_ETMIMPSPEC3(val) MCR14(val, 1, c0, c3, 7) +#define WCP14_ETMIMPSPEC4(val) MCR14(val, 1, c0, c4, 7) +#define WCP14_ETMIMPSPEC5(val) MCR14(val, 1, c0, c5, 7) +#define WCP14_ETMIMPSPEC6(val) MCR14(val, 1, c0, c6, 7) +#define WCP14_ETMIMPSPEC7(val) MCR14(val, 1, c0, c7, 7) +/* Can be read only in ETMv3.4, ETMv3.5 */ +#define WCP14_ETMSYNCFR(val) MCR14(val, 1, c0, c8, 7) +#define WCP14_ETMEXTINSELR(val) MCR14(val, 1, c0, c11, 7) +#define WCP14_ETMTESSEICR(val) MCR14(val, 1, c0, c12, 7) +#define WCP14_ETMEIBCR(val) MCR14(val, 1, c0, c13, 7) +#define WCP14_ETMTSEVR(val) MCR14(val, 1, c0, c14, 7) +#define WCP14_ETMAUXCR(val) MCR14(val, 1, c0, c15, 7) +#define WCP14_ETMTRACEIDR(val) MCR14(val, 1, c1, c0, 0) +#define WCP14_ETMIDR2(val) MCR14(val, 1, c1, c2, 0) +#define WCP14_ETMVMIDCVR(val) MCR14(val, 1, c1, c0, 1) +#define WCP14_ETMOSLAR(val) MCR14(val, 1, c1, c0, 4) +/* Not available in PFTv1.1 */ +#define WCP14_ETMOSSRR(val) MCR14(val, 1, c1, c2, 4) +#define WCP14_ETMPDCR(val) MCR14(val, 1, c1, c4, 4) +#define WCP14_ETMPDSR(val) MCR14(val, 1, c1, c5, 4) +#define WCP14_ETMITCTRL(val) MCR14(val, 1, c7, c0, 4) +#define WCP14_ETMCLAIMSET(val) MCR14(val, 1, c7, c8, 6) +#define WCP14_ETMCLAIMCLR(val) MCR14(val, 1, c7, c9, 6) +/* Writes to this from CP14 interface are ignored */ +#define WCP14_ETMLAR(val) MCR14(val, 1, c7, c12, 6) + +#endif diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile index 574d5fa496fa..4b4bec890ef5 100644 --- a/drivers/coresight/Makefile +++ b/drivers/coresight/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_CORESIGHT_SINK_TPIU) += coresight-tpiu.o obj-$(CONFIG_CORESIGHT_SINK_ETBV10) += coresight-etb10.o obj-$(CONFIG_CORESIGHT_LINKS_AND_SINKS) += coresight-funnel.o \ coresight-replicator.o +obj-$(CONFIG_CORESIGHT_SOURCE_ETM3X) += coresight-etm3x.o coresight-etm-cp14.o diff --git a/drivers/coresight/coresight-etm-cp14.c b/drivers/coresight/coresight-etm-cp14.c new file mode 100644 index 000000000000..12a220682117 --- /dev/null +++ b/drivers/coresight/coresight-etm-cp14.c @@ -0,0 +1,591 @@ +/* Copyright (c) 2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "coresight-etm.h" + +int etm_readl_cp14(u32 reg, unsigned int *val) +{ + switch (reg) { + case ETMCR: + *val = etm_read(ETMCR); + return 0; + case ETMCCR: + *val = etm_read(ETMCCR); + return 0; + case ETMTRIGGER: + *val = etm_read(ETMTRIGGER); + return 0; + case ETMSR: + *val = etm_read(ETMSR); + return 0; + case ETMSCR: + *val = etm_read(ETMSCR); + return 0; + case ETMTSSCR: + *val = etm_read(ETMTSSCR); + return 0; + case ETMTEEVR: + *val = etm_read(ETMTEEVR); + return 0; + case ETMTECR1: + *val = etm_read(ETMTECR1); + return 0; + case ETMFFLR: + *val = etm_read(ETMFFLR); + return 0; + case ETMACVRn(0): + *val = etm_read(ETMACVR0); + return 0; + case ETMACVRn(1): + *val = etm_read(ETMACVR1); + return 0; + case ETMACVRn(2): + *val = etm_read(ETMACVR2); + return 0; + case ETMACVRn(3): + *val = etm_read(ETMACVR3); + return 0; + case ETMACVRn(4): + *val = etm_read(ETMACVR4); + return 0; + case ETMACVRn(5): + *val = etm_read(ETMACVR5); + return 0; + case ETMACVRn(6): + *val = etm_read(ETMACVR6); + return 0; + case ETMACVRn(7): + *val = etm_read(ETMACVR7); + return 0; + case ETMACVRn(8): + *val = etm_read(ETMACVR8); + return 0; + case ETMACVRn(9): + *val = etm_read(ETMACVR9); + return 0; + case ETMACVRn(10): + *val = etm_read(ETMACVR10); + return 0; + case ETMACVRn(11): + *val = etm_read(ETMACVR11); + return 0; + case ETMACVRn(12): + *val = etm_read(ETMACVR12); + return 0; + case ETMACVRn(13): + *val = etm_read(ETMACVR13); + return 0; + case ETMACVRn(14): + *val = etm_read(ETMACVR14); + return 0; + case ETMACVRn(15): + *val = etm_read(ETMACVR15); + return 0; + case ETMACTRn(0): + *val = etm_read(ETMACTR0); + return 0; + case ETMACTRn(1): + *val = etm_read(ETMACTR1); + return 0; + case ETMACTRn(2): + *val = etm_read(ETMACTR2); + return 0; + case ETMACTRn(3): + *val = etm_read(ETMACTR3); + return 0; + case ETMACTRn(4): + *val = etm_read(ETMACTR4); + return 0; + case ETMACTRn(5): + *val = etm_read(ETMACTR5); + return 0; + case ETMACTRn(6): + *val = etm_read(ETMACTR6); + return 0; + case ETMACTRn(7): + *val = etm_read(ETMACTR7); + return 0; + case ETMACTRn(8): + *val = etm_read(ETMACTR8); + return 0; + case ETMACTRn(9): + *val = etm_read(ETMACTR9); + return 0; + case ETMACTRn(10): + *val = etm_read(ETMACTR10); + return 0; + case ETMACTRn(11): + *val = etm_read(ETMACTR11); + return 0; + case ETMACTRn(12): + *val = etm_read(ETMACTR12); + return 0; + case ETMACTRn(13): + *val = etm_read(ETMACTR13); + return 0; + case ETMACTRn(14): + *val = etm_read(ETMACTR14); + return 0; + case ETMACTRn(15): + *val = etm_read(ETMACTR15); + return 0; + case ETMCNTRLDVRn(0): + *val = etm_read(ETMCNTRLDVR0); + return 0; + case ETMCNTRLDVRn(1): + *val = etm_read(ETMCNTRLDVR1); + return 0; + case ETMCNTRLDVRn(2): + *val = etm_read(ETMCNTRLDVR2); + return 0; + case ETMCNTRLDVRn(3): + *val = etm_read(ETMCNTRLDVR3); + return 0; + case ETMCNTENRn(0): + *val = etm_read(ETMCNTENR0); + return 0; + case ETMCNTENRn(1): + *val = etm_read(ETMCNTENR1); + return 0; + case ETMCNTENRn(2): + *val = etm_read(ETMCNTENR2); + return 0; + case ETMCNTENRn(3): + *val = etm_read(ETMCNTENR3); + return 0; + case ETMCNTRLDEVRn(0): + *val = etm_read(ETMCNTRLDEVR0); + return 0; + case ETMCNTRLDEVRn(1): + *val = etm_read(ETMCNTRLDEVR1); + return 0; + case ETMCNTRLDEVRn(2): + *val = etm_read(ETMCNTRLDEVR2); + return 0; + case ETMCNTRLDEVRn(3): + *val = etm_read(ETMCNTRLDEVR3); + return 0; + case ETMCNTVRn(0): + *val = etm_read(ETMCNTVR0); + return 0; + case ETMCNTVRn(1): + *val = etm_read(ETMCNTVR1); + return 0; + case ETMCNTVRn(2): + *val = etm_read(ETMCNTVR2); + return 0; + case ETMCNTVRn(3): + *val = etm_read(ETMCNTVR3); + return 0; + case ETMSQ12EVR: + *val = etm_read(ETMSQ12EVR); + return 0; + case ETMSQ21EVR: + *val = etm_read(ETMSQ21EVR); + return 0; + case ETMSQ23EVR: + *val = etm_read(ETMSQ23EVR); + return 0; + case ETMSQ31EVR: + *val = etm_read(ETMSQ31EVR); + return 0; + case ETMSQ32EVR: + *val = etm_read(ETMSQ32EVR); + return 0; + case ETMSQ13EVR: + *val = etm_read(ETMSQ13EVR); + return 0; + case ETMSQR: + *val = etm_read(ETMSQR); + return 0; + case ETMEXTOUTEVRn(0): + *val = etm_read(ETMEXTOUTEVR0); + return 0; + case ETMEXTOUTEVRn(1): + *val = etm_read(ETMEXTOUTEVR1); + return 0; + case ETMEXTOUTEVRn(2): + *val = etm_read(ETMEXTOUTEVR2); + return 0; + case ETMEXTOUTEVRn(3): + *val = etm_read(ETMEXTOUTEVR3); + return 0; + case ETMCIDCVRn(0): + *val = etm_read(ETMCIDCVR0); + return 0; + case ETMCIDCVRn(1): + *val = etm_read(ETMCIDCVR1); + return 0; + case ETMCIDCVRn(2): + *val = etm_read(ETMCIDCVR2); + return 0; + case ETMCIDCMR: + *val = etm_read(ETMCIDCMR); + return 0; + case ETMIMPSPEC0: + *val = etm_read(ETMIMPSPEC0); + return 0; + case ETMIMPSPEC1: + *val = etm_read(ETMIMPSPEC1); + return 0; + case ETMIMPSPEC2: + *val = etm_read(ETMIMPSPEC2); + return 0; + case ETMIMPSPEC3: + *val = etm_read(ETMIMPSPEC3); + return 0; + case ETMIMPSPEC4: + *val = etm_read(ETMIMPSPEC4); + return 0; + case ETMIMPSPEC5: + *val = etm_read(ETMIMPSPEC5); + return 0; + case ETMIMPSPEC6: + *val = etm_read(ETMIMPSPEC6); + return 0; + case ETMIMPSPEC7: + *val = etm_read(ETMIMPSPEC7); + return 0; + case ETMSYNCFR: + *val = etm_read(ETMSYNCFR); + return 0; + case ETMIDR: + *val = etm_read(ETMIDR); + return 0; + case ETMCCER: + *val = etm_read(ETMCCER); + return 0; + case ETMEXTINSELR: + *val = etm_read(ETMEXTINSELR); + return 0; + case ETMTESSEICR: + *val = etm_read(ETMTESSEICR); + return 0; + case ETMEIBCR: + *val = etm_read(ETMEIBCR); + return 0; + case ETMTSEVR: + *val = etm_read(ETMTSEVR); + return 0; + case ETMAUXCR: + *val = etm_read(ETMAUXCR); + return 0; + case ETMTRACEIDR: + *val = etm_read(ETMTRACEIDR); + return 0; + case ETMVMIDCVR: + *val = etm_read(ETMVMIDCVR); + return 0; + case ETMOSLSR: + *val = etm_read(ETMOSLSR); + return 0; + case ETMOSSRR: + *val = etm_read(ETMOSSRR); + return 0; + case ETMPDCR: + *val = etm_read(ETMPDCR); + return 0; + case ETMPDSR: + *val = etm_read(ETMPDSR); + return 0; + default: + *val = 0; + return -EINVAL; + } +} + +int etm_writel_cp14(u32 reg, u32 val) +{ + switch (reg) { + case ETMCR: + etm_write(val, ETMCR); + break; + case ETMTRIGGER: + etm_write(val, ETMTRIGGER); + break; + case ETMSR: + etm_write(val, ETMSR); + break; + case ETMTSSCR: + etm_write(val, ETMTSSCR); + break; + case ETMTEEVR: + etm_write(val, ETMTEEVR); + break; + case ETMTECR1: + etm_write(val, ETMTECR1); + break; + case ETMFFLR: + etm_write(val, ETMFFLR); + break; + case ETMACVRn(0): + etm_write(val, ETMACVR0); + break; + case ETMACVRn(1): + etm_write(val, ETMACVR1); + break; + case ETMACVRn(2): + etm_write(val, ETMACVR2); + break; + case ETMACVRn(3): + etm_write(val, ETMACVR3); + break; + case ETMACVRn(4): + etm_write(val, ETMACVR4); + break; + case ETMACVRn(5): + etm_write(val, ETMACVR5); + break; + case ETMACVRn(6): + etm_write(val, ETMACVR6); + break; + case ETMACVRn(7): + etm_write(val, ETMACVR7); + break; + case ETMACVRn(8): + etm_write(val, ETMACVR8); + break; + case ETMACVRn(9): + etm_write(val, ETMACVR9); + break; + case ETMACVRn(10): + etm_write(val, ETMACVR10); + break; + case ETMACVRn(11): + etm_write(val, ETMACVR11); + break; + case ETMACVRn(12): + etm_write(val, ETMACVR12); + break; + case ETMACVRn(13): + etm_write(val, ETMACVR13); + break; + case ETMACVRn(14): + etm_write(val, ETMACVR14); + break; + case ETMACVRn(15): + etm_write(val, ETMACVR15); + break; + case ETMACTRn(0): + etm_write(val, ETMACTR0); + break; + case ETMACTRn(1): + etm_write(val, ETMACTR1); + break; + case ETMACTRn(2): + etm_write(val, ETMACTR2); + break; + case ETMACTRn(3): + etm_write(val, ETMACTR3); + break; + case ETMACTRn(4): + etm_write(val, ETMACTR4); + break; + case ETMACTRn(5): + etm_write(val, ETMACTR5); + break; + case ETMACTRn(6): + etm_write(val, ETMACTR6); + break; + case ETMACTRn(7): + etm_write(val, ETMACTR7); + break; + case ETMACTRn(8): + etm_write(val, ETMACTR8); + break; + case ETMACTRn(9): + etm_write(val, ETMACTR9); + break; + case ETMACTRn(10): + etm_write(val, ETMACTR10); + break; + case ETMACTRn(11): + etm_write(val, ETMACTR11); + break; + case ETMACTRn(12): + etm_write(val, ETMACTR12); + break; + case ETMACTRn(13): + etm_write(val, ETMACTR13); + break; + case ETMACTRn(14): + etm_write(val, ETMACTR14); + break; + case ETMACTRn(15): + etm_write(val, ETMACTR15); + break; + case ETMCNTRLDVRn(0): + etm_write(val, ETMCNTRLDVR0); + break; + case ETMCNTRLDVRn(1): + etm_write(val, ETMCNTRLDVR1); + break; + case ETMCNTRLDVRn(2): + etm_write(val, ETMCNTRLDVR2); + break; + case ETMCNTRLDVRn(3): + etm_write(val, ETMCNTRLDVR3); + break; + case ETMCNTENRn(0): + etm_write(val, ETMCNTENR0); + break; + case ETMCNTENRn(1): + etm_write(val, ETMCNTENR1); + break; + case ETMCNTENRn(2): + etm_write(val, ETMCNTENR2); + break; + case ETMCNTENRn(3): + etm_write(val, ETMCNTENR3); + break; + case ETMCNTRLDEVRn(0): + etm_write(val, ETMCNTRLDEVR0); + break; + case ETMCNTRLDEVRn(1): + etm_write(val, ETMCNTRLDEVR1); + break; + case ETMCNTRLDEVRn(2): + etm_write(val, ETMCNTRLDEVR2); + break; + case ETMCNTRLDEVRn(3): + etm_write(val, ETMCNTRLDEVR3); + break; + case ETMCNTVRn(0): + etm_write(val, ETMCNTVR0); + break; + case ETMCNTVRn(1): + etm_write(val, ETMCNTVR1); + break; + case ETMCNTVRn(2): + etm_write(val, ETMCNTVR2); + break; + case ETMCNTVRn(3): + etm_write(val, ETMCNTVR3); + break; + case ETMSQ12EVR: + etm_write(val, ETMSQ12EVR); + break; + case ETMSQ21EVR: + etm_write(val, ETMSQ21EVR); + break; + case ETMSQ23EVR: + etm_write(val, ETMSQ23EVR); + break; + case ETMSQ31EVR: + etm_write(val, ETMSQ31EVR); + break; + case ETMSQ32EVR: + etm_write(val, ETMSQ32EVR); + break; + case ETMSQ13EVR: + etm_write(val, ETMSQ13EVR); + break; + case ETMSQR: + etm_write(val, ETMSQR); + break; + case ETMEXTOUTEVRn(0): + etm_write(val, ETMEXTOUTEVR0); + break; + case ETMEXTOUTEVRn(1): + etm_write(val, ETMEXTOUTEVR1); + break; + case ETMEXTOUTEVRn(2): + etm_write(val, ETMEXTOUTEVR2); + break; + case ETMEXTOUTEVRn(3): + etm_write(val, ETMEXTOUTEVR3); + break; + case ETMCIDCVRn(0): + etm_write(val, ETMCIDCVR0); + break; + case ETMCIDCVRn(1): + etm_write(val, ETMCIDCVR1); + break; + case ETMCIDCVRn(2): + etm_write(val, ETMCIDCVR2); + break; + case ETMCIDCMR: + etm_write(val, ETMCIDCMR); + break; + case ETMIMPSPEC0: + etm_write(val, ETMIMPSPEC0); + break; + case ETMIMPSPEC1: + etm_write(val, ETMIMPSPEC1); + break; + case ETMIMPSPEC2: + etm_write(val, ETMIMPSPEC2); + break; + case ETMIMPSPEC3: + etm_write(val, ETMIMPSPEC3); + break; + case ETMIMPSPEC4: + etm_write(val, ETMIMPSPEC4); + break; + case ETMIMPSPEC5: + etm_write(val, ETMIMPSPEC5); + break; + case ETMIMPSPEC6: + etm_write(val, ETMIMPSPEC6); + break; + case ETMIMPSPEC7: + etm_write(val, ETMIMPSPEC7); + break; + case ETMSYNCFR: + etm_write(val, ETMSYNCFR); + break; + case ETMEXTINSELR: + etm_write(val, ETMEXTINSELR); + break; + case ETMTESSEICR: + etm_write(val, ETMTESSEICR); + break; + case ETMEIBCR: + etm_write(val, ETMEIBCR); + break; + case ETMTSEVR: + etm_write(val, ETMTSEVR); + break; + case ETMAUXCR: + etm_write(val, ETMAUXCR); + break; + case ETMTRACEIDR: + etm_write(val, ETMTRACEIDR); + break; + case ETMVMIDCVR: + etm_write(val, ETMVMIDCVR); + break; + case ETMOSLAR: + etm_write(val, ETMOSLAR); + break; + case ETMOSSRR: + etm_write(val, ETMOSSRR); + break; + case ETMPDCR: + etm_write(val, ETMPDCR); + break; + case ETMPDSR: + etm_write(val, ETMPDSR); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/coresight/coresight-etm.h b/drivers/coresight/coresight-etm.h new file mode 100644 index 000000000000..501c5fac8a45 --- /dev/null +++ b/drivers/coresight/coresight-etm.h @@ -0,0 +1,251 @@ +/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CORESIGHT_CORESIGHT_ETM_H +#define _CORESIGHT_CORESIGHT_ETM_H + +#include +#include "coresight-priv.h" + +/* + * Device registers: + * 0x000 - 0x2FC: Trace registers + * 0x300 - 0x314: Management registers + * 0x318 - 0xEFC: Trace registers + * + * Coresight registers + * 0xF00 - 0xF9C: Management registers + * 0xFA0 - 0xFA4: Management registers in PFTv1.0 + * Trace registers in PFTv1.1 + * 0xFA8 - 0xFFC: Management registers + */ + +/* Trace registers (0x000-0x2FC) */ +#define ETMCR 0x000 +#define ETMCCR 0x004 +#define ETMTRIGGER 0x008 +#define ETMSR 0x010 +#define ETMSCR 0x014 +#define ETMTSSCR 0x018 +#define ETMTECR2 0x01c +#define ETMTEEVR 0x020 +#define ETMTECR1 0x024 +#define ETMFFLR 0x02c +#define ETMACVRn(n) (0x040 + (n * 4)) +#define ETMACTRn(n) (0x080 + (n * 4)) +#define ETMCNTRLDVRn(n) (0x140 + (n * 4)) +#define ETMCNTENRn(n) (0x150 + (n * 4)) +#define ETMCNTRLDEVRn(n) (0x160 + (n * 4)) +#define ETMCNTVRn(n) (0x170 + (n * 4)) +#define ETMSQ12EVR 0x180 +#define ETMSQ21EVR 0x184 +#define ETMSQ23EVR 0x188 +#define ETMSQ31EVR 0x18c +#define ETMSQ32EVR 0x190 +#define ETMSQ13EVR 0x194 +#define ETMSQR 0x19c +#define ETMEXTOUTEVRn(n) (0x1a0 + (n * 4)) +#define ETMCIDCVRn(n) (0x1b0 + (n * 4)) +#define ETMCIDCMR 0x1bc +#define ETMIMPSPEC0 0x1c0 +#define ETMIMPSPEC1 0x1c4 +#define ETMIMPSPEC2 0x1c8 +#define ETMIMPSPEC3 0x1cc +#define ETMIMPSPEC4 0x1d0 +#define ETMIMPSPEC5 0x1d4 +#define ETMIMPSPEC6 0x1d8 +#define ETMIMPSPEC7 0x1dc +#define ETMSYNCFR 0x1e0 +#define ETMIDR 0x1e4 +#define ETMCCER 0x1e8 +#define ETMEXTINSELR 0x1ec +#define ETMTESSEICR 0x1f0 +#define ETMEIBCR 0x1f4 +#define ETMTSEVR 0x1f8 +#define ETMAUXCR 0x1fc +#define ETMTRACEIDR 0x200 +#define ETMVMIDCVR 0x240 +/* Management registers (0x300-0x314) */ +#define ETMOSLAR 0x300 +#define ETMOSLSR 0x304 +#define ETMOSSRR 0x308 +#define ETMPDCR 0x310 +#define ETMPDSR 0x314 +#define ETM_MAX_ADDR_CMP 16 +#define ETM_MAX_CNTR 4 +#define ETM_MAX_CTXID_CMP 3 + +/* Register definition */ +/* ETMCR - 0x00 */ +#define ETMCR_PWD_DWN BIT(0) +#define ETMCR_STALL_MODE BIT(7) +#define ETMCR_ETM_PRG BIT(10) +#define ETMCR_ETM_EN BIT(11) +#define ETMCR_CYC_ACC BIT(12) +#define ETMCR_CTXID_SIZE (BIT(14)|BIT(15)) +#define ETMCR_TIMESTAMP_EN BIT(28) +/* ETMCCR - 0x04 */ +#define ETMCCR_FIFOFULL BIT(23) +/* ETMPDCR - 0x310 */ +#define ETMPDCR_PWD_UP BIT(3) +/* ETMTECR1 - 0x024 */ +#define ETMTECR1_ADDR_COMP_1 BIT(0) +#define ETMTECR1_INC_EXC BIT(24) +#define ETMTECR1_START_STOP BIT(25) +/* ETMCCER - 0x1E8 */ +#define ETMCCER_TIMESTAMP BIT(22) + +#define ETM_MODE_EXCLUDE BIT(0) +#define ETM_MODE_CYCACC BIT(1) +#define ETM_MODE_STALL BIT(2) +#define ETM_MODE_TIMESTAMP BIT(3) +#define ETM_MODE_CTXID BIT(4) +#define ETM_MODE_ALL 0x1f + +#define ETM_SQR_MASK 0x3 +#define ETM_TRACEID_MASK 0x3f +#define ETM_EVENT_MASK 0x1ffff +#define ETM_SYNC_MASK 0xfff +#define ETM_ALL_MASK 0xffffffff + +#define ETMSR_PROG_BIT 1 +#define ETM_SEQ_STATE_MAX_VAL (0x2) +#define PORT_SIZE_MASK (GENMASK(21, 21) | GENMASK(6, 4)) + +#define ETM_HARD_WIRE_RES_A /* Hard wired, always true */ \ + ((0x0f << 0) | \ + /* Resource index A */ \ + (0x06 << 4)) + +#define ETM_ADD_COMP_0 /* Single addr comparator 1 */ \ + ((0x00 << 7) | \ + /* Resource index B */ \ + (0x00 << 11)) + +#define ETM_EVENT_NOT_A BIT(14) /* NOT(A) */ + +#define ETM_DEFAULT_EVENT_VAL (ETM_HARD_WIRE_RES_A | \ + ETM_ADD_COMP_0 | \ + ETM_EVENT_NOT_A) +/** + * struct etm_drvdata - specifics associated to an ETM component + * @base: memory mapped base address for this component. + * @dev: the device entity associated to this component. + * @csdev: component vitals needed by the framework. + * @clk: the clock this component is associated to. + * @spinlock: only one at a time pls. + * @cpu: the cpu this component is affined to. + * @port_size: port size as reported by ETMCR bit 4-6 and 21. + * @arch: ETM/PTM version number. + * @use_cpu14: true if management registers need to be accessed via CP14. + * @enable: is this ETM/PTM currently tracing. + * @sticky_enable: true if ETM base configuration has been done. + * @boot_enable:true if we should start tracing at boot time. + * @os_unlock: true if access to management registers is allowed. + * @nr_addr_cmp:Number of pairs of address comparators as found in ETMCCR. + * @nr_cntr: Number of counters as found in ETMCCR bit 13-15. + * @nr_ext_inp: Number of external input as found in ETMCCR bit 17-19. + * @nr_ext_out: Number of external output as found in ETMCCR bit 20-22. + * @nr_ctxid_cmp: Number of contextID comparators as found in ETMCCR bit 24-25. + * @etmccr: value of register ETMCCR. + * @etmccer: value of register ETMCCER. + * @traceid: value of the current ID for this component. + * @mode: controls various modes supported by this ETM/PTM. + * @ctrl: used in conjunction with @mode. + * @trigger_event: setting for register ETMTRIGGER. + * @startstop_ctrl: setting for register ETMTSSCR. + * @enable_event: setting for register ETMTEEVR. + * @enable_ctrl1: setting for register ETMTECR1. + * @fifofull_level: setting for register ETMFFLR. + * @addr_idx: index for the address comparator selection. + * @addr_val: value for address comparator register. + * @addr_acctype: access type for address comparator register. + * @addr_type: current status of the comparator register. + * @cntr_idx: index for the counter register selection. + * @cntr_rld_val: reload value of a counter register. + * @cntr_event: control for counter enable register. + * @cntr_rld_event: value for counter reload event register. + * @cntr_val: counter value register. + * @seq_12_event: event causing the transition from 1 to 2. + * @seq_21_event: event causing the transition from 2 to 1. + * @seq_23_event: event causing the transition from 2 to 3. + * @seq_31_event: event causing the transition from 3 to 1. + * @seq_32_event: event causing the transition from 3 to 2. + * @seq_13_event: event causing the transition from 1 to 3. + * @seq_curr_state: current value of the sequencer register. + * @ctxid_idx: index for the context ID registers. + * @ctxid_val: value for the context ID to trigger on. + * @ctxid_mask: mask applicable to all the context IDs. + * @sync_freq: Synchronisation frequency. + * @timestamp_event: Defines an event that requests the insertion + of a timestamp into the trace stream. + */ +struct etm_drvdata { + void __iomem *base; + struct device *dev; + struct coresight_device *csdev; + struct clk *clk; + spinlock_t spinlock; + int cpu; + int port_size; + u8 arch; + bool use_cp14; + bool enable; + bool sticky_enable; + bool boot_enable; + bool os_unlock; + u8 nr_addr_cmp; + u8 nr_cntr; + u8 nr_ext_inp; + u8 nr_ext_out; + u8 nr_ctxid_cmp; + u32 etmccr; + u32 etmccer; + u32 traceid; + u32 mode; + u32 ctrl; + u32 trigger_event; + u32 startstop_ctrl; + u32 enable_event; + u32 enable_ctrl1; + u32 fifofull_level; + u8 addr_idx; + u32 addr_val[ETM_MAX_ADDR_CMP]; + u32 addr_acctype[ETM_MAX_ADDR_CMP]; + u32 addr_type[ETM_MAX_ADDR_CMP]; + u8 cntr_idx; + u32 cntr_rld_val[ETM_MAX_CNTR]; + u32 cntr_event[ETM_MAX_CNTR]; + u32 cntr_rld_event[ETM_MAX_CNTR]; + u32 cntr_val[ETM_MAX_CNTR]; + u32 seq_12_event; + u32 seq_21_event; + u32 seq_23_event; + u32 seq_31_event; + u32 seq_32_event; + u32 seq_13_event; + u32 seq_curr_state; + u8 ctxid_idx; + u32 ctxid_val[ETM_MAX_CTXID_CMP]; + u32 ctxid_mask; + u32 sync_freq; + u32 timestamp_event; +}; + +enum etm_addr_type { + ETM_ADDR_TYPE_NONE, + ETM_ADDR_TYPE_SINGLE, + ETM_ADDR_TYPE_RANGE, + ETM_ADDR_TYPE_START, + ETM_ADDR_TYPE_STOP, +}; +#endif diff --git a/drivers/coresight/coresight-etm3x.c b/drivers/coresight/coresight-etm3x.c new file mode 100644 index 000000000000..d9e3ed6aa857 --- /dev/null +++ b/drivers/coresight/coresight-etm3x.c @@ -0,0 +1,1928 @@ +/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "coresight-etm.h" + +#ifdef CONFIG_CORESIGHT_SOURCE_ETM_DEFAULT_ENABLE +static int boot_enable = 1; +#else +static int boot_enable; +#endif +module_param_named( + boot_enable, boot_enable, int, S_IRUGO +); + +/* The number of ETM/PTM currently registered */ +static int etm_count; +static struct etm_drvdata *etmdrvdata[NR_CPUS]; + +static inline void etm_writel(struct etm_drvdata *drvdata, + u32 val, u32 off) +{ + if (drvdata->use_cp14) { + if (etm_writel_cp14(off, val)) { + dev_err(drvdata->dev, + "invalid CP14 access to ETM reg: %#x", off); + } + } else { + writel_relaxed(val, drvdata->base + off); + } +} + +static inline unsigned int etm_readl(struct etm_drvdata *drvdata, u32 off) +{ + u32 val; + + if (drvdata->use_cp14) { + if (etm_readl_cp14(off, &val)) { + dev_err(drvdata->dev, + "invalid CP14 access to ETM reg: %#x", off); + } + } else { + val = readl_relaxed(drvdata->base + off); + } + + return val; +} + +/* + * Memory mapped writes to clear os lock are not supported on some processors + * and OS lock must be unlocked before any memory mapped access on such + * processors, otherwise memory mapped reads/writes will be invalid. + */ +static void etm_os_unlock(void *info) +{ + struct etm_drvdata *drvdata = (struct etm_drvdata *)info; + /* Writing any value to ETMOSLAR unlocks the trace registers */ + etm_writel(drvdata, 0x0, ETMOSLAR); + isb(); +} + +static void etm_set_pwrdwn(struct etm_drvdata *drvdata) +{ + u32 etmcr; + + /* Ensure pending cp14 accesses complete before setting pwrdwn */ + mb(); + isb(); + etmcr = etm_readl(drvdata, ETMCR); + etmcr |= ETMCR_PWD_DWN; + etm_writel(drvdata, etmcr, ETMCR); +} + +static void etm_clr_pwrdwn(struct etm_drvdata *drvdata) +{ + u32 etmcr; + + etmcr = etm_readl(drvdata, ETMCR); + etmcr &= ~ETMCR_PWD_DWN; + etm_writel(drvdata, etmcr, ETMCR); + /* Ensure pwrup completes before subsequent cp14 accesses */ + mb(); + isb(); +} + +static void etm_set_pwrup(struct etm_drvdata *drvdata) +{ + u32 etmpdcr; + + etmpdcr = readl_relaxed(drvdata->base + ETMPDCR); + etmpdcr |= ETMPDCR_PWD_UP; + writel_relaxed(etmpdcr, drvdata->base + ETMPDCR); + /* Ensure pwrup completes before subsequent cp14 accesses */ + mb(); + isb(); +} + +static void etm_clr_pwrup(struct etm_drvdata *drvdata) +{ + u32 etmpdcr; + + /* Ensure pending cp14 accesses complete before clearing pwrup */ + mb(); + isb(); + etmpdcr = readl_relaxed(drvdata->base + ETMPDCR); + etmpdcr &= ~ETMPDCR_PWD_UP; + writel_relaxed(etmpdcr, drvdata->base + ETMPDCR); +} + +/** + * coresight_timeout_etm - loop until a bit has changed to a specific state. + * @drvdata: etm's private data structure. + * @offset: address of a register, starting from @addr. + * @position: the position of the bit of interest. + * @value: the value the bit should have. + * + * Basically the same as @coresight_timeout except for the register access + * method where we have to account for CP14 configurations. + + * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if + * TIMEOUT_US has elapsed, which ever happens first. + */ + +static int coresight_timeout_etm(struct etm_drvdata *drvdata, u32 offset, + int position, int value) +{ + int i; + u32 val; + + for (i = TIMEOUT_US; i > 0; i--) { + val = etm_readl(drvdata, offset); + /* Waiting on the bit to go from 0 to 1 */ + if (value) { + if (val & BIT(position)) + return 0; + /* Waiting on the bit to go from 1 to 0 */ + } else { + if (!(val & BIT(position))) + return 0; + } + + /* + * Delay is arbitrary - the specification doesn't say how long + * we are expected to wait. Extra check required to make sure + * we don't wait needlessly on the last iteration. + */ + if (i - 1) + udelay(1); + } + + return -EAGAIN; +} + + +static void etm_set_prog(struct etm_drvdata *drvdata) +{ + u32 etmcr; + + etmcr = etm_readl(drvdata, ETMCR); + etmcr |= ETMCR_ETM_PRG; + etm_writel(drvdata, etmcr, ETMCR); + /* + * Recommended by spec for cp14 accesses to ensure etmcr write is + * complete before polling etmsr + */ + isb(); + if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 1)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", ETMSR); + } +} + +static void etm_clr_prog(struct etm_drvdata *drvdata) +{ + u32 etmcr; + + etmcr = etm_readl(drvdata, ETMCR); + etmcr &= ~ETMCR_ETM_PRG; + etm_writel(drvdata, etmcr, ETMCR); + /* + * Recommended by spec for cp14 accesses to ensure etmcr write is + * complete before polling etmsr + */ + isb(); + if (coresight_timeout_etm(drvdata, ETMSR, ETMSR_PROG_BIT, 0)) { + dev_err(drvdata->dev, + "timeout observed when probing at offset %#x\n", ETMSR); + } +} + +static void etm_set_default(struct etm_drvdata *drvdata) +{ + int i; + + drvdata->trigger_event = ETM_DEFAULT_EVENT_VAL; + drvdata->enable_event = ETM_HARD_WIRE_RES_A; + + drvdata->seq_12_event = ETM_DEFAULT_EVENT_VAL; + drvdata->seq_21_event = ETM_DEFAULT_EVENT_VAL; + drvdata->seq_23_event = ETM_DEFAULT_EVENT_VAL; + drvdata->seq_31_event = ETM_DEFAULT_EVENT_VAL; + drvdata->seq_32_event = ETM_DEFAULT_EVENT_VAL; + drvdata->seq_13_event = ETM_DEFAULT_EVENT_VAL; + drvdata->timestamp_event = ETM_DEFAULT_EVENT_VAL; + + for (i = 0; i < drvdata->nr_cntr; i++) { + drvdata->cntr_rld_val[i] = 0x0; + drvdata->cntr_event[i] = ETM_DEFAULT_EVENT_VAL; + drvdata->cntr_rld_event[i] = ETM_DEFAULT_EVENT_VAL; + drvdata->cntr_val[i] = 0x0; + } + + drvdata->seq_curr_state = 0x0; + drvdata->ctxid_idx = 0x0; + for (i = 0; i < drvdata->nr_ctxid_cmp; i++) + drvdata->ctxid_val[i] = 0x0; + drvdata->ctxid_mask = 0x0; +} + +static void etm_enable_hw(void *info) +{ + int i; + u32 etmcr; + struct etm_drvdata *drvdata = info; + + CS_UNLOCK(drvdata->base); + + /* Turn engine on */ + etm_clr_pwrdwn(drvdata); + /* Apply power to trace registers */ + etm_set_pwrup(drvdata); + /* Make sure all registers are accessible */ + etm_os_unlock(drvdata); + + etm_set_prog(drvdata); + + etmcr = etm_readl(drvdata, ETMCR); + etmcr &= (ETMCR_PWD_DWN | ETMCR_ETM_PRG); + etmcr |= drvdata->port_size; + etm_writel(drvdata, drvdata->ctrl | etmcr, ETMCR); + etm_writel(drvdata, drvdata->trigger_event, ETMTRIGGER); + etm_writel(drvdata, drvdata->startstop_ctrl, ETMTSSCR); + etm_writel(drvdata, drvdata->enable_event, ETMTEEVR); + etm_writel(drvdata, drvdata->enable_ctrl1, ETMTECR1); + etm_writel(drvdata, drvdata->fifofull_level, ETMFFLR); + for (i = 0; i < drvdata->nr_addr_cmp; i++) { + etm_writel(drvdata, drvdata->addr_val[i], ETMACVRn(i)); + etm_writel(drvdata, drvdata->addr_acctype[i], ETMACTRn(i)); + } + for (i = 0; i < drvdata->nr_cntr; i++) { + etm_writel(drvdata, drvdata->cntr_rld_val[i], ETMCNTRLDVRn(i)); + etm_writel(drvdata, drvdata->cntr_event[i], ETMCNTENRn(i)); + etm_writel(drvdata, drvdata->cntr_rld_event[i], + ETMCNTRLDEVRn(i)); + etm_writel(drvdata, drvdata->cntr_val[i], ETMCNTVRn(i)); + } + etm_writel(drvdata, drvdata->seq_12_event, ETMSQ12EVR); + etm_writel(drvdata, drvdata->seq_21_event, ETMSQ21EVR); + etm_writel(drvdata, drvdata->seq_23_event, ETMSQ23EVR); + etm_writel(drvdata, drvdata->seq_31_event, ETMSQ31EVR); + etm_writel(drvdata, drvdata->seq_32_event, ETMSQ32EVR); + etm_writel(drvdata, drvdata->seq_13_event, ETMSQ13EVR); + etm_writel(drvdata, drvdata->seq_curr_state, ETMSQR); + for (i = 0; i < drvdata->nr_ext_out; i++) + etm_writel(drvdata, ETM_DEFAULT_EVENT_VAL, ETMEXTOUTEVRn(i)); + for (i = 0; i < drvdata->nr_ctxid_cmp; i++) + etm_writel(drvdata, drvdata->ctxid_val[i], ETMCIDCVRn(i)); + etm_writel(drvdata, drvdata->ctxid_mask, ETMCIDCMR); + etm_writel(drvdata, drvdata->sync_freq, ETMSYNCFR); + /* No external input selected */ + etm_writel(drvdata, 0x0, ETMEXTINSELR); + etm_writel(drvdata, drvdata->timestamp_event, ETMTSEVR); + /* No auxiliary control selected */ + etm_writel(drvdata, 0x0, ETMAUXCR); + etm_writel(drvdata, drvdata->traceid, ETMTRACEIDR); + /* No VMID comparator value selected */ + etm_writel(drvdata, 0x0, ETMVMIDCVR); + + /* Ensures trace output is enabled from this ETM */ + etm_writel(drvdata, drvdata->ctrl | ETMCR_ETM_EN | etmcr, ETMCR); + + etm_clr_prog(drvdata); + CS_LOCK(drvdata->base); + + dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); +} + +static int etm_trace_id_simple(struct etm_drvdata *drvdata) +{ + if (!drvdata->enable) + return drvdata->traceid; + + return (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK); +} + +static int etm_trace_id(struct coresight_device *csdev) +{ + struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + int trace_id = -1; + + if (!drvdata->enable) + return drvdata->traceid; + + if (clk_prepare_enable(drvdata->clk)) + goto out; + + spin_lock_irqsave(&drvdata->spinlock, flags); + + CS_UNLOCK(drvdata->base); + trace_id = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK); + CS_LOCK(drvdata->base); + + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); +out: + return trace_id; +} + +static int etm_enable(struct coresight_device *csdev) +{ + struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int ret; + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + goto err_clk; + + spin_lock(&drvdata->spinlock); + + /* + * Configure the ETM only if the CPU is online. If it isn't online + * hw configuration will take place when 'CPU_STARTING' is received + * in @etm_cpu_callback. + */ + if (cpu_online(drvdata->cpu)) { + ret = smp_call_function_single(drvdata->cpu, + etm_enable_hw, drvdata, 1); + if (ret) + goto err; + } + + drvdata->enable = true; + drvdata->sticky_enable = true; + + spin_unlock(&drvdata->spinlock); + + dev_info(drvdata->dev, "ETM tracing enabled\n"); + return 0; +err: + spin_unlock(&drvdata->spinlock); + clk_disable_unprepare(drvdata->clk); +err_clk: + return ret; +} + +static void etm_disable_hw(void *info) +{ + int i; + struct etm_drvdata *drvdata = info; + + CS_UNLOCK(drvdata->base); + etm_set_prog(drvdata); + + /* Program trace enable to low by using always false event */ + etm_writel(drvdata, ETM_HARD_WIRE_RES_A | ETM_EVENT_NOT_A, ETMTEEVR); + + /* Read back sequencer and counters for post trace analysis */ + drvdata->seq_curr_state = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK); + + for (i = 0; i < drvdata->nr_cntr; i++) + drvdata->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i)); + + etm_set_pwrdwn(drvdata); + CS_LOCK(drvdata->base); + + dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu); +} + +static void etm_disable(struct coresight_device *csdev) +{ + struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + /* + * Taking hotplug lock here protects from clocks getting disabled + * with tracing being left on (crash scenario) if user disable occurs + * after cpu online mask indicates the cpu is offline but before the + * DYING hotplug callback is serviced by the ETM driver. + */ + get_online_cpus(); + spin_lock(&drvdata->spinlock); + + /* + * Executing etm_disable_hw on the cpu whose ETM is being disabled + * ensures that register writes occur when cpu is powered. + */ + smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1); + drvdata->enable = false; + + spin_unlock(&drvdata->spinlock); + put_online_cpus(); + + clk_disable_unprepare(drvdata->clk); + + dev_info(drvdata->dev, "ETM tracing disabled\n"); +} + +static const struct coresight_ops_source etm_source_ops = { + .trace_id = etm_trace_id, + .enable = etm_enable, + .disable = etm_disable, +}; + +static const struct coresight_ops etm_cs_ops = { + .source_ops = &etm_source_ops, +}; + +static ssize_t nr_addr_cmp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_addr_cmp; + return sprintf(buf, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_addr_cmp); + +static ssize_t nr_cntr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_cntr; + return sprintf(buf, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_cntr); + +static ssize_t nr_ctxid_cmp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->nr_ctxid_cmp; + return sprintf(buf, "%#lx\n", val); +} +static DEVICE_ATTR_RO(nr_ctxid_cmp); + +static ssize_t etmsr_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + unsigned long flags, val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + CS_UNLOCK(drvdata->base); + + val = etm_readl(drvdata, ETMSR); + + CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); + + return sprintf(buf, "%#lx\n", val); +} +static DEVICE_ATTR_RO(etmsr); + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int i, ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + if (val) { + spin_lock(&drvdata->spinlock); + drvdata->mode = ETM_MODE_EXCLUDE; + drvdata->ctrl = 0x0; + drvdata->trigger_event = ETM_DEFAULT_EVENT_VAL; + drvdata->startstop_ctrl = 0x0; + drvdata->addr_idx = 0x0; + for (i = 0; i < drvdata->nr_addr_cmp; i++) { + drvdata->addr_val[i] = 0x0; + drvdata->addr_acctype[i] = 0x0; + drvdata->addr_type[i] = ETM_ADDR_TYPE_NONE; + } + drvdata->cntr_idx = 0x0; + + etm_set_default(drvdata); + spin_unlock(&drvdata->spinlock); + } + + return size; +} +static DEVICE_ATTR_WO(reset); + +static ssize_t mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->mode; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->mode = val & ETM_MODE_ALL; + + if (drvdata->mode & ETM_MODE_EXCLUDE) + drvdata->enable_ctrl1 |= ETMTECR1_INC_EXC; + else + drvdata->enable_ctrl1 &= ~ETMTECR1_INC_EXC; + + if (drvdata->mode & ETM_MODE_CYCACC) + drvdata->ctrl |= ETMCR_CYC_ACC; + else + drvdata->ctrl &= ~ETMCR_CYC_ACC; + + if (drvdata->mode & ETM_MODE_STALL) { + if (!(drvdata->etmccr & ETMCCR_FIFOFULL)) { + dev_warn(drvdata->dev, "stall mode not supported\n"); + return -EINVAL; + } + drvdata->ctrl |= ETMCR_STALL_MODE; + } else + drvdata->ctrl &= ~ETMCR_STALL_MODE; + + if (drvdata->mode & ETM_MODE_TIMESTAMP) { + if (!(drvdata->etmccer & ETMCCER_TIMESTAMP)) { + dev_warn(drvdata->dev, "timestamp not supported\n"); + return -EINVAL; + } + drvdata->ctrl |= ETMCR_TIMESTAMP_EN; + } else + drvdata->ctrl &= ~ETMCR_TIMESTAMP_EN; + + if (drvdata->mode & ETM_MODE_CTXID) + drvdata->ctrl |= ETMCR_CTXID_SIZE; + else + drvdata->ctrl &= ~ETMCR_CTXID_SIZE; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(mode); + +static ssize_t trigger_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->trigger_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t trigger_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->trigger_event = val & ETM_EVENT_MASK; + + return size; +} +static DEVICE_ATTR_RW(trigger_event); + +static ssize_t enable_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->enable_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t enable_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->enable_event = val & ETM_EVENT_MASK; + + return size; +} +static DEVICE_ATTR_RW(enable_event); + +static ssize_t fifofull_level_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->fifofull_level; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t fifofull_level_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->fifofull_level = val; + + return size; +} +static DEVICE_ATTR_RW(fifofull_level); + +static ssize_t addr_idx_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->addr_idx; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t addr_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + if (val >= drvdata->nr_addr_cmp) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->addr_idx = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_idx); + +static ssize_t addr_single_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 idx; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { + spin_unlock(&drvdata->spinlock); + return -EINVAL; + } + + val = drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t addr_single_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_SINGLE)) { + spin_unlock(&drvdata->spinlock); + return -EINVAL; + } + + drvdata->addr_val[idx] = val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_SINGLE; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_single); + +static ssize_t addr_range_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 idx; + unsigned long val1, val2; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (idx % 2 != 0) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || + (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val1 = drvdata->addr_val[idx]; + val2 = drvdata->addr_val[idx + 1]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx %#lx\n", val1, val2); +} + +static ssize_t addr_range_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + unsigned long val1, val2; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (sscanf(buf, "%lx %lx", &val1, &val2) != 2) + return -EINVAL; + /* Lower address comparator cannot have a higher address value */ + if (val1 > val2) + return -EINVAL; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (idx % 2 != 0) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + if (!((drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_NONE) || + (drvdata->addr_type[idx] == ETM_ADDR_TYPE_RANGE && + drvdata->addr_type[idx + 1] == ETM_ADDR_TYPE_RANGE))) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = val1; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_RANGE; + drvdata->addr_val[idx + 1] = val2; + drvdata->addr_type[idx + 1] = ETM_ADDR_TYPE_RANGE; + drvdata->enable_ctrl1 |= (1 << (idx/2)); + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_range); + +static ssize_t addr_start_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 idx; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val = drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t addr_start_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_START)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_START; + drvdata->startstop_ctrl |= (1 << idx); + drvdata->enable_ctrl1 |= BIT(25); + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_start); + +static ssize_t addr_stop_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u8 idx; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + val = drvdata->addr_val[idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t addr_stop_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + u8 idx; + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + idx = drvdata->addr_idx; + if (!(drvdata->addr_type[idx] == ETM_ADDR_TYPE_NONE || + drvdata->addr_type[idx] == ETM_ADDR_TYPE_STOP)) { + spin_unlock(&drvdata->spinlock); + return -EPERM; + } + + drvdata->addr_val[idx] = val; + drvdata->addr_type[idx] = ETM_ADDR_TYPE_STOP; + drvdata->startstop_ctrl |= (1 << (idx + 16)); + drvdata->enable_ctrl1 |= ETMTECR1_START_STOP; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_stop); + +static ssize_t addr_acctype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val = drvdata->addr_acctype[drvdata->addr_idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t addr_acctype_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->addr_acctype[drvdata->addr_idx] = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(addr_acctype); + +static ssize_t cntr_idx_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->cntr_idx; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t cntr_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + if (val >= drvdata->nr_cntr) + return -EINVAL; + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->cntr_idx = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(cntr_idx); + +static ssize_t cntr_rld_val_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val = drvdata->cntr_rld_val[drvdata->cntr_idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t cntr_rld_val_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->cntr_rld_val[drvdata->cntr_idx] = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(cntr_rld_val); + +static ssize_t cntr_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val = drvdata->cntr_event[drvdata->cntr_idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t cntr_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->cntr_event[drvdata->cntr_idx] = val & ETM_EVENT_MASK; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(cntr_event); + +static ssize_t cntr_rld_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val = drvdata->cntr_rld_event[drvdata->cntr_idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t cntr_rld_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->cntr_rld_event[drvdata->cntr_idx] = val & ETM_EVENT_MASK; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(cntr_rld_event); + +static ssize_t cntr_val_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int i, ret = 0; + u32 val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (!drvdata->enable) { + spin_lock(&drvdata->spinlock); + for (i = 0; i < drvdata->nr_cntr; i++) + ret += sprintf(buf, "counter %d: %x\n", + i, drvdata->cntr_val[i]); + spin_unlock(&drvdata->spinlock); + return ret; + } + + for (i = 0; i < drvdata->nr_cntr; i++) { + val = etm_readl(drvdata, ETMCNTVRn(i)); + ret += sprintf(buf, "counter %d: %x\n", i, val); + } + + return ret; +} + +static ssize_t cntr_val_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->cntr_val[drvdata->cntr_idx] = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(cntr_val); + +static ssize_t seq_12_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_12_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_12_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_12_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_12_event); + +static ssize_t seq_21_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_21_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_21_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_21_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_21_event); + +static ssize_t seq_23_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_23_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_23_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_23_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_23_event); + +static ssize_t seq_31_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_31_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_31_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_31_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_31_event); + +static ssize_t seq_32_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_32_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_32_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_32_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_32_event); + +static ssize_t seq_13_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->seq_13_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_13_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->seq_13_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(seq_13_event); + +static ssize_t seq_curr_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + unsigned long val, flags; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (!drvdata->enable) { + val = drvdata->seq_curr_state; + goto out; + } + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + + CS_UNLOCK(drvdata->base); + val = (etm_readl(drvdata, ETMSQR) & ETM_SQR_MASK); + CS_LOCK(drvdata->base); + + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); +out: + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t seq_curr_state_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + if (val > ETM_SEQ_STATE_MAX_VAL) + return -EINVAL; + + drvdata->seq_curr_state = val; + + return size; +} +static DEVICE_ATTR_RW(seq_curr_state); + +static ssize_t ctxid_idx_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->ctxid_idx; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t ctxid_idx_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + if (val >= drvdata->nr_ctxid_cmp) + return -EINVAL; + + /* + * Use spinlock to ensure index doesn't change while it gets + * dereferenced multiple times within a spinlock block elsewhere. + */ + spin_lock(&drvdata->spinlock); + drvdata->ctxid_idx = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(ctxid_idx); + +static ssize_t ctxid_val_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + spin_lock(&drvdata->spinlock); + val = drvdata->ctxid_val[drvdata->ctxid_idx]; + spin_unlock(&drvdata->spinlock); + + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t ctxid_val_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + spin_lock(&drvdata->spinlock); + drvdata->ctxid_val[drvdata->ctxid_idx] = val; + spin_unlock(&drvdata->spinlock); + + return size; +} +static DEVICE_ATTR_RW(ctxid_val); + +static ssize_t ctxid_mask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->ctxid_mask; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t ctxid_mask_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->ctxid_mask = val; + return size; +} +static DEVICE_ATTR_RW(ctxid_mask); + +static ssize_t sync_freq_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->sync_freq; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t sync_freq_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->sync_freq = val & ETM_SYNC_MASK; + return size; +} +static DEVICE_ATTR_RW(sync_freq); + +static ssize_t timestamp_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + val = drvdata->timestamp_event; + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t timestamp_event_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->timestamp_event = val & ETM_EVENT_MASK; + return size; +} +static DEVICE_ATTR_RW(timestamp_event); + +static ssize_t status_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + unsigned long flags; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + + CS_UNLOCK(drvdata->base); + ret = sprintf(buf, + "ETMCCR: 0x%08x\n" + "ETMCCER: 0x%08x\n" + "ETMSCR: 0x%08x\n" + "ETMIDR: 0x%08x\n" + "ETMCR: 0x%08x\n" + "ETMTRACEIDR: 0x%08x\n" + "Enable event: 0x%08x\n" + "Enable start/stop: 0x%08x\n" + "Enable control: CR1 0x%08x CR2 0x%08x\n" + "CPU affinity: %d\n", + drvdata->etmccr, drvdata->etmccer, + etm_readl(drvdata, ETMSCR), etm_readl(drvdata, ETMIDR), + etm_readl(drvdata, ETMCR), etm_trace_id_simple(drvdata), + etm_readl(drvdata, ETMTEEVR), + etm_readl(drvdata, ETMTSSCR), + etm_readl(drvdata, ETMTECR1), + etm_readl(drvdata, ETMTECR2), + drvdata->cpu); + CS_LOCK(drvdata->base); + + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); + + return ret; +} +static DEVICE_ATTR_RO(status); + +static ssize_t traceid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + unsigned long val, flags; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + if (!drvdata->enable) { + val = drvdata->traceid; + goto out; + } + + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + spin_lock_irqsave(&drvdata->spinlock, flags); + CS_UNLOCK(drvdata->base); + + val = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK); + + CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + clk_disable_unprepare(drvdata->clk); +out: + return sprintf(buf, "%#lx\n", val); +} + +static ssize_t traceid_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); + + ret = kstrtoul(buf, 16, &val); + if (ret) + return ret; + + drvdata->traceid = val & ETM_TRACEID_MASK; + return size; +} +static DEVICE_ATTR_RW(traceid); + +static struct attribute *coresight_etm_attrs[] = { + &dev_attr_nr_addr_cmp.attr, + &dev_attr_nr_cntr.attr, + &dev_attr_nr_ctxid_cmp.attr, + &dev_attr_etmsr.attr, + &dev_attr_reset.attr, + &dev_attr_mode.attr, + &dev_attr_trigger_event.attr, + &dev_attr_enable_event.attr, + &dev_attr_fifofull_level.attr, + &dev_attr_addr_idx.attr, + &dev_attr_addr_single.attr, + &dev_attr_addr_range.attr, + &dev_attr_addr_start.attr, + &dev_attr_addr_stop.attr, + &dev_attr_addr_acctype.attr, + &dev_attr_cntr_idx.attr, + &dev_attr_cntr_rld_val.attr, + &dev_attr_cntr_event.attr, + &dev_attr_cntr_rld_event.attr, + &dev_attr_cntr_val.attr, + &dev_attr_seq_12_event.attr, + &dev_attr_seq_21_event.attr, + &dev_attr_seq_23_event.attr, + &dev_attr_seq_31_event.attr, + &dev_attr_seq_32_event.attr, + &dev_attr_seq_13_event.attr, + &dev_attr_seq_curr_state.attr, + &dev_attr_ctxid_idx.attr, + &dev_attr_ctxid_val.attr, + &dev_attr_ctxid_mask.attr, + &dev_attr_sync_freq.attr, + &dev_attr_timestamp_event.attr, + &dev_attr_status.attr, + &dev_attr_traceid.attr, + NULL, +}; +ATTRIBUTE_GROUPS(coresight_etm); + +static int etm_cpu_callback(struct notifier_block *nfb, unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + if (!etmdrvdata[cpu]) + goto out; + + switch (action & (~CPU_TASKS_FROZEN)) { + case CPU_STARTING: + spin_lock(&etmdrvdata[cpu]->spinlock); + if (!etmdrvdata[cpu]->os_unlock) { + etm_os_unlock(etmdrvdata[cpu]); + etmdrvdata[cpu]->os_unlock = true; + } + + if (etmdrvdata[cpu]->enable) + etm_enable_hw(etmdrvdata[cpu]); + spin_unlock(&etmdrvdata[cpu]->spinlock); + break; + + case CPU_ONLINE: + if (etmdrvdata[cpu]->boot_enable && + !etmdrvdata[cpu]->sticky_enable) + coresight_enable(etmdrvdata[cpu]->csdev); + break; + + case CPU_DYING: + spin_lock(&etmdrvdata[cpu]->spinlock); + if (etmdrvdata[cpu]->enable) + etm_disable_hw(etmdrvdata[cpu]); + spin_unlock(&etmdrvdata[cpu]->spinlock); + break; + } +out: + return NOTIFY_OK; +} + +static struct notifier_block etm_cpu_notifier = { + .notifier_call = etm_cpu_callback, +}; + +static bool etm_arch_supported(u8 arch) +{ + switch (arch) { + case ETM_ARCH_V3_3: + break; + case ETM_ARCH_V3_5: + break; + case PFT_ARCH_V1_0: + break; + case PFT_ARCH_V1_1: + break; + default: + return false; + } + return true; +} + +static void etm_init_arch_data(void *info) +{ + u32 etmidr; + u32 etmccr; + struct etm_drvdata *drvdata = info; + + CS_UNLOCK(drvdata->base); + + /* First dummy read */ + (void)etm_readl(drvdata, ETMPDSR); + /* Provide power to ETM: ETMPDCR[3] == 1 */ + etm_set_pwrup(drvdata); + /* + * Clear power down bit since when this bit is set writes to + * certain registers might be ignored. + */ + etm_clr_pwrdwn(drvdata); + /* + * Set prog bit. It will be set from reset but this is included to + * ensure it is set + */ + etm_set_prog(drvdata); + + /* Find all capabilities */ + etmidr = etm_readl(drvdata, ETMIDR); + drvdata->arch = BMVAL(etmidr, 4, 11); + drvdata->port_size = etm_readl(drvdata, ETMCR) & PORT_SIZE_MASK; + + drvdata->etmccer = etm_readl(drvdata, ETMCCER); + etmccr = etm_readl(drvdata, ETMCCR); + drvdata->etmccr = etmccr; + drvdata->nr_addr_cmp = BMVAL(etmccr, 0, 3) * 2; + drvdata->nr_cntr = BMVAL(etmccr, 13, 15); + drvdata->nr_ext_inp = BMVAL(etmccr, 17, 19); + drvdata->nr_ext_out = BMVAL(etmccr, 20, 22); + drvdata->nr_ctxid_cmp = BMVAL(etmccr, 24, 25); + + etm_set_pwrdwn(drvdata); + etm_clr_pwrup(drvdata); + CS_LOCK(drvdata->base); +} + +static void etm_init_default_data(struct etm_drvdata *drvdata) +{ + static int etm3x_traceid; + + u32 flags = (1 << 0 | /* instruction execute*/ + 3 << 3 | /* ARM instruction */ + 0 << 5 | /* No data value comparison */ + 0 << 7 | /* No exact mach */ + 0 << 8 | /* Ignore context ID */ + 0 << 10); /* Security ignored */ + + /* + * Initial configuration only - guarantees sources handled by + * this driver have a unique ID at startup time but not between + * all other types of sources. For that we lean on the core + * framework. + */ + drvdata->traceid = etm3x_traceid++; + drvdata->ctrl = (ETMCR_CYC_ACC | ETMCR_TIMESTAMP_EN); + drvdata->enable_ctrl1 = ETMTECR1_ADDR_COMP_1; + if (drvdata->nr_addr_cmp >= 2) { + drvdata->addr_val[0] = (u32) _stext; + drvdata->addr_val[1] = (u32) _etext; + drvdata->addr_acctype[0] = flags; + drvdata->addr_acctype[1] = flags; + drvdata->addr_type[0] = ETM_ADDR_TYPE_RANGE; + drvdata->addr_type[1] = ETM_ADDR_TYPE_RANGE; + } + + etm_set_default(drvdata); +} + +static int etm_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret; + void __iomem *base; + struct device *dev = &adev->dev; + struct coresight_platform_data *pdata = NULL; + struct etm_drvdata *drvdata; + struct resource *res = &adev->res; + struct coresight_desc *desc; + struct device_node *np = adev->dev.of_node; + + desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + + adev->dev.platform_data = pdata; + drvdata->use_cp14 = of_property_read_bool(np, "arm,cp14"); + } + + drvdata->dev = &adev->dev; + dev_set_drvdata(dev, drvdata); + + /* Validity for the resource is already checked by the AMBA core */ + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); + + drvdata->base = base; + + spin_lock_init(&drvdata->spinlock); + + drvdata->clk = adev->pclk; + ret = clk_prepare_enable(drvdata->clk); + if (ret) + return ret; + + drvdata->cpu = pdata ? pdata->cpu : 0; + + get_online_cpus(); + etmdrvdata[drvdata->cpu] = drvdata; + + if (!smp_call_function_single(drvdata->cpu, etm_os_unlock, drvdata, 1)) + drvdata->os_unlock = true; + + if (smp_call_function_single(drvdata->cpu, + etm_init_arch_data, drvdata, 1)) + dev_err(dev, "ETM arch init failed\n"); + + if (!etm_count++) + register_hotcpu_notifier(&etm_cpu_notifier); + + put_online_cpus(); + + if (etm_arch_supported(drvdata->arch) == false) { + ret = -EINVAL; + goto err_arch_supported; + } + etm_init_default_data(drvdata); + + clk_disable_unprepare(drvdata->clk); + + desc->type = CORESIGHT_DEV_TYPE_SOURCE; + desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC; + desc->ops = &etm_cs_ops; + desc->pdata = pdata; + desc->dev = dev; + desc->groups = coresight_etm_groups; + drvdata->csdev = coresight_register(desc); + if (IS_ERR(drvdata->csdev)) { + ret = PTR_ERR(drvdata->csdev); + goto err_arch_supported; + } + + dev_info(dev, "ETM initialized\n"); + + if (boot_enable) { + coresight_enable(drvdata->csdev); + drvdata->boot_enable = true; + } + + return 0; + +err_arch_supported: + clk_disable_unprepare(drvdata->clk); + if (--etm_count == 0) + unregister_hotcpu_notifier(&etm_cpu_notifier); + return ret; +} + +static int etm_remove(struct amba_device *adev) +{ + struct etm_drvdata *drvdata = amba_get_drvdata(adev); + + coresight_unregister(drvdata->csdev); + if (--etm_count == 0) + unregister_hotcpu_notifier(&etm_cpu_notifier); + + return 0; +} + +static struct amba_id etm_ids[] = { + { /* ETM 3.3 */ + .id = 0x0003b921, + .mask = 0x0003ffff, + }, + { /* ETM 3.5 */ + .id = 0x0003b956, + .mask = 0x0003ffff, + }, + { /* PTM 1.0 */ + .id = 0x0003b950, + .mask = 0x0003ffff, + }, + { /* PTM 1.1 */ + .id = 0x0003b95f, + .mask = 0x0003ffff, + }, + { 0, 0}, +}; + +static struct amba_driver etm_driver = { + .drv = { + .name = "coresight-etm3x", + .owner = THIS_MODULE, + }, + .probe = etm_probe, + .remove = etm_remove, + .id_table = etm_ids, +}; + +int __init etm_init(void) +{ + return amba_driver_register(&etm_driver); +} +module_init(etm_init); + +void __exit etm_exit(void) +{ + amba_driver_unregister(&etm_driver); +} +module_exit(etm_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight Program Flow Trace driver"); From aec6d76966d7a9c87eb33787cb7a92ab9801663a Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 3 Nov 2014 11:07:42 -0700 Subject: [PATCH 074/214] coresight: documentation for coresight framework and drivers Documentation containing an explanation on what the framework provides and the drivers working with it. A minimal example on how to use the functionality is also provided. Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 872234d3fb32e26d3377590c396858d2324b8c16) Signed-off-by: Mathieu Poirier --- Documentation/trace/coresight.txt | 299 ++++++++++++++++++++++++++++++ 1 file changed, 299 insertions(+) create mode 100644 Documentation/trace/coresight.txt diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt new file mode 100644 index 000000000000..bba7dbfc49ed --- /dev/null +++ b/Documentation/trace/coresight.txt @@ -0,0 +1,299 @@ + Coresight - HW Assisted Tracing on ARM + ====================================== + + Author: Mathieu Poirier + Date: September 11th, 2014 + +Introduction +------------ + +Coresight is an umbrella of technologies allowing for the debugging of ARM +based SoC. It includes solutions for JTAG and HW assisted tracing. This +document is concerned with the latter. + +HW assisted tracing is becoming increasingly useful when dealing with systems +that have many SoCs and other components like GPU and DMA engines. ARM has +developed a HW assisted tracing solution by means of different components, each +being added to a design at systhesis time to cater to specific tracing needs. +Compoments are generally categorised as source, link and sinks and are +(usually) discovered using the AMBA bus. + +"Sources" generate a compressed stream representing the processor instruction +path based on tracing scenarios as configured by users. From there the stream +flows through the coresight system (via ATB bus) using links that are connecting +the emanating source to a sink(s). Sinks serve as endpoints to the coresight +implementation, either storing the compressed stream in a memory buffer or +creating an interface to the outside world where data can be transferred to a +host without fear of filling up the onboard coresight memory buffer. + +At typical coresight system would look like this: + + ***************************************************************** + **************************** AMBA AXI ****************************===|| + ***************************************************************** || + ^ ^ | || + | | * ** + 0000000 ::::: 0000000 ::::: ::::: @@@@@@@ |||||||||||| + 0 CPU 0<-->: C : 0 CPU 0<-->: C : : C : @ STM @ || System || + |->0000000 : T : |->0000000 : T : : T :<--->@@@@@ || Memory || + | #######<-->: I : | #######<-->: I : : I : @@@<-| |||||||||||| + | # ETM # ::::: | # PTM # ::::: ::::: @ | + | ##### ^ ^ | ##### ^ ! ^ ! . | ||||||||| + | |->### | ! | |->### | ! | ! . | || DAP || + | | # | ! | | # | ! | ! . | ||||||||| + | | . | ! | | . | ! | ! . | | | + | | . | ! | | . | ! | ! . | | * + | | . | ! | | . | ! | ! . | | SWD/ + | | . | ! | | . | ! | ! . | | JTAG + *****************************************************************<-| + *************************** AMBA Debug ABP ************************ + ***************************************************************** + | . ! . ! ! . | + | . * . * * . | + ***************************************************************** + ******************** Cross Trigger Matrix (CTM) ******************* + ***************************************************************** + | . ^ . . | + | * ! * * | + ***************************************************************** + ****************** AMBA Advanced Trace Bus (ATB) ****************** + ***************************************************************** + | ! =============== | + | * ===== F =====<---------| + | ::::::::: ==== U ==== + |-->:: CTI ::&& ETB &&<......II I ======= + | ! &&&&&&&&& II I . + | ! I I . + | ! I REP I<.......... + | ! I I + | !!>&&&&&&&&& II I *Source: ARM ltd. + |------>& TPIU &<......II I DAP = Debug Access Port + &&&&&&&&& IIIIIII ETM = Embedded Trace Macrocell + ; PTM = Program Trace Macrocell + ; CTI = Cross Trigger Interface + * ETB = Embedded Trace Buffer + To trace port TPIU= Trace Port Interface Unit + SWD = Serial Wire Debug + +While on target configuration of the components is done via the ABP bus, +all trace data are carried out-of-band on the ATB bus. The CTM provides +a way to aggregate and distribute signals between CoreSight components. + +The coresight framework provides a central point to represent, configure and +manage coresight devices on a platform. This first implementation centers on +the basic tracing functionality, enabling components such ETM/PTM, funnel, +replicator, TMC, TPIU and ETB. Future work will enable more +intricate IP blocks such as STM and CTI. + + +Acronyms and Classification +--------------------------- + +Acronyms: + +PTM: Program Trace Macrocell +ETM: Embedded Trace Macrocell +STM: System trace Macrocell +ETB: Embedded Trace Buffer +ITM: Instrumentation Trace Macrocell +TPIU: Trace Port Interface Unit +TMC-ETR: Trace Memory Controller, configured as Embedded Trace Router +TMC-ETF: Trace Memory Controller, configured as Embedded Trace FIFO +CTI: Cross Trigger Interface + +Classification: + +Source: + ETMv3.x ETMv4, PTMv1.0, PTMv1.1, STM, STM500, ITM +Link: + Funnel, replicator (intelligent or not), TMC-ETR +Sinks: + ETBv1.0, ETB1.1, TPIU, TMC-ETF +Misc: + CTI + + +Device Tree Bindings +---------------------- + +See Documentation/devicetree/bindings/arm/coresight.txt for details. + +As of this writing drivers for ITM, STMs and CTIs are not provided but are +expected to be added as the solution matures. + + +Framework and implementation +---------------------------- + +The coresight framework provides a central point to represent, configure and +manage coresight devices on a platform. Any coresight compliant device can +register with the framework for as long as they use the right APIs: + +struct coresight_device *coresight_register(struct coresight_desc *desc); +void coresight_unregister(struct coresight_device *csdev); + +The registering function is taking a "struct coresight_device *csdev" and +register the device with the core framework. The unregister function takes +a reference to a "strut coresight_device", obtained at registration time. + +If everything goes well during the registration process the new devices will +show up under /sys/bus/coresight/devices, as showns here for a TC2 platform: + +root:~# ls /sys/bus/coresight/devices/ +replicator 20030000.tpiu 2201c000.ptm 2203c000.etm 2203e000.etm +20010000.etb 20040000.funnel 2201d000.ptm 2203d000.etm +root:~# + +The functions take a "struct coresight_device", which looks like this: + +struct coresight_desc { + enum coresight_dev_type type; + struct coresight_dev_subtype subtype; + const struct coresight_ops *ops; + struct coresight_platform_data *pdata; + struct device *dev; + const struct attribute_group **groups; +}; + + +The "coresight_dev_type" identifies what the device is, i.e, source link or +sink while the "coresight_dev_subtype" will characterise that type further. + +The "struct coresight_ops" is mandatory and will tell the framework how to +perform base operations related to the components, each component having +a different set of requirement. For that "struct coresight_ops_sink", +"struct coresight_ops_link" and "struct coresight_ops_source" have been +provided. + +The next field, "struct coresight_platform_data *pdata" is acquired by calling +"of_get_coresight_platform_data()", as part of the driver's _probe routine and +"struct device *dev" gets the device reference embedded in the "amba_device": + +static int etm_probe(struct amba_device *adev, const struct amba_id *id) +{ + ... + ... + drvdata->dev = &adev->dev; + ... +} + +Specific class of device (source, link, or sink) have generic operations +that can be performed on them (see "struct coresight_ops"). The +"**groups" is a list of sysfs entries pertaining to operations +specific to that component only. "Implementation defined" customisations are +expected to be accessed and controlled using those entries. + +Last but not least, "struct module *owner" is expected to be set to reflect +the information carried in "THIS_MODULE". + +How to use +---------- + +Before trace collection can start, a coresight sink needs to be identify. +There is no limit on the amount of sinks (nor sources) that can be enabled at +any given moment. As a generic operation, all device pertaining to the sink +class will have an "active" entry in sysfs: + +root:/sys/bus/coresight/devices# ls +replicator 20030000.tpiu 2201c000.ptm 2203c000.etm 2203e000.etm +20010000.etb 20040000.funnel 2201d000.ptm 2203d000.etm +root:/sys/bus/coresight/devices# ls 20010000.etb +enable_sink status trigger_cntr +root:/sys/bus/coresight/devices# echo 1 > 20010000.etb/enable_sink +root:/sys/bus/coresight/devices# cat 20010000.etb/enable_sink +1 +root:/sys/bus/coresight/devices# + +At boot time the current etm3x driver will configure the first address +comparator with "_stext" and "_etext", essentially tracing any instruction +that falls within that range. As such "enabling" a source will immediately +trigger a trace capture: + +root:/sys/bus/coresight/devices# echo 1 > 2201c000.ptm/enable_source +root:/sys/bus/coresight/devices# cat 2201c000.ptm/enable_source +1 +root:/sys/bus/coresight/devices# cat 20010000.etb/status +Depth: 0x2000 +Status: 0x1 +RAM read ptr: 0x0 +RAM wrt ptr: 0x19d3 <----- The write pointer is moving +Trigger cnt: 0x0 +Control: 0x1 +Flush status: 0x0 +Flush ctrl: 0x2001 +root:/sys/bus/coresight/devices# + +Trace collection is stopped the same way: + +root:/sys/bus/coresight/devices# echo 0 > 2201c000.ptm/enable_source +root:/sys/bus/coresight/devices# + +The content of the ETB buffer can be harvested directly from /dev: + +root:/sys/bus/coresight/devices# dd if=/dev/20010000.etb \ +of=~/cstrace.bin + +64+0 records in +64+0 records out +32768 bytes (33 kB) copied, 0.00125258 s, 26.2 MB/s +root:/sys/bus/coresight/devices# + +The file cstrace.bin can be decompressed using "ptm2human", DS-5 or Trace32. + +Following is a DS-5 output of an experimental loop that increments a variable up +to a certain value. The example is simple and yet provides a glimpse of the +wealth of possibilities that coresight provides. + +Info Tracing enabled +Instruction 106378866 0x8026B53C E52DE004 false PUSH {lr} +Instruction 0 0x8026B540 E24DD00C false SUB sp,sp,#0xc +Instruction 0 0x8026B544 E3A03000 false MOV r3,#0 +Instruction 0 0x8026B548 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Timestamp Timestamp: 17106715833 +Instruction 319 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Instruction 9 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Instruction 7 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Instruction 7 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Instruction 10 0x8026B54C E59D3004 false LDR r3,[sp,#4] +Instruction 0 0x8026B550 E3530004 false CMP r3,#4 +Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1 +Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4] +Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c +Instruction 6 0x8026B560 EE1D3F30 false MRC p15,#0x0,r3,c13,c0,#1 +Instruction 0 0x8026B564 E1A0100D false MOV r1,sp +Instruction 0 0x8026B568 E3C12D7F false BIC r2,r1,#0x1fc0 +Instruction 0 0x8026B56C E3C2203F false BIC r2,r2,#0x3f +Instruction 0 0x8026B570 E59D1004 false LDR r1,[sp,#4] +Instruction 0 0x8026B574 E59F0010 false LDR r0,[pc,#16] ; [0x8026B58C] = 0x80550368 +Instruction 0 0x8026B578 E592200C false LDR r2,[r2,#0xc] +Instruction 0 0x8026B57C E59221D0 false LDR r2,[r2,#0x1d0] +Instruction 0 0x8026B580 EB07A4CF true BL {pc}+0x1e9344 ; 0x804548c4 +Info Tracing enabled +Instruction 13570831 0x8026B584 E28DD00C false ADD sp,sp,#0xc +Instruction 0 0x8026B588 E8BD8000 true LDM sp!,{pc} +Timestamp Timestamp: 17107041535 From 63041eff2297676a345478c2b1cc6f76c63102db Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 3 Nov 2014 11:07:44 -0700 Subject: [PATCH 075/214] coresight: adding basic support for Vexpress TC2 Support for the 2 PTMs, 3 ETMs, funnel, TPIU and replicator connected to the ETB are included. Proper handling of the ITM and the replicator linked to it along with the CTIs and SWO are not included. Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0bec8d82bd112f6aa0e16cab5d840bd7f5d3a7ce) Signed-off-by: Mathieu Poirier --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 199 +++++++++++++++++++++ 1 file changed, 199 insertions(+) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index d2803be4e1a8..62e629bd3550 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -303,6 +303,205 @@ }; }; + etb@0,20010000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0x20010000 0 0x1000>; + + coresight-default-sink; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + etb_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator_out_port0>; + }; + }; + }; + + tpiu@0,20030000 { + compatible = "arm,coresight-tpiu", "arm,primecell"; + reg = <0 0x20030000 0 0x1000>; + + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + tpiu_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator_out_port1>; + }; + }; + }; + + replicator { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator_out_port0: endpoint { + remote-endpoint = <&etb_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator_out_port1: endpoint { + remote-endpoint = <&tpiu_in_port>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel_out_port0>; + }; + }; + }; + }; + + funnel@0,20040000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0x20040000 0 0x1000>; + + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel_out_port0: endpoint { + remote-endpoint = + <&replicator_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm0_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm1_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel_in_port2: endpoint { + slave-mode; + remote-endpoint = <&etm0_out_port>; + }; + }; + + /* Input port #3 is for ITM, not supported here */ + + port@4 { + reg = <4>; + funnel_in_port4: endpoint { + slave-mode; + remote-endpoint = <&etm1_out_port>; + }; + }; + + port@5 { + reg = <5>; + funnel_in_port5: endpoint { + slave-mode; + remote-endpoint = <&etm2_out_port>; + }; + }; + }; + }; + + ptm@0,2201c000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2201c000 0 0x1000>; + + cpu = <&cpu0>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + ptm0_out_port: endpoint { + remote-endpoint = <&funnel_in_port0>; + }; + }; + }; + + ptm@0,2201d000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2201d000 0 0x1000>; + + cpu = <&cpu1>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + ptm1_out_port: endpoint { + remote-endpoint = <&funnel_in_port1>; + }; + }; + }; + + etm@0,2203c000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2203c000 0 0x1000>; + + cpu = <&cpu2>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + etm0_out_port: endpoint { + remote-endpoint = <&funnel_in_port2>; + }; + }; + }; + + etm@0,2203d000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2203d000 0 0x1000>; + + cpu = <&cpu3>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + etm1_out_port: endpoint { + remote-endpoint = <&funnel_in_port4>; + }; + }; + }; + + etm@0,2203e000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0x2203e000 0 0x1000>; + + cpu = <&cpu4>; + clocks = <&oscclk6a>; + clock-names = "apb_pclk"; + port { + etm2_out_port: endpoint { + remote-endpoint = <&funnel_in_port5>; + }; + }; + }; + smb { compatible = "simple-bus"; From 3aca7ef7f93894209a35c17786c2857422e2ca84 Mon Sep 17 00:00:00 2001 From: Xia Kaixu Date: Mon, 3 Nov 2014 11:07:45 -0700 Subject: [PATCH 076/214] coresight: adding basic support for D01 board Support for 16 PTMs, funnel, TPIU and replicator connected to the ETB are included. Signed-off-by: Xia Kaixu Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4d5616ca59350c47e4b00d17c1480d8b44d3c535) Signed-off-by: Mathieu Poirier Conflicts: arch/arm/boot/dts/hip04.dtsi --- arch/arm/boot/dts/hip04.dtsi | 984 +++++++++++++++++++++++++++++++++++ 1 file changed, 984 insertions(+) create mode 100644 arch/arm/boot/dts/hip04.dtsi diff --git a/arch/arm/boot/dts/hip04.dtsi b/arch/arm/boot/dts/hip04.dtsi new file mode 100644 index 000000000000..238814596a87 --- /dev/null +++ b/arch/arm/boot/dts/hip04.dtsi @@ -0,0 +1,984 @@ +/* + * Hisilicon Ltd. HiP04 SoC + * + * Copyright (C) 2013-2014 Hisilicon Ltd. + * Copyright (C) 2013-2014 Linaro Ltd. + * + * Author: Haojian Zhuang + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/ { + /* memory bus is 64-bit */ + #address-cells = <2>; + #size-cells = <2>; + + aliases { + serial0 = &uart0; + }; + + bootwrapper { + compatible = "hisilicon,hip04-bootwrapper"; + boot-method = <0x10c00000 0x10000>, <0xe0000100 0x1000>; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu-map { + cluster0 { + core0 { + cpu = <&CPU0>; + }; + core1 { + cpu = <&CPU1>; + }; + core2 { + cpu = <&CPU2>; + }; + core3 { + cpu = <&CPU3>; + }; + }; + cluster1 { + core0 { + cpu = <&CPU4>; + }; + core1 { + cpu = <&CPU5>; + }; + core2 { + cpu = <&CPU6>; + }; + core3 { + cpu = <&CPU7>; + }; + }; + cluster2 { + core0 { + cpu = <&CPU8>; + }; + core1 { + cpu = <&CPU9>; + }; + core2 { + cpu = <&CPU10>; + }; + core3 { + cpu = <&CPU11>; + }; + }; + cluster3 { + core0 { + cpu = <&CPU12>; + }; + core1 { + cpu = <&CPU13>; + }; + core2 { + cpu = <&CPU14>; + }; + core3 { + cpu = <&CPU15>; + }; + }; + }; + CPU0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + CPU1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + CPU2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + }; + CPU3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + }; + CPU4: cpu@100 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x100>; + }; + CPU5: cpu@101 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x101>; + }; + CPU6: cpu@102 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x102>; + }; + CPU7: cpu@103 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x103>; + }; + CPU8: cpu@200 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x200>; + }; + CPU9: cpu@201 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x201>; + }; + CPU10: cpu@202 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x202>; + }; + CPU11: cpu@203 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x203>; + }; + CPU12: cpu@300 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x300>; + }; + CPU13: cpu@301 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x301>; + }; + CPU14: cpu@302 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x302>; + }; + CPU15: cpu@303 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0x303>; + }; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupt-parent = <&gic>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + clk_50m: clk_50m { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <50000000>; + }; + + clk_168m: clk_168m { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <168000000>; + }; + + clk_375m: clk_375m { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <375000000>; + }; + + soc { + /* It's a 32-bit SoC. */ + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + interrupt-parent = <&gic>; + ranges = <0 0 0xe0000000 0x10000000>; + + gic: interrupt-controller@c01000 { + compatible = "hisilicon,hip04-intc"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + interrupts = <1 9 0xf04>; + + reg = <0xc01000 0x1000>, <0xc02000 0x1000>, + <0xc04000 0x2000>, <0xc06000 0x2000>; + }; + + sysctrl: sysctrl { + compatible = "hisilicon,sysctrl"; + reg = <0x3e00000 0x00100000>; + }; + + fabric: fabric { + compatible = "hisilicon,hip04-fabric"; + reg = <0x302a000 0x1000>; + }; + + dual_timer0: dual_timer@3000000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x3000000 0x1000>; + interrupts = <0 224 4>; + clocks = <&clk_50m>, <&clk_50m>; + clock-names = "apb_pclk"; + }; + + arm-pmu { + compatible = "arm,cortex-a15-pmu"; + interrupts = <0 64 4>, + <0 65 4>, + <0 66 4>, + <0 67 4>, + <0 68 4>, + <0 69 4>, + <0 70 4>, + <0 71 4>, + <0 72 4>, + <0 73 4>, + <0 74 4>, + <0 75 4>, + <0 76 4>, + <0 77 4>, + <0 78 4>, + <0 79 4>; + }; + + uart0: uart@4007000 { + compatible = "snps,dw-apb-uart"; + reg = <0x4007000 0x1000>; + interrupts = <0 381 4>; + clocks = <&clk_168m>; + clock-names = "uartclk"; + reg-shift = <2>; + status = "disabled"; + }; + + sata0: sata@a000000 { + compatible = "hisilicon,hisi-ahci"; + reg = <0xa000000 0x1000000>; + interrupts = <0 372 4>; + }; + + }; + + etb@0,e3c42000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0xe3c42000 0 0x1000>; + + coresight-default-sink; + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + port { + etb0_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator0_out_port0>; + }; + }; + }; + + etb@0,e3c82000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0xe3c82000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + port { + etb1_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator1_out_port0>; + }; + }; + }; + + etb@0,e3cc2000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0xe3cc2000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + port { + etb2_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator2_out_port0>; + }; + }; + }; + + etb@0,e3d02000 { + compatible = "arm,coresight-etb10", "arm,primecell"; + reg = <0 0xe3d02000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + port { + etb3_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&replicator3_out_port0>; + }; + }; + }; + + tpiu@0,e3c05000 { + compatible = "arm,coresight-tpiu", "arm,primecell"; + reg = <0 0xe3c05000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + port { + tpiu_in_port: endpoint@0 { + slave-mode; + remote-endpoint = <&funnel4_out_port0>; + }; + }; + }; + + replicator0 { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator0_out_port0: endpoint { + remote-endpoint = <&etb0_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator0_out_port1: endpoint { + remote-endpoint = <&funnel4_in_port0>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator0_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel0_out_port0>; + }; + }; + }; + }; + + replicator1 { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator1_out_port0: endpoint { + remote-endpoint = <&etb1_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator1_out_port1: endpoint { + remote-endpoint = <&funnel4_in_port1>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator1_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel1_out_port0>; + }; + }; + }; + }; + + replicator2 { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator2_out_port0: endpoint { + remote-endpoint = <&etb2_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator2_out_port1: endpoint { + remote-endpoint = <&funnel4_in_port2>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator2_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel2_out_port0>; + }; + }; + }; + }; + + replicator3 { + /* non-configurable replicators don't show up on the + * AMBA bus. As such no need to add "arm,primecell". + */ + compatible = "arm,coresight-replicator"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* replicator output ports */ + port@0 { + reg = <0>; + replicator3_out_port0: endpoint { + remote-endpoint = <&etb3_in_port>; + }; + }; + + port@1 { + reg = <1>; + replicator3_out_port1: endpoint { + remote-endpoint = <&funnel4_in_port3>; + }; + }; + + /* replicator input port */ + port@2 { + reg = <0>; + replicator3_in_port0: endpoint { + slave-mode; + remote-endpoint = <&funnel3_out_port0>; + }; + }; + }; + }; + + funnel@0,e3c41000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0xe3c41000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel0_out_port0: endpoint { + remote-endpoint = + <&replicator0_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel0_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm0_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel0_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm1_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel0_in_port2: endpoint { + slave-mode; + remote-endpoint = <&ptm2_out_port>; + }; + }; + + port@4 { + reg = <3>; + funnel0_in_port3: endpoint { + slave-mode; + remote-endpoint = <&ptm3_out_port>; + }; + }; + }; + }; + + funnel@0,e3c81000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0xe3c81000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel1_out_port0: endpoint { + remote-endpoint = + <&replicator1_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel1_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm4_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel1_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm5_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel1_in_port2: endpoint { + slave-mode; + remote-endpoint = <&ptm6_out_port>; + }; + }; + + port@4 { + reg = <3>; + funnel1_in_port3: endpoint { + slave-mode; + remote-endpoint = <&ptm7_out_port>; + }; + }; + }; + }; + + funnel@0,e3cc1000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0xe3cc1000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel2_out_port0: endpoint { + remote-endpoint = + <&replicator2_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel2_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm8_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel2_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm9_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel2_in_port2: endpoint { + slave-mode; + remote-endpoint = <&ptm10_out_port>; + }; + }; + + port@4 { + reg = <3>; + funnel2_in_port3: endpoint { + slave-mode; + remote-endpoint = <&ptm11_out_port>; + }; + }; + }; + }; + + funnel@0,e3d01000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0xe3d01000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel3_out_port0: endpoint { + remote-endpoint = + <&replicator3_in_port0>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel3_in_port0: endpoint { + slave-mode; + remote-endpoint = <&ptm12_out_port>; + }; + }; + + port@2 { + reg = <1>; + funnel3_in_port1: endpoint { + slave-mode; + remote-endpoint = <&ptm13_out_port>; + }; + }; + + port@3 { + reg = <2>; + funnel3_in_port2: endpoint { + slave-mode; + remote-endpoint = <&ptm14_out_port>; + }; + }; + + port@4 { + reg = <3>; + funnel3_in_port3: endpoint { + slave-mode; + remote-endpoint = <&ptm15_out_port>; + }; + }; + }; + }; + + funnel@0,e3c04000 { + compatible = "arm,coresight-funnel", "arm,primecell"; + reg = <0 0xe3c04000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* funnel output port */ + port@0 { + reg = <0>; + funnel4_out_port0: endpoint { + remote-endpoint = <&tpiu_in_port>; + }; + }; + + /* funnel input ports */ + port@1 { + reg = <0>; + funnel4_in_port0: endpoint { + slave-mode; + remote-endpoint = + <&replicator0_out_port1>; + }; + }; + + port@2 { + reg = <1>; + funnel4_in_port1: endpoint { + slave-mode; + remote-endpoint = + <&replicator1_out_port1>; + }; + }; + + port@3 { + reg = <2>; + funnel4_in_port2: endpoint { + slave-mode; + remote-endpoint = + <&replicator2_out_port1>; + }; + }; + + port@4 { + reg = <3>; + funnel4_in_port3: endpoint { + slave-mode; + remote-endpoint = + <&replicator3_out_port1>; + }; + }; + }; + }; + + ptm@0,e3c7c000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3c7c000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU0>; + port { + ptm0_out_port: endpoint { + remote-endpoint = <&funnel0_in_port0>; + }; + }; + }; + + ptm@0,e3c7d000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3c7d000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU1>; + port { + ptm1_out_port: endpoint { + remote-endpoint = <&funnel0_in_port1>; + }; + }; + }; + + ptm@0,e3c7e000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3c7e000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU2>; + port { + ptm2_out_port: endpoint { + remote-endpoint = <&funnel0_in_port2>; + }; + }; + }; + + ptm@0,e3c7f000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3c7f000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU3>; + port { + ptm3_out_port: endpoint { + remote-endpoint = <&funnel0_in_port3>; + }; + }; + }; + + ptm@0,e3cbc000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cbc000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU4>; + port { + ptm4_out_port: endpoint { + remote-endpoint = <&funnel1_in_port0>; + }; + }; + }; + + ptm@0,e3cbd000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cbd000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU5>; + port { + ptm5_out_port: endpoint { + remote-endpoint = <&funnel1_in_port1>; + }; + }; + }; + + ptm@0,e3cbe000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cbe000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU6>; + port { + ptm6_out_port: endpoint { + remote-endpoint = <&funnel1_in_port2>; + }; + }; + }; + + ptm@0,e3cbf000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cbf000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU7>; + port { + ptm7_out_port: endpoint { + remote-endpoint = <&funnel1_in_port3>; + }; + }; + }; + + ptm@0,e3cfc000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cfc000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU8>; + port { + ptm8_out_port: endpoint { + remote-endpoint = <&funnel2_in_port0>; + }; + }; + }; + + ptm@0,e3cfd000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cfd000 0 0x1000>; + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU9>; + port { + ptm9_out_port: endpoint { + remote-endpoint = <&funnel2_in_port1>; + }; + }; + }; + + ptm@0,e3cfe000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cfe000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU10>; + port { + ptm10_out_port: endpoint { + remote-endpoint = <&funnel2_in_port2>; + }; + }; + }; + + ptm@0,e3cff000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3cff000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU11>; + port { + ptm11_out_port: endpoint { + remote-endpoint = <&funnel2_in_port3>; + }; + }; + }; + + ptm@0,e3d3c000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3d3c000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU12>; + port { + ptm12_out_port: endpoint { + remote-endpoint = <&funnel3_in_port0>; + }; + }; + }; + + ptm@0,e3d3d000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3d3d000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU13>; + port { + ptm13_out_port: endpoint { + remote-endpoint = <&funnel3_in_port1>; + }; + }; + }; + + ptm@0,e3d3e000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3d3e000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU14>; + port { + ptm14_out_port: endpoint { + remote-endpoint = <&funnel3_in_port2>; + }; + }; + }; + + ptm@0,e3d3f000 { + compatible = "arm,coresight-etm3x", "arm,primecell"; + reg = <0 0xe3d3f000 0 0x1000>; + + clocks = <&clk_375m>; + clock-names = "apb_pclk"; + cpu = <&CPU15>; + port { + ptm15_out_port: endpoint { + remote-endpoint = <&funnel3_in_port3>; + }; + }; + }; +}; From 5491ad2d3c24a3ad42524539b9802aed1c8df649 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Mon, 3 Nov 2014 11:07:46 -0700 Subject: [PATCH 077/214] ARM: removing support for etb/etm in "arch/arm/kernel/" Removing minimal support for etb/etm to favour an implementation that is more flexible, extensible and capable of handling more platforms. Also removing the only client of the old driver. That code can easily be replaced by entries for etb/etm in the device tree. Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 184901a06a366d40386e07307bcadc9eeaabbd39) Signed-off-by: Mathieu Poirier Conflicts: arch/arm/include/asm/hardware/coresight.h arch/arm/kernel/Makefile arch/arm/kernel/etm.c arch/arm/mach-omap2/Kconfig --- arch/arm/Kconfig.debug | 8 - arch/arm/include/asm/hardware/coresight.h | 157 ----- arch/arm/kernel/Makefile | 1 - arch/arm/kernel/etm.c | 660 ---------------------- arch/arm/kernel/hw_breakpoint.c | 4 +- arch/arm/mach-omap2/Kconfig | 8 - arch/arm/mach-omap2/Makefile | 1 - arch/arm/mach-omap2/emu.c | 50 -- 8 files changed, 2 insertions(+), 887 deletions(-) delete mode 100644 arch/arm/include/asm/hardware/coresight.h delete mode 100644 arch/arm/kernel/etm.c delete mode 100644 arch/arm/mach-omap2/emu.c diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6a961376f929..5fdb6dbc5f89 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -669,14 +669,6 @@ config EARLY_PRINTK kernel low-level debugging functions. Add earlyprintk to your kernel parameters to enable this console. -config OC_ETM - bool "On-chip ETM and ETB" - depends on ARM_AMBA - help - Enables the on-chip embedded trace macrocell and embedded trace - buffer driver that will allow you to collect traces of the - kernel code. - config ARM_KPROBES_TEST tristate "Kprobes test module" depends on KPROBES && MODULES diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h deleted file mode 100644 index 0cf7a6b842ff..000000000000 --- a/arch/arm/include/asm/hardware/coresight.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * linux/arch/arm/include/asm/hardware/coresight.h - * - * CoreSight components' registers - * - * Copyright (C) 2009 Nokia Corporation. - * Alexander Shishkin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_HARDWARE_CORESIGHT_H -#define __ASM_HARDWARE_CORESIGHT_H - -#define TRACER_ACCESSED_BIT 0 -#define TRACER_RUNNING_BIT 1 -#define TRACER_CYCLE_ACC_BIT 2 -#define TRACER_ACCESSED BIT(TRACER_ACCESSED_BIT) -#define TRACER_RUNNING BIT(TRACER_RUNNING_BIT) -#define TRACER_CYCLE_ACC BIT(TRACER_CYCLE_ACC_BIT) - -#define TRACER_TIMEOUT 10000 - -#define etm_writel(t, v, x) \ - (__raw_writel((v), (t)->etm_regs + (x))) -#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x))) - -/* CoreSight Management Registers */ -#define CSMR_LOCKACCESS 0xfb0 -#define CSMR_LOCKSTATUS 0xfb4 -#define CSMR_AUTHSTATUS 0xfb8 -#define CSMR_DEVID 0xfc8 -#define CSMR_DEVTYPE 0xfcc -/* CoreSight Component Registers */ -#define CSCR_CLASS 0xff4 - -#define CS_LAR_KEY 0xc5acce55 - -/* ETM control register, "ETM Architecture", 3.3.1 */ -#define ETMR_CTRL 0 -#define ETMCTRL_POWERDOWN 1 -#define ETMCTRL_PROGRAM (1 << 10) -#define ETMCTRL_PORTSEL (1 << 11) -#define ETMCTRL_DO_CONTEXTID (3 << 14) -#define ETMCTRL_PORTMASK1 (7 << 4) -#define ETMCTRL_PORTMASK2 (1 << 21) -#define ETMCTRL_PORTMASK (ETMCTRL_PORTMASK1 | ETMCTRL_PORTMASK2) -#define ETMCTRL_PORTSIZE(x) ((((x) & 7) << 4) | (!!((x) & 8)) << 21) -#define ETMCTRL_DO_CPRT (1 << 1) -#define ETMCTRL_DATAMASK (3 << 2) -#define ETMCTRL_DATA_DO_DATA (1 << 2) -#define ETMCTRL_DATA_DO_ADDR (1 << 3) -#define ETMCTRL_DATA_DO_BOTH (ETMCTRL_DATA_DO_DATA | ETMCTRL_DATA_DO_ADDR) -#define ETMCTRL_BRANCH_OUTPUT (1 << 8) -#define ETMCTRL_CYCLEACCURATE (1 << 12) - -/* ETM configuration code register */ -#define ETMR_CONFCODE (0x04) - -/* ETM trace start/stop resource control register */ -#define ETMR_TRACESSCTRL (0x18) - -/* ETM trigger event register */ -#define ETMR_TRIGEVT (0x08) - -/* address access type register bits, "ETM architecture", - * table 3-27 */ -/* - access type */ -#define ETMAAT_IFETCH 0 -#define ETMAAT_IEXEC 1 -#define ETMAAT_IEXECPASS 2 -#define ETMAAT_IEXECFAIL 3 -#define ETMAAT_DLOADSTORE 4 -#define ETMAAT_DLOAD 5 -#define ETMAAT_DSTORE 6 -/* - comparison access size */ -#define ETMAAT_JAVA (0 << 3) -#define ETMAAT_THUMB (1 << 3) -#define ETMAAT_ARM (3 << 3) -/* - data value comparison control */ -#define ETMAAT_NOVALCMP (0 << 5) -#define ETMAAT_VALMATCH (1 << 5) -#define ETMAAT_VALNOMATCH (3 << 5) -/* - exact match */ -#define ETMAAT_EXACTMATCH (1 << 7) -/* - context id comparator control */ -#define ETMAAT_IGNCONTEXTID (0 << 8) -#define ETMAAT_VALUE1 (1 << 8) -#define ETMAAT_VALUE2 (2 << 8) -#define ETMAAT_VALUE3 (3 << 8) -/* - security level control */ -#define ETMAAT_IGNSECURITY (0 << 10) -#define ETMAAT_NSONLY (1 << 10) -#define ETMAAT_SONLY (2 << 10) - -#define ETMR_COMP_VAL(x) (0x40 + (x) * 4) -#define ETMR_COMP_ACC_TYPE(x) (0x80 + (x) * 4) - -/* ETM status register, "ETM Architecture", 3.3.2 */ -#define ETMR_STATUS (0x10) -#define ETMST_OVERFLOW BIT(0) -#define ETMST_PROGBIT BIT(1) -#define ETMST_STARTSTOP BIT(2) -#define ETMST_TRIGGER BIT(3) - -#define etm_progbit(t) (etm_readl((t), ETMR_STATUS) & ETMST_PROGBIT) -#define etm_started(t) (etm_readl((t), ETMR_STATUS) & ETMST_STARTSTOP) -#define etm_triggered(t) (etm_readl((t), ETMR_STATUS) & ETMST_TRIGGER) - -#define ETMR_TRACEENCTRL2 0x1c -#define ETMR_TRACEENCTRL 0x24 -#define ETMTE_INCLEXCL BIT(24) -#define ETMR_TRACEENEVT 0x20 -#define ETMCTRL_OPTS (ETMCTRL_DO_CPRT | \ - ETMCTRL_DATA_DO_ADDR | \ - ETMCTRL_BRANCH_OUTPUT | \ - ETMCTRL_DO_CONTEXTID) - -/* ETM management registers, "ETM Architecture", 3.5.24 */ -#define ETMMR_OSLAR 0x300 -#define ETMMR_OSLSR 0x304 -#define ETMMR_OSSRR 0x308 -#define ETMMR_PDSR 0x314 - -/* ETB registers, "CoreSight Components TRM", 9.3 */ -#define ETBR_DEPTH 0x04 -#define ETBR_STATUS 0x0c -#define ETBR_READMEM 0x10 -#define ETBR_READADDR 0x14 -#define ETBR_WRITEADDR 0x18 -#define ETBR_TRIGGERCOUNT 0x1c -#define ETBR_CTRL 0x20 -#define ETBR_FORMATTERCTRL 0x304 -#define ETBFF_ENFTC 1 -#define ETBFF_ENFCONT BIT(1) -#define ETBFF_FONFLIN BIT(4) -#define ETBFF_MANUAL_FLUSH BIT(6) -#define ETBFF_TRIGIN BIT(8) -#define ETBFF_TRIGEVT BIT(9) -#define ETBFF_TRIGFL BIT(10) - -#define etb_writel(t, v, x) \ - (__raw_writel((v), (t)->etb_regs + (x))) -#define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x))) - -#define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) -#define etm_unlock(t) \ - do { etm_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) - -#define etb_lock(t) do { etb_writel((t), 0, CSMR_LOCKACCESS); } while (0) -#define etb_unlock(t) \ - do { etb_writel((t), CS_LAR_KEY, CSMR_LOCKACCESS); } while (0) - -#endif /* __ASM_HARDWARE_CORESIGHT_H */ - diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..167143aeebf7 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o -obj-$(CONFIG_OC_ETM) += etm.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_FIQ) += fiq.o fiqasm.o diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c deleted file mode 100644 index 8ff0ecdc637f..000000000000 --- a/arch/arm/kernel/etm.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * linux/arch/arm/kernel/etm.c - * - * Driver for ARM's Embedded Trace Macrocell and Embedded Trace Buffer. - * - * Copyright (C) 2009 Nokia Corporation. - * Alexander Shishkin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Alexander Shishkin"); - -/* - * ETM tracer state - */ -struct tracectx { - unsigned int etb_bufsz; - void __iomem *etb_regs; - void __iomem *etm_regs; - unsigned long flags; - int ncmppairs; - int etm_portsz; - struct device *dev; - struct clk *emu_clk; - struct mutex mutex; -}; - -static struct tracectx tracer; - -static inline bool trace_isrunning(struct tracectx *t) -{ - return !!(t->flags & TRACER_RUNNING); -} - -static int etm_setup_address_range(struct tracectx *t, int n, - unsigned long start, unsigned long end, int exclude, int data) -{ - u32 flags = ETMAAT_ARM | ETMAAT_IGNCONTEXTID | ETMAAT_NSONLY | \ - ETMAAT_NOVALCMP; - - if (n < 1 || n > t->ncmppairs) - return -EINVAL; - - /* comparators and ranges are numbered starting with 1 as opposed - * to bits in a word */ - n--; - - if (data) - flags |= ETMAAT_DLOADSTORE; - else - flags |= ETMAAT_IEXEC; - - /* first comparator for the range */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2)); - etm_writel(t, start, ETMR_COMP_VAL(n * 2)); - - /* second comparator is right next to it */ - etm_writel(t, flags, ETMR_COMP_ACC_TYPE(n * 2 + 1)); - etm_writel(t, end, ETMR_COMP_VAL(n * 2 + 1)); - - flags = exclude ? ETMTE_INCLEXCL : 0; - etm_writel(t, flags | (1 << n), ETMR_TRACEENCTRL); - - return 0; -} - -static int trace_start(struct tracectx *t) -{ - u32 v; - unsigned long timeout = TRACER_TIMEOUT; - - etb_unlock(t); - - etb_writel(t, 0, ETBR_FORMATTERCTRL); - etb_writel(t, 1, ETBR_CTRL); - - etb_lock(t); - - /* configure etm */ - v = ETMCTRL_OPTS | ETMCTRL_PROGRAM | ETMCTRL_PORTSIZE(t->etm_portsz); - - if (t->flags & TRACER_CYCLE_ACC) - v |= ETMCTRL_CYCLEACCURATE; - - etm_unlock(t); - - etm_writel(t, v, ETMR_CTRL); - - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) - ; - if (!timeout) { - dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); - return -EFAULT; - } - - etm_setup_address_range(t, 1, (unsigned long)_stext, - (unsigned long)_etext, 0, 0); - etm_writel(t, 0, ETMR_TRACEENCTRL2); - etm_writel(t, 0, ETMR_TRACESSCTRL); - etm_writel(t, 0x6f, ETMR_TRACEENEVT); - - v &= ~ETMCTRL_PROGRAM; - v |= ETMCTRL_PORTSEL; - - etm_writel(t, v, ETMR_CTRL); - - timeout = TRACER_TIMEOUT; - while (etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM && --timeout) - ; - if (!timeout) { - dev_dbg(t->dev, "Waiting for progbit to deassert timed out\n"); - etm_lock(t); - return -EFAULT; - } - - etm_lock(t); - - t->flags |= TRACER_RUNNING; - - return 0; -} - -static int trace_stop(struct tracectx *t) -{ - unsigned long timeout = TRACER_TIMEOUT; - - etm_unlock(t); - - etm_writel(t, 0x440, ETMR_CTRL); - while (!(etm_readl(t, ETMR_CTRL) & ETMCTRL_PROGRAM) && --timeout) - ; - if (!timeout) { - dev_dbg(t->dev, "Waiting for progbit to assert timed out\n"); - etm_lock(t); - return -EFAULT; - } - - etm_lock(t); - - etb_unlock(t); - etb_writel(t, ETBFF_MANUAL_FLUSH, ETBR_FORMATTERCTRL); - - timeout = TRACER_TIMEOUT; - while (etb_readl(t, ETBR_FORMATTERCTRL) & - ETBFF_MANUAL_FLUSH && --timeout) - ; - if (!timeout) { - dev_dbg(t->dev, "Waiting for formatter flush to commence " - "timed out\n"); - etb_lock(t); - return -EFAULT; - } - - etb_writel(t, 0, ETBR_CTRL); - - etb_lock(t); - - t->flags &= ~TRACER_RUNNING; - - return 0; -} - -static int etb_getdatalen(struct tracectx *t) -{ - u32 v; - int rp, wp; - - v = etb_readl(t, ETBR_STATUS); - - if (v & 1) - return t->etb_bufsz; - - rp = etb_readl(t, ETBR_READADDR); - wp = etb_readl(t, ETBR_WRITEADDR); - - if (rp > wp) { - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - - return 0; - } - - return wp - rp; -} - -/* sysrq+v will always stop the running trace and leave it at that */ -static void etm_dump(void) -{ - struct tracectx *t = &tracer; - u32 first = 0; - int length; - - if (!t->etb_regs) { - printk(KERN_INFO "No tracing hardware found\n"); - return; - } - - if (trace_isrunning(t)) - trace_stop(t); - - etb_unlock(t); - - length = etb_getdatalen(t); - - if (length == t->etb_bufsz) - first = etb_readl(t, ETBR_WRITEADDR); - - etb_writel(t, first, ETBR_READADDR); - - printk(KERN_INFO "Trace buffer contents length: %d\n", length); - printk(KERN_INFO "--- ETB buffer begin ---\n"); - for (; length; length--) - printk("%08x", cpu_to_be32(etb_readl(t, ETBR_READMEM))); - printk(KERN_INFO "\n--- ETB buffer end ---\n"); - - /* deassert the overflow bit */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_WRITEADDR); - - etb_lock(t); -} - -static void sysrq_etm_dump(int key) -{ - dev_dbg(tracer.dev, "Dumping ETB buffer\n"); - etm_dump(); -} - -static struct sysrq_key_op sysrq_etm_op = { - .handler = sysrq_etm_dump, - .help_msg = "etm-buffer-dump(v)", - .action_msg = "etm", -}; - -static int etb_open(struct inode *inode, struct file *file) -{ - if (!tracer.etb_regs) - return -ENODEV; - - file->private_data = &tracer; - - return nonseekable_open(inode, file); -} - -static ssize_t etb_read(struct file *file, char __user *data, - size_t len, loff_t *ppos) -{ - int total, i; - long length; - struct tracectx *t = file->private_data; - u32 first = 0; - u32 *buf; - - mutex_lock(&t->mutex); - - if (trace_isrunning(t)) { - length = 0; - goto out; - } - - etb_unlock(t); - - total = etb_getdatalen(t); - if (total == t->etb_bufsz) - first = etb_readl(t, ETBR_WRITEADDR); - - etb_writel(t, first, ETBR_READADDR); - - length = min(total * 4, (int)len); - buf = vmalloc(length); - - dev_dbg(t->dev, "ETB buffer length: %d\n", total); - dev_dbg(t->dev, "ETB status reg: %x\n", etb_readl(t, ETBR_STATUS)); - for (i = 0; i < length / 4; i++) - buf[i] = etb_readl(t, ETBR_READMEM); - - /* the only way to deassert overflow bit in ETB status is this */ - etb_writel(t, 1, ETBR_CTRL); - etb_writel(t, 0, ETBR_CTRL); - - etb_writel(t, 0, ETBR_WRITEADDR); - etb_writel(t, 0, ETBR_READADDR); - etb_writel(t, 0, ETBR_TRIGGERCOUNT); - - etb_lock(t); - - length -= copy_to_user(data, buf, length); - vfree(buf); - -out: - mutex_unlock(&t->mutex); - - return length; -} - -static int etb_release(struct inode *inode, struct file *file) -{ - /* there's nothing to do here, actually */ - return 0; -} - -static const struct file_operations etb_fops = { - .owner = THIS_MODULE, - .read = etb_read, - .open = etb_open, - .release = etb_release, - .llseek = no_llseek, -}; - -static struct miscdevice etb_miscdev = { - .name = "tracebuf", - .minor = 0, - .fops = &etb_fops, -}; - -static int etb_probe(struct amba_device *dev, const struct amba_id *id) -{ - struct tracectx *t = &tracer; - int ret = 0; - - ret = amba_request_regions(dev, NULL); - if (ret) - goto out; - - t->etb_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); - if (!t->etb_regs) { - ret = -ENOMEM; - goto out_release; - } - - amba_set_drvdata(dev, t); - - etb_miscdev.parent = &dev->dev; - - ret = misc_register(&etb_miscdev); - if (ret) - goto out_unmap; - - t->emu_clk = clk_get(&dev->dev, "emu_src_ck"); - if (IS_ERR(t->emu_clk)) { - dev_dbg(&dev->dev, "Failed to obtain emu_src_ck.\n"); - return -EFAULT; - } - - clk_enable(t->emu_clk); - - etb_unlock(t); - t->etb_bufsz = etb_readl(t, ETBR_DEPTH); - dev_dbg(&dev->dev, "Size: %x\n", t->etb_bufsz); - - /* make sure trace capture is disabled */ - etb_writel(t, 0, ETBR_CTRL); - etb_writel(t, 0x1000, ETBR_FORMATTERCTRL); - etb_lock(t); - - dev_dbg(&dev->dev, "ETB AMBA driver initialized.\n"); - -out: - return ret; - -out_unmap: - amba_set_drvdata(dev, NULL); - iounmap(t->etb_regs); - -out_release: - amba_release_regions(dev); - - return ret; -} - -static int etb_remove(struct amba_device *dev) -{ - struct tracectx *t = amba_get_drvdata(dev); - - amba_set_drvdata(dev, NULL); - - iounmap(t->etb_regs); - t->etb_regs = NULL; - - clk_disable(t->emu_clk); - clk_put(t->emu_clk); - - amba_release_regions(dev); - - return 0; -} - -static struct amba_id etb_ids[] = { - { - .id = 0x0003b907, - .mask = 0x0007ffff, - }, - { 0, 0 }, -}; - -static struct amba_driver etb_driver = { - .drv = { - .name = "etb", - .owner = THIS_MODULE, - }, - .probe = etb_probe, - .remove = etb_remove, - .id_table = etb_ids, -}; - -/* use a sysfs file "trace_running" to start/stop tracing */ -static ssize_t trace_running_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return sprintf(buf, "%x\n", trace_isrunning(&tracer)); -} - -static ssize_t trace_running_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t n) -{ - unsigned int value; - int ret; - - if (sscanf(buf, "%u", &value) != 1) - return -EINVAL; - - mutex_lock(&tracer.mutex); - ret = value ? trace_start(&tracer) : trace_stop(&tracer); - mutex_unlock(&tracer.mutex); - - return ret ? : n; -} - -static struct kobj_attribute trace_running_attr = - __ATTR(trace_running, 0644, trace_running_show, trace_running_store); - -static ssize_t trace_info_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - u32 etb_wa, etb_ra, etb_st, etb_fc, etm_ctrl, etm_st; - int datalen; - - etb_unlock(&tracer); - datalen = etb_getdatalen(&tracer); - etb_wa = etb_readl(&tracer, ETBR_WRITEADDR); - etb_ra = etb_readl(&tracer, ETBR_READADDR); - etb_st = etb_readl(&tracer, ETBR_STATUS); - etb_fc = etb_readl(&tracer, ETBR_FORMATTERCTRL); - etb_lock(&tracer); - - etm_unlock(&tracer); - etm_ctrl = etm_readl(&tracer, ETMR_CTRL); - etm_st = etm_readl(&tracer, ETMR_STATUS); - etm_lock(&tracer); - - return sprintf(buf, "Trace buffer len: %d\nComparator pairs: %d\n" - "ETBR_WRITEADDR:\t%08x\n" - "ETBR_READADDR:\t%08x\n" - "ETBR_STATUS:\t%08x\n" - "ETBR_FORMATTERCTRL:\t%08x\n" - "ETMR_CTRL:\t%08x\n" - "ETMR_STATUS:\t%08x\n", - datalen, - tracer.ncmppairs, - etb_wa, - etb_ra, - etb_st, - etb_fc, - etm_ctrl, - etm_st - ); -} - -static struct kobj_attribute trace_info_attr = - __ATTR(trace_info, 0444, trace_info_show, NULL); - -static ssize_t trace_mode_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d %d\n", - !!(tracer.flags & TRACER_CYCLE_ACC), - tracer.etm_portsz); -} - -static ssize_t trace_mode_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t n) -{ - unsigned int cycacc, portsz; - - if (sscanf(buf, "%u %u", &cycacc, &portsz) != 2) - return -EINVAL; - - mutex_lock(&tracer.mutex); - if (cycacc) - tracer.flags |= TRACER_CYCLE_ACC; - else - tracer.flags &= ~TRACER_CYCLE_ACC; - - tracer.etm_portsz = portsz & 0x0f; - mutex_unlock(&tracer.mutex); - - return n; -} - -static struct kobj_attribute trace_mode_attr = - __ATTR(trace_mode, 0644, trace_mode_show, trace_mode_store); - -static int etm_probe(struct amba_device *dev, const struct amba_id *id) -{ - struct tracectx *t = &tracer; - int ret = 0; - - if (t->etm_regs) { - dev_dbg(&dev->dev, "ETM already initialized\n"); - ret = -EBUSY; - goto out; - } - - ret = amba_request_regions(dev, NULL); - if (ret) - goto out; - - t->etm_regs = ioremap_nocache(dev->res.start, resource_size(&dev->res)); - if (!t->etm_regs) { - ret = -ENOMEM; - goto out_release; - } - - amba_set_drvdata(dev, t); - - mutex_init(&t->mutex); - t->dev = &dev->dev; - t->flags = TRACER_CYCLE_ACC; - t->etm_portsz = 1; - - etm_unlock(t); - (void)etm_readl(t, ETMMR_PDSR); - /* dummy first read */ - (void)etm_readl(&tracer, ETMMR_OSSRR); - - t->ncmppairs = etm_readl(t, ETMR_CONFCODE) & 0xf; - etm_writel(t, 0x440, ETMR_CTRL); - etm_lock(t); - - ret = sysfs_create_file(&dev->dev.kobj, - &trace_running_attr.attr); - if (ret) - goto out_unmap; - - /* failing to create any of these two is not fatal */ - ret = sysfs_create_file(&dev->dev.kobj, &trace_info_attr.attr); - if (ret) - dev_dbg(&dev->dev, "Failed to create trace_info in sysfs\n"); - - ret = sysfs_create_file(&dev->dev.kobj, &trace_mode_attr.attr); - if (ret) - dev_dbg(&dev->dev, "Failed to create trace_mode in sysfs\n"); - - dev_dbg(t->dev, "ETM AMBA driver initialized.\n"); - -out: - return ret; - -out_unmap: - amba_set_drvdata(dev, NULL); - iounmap(t->etm_regs); - -out_release: - amba_release_regions(dev); - - return ret; -} - -static int etm_remove(struct amba_device *dev) -{ - struct tracectx *t = amba_get_drvdata(dev); - - amba_set_drvdata(dev, NULL); - - iounmap(t->etm_regs); - t->etm_regs = NULL; - - amba_release_regions(dev); - - sysfs_remove_file(&dev->dev.kobj, &trace_running_attr.attr); - sysfs_remove_file(&dev->dev.kobj, &trace_info_attr.attr); - sysfs_remove_file(&dev->dev.kobj, &trace_mode_attr.attr); - - return 0; -} - -static struct amba_id etm_ids[] = { - { - .id = 0x0003b921, - .mask = 0x0007ffff, - }, - { 0, 0 }, -}; - -static struct amba_driver etm_driver = { - .drv = { - .name = "etm", - .owner = THIS_MODULE, - }, - .probe = etm_probe, - .remove = etm_remove, - .id_table = etm_ids, -}; - -static int __init etm_init(void) -{ - int retval; - - retval = amba_driver_register(&etb_driver); - if (retval) { - printk(KERN_ERR "Failed to register etb\n"); - return retval; - } - - retval = amba_driver_register(&etm_driver); - if (retval) { - amba_driver_unregister(&etb_driver); - printk(KERN_ERR "Failed to probe etm\n"); - return retval; - } - - /* not being able to install this handler is not fatal */ - (void)register_sysrq_key('v', &sysrq_etm_op); - - return 0; -} - -device_initcall(etm_init); - diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 1fd749ee4a1b..2bdcb0f4cf5d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -975,7 +975,7 @@ static void reset_ctrl_regs(void *unused) * Unconditionally clear the OS lock by writing a value * other than CS_LAR_KEY to the access register. */ - ARM_DBG_WRITE(c1, c0, 4, ~CS_LAR_KEY); + ARM_DBG_WRITE(c1, c0, 4, ~CORESIGHT_UNLOCK); isb(); /* diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index f49cd51e162a..42afc6682d10 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -394,14 +394,6 @@ config MACH_OMAP4_PANDA select OMAP_PACKAGE_CBS select REGULATOR_FIXED_VOLTAGE if REGULATOR -config OMAP3_EMU - bool "OMAP3 debugging peripherals" - depends on ARCH_OMAP3 - select ARM_AMBA - select OC_ETM - help - Say Y here to enable debugging hardware of omap3 - config OMAP3_SDRC_AC_TIMING bool "Enable SDRC AC timing register changes" depends on ARCH_OMAP3 diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 55a9d6777683..e2f7210a8eab 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -200,7 +200,6 @@ obj-$(CONFIG_SOC_AM33XX) += omap_hwmod_33xx_data.o obj-$(CONFIG_ARCH_OMAP4) += omap_hwmod_44xx_data.o # EMU peripherals -obj-$(CONFIG_OMAP3_EMU) += emu.o obj-$(CONFIG_HW_PERF_EVENTS) += pmu.o obj-$(CONFIG_OMAP_MBOX_FWK) += mailbox_mach.o diff --git a/arch/arm/mach-omap2/emu.c b/arch/arm/mach-omap2/emu.c deleted file mode 100644 index cbeaca2d7695..000000000000 --- a/arch/arm/mach-omap2/emu.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * emu.c - * - * ETM and ETB CoreSight components' resources as found in OMAP3xxx. - * - * Copyright (C) 2009 Nokia Corporation. - * Alexander Shishkin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "soc.h" -#include "iomap.h" - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Alexander Shishkin"); - -/* Cortex CoreSight components within omap3xxx EMU */ -#define ETM_BASE (L4_EMU_34XX_PHYS + 0x10000) -#define DBG_BASE (L4_EMU_34XX_PHYS + 0x11000) -#define ETB_BASE (L4_EMU_34XX_PHYS + 0x1b000) -#define DAPCTL (L4_EMU_34XX_PHYS + 0x1d000) - -static AMBA_APB_DEVICE(omap3_etb, "etb", 0x000bb907, ETB_BASE, { }, NULL); -static AMBA_APB_DEVICE(omap3_etm, "etm", 0x102bb921, ETM_BASE, { }, NULL); - -static int __init emu_init(void) -{ - if (!cpu_is_omap34xx()) - return -ENODEV; - - amba_device_register(&omap3_etb_device, &iomem_resource); - amba_device_register(&omap3_etm_device, &iomem_resource); - - return 0; -} - -omap_subsys_initcall(emu_init); From ade3812385c91985cee85151722d8de9644f3b98 Mon Sep 17 00:00:00 2001 From: Pankaj Dubey Date: Thu, 13 Nov 2014 14:12:47 +0530 Subject: [PATCH 078/214] coresight: fix typo in comment in coresight-priv.h fixes a typo %s/eveyone/everyone/ in function CS_UNLOCK. Signed-off-by: Pankaj Dubey Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 84b763d5b808ee6c23d95314ac81859cab471adb) Signed-off-by: Mathieu Poirier --- drivers/coresight/coresight-priv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/coresight/coresight-priv.h b/drivers/coresight/coresight-priv.h index 8d1180c47c0b..7b3372fca4f6 100644 --- a/drivers/coresight/coresight-priv.h +++ b/drivers/coresight/coresight-priv.h @@ -47,7 +47,7 @@ static inline void CS_UNLOCK(void __iomem *addr) { do { writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR); - /* Make sure eveyone has seen this */ + /* Make sure everyone has seen this */ mb(); } while (0); } From b8f437e397ad822bd8e9e7e0975467731abfa881 Mon Sep 17 00:00:00 2001 From: Pankaj Dubey Date: Thu, 13 Nov 2014 14:12:49 +0530 Subject: [PATCH 079/214] coresight-replicator: remove .owner field for driver There is no need of .owner field for driver using module_platform_driver. Signed-off-by: Pankaj Dubey Signed-off-by: Greg Kroah-Hartman (cherry picked from commit ac610cd7c2b13d7f916ebb73c2ae6b91c1d53a61) Signed-off-by: Mathieu Poirier --- drivers/coresight/coresight-replicator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/coresight/coresight-replicator.c b/drivers/coresight/coresight-replicator.c index c879039d59cc..a2dfcf903551 100644 --- a/drivers/coresight/coresight-replicator.c +++ b/drivers/coresight/coresight-replicator.c @@ -117,7 +117,6 @@ static struct platform_driver replicator_driver = { .remove = replicator_remove, .driver = { .name = "coresight-replicator", - .owner = THIS_MODULE, .of_match_table = replicator_match, }, }; From 1fe5620fcd6c2f0a4a927ee10c8e53196da392f3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 15 Dec 2014 14:22:46 +0100 Subject: [PATCH 080/214] isofs: Fix infinite looping over CE entries commit f54e18f1b831c92f6512d2eedb224cd63d607d3d upstream. Rock Ridge extensions define so called Continuation Entries (CE) which define where is further space with Rock Ridge data. Corrupted isofs image can contain arbitrarily long chain of these, including a one containing loop and thus causing kernel to end in an infinite loop when traversing these entries. Limit the traversal to 32 entries which should be more than enough space to store all the Rock Ridge data. Reported-by: P J P Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index f488bbae541a..bb63254ed848 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -30,6 +30,7 @@ struct rock_state { int cont_size; int cont_extent; int cont_offset; + int cont_loops; struct inode *inode; }; @@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode) rs->inode = inode; } +/* Maximum number of Rock Ridge continuation entries */ +#define RR_MAX_CE_ENTRIES 32 + /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. @@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs) goto out; } ret = -EIO; + if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) + goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, From 359d2d755e1924fc7231d8423696a7365eccd3e1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Dec 2014 16:48:16 -0800 Subject: [PATCH 081/214] x86/tls: Validate TLS entries to protect espfix commit 41bdc78544b8a93a9c6814b8bbbfef966272abbe upstream. Installing a 16-bit RW data segment into the GDT defeats espfix. AFAICT this will not affect glibc, Wine, or dosemu at all. Signed-off-by: Andy Lutomirski Acked-by: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Willy Tarreau Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tls.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index f7fec09e3e3a..e7650bd71109 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -27,6 +27,21 @@ static int get_free_idx(void) return -ESRCH; } +static bool tls_desc_okay(const struct user_desc *info) +{ + if (LDT_empty(info)) + return true; + + /* + * espfix is required for 16-bit data segments, but espfix + * only works for LDT segments. + */ + if (!info->seg_32bit) + return false; + + return true; +} + static void set_tls_desc(struct task_struct *p, int idx, const struct user_desc *info, int n) { @@ -66,6 +81,9 @@ int do_set_thread_area(struct task_struct *p, int idx, if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + if (!tls_desc_okay(&info)) + return -EINVAL; + if (idx == -1) idx = info.entry_number; @@ -192,6 +210,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, { struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; + int i; if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || @@ -205,6 +224,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, else info = infobuf; + for (i = 0; i < count / sizeof(struct user_desc); i++) + if (!tls_desc_okay(info + i)) + return -EINVAL; + set_tls_desc(target, GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), info, count / sizeof(struct user_desc)); From 84f538365947a5575c6c9d1404fb62386ae994ba Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 4 Dec 2014 16:48:17 -0800 Subject: [PATCH 082/214] x86/tls: Disallow unusual TLS segments commit 0e58af4e1d2166e9e33375a0f121e4867010d4f8 upstream. Users have no business installing custom code segments into the GDT, and segments that are not present but are otherwise valid are a historical source of interesting attacks. For completeness, block attempts to set the L bit. (Prior to this patch, the L bit would have been silently dropped.) This is an ABI break. I've checked glibc, musl, and Wine, and none of them look like they'll have any trouble. Note to stable maintainers: this is a hardening patch that fixes no known bugs. Given the possibility of ABI issues, this probably shouldn't be backported quickly. Signed-off-by: Andy Lutomirski Acked-by: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Willy Tarreau Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tls.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index e7650bd71109..3e551eee87b9 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -39,6 +39,28 @@ static bool tls_desc_okay(const struct user_desc *info) if (!info->seg_32bit) return false; + /* Only allow data segments in the TLS array. */ + if (info->contents > 1) + return false; + + /* + * Non-present segments with DPL 3 present an interesting attack + * surface. The kernel should handle such segments correctly, + * but TLS is very difficult to protect in a sandbox, so prevent + * such segments from being created. + * + * If userspace needs to remove a TLS entry, it can still delete + * it outright. + */ + if (info->seg_not_present) + return false; + +#ifdef CONFIG_X86_64 + /* The L bit makes no sense for data. */ + if (info->lm) + return false; +#endif + return true; } From cb7977a9a8f74fa555a893c052f82a826cc66231 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 8 Dec 2014 13:55:20 -0800 Subject: [PATCH 083/214] x86_64, switch_to(): Load TLS descriptors before switching DS and ES commit f647d7c155f069c1a068030255c300663516420e upstream. Otherwise, if buggy user code points DS or ES into the TLS array, they would be corrupted after a context switch. This also significantly improves the comments and documents some gotchas in the code. Before this patch, the both tests below failed. With this patch, the es test passes, although the gsbase test still fails. ----- begin es test ----- /* * Copyright (c) 2014 Andy Lutomirski * GPL v2 */ static unsigned short GDT3(int idx) { return (idx << 3) | 3; } static int create_tls(int idx, unsigned int base) { struct user_desc desc = { .entry_number = idx, .base_addr = base, .limit = 0xfffff, .seg_32bit = 1, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 1, .seg_not_present = 0, .useable = 0, }; if (syscall(SYS_set_thread_area, &desc) != 0) err(1, "set_thread_area"); return desc.entry_number; } int main() { int idx = create_tls(-1, 0); printf("Allocated GDT index %d\n", idx); unsigned short orig_es; asm volatile ("mov %%es,%0" : "=rm" (orig_es)); int errors = 0; int total = 1000; for (int i = 0; i < total; i++) { asm volatile ("mov %0,%%es" : : "rm" (GDT3(idx))); usleep(100); unsigned short es; asm volatile ("mov %%es,%0" : "=rm" (es)); asm volatile ("mov %0,%%es" : : "rm" (orig_es)); if (es != GDT3(idx)) { if (errors == 0) printf("[FAIL]\tES changed from 0x%hx to 0x%hx\n", GDT3(idx), es); errors++; } } if (errors) { printf("[FAIL]\tES was corrupted %d/%d times\n", errors, total); return 1; } else { printf("[OK]\tES was preserved\n"); return 0; } } ----- end es test ----- ----- begin gsbase test ----- /* * gsbase.c, a gsbase test * Copyright (c) 2014 Andy Lutomirski * GPL v2 */ static unsigned char *testptr, *testptr2; static unsigned char read_gs_testvals(void) { unsigned char ret; asm volatile ("movb %%gs:%1, %0" : "=r" (ret) : "m" (*testptr)); return ret; } int main() { int errors = 0; testptr = mmap((void *)0x200000000UL, 1, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (testptr == MAP_FAILED) err(1, "mmap"); testptr2 = mmap((void *)0x300000000UL, 1, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS, -1, 0); if (testptr2 == MAP_FAILED) err(1, "mmap"); *testptr = 0; *testptr2 = 1; if (syscall(SYS_arch_prctl, ARCH_SET_GS, (unsigned long)testptr2 - (unsigned long)testptr) != 0) err(1, "ARCH_SET_GS"); usleep(100); if (read_gs_testvals() == 1) { printf("[OK]\tARCH_SET_GS worked\n"); } else { printf("[FAIL]\tARCH_SET_GS failed\n"); errors++; } asm volatile ("mov %0,%%gs" : : "r" (0)); if (read_gs_testvals() == 0) { printf("[OK]\tWriting 0 to gs worked\n"); } else { printf("[FAIL]\tWriting 0 to gs failed\n"); errors++; } usleep(100); if (read_gs_testvals() == 0) { printf("[OK]\tgsbase is still zero\n"); } else { printf("[FAIL]\tgsbase was corrupted\n"); errors++; } return errors == 0 ? 0 : 1; } ----- end gsbase test ----- Signed-off-by: Andy Lutomirski Cc: Andi Kleen Cc: Linus Torvalds Link: http://lkml.kernel.org/r/509d27c9fec78217691c3dad91cec87e1006b34a.1418075657.git.luto@amacapital.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/process_64.c | 101 +++++++++++++++++++++++++---------- 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f99a242730e9..7099ab1e075b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -279,24 +279,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) fpu = switch_fpu_prepare(prev_p, next_p, cpu); - /* - * Reload esp0, LDT and the page table pointer: - */ + /* Reload esp0 and ss1. */ load_sp0(tss, next); - /* - * Switch DS and ES. - * This won't pick up thread selector changes, but I guess that is ok. - */ - savesegment(es, prev->es); - if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); - - savesegment(ds, prev->ds); - if (unlikely(next->ds | prev->ds)) - loadsegment(ds, next->ds); - - /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). * @@ -305,41 +290,101 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) savesegment(fs, fsindex); savesegment(gs, gsindex); + /* + * Load TLS before restoring any segments so that segment loads + * reference the correct GDT entries. + */ load_TLS(next, cpu); /* - * Leave lazy mode, flushing any hypercalls made here. - * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up - * to date. + * Leave lazy mode, flushing any hypercalls made here. This + * must be done after loading TLS entries in the GDT but before + * loading segments that might reference them, and and it must + * be done before math_state_restore, so the TS bit is up to + * date. */ arch_end_context_switch(next_p); + /* Switch DS and ES. + * + * Reading them only returns the selectors, but writing them (if + * nonzero) loads the full descriptor from the GDT or LDT. The + * LDT for next is loaded in switch_mm, and the GDT is loaded + * above. + * + * We therefore need to write new values to the segment + * registers on every context switch unless both the new and old + * values are zero. + * + * Note that we don't need to do anything for CS and SS, as + * those are saved and restored as part of pt_regs. + */ + savesegment(es, prev->es); + if (unlikely(next->es | prev->es)) + loadsegment(es, next->es); + + savesegment(ds, prev->ds); + if (unlikely(next->ds | prev->ds)) + loadsegment(ds, next->ds); + /* * Switch FS and GS. * - * Segment register != 0 always requires a reload. Also - * reload when it has changed. When prev process used 64bit - * base always reload to avoid an information leak. + * These are even more complicated than FS and GS: they have + * 64-bit bases are that controlled by arch_prctl. Those bases + * only differ from the values in the GDT or LDT if the selector + * is 0. + * + * Loading the segment register resets the hidden base part of + * the register to 0 or the value from the GDT / LDT. If the + * next base address zero, writing 0 to the segment register is + * much faster than using wrmsr to explicitly zero the base. + * + * The thread_struct.fs and thread_struct.gs values are 0 + * if the fs and gs bases respectively are not overridden + * from the values implied by fsindex and gsindex. They + * are nonzero, and store the nonzero base addresses, if + * the bases are overridden. + * + * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should + * be impossible. + * + * Therefore we need to reload the segment registers if either + * the old or new selector is nonzero, and we need to override + * the base address if next thread expects it to be overridden. + * + * This code is unnecessarily slow in the case where the old and + * new indexes are zero and the new base is nonzero -- it will + * unnecessarily write 0 to the selector before writing the new + * base address. + * + * Note: This all depends on arch_prctl being the only way that + * user code can override the segment base. Once wrfsbase and + * wrgsbase are enabled, most of this code will need to change. */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); + /* - * Check if the user used a selector != 0; if yes - * clear 64bit base, since overloaded base is always - * mapped to the Null selector + * If user code wrote a nonzero value to FS, then it also + * cleared the overridden base address. + * + * XXX: if user code wrote 0 to FS and cleared the base + * address itself, we won't notice and we'll incorrectly + * restore the prior base address next time we reschdule + * the process. */ if (fsindex) prev->fs = 0; } - /* when next process has a 64bit base use it */ if (next->fs) wrmsrl(MSR_FS_BASE, next->fs); prev->fsindex = fsindex; if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); + + /* This works (and fails) the same way as fsindex above. */ if (gsindex) prev->gs = 0; } From 9d2b6132e6963ccdfb15a4570984382425b96529 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 5 Dec 2014 19:03:28 -0800 Subject: [PATCH 084/214] x86, kvm: Clear paravirt_enabled on KVM guests for espfix32's benefit commit 29fa6825463c97e5157284db80107d1bfac5d77b upstream. paravirt_enabled has the following effects: - Disables the F00F bug workaround warning. There is no F00F bug workaround any more because Linux's standard IDT handling already works around the F00F bug, but the warning still exists. This is only cosmetic, and, in any event, there is no such thing as KVM on a CPU with the F00F bug. - Disables 32-bit APM BIOS detection. On a KVM paravirt system, there should be no APM BIOS anyway. - Disables tboot. I think that the tboot code should check the CPUID hypervisor bit directly if it matters. - paravirt_enabled disables espfix32. espfix32 should *not* be disabled under KVM paravirt. The last point is the purpose of this patch. It fixes a leak of the high 16 bits of the kernel stack address on 32-bit KVM paravirt guests. Fixes CVE-2014-8134. Suggested-by: Konrad Rzeszutek Wilk Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kvm.c | 9 ++++++++- arch/x86/kernel/kvmclock.c | 1 - 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index cd6d9a5a42f6..c4ff2a916139 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -279,7 +279,14 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3dd37ebd591b..41514f56c241 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -265,7 +265,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) From 15225559b32c87614b39713875f6d799df05c9b0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 9 Sep 2014 14:13:51 +1000 Subject: [PATCH 085/214] md/bitmap: always wait for writes on unplug. commit 4b5060ddae2b03c5387321fafc089d242225697a upstream. If two threads call bitmap_unplug at the same time, then one might schedule all the writes, and the other might decide that it doesn't need to wait. But really it does. It rarely hurts to wait when it isn't absolutely necessary, and the current code doesn't really focus on 'absolutely necessary' anyway. So just wait always. This can potentially lead to data corruption if a crash happens at an awkward time and data was written before the bitmap was updated. It is very unlikely, but this should go to -stable just to be safe. Appropriate for any -stable. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/bitmap.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 5a2c75499824..a79cbd6038f6 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -883,7 +883,6 @@ void bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; - int wait = 0; if (!bitmap || !bitmap->storage.filemap || test_bit(BITMAP_STALE, &bitmap->flags)) @@ -901,16 +900,13 @@ void bitmap_unplug(struct bitmap *bitmap) clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); write_page(bitmap, bitmap->storage.filemap[i], 0); } - if (dirty) - wait = 1; - } - if (wait) { /* if any writes were performed, we need to wait on them */ - if (bitmap->storage.file) - wait_event(bitmap->write_wait, - atomic_read(&bitmap->pending_writes)==0); - else - md_super_wait(bitmap->mddev); } + if (bitmap->storage.file) + wait_event(bitmap->write_wait, + atomic_read(&bitmap->pending_writes)==0); + else + md_super_wait(bitmap->mddev); + if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); } From 3e6845bed5e8b6b1938d509486e0fe0b953daa24 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 24 Oct 2014 21:19:57 +0400 Subject: [PATCH 086/214] mfd: tc6393xb: Fail ohci suspend if full state restore is required commit 1a5fb99de4850cba710d91becfa2c65653048589 upstream. Some boards with TC6393XB chip require full state restore during system resume thanks to chip's VCC being cut off during suspend (Sharp SL-6000 tosa is one of them). Failing to do so would result in ohci Oops on resume due to internal memory contentes being changed. Fail ohci suspend on tc6393xb is full state restore is required. Recommended workaround is to unbind tmio-ohci driver before suspend and rebind it after resume. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/tc6393xb.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 15e1463e5e13..17fe83e81ea4 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -263,6 +263,17 @@ static int tc6393xb_ohci_disable(struct platform_device *dev) return 0; } +static int tc6393xb_ohci_suspend(struct platform_device *dev) +{ + struct tc6393xb_platform_data *tcpd = dev_get_platdata(dev->dev.parent); + + /* We can't properly store/restore OHCI state, so fail here */ + if (tcpd->resume_restore) + return -EBUSY; + + return tc6393xb_ohci_disable(dev); +} + static int tc6393xb_fb_enable(struct platform_device *dev) { struct tc6393xb *tc6393xb = dev_get_drvdata(dev->dev.parent); @@ -403,7 +414,7 @@ static struct mfd_cell tc6393xb_cells[] = { .num_resources = ARRAY_SIZE(tc6393xb_ohci_resources), .resources = tc6393xb_ohci_resources, .enable = tc6393xb_ohci_enable, - .suspend = tc6393xb_ohci_disable, + .suspend = tc6393xb_ohci_suspend, .resume = tc6393xb_ohci_enable, .disable = tc6393xb_ohci_disable, }, From 5d316a316386b92dc188b96ff8fc8013a0db4cc7 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 22 Sep 2014 10:12:51 +0300 Subject: [PATCH 087/214] mmc: block: add newline to sysfs display of force_ro commit 0031a98a85e9fca282624bfc887f9531b2768396 upstream. Make force_ro consistent with other sysfs entries. Fixes: 371a689f64b0d ('mmc: MMC boot partitions support') Cc: Andrei Warkentin Signed-off-by: Baruch Siach Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/card/block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 9aca9462a12f..7ad66823d022 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -257,7 +257,7 @@ static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr, int ret; struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev)); - ret = snprintf(buf, PAGE_SIZE, "%d", + ret = snprintf(buf, PAGE_SIZE, "%d\n", get_disk_ro(dev_to_disk(dev)) ^ md->read_only); mmc_blk_put(md); From 72f7a5c969dd7c3f5958e14eb97ee5022fe3dba5 Mon Sep 17 00:00:00 2001 From: "Sumit.Saxena@avagotech.com" Date: Mon, 17 Nov 2014 15:24:23 +0530 Subject: [PATCH 088/214] megaraid_sas: corrected return of wait_event from abort frame path commit 170c238701ec38b1829321b17c70671c101bac55 upstream. Corrected wait_event() call which was waiting for wrong completion status (0xFF). Signed-off-by: Sumit Saxena Signed-off-by: Kashyap Desai Reviewed-by: Tomas Henzl Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 4956c99ed90e..78b4fe845245 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -933,7 +933,7 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance, abort_fr->abort_mfi_phys_addr_hi = 0; cmd->sync_cmd = 1; - cmd->cmd_status = 0xFF; + cmd->cmd_status = ENODATA; instance->instancet->issue_dcmd(instance, cmd); From f30d637d028b4a5344d4651f30c486f9daec6804 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 11:05:17 +0800 Subject: [PATCH 089/214] nfs41: fix nfs4_proc_layoutget error handling commit 4bd5a980de87d2b5af417485bde97b8eb3d6cf6a upstream. nfs4_layoutget_release() drops layout hdr refcnt. Grab the refcnt early so that it is safe to call .release in case nfs4_alloc_pages fails. Signed-off-by: Peng Tao Fixes: a47970ff78147 ("NFSv4.1: Hold reference to layout hdr in layoutget") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 78787948f69d..20ebcfa3c92e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6418,6 +6418,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -6430,9 +6433,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); From e1102c5b030bb0f41bed2f9120695ce0b0165c13 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 25 Nov 2014 17:45:15 -0800 Subject: [PATCH 090/214] dm bufio: fix memleak when using a dm_buffer's inline bio commit 445559cdcb98a141f5de415b94fd6eaccab87e6d upstream. When dm-bufio sets out to use the bio built into a struct dm_buffer to issue an IO, it needs to call bio_reset after it's done with the bio so that we can free things attached to the bio such as the integrity payload. Therefore, inject our own endio callback to take care of the bio_reset after calling submit_io's end_io callback. Test case: 1. modprobe scsi_debug delay=0 dif=1 dix=199 ato=1 dev_size_mb=300 2. Set up a dm-bufio client, e.g. dm-verity, on the scsi_debug device 3. Repeatedly read metadata and watch kmalloc-192 leak! Signed-off-by: Darrick J. Wong Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index c9b4ca9e0696..e855a190270d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -529,6 +529,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, end_io(&b->bio, r); } +static void inline_endio(struct bio *bio, int error) +{ + bio_end_io_t *end_fn = bio->bi_private; + + /* + * Reset the bio to free any attached resources + * (e.g. bio integrity profiles). + */ + bio_reset(bio); + + end_fn(bio, error); +} + static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, bio_end_io_t *end_io) { @@ -540,7 +553,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS; b->bio.bi_sector = block << b->c->sectors_per_block_bits; b->bio.bi_bdev = b->c->bdev; - b->bio.bi_end_io = end_io; + b->bio.bi_end_io = inline_endio; + /* + * Use of .bi_private isn't a problem here because + * the dm_buffer's inline bio is local to bufio. + */ + b->bio.bi_private = end_io; /* * We assume that if len >= PAGE_SIZE ptr is page-aligned. From 81f250e1cf8048e4780e46d5219b0e7f943a336b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 29 Nov 2014 15:50:21 +0300 Subject: [PATCH 091/214] dm space map metadata: fix sm_bootstrap_get_nr_blocks() commit c1c6156fe4d4577444b769d7edd5dd503e57bbc9 upstream. This function isn't right and it causes a static checker warning: drivers/md/dm-thin.c:3016 maybe_resize_data_dev() error: potentially using uninitialized 'sb_data_size'. It should set "*count" and return zero on success the same as the sm_metadata_get_nr_blocks() function does earlier. Fixes: 3241b1d3e0aa ('dm: add persistent data library') Signed-off-by: Dan Carpenter Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-space-map-metadata.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index afb419e514bf..056d09c33af1 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -493,7 +493,9 @@ static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - return smm->ll.nr_blocks; + *count = smm->ll.nr_blocks; + + return 0; } static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) From d011d586b3f9f1b08f3deb080d8542f56963dc28 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 17 Dec 2014 14:48:30 -0800 Subject: [PATCH 092/214] x86/tls: Don't validate lm in set_thread_area() after all commit 3fb2f4237bb452eb4e98f6a5dbd5a445b4fed9d0 upstream. It turns out that there's a lurking ABI issue. GCC, when compiling this in a 32-bit program: struct user_desc desc = { .entry_number = idx, .base_addr = base, .limit = 0xfffff, .seg_32bit = 1, .contents = 0, /* Data, grow-up */ .read_exec_only = 0, .limit_in_pages = 1, .seg_not_present = 0, .useable = 0, }; will leave .lm uninitialized. This means that anything in the kernel that reads user_desc.lm for 32-bit tasks is unreliable. Revert the .lm check in set_thread_area(). The value never did anything in the first place. Fixes: 0e58af4e1d21 ("x86/tls: Disallow unusual TLS segments") Signed-off-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Linus Torvalds Link: http://lkml.kernel.org/r/d7875b60e28c512f6a6fc0baf5714d58e7eaadbb.1418856405.git.luto@amacapital.net Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/ldt.h | 7 +++++++ arch/x86/kernel/tls.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb37bfe..6e1aaf73852a 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 3e551eee87b9..4e942f31b1a7 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -55,12 +55,6 @@ static bool tls_desc_okay(const struct user_desc *info) if (info->seg_not_present) return false; -#ifdef CONFIG_X86_64 - /* The L bit makes no sense for data. */ - if (info->lm) - return false; -#endif - return true; } From 684f4c093f182756a1c1f582c415d3120cc7f5e8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Dec 2014 17:26:10 +0100 Subject: [PATCH 093/214] isofs: Fix unchecked printing of ER records commit 4e2024624e678f0ebb916e6192bd23c1f9fdf696 upstream. We didn't check length of rock ridge ER records before printing them. Thus corrupted isofs image can cause us to access and print some memory behind the buffer with obvious consequences. Reported-and-tested-by: Carl Henrik Lunde Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/isofs/rock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index bb63254ed848..735d7522a3a9 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -362,6 +362,9 @@ repeat: rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): + /* Invalid length of ER tag id? */ + if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) + goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { From 18b41bd86e6cdcdc3b4cef920a1f3d6f496634e3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 4 Dec 2014 18:25:19 +0100 Subject: [PATCH 094/214] KEYS: Fix stale key registration at error path commit b26bdde5bb27f3f900e25a95e33a0c476c8c2c48 upstream. When loading encrypted-keys module, if the last check of aes_get_sizes() in init_encrypted() fails, the driver just returns an error without unregistering its key type. This results in the stale entry in the list. In addition to memory leaks, this leads to a kernel crash when registering a new key type later. This patch fixes the problem by swapping the calls of aes_get_sizes() and register_key_type(), and releasing resources properly at the error paths. Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=908163 Signed-off-by: Takashi Iwai Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/keys/encrypted-keys/encrypted.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 9e1e005c7596..c4c8df4b214d 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -1018,10 +1018,13 @@ static int __init init_encrypted(void) ret = encrypted_shash_alloc(); if (ret < 0) return ret; + ret = aes_get_sizes(); + if (ret < 0) + goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; - return aes_get_sizes(); + return 0; out: encrypted_shash_release(); return ret; From 47e60160ef39bea01a3eb5d3aa1da945400fa76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20M=C3=BCller?= Date: Fri, 12 Dec 2014 12:11:11 +0100 Subject: [PATCH 095/214] mac80211: fix multicast LED blinking and counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d025933e29872cb1fe19fc54d80e4dfa4ee5779c upstream. As multicast-frames can't be fragmented, "dot11MulticastReceivedFrameCount" stopped being incremented after the use-after-free fix. Furthermore, the RX-LED will be triggered by every multicast frame (which wouldn't happen before) which wouldn't allow the LED to rest at all. Fixes https://bugzilla.kernel.org/show_bug.cgi?id=89431 which also had the patch. Fixes: b8fff407a180 ("mac80211: fix use-after-free in defragmentation") Signed-off-by: Andreas Müller [rewrite commit message] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 85bc6d498b46..9299a38c372e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1585,14 +1585,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) - goto out; - if (is_multicast_ether_addr(hdr->addr1)) { rx->local->dot11MulticastReceivedFrameCount++; - goto out; + goto out_no_led; } + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1683,9 +1683,10 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) status->rx_flags |= IEEE80211_RX_FRAGMENTED; out: + ieee80211_led_rx(rx->local); + out_no_led: if (rx->sta) rx->sta->rx_packets++; - ieee80211_led_rx(rx->local); return RX_CONTINUE; } From 5f37daa4cbd955afb1eba2909bf24daea20fd37c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 17 Dec 2014 13:55:49 +0100 Subject: [PATCH 096/214] mac80211: free management frame keys when removing station commit 28a9bc68124c319b2b3dc861e80828a8865fd1ba upstream. When writing the code to allow per-station GTKs, I neglected to take into account the management frame keys (index 4 and 5) when freeing the station and only added code to free the first four data frame keys. Fix this by iterating the array of keys over the right length. Fixes: e31b82136d1a ("cfg80211/mac80211: allow per-station GTKs") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 67059b88fea5..635d0972b688 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -607,7 +607,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, int i; mutex_lock(&local->key_mtx); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) { key = key_mtx_dereference(local, sta->gtk[i]); if (!key) continue; From 4be461b16020beb18a0cd680ec74ba63f83b677c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 Aug 2014 01:33:38 -0700 Subject: [PATCH 097/214] mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount commit 3e1866410f11356a9fd869beb3e95983dc79c067 upstream. Now that remount is properly enforcing the rule that you can't remove nodev at least sandstorm.io is breaking when performing a remount. It turns out that there is an easy intuitive solution implicitly add nodev on remount when nodev was implicitly added on mount. Tested-by: Cedric Bosdonnat Tested-by: Richard Weinberger Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 154822397780..fb2d1ad022bf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1816,7 +1816,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, } if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && !(mnt_flags & MNT_NODEV)) { - return -EPERM; + /* Was the nodev implicitly added in mount? */ + if ((mnt->mnt_ns->user_ns != &init_user_ns) && + !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { + mnt_flags |= MNT_NODEV; + } else { + return -EPERM; + } } if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && !(mnt_flags & MNT_NOSUID)) { From 260cb8f431389643881f136d62838334de5fc327 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 22 Aug 2014 16:39:03 -0500 Subject: [PATCH 098/214] mnt: Update unprivileged remount test commit 4a44a19b470a886997d6647a77bb3e38dcbfa8c5 upstream. - MNT_NODEV should be irrelevant except when reading back mount flags, no longer specify MNT_NODEV on remount. - Test MNT_NODEV on devpts where it is meaningful even for unprivileged mounts. - Add a test to verify that remount of a prexisting mount with the same flags is allowed and does not change those flags. - Cleanup up the definitions of MS_REC, MS_RELATIME, MS_STRICTATIME that are used when the code is built in an environment without them. - Correct the test error messages when tests fail. There were not 5 tests that tested MS_RELATIME. Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- .../mount/unprivileged-remount-test.c | 172 +++++++++++++++--- 1 file changed, 142 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 1b3ff2fda4d0..9669d375625a 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -32,11 +34,14 @@ # define CLONE_NEWPID 0x20000000 #endif +#ifndef MS_REC +# define MS_REC 16384 +#endif #ifndef MS_RELATIME -#define MS_RELATIME (1 << 21) +# define MS_RELATIME (1 << 21) #endif #ifndef MS_STRICTATIME -#define MS_STRICTATIME (1 << 24) +# define MS_STRICTATIME (1 << 24) #endif static void die(char *fmt, ...) @@ -87,6 +92,45 @@ static void write_file(char *filename, char *fmt, ...) } } +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) { + die("statvfs of %s failed: %s\n", + path, strerror(errno)); + } + if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \ + ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \ + ST_SYNCHRONOUS | ST_MANDLOCK)) { + die("Unrecognized mount flags\n"); + } + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + static void create_and_enter_userns(void) { uid_t uid; @@ -118,7 +162,8 @@ static void create_and_enter_userns(void) } static -bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) +bool test_unpriv_remount(const char *fstype, const char *mount_options, + int mount_flags, int remount_flags, int invalid_flags) { pid_t child; @@ -151,9 +196,11 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) strerror(errno)); } - if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) { - die("mount of /tmp failed: %s\n", - strerror(errno)); + if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) { + die("mount of %s with options '%s' on /tmp failed: %s\n", + fstype, + mount_options? mount_options : "", + strerror(errno)); } create_and_enter_userns(); @@ -181,62 +228,127 @@ bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags) static bool test_unpriv_remount_simple(int mount_flags) { - return test_unpriv_remount(mount_flags, mount_flags, 0); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0); } static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags) { - return test_unpriv_remount(mount_flags, mount_flags, invalid_flags); + return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, + invalid_flags); +} + +static bool test_priv_mount_unpriv_remount(void) +{ + pid_t child; + int ret; + const char *orig_path = "/dev"; + const char *dest_path = "/tmp"; + int orig_mnt_flags, remount_mnt_flags; + + child = fork(); + if (child == -1) { + die("fork failed: %s\n", + strerror(errno)); + } + if (child != 0) { /* parent */ + pid_t pid; + int status; + pid = waitpid(child, &status, 0); + if (pid == -1) { + die("waitpid failed: %s\n", + strerror(errno)); + } + if (pid != child) { + die("waited for %d got %d\n", + child, pid); + } + if (!WIFEXITED(status)) { + die("child did not terminate cleanly\n"); + } + return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + } + + orig_mnt_flags = read_mnt_flags(orig_path); + + create_and_enter_userns(); + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + die("unshare(CLONE_NEWNS) failed: %s\n", + strerror(errno)); + } + + ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL); + if (ret != 0) { + die("recursive bind mount of %s onto %s failed: %s\n", + orig_path, dest_path, strerror(errno)); + } + + ret = mount(dest_path, dest_path, "none", + MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL); + if (ret != 0) { + /* system("cat /proc/self/mounts"); */ + die("remount of /tmp failed: %s\n", + strerror(errno)); + } + + remount_mnt_flags = read_mnt_flags(dest_path); + if (orig_mnt_flags != remount_mnt_flags) { + die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n", + dest_path, orig_path); + } + exit(EXIT_SUCCESS); } int main(int argc, char **argv) { - if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_RDONLY)) { die("MS_RDONLY malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NODEV)) { + if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) { die("MS_NODEV malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOSUID)) { die("MS_NOSUID malfunctions\n"); } - if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) { + if (!test_unpriv_remount_simple(MS_NOEXEC)) { die("MS_NOEXEC malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME, + MS_NOATIME)) { die("MS_RELATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME, + MS_NOATIME)) { die("MS_STRICTATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_RELATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME, + MS_NOATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n"); } - if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV, - MS_STRICTATIME|MS_NODEV)) + if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME, + MS_STRICTATIME)) { - die("MS_RELATIME malfunctions\n"); + die("MS_NOATIME|MS_DIRATIME malfunctions\n"); } - if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV, - MS_NOATIME|MS_NODEV)) + if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME)) { die("Default atime malfunctions\n"); } + if (!test_priv_mount_unpriv_remount()) { + die("Mount flags unexpectedly changed after remount\n"); + } return EXIT_SUCCESS; } From c65d3b05d20c15f4421f853cbd2d41b91a12185e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 4 Oct 2014 14:44:03 -0700 Subject: [PATCH 099/214] umount: Disallow unprivileged mount force commit b2f5d4dc38e034eecb7987e513255265ff9aa1cf upstream. Forced unmount affects not just the mount namespace but the underlying superblock as well. Restrict forced unmount to the global root user for now. Otherwise it becomes possible a user in a less privileged mount namespace to force the shutdown of a superblock of a filesystem in a more privileged mount namespace, allowing a DOS attack on root. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/namespace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index fb2d1ad022bf..d0244c8ba09c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1342,6 +1342,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (!check_mnt(mnt)) goto dput_and_out; + retval = -EPERM; + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + goto dput_and_out; retval = do_umount(mnt, flags); dput_and_out: From 4accc8c8e2dbb1f5ff3d8836244a9423bd5e11a9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 17:19:27 -0600 Subject: [PATCH 100/214] groups: Consolidate the setgroups permission checks commit 7ff4d90b4c24a03666f296c3d4878cd39001e81e upstream. Today there are 3 instances of setgroups and due to an oversight their permission checking has diverged. Add a common function so that they may all share the same permission checking code. This corrects the current oversight in the current permission checks and adds a helper to avoid this in the future. A user namespace security fix will update this new helper, shortly. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_linux.c | 2 +- include/linux/cred.h | 1 + kernel/groups.c | 9 ++++++++- kernel/uid16.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 8b6e4f5288a2..a98afed9348b 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -248,7 +248,7 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index 04421e825365..6c58dd7cb9ac 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -68,6 +68,7 @@ extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); +extern bool may_setgroups(void); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ diff --git a/kernel/groups.c b/kernel/groups.c index 6b2588dd04ff..984bb629c68c 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -223,6 +223,13 @@ out: return i; } +bool may_setgroups(void) +{ + struct user_namespace *user_ns = current_user_ns(); + + return ns_capable(user_ns, CAP_SETGID); +} + /* * SMP: Our groups are copy-on-write. We can set them safely * without another task interfering. @@ -233,7 +240,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!nsown_capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/kernel/uid16.c b/kernel/uid16.c index f6c83d7ef000..d58cc4d8f0d1 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) struct group_info *group_info; int retval; - if (!nsown_capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; From b8a0441b542f6d6bd6fda46cc735ae71392cb845 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 17:51:47 -0600 Subject: [PATCH 101/214] userns: Document what the invariant required for safe unprivileged mappings. commit 0542f17bf2c1f2430d368f44c8fcf2f82ec9e53e upstream. The rule is simple. Don't allow anything that wouldn't be allowed without unprivileged mappings. It was previously overlooked that establishing gid mappings would allow dropping groups and potentially gaining permission to files and directories that had lesser permissions for a specific group than for all other users. This is the rule needed to fix CVE-2014-8989 and prevent any other security issues with new_idmap_permitted. The reason for this rule is that the unix permission model is old and there are programs out there somewhere that take advantage of every little corner of it. So allowing a uid or gid mapping to be established without privielge that would allow anything that would not be allowed without that mapping will result in expectations from some code somewhere being violated. Violated expectations about the behavior of the OS is a long way to say a security issue. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9bea1d7dd21f..59790206ca2e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -800,7 +800,9 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { - /* Allow mapping to your own filesystem ids */ + /* Don't allow mappings that would allow anything that wouldn't + * be allowed without the establishment of unprivileged mappings. + */ if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { From fc9b65e3d7703e6d63875b0b233bbe26a4a513ba Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:01:11 -0600 Subject: [PATCH 102/214] userns: Don't allow setgroups until a gid mapping has been setablished commit 273d2c67c3e179adb1e74f403d1e9a06e3f841b5 upstream. setgroups is unique in not needing a valid mapping before it can be called, in the case of setgroups(0, NULL) which drops all supplemental groups. The design of the user namespace assumes that CAP_SETGID can not actually be used until a gid mapping is established. Therefore add a helper function to see if the user namespace gid mapping has been established and call that function in the setgroups permission check. This is part of the fix for CVE-2014-8989, being able to drop groups without privilege using user namespaces. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- include/linux/user_namespace.h | 5 +++++ kernel/groups.c | 4 +++- kernel/user_namespace.c | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 14105c26a836..648cf88c5d6d 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -59,6 +59,7 @@ extern struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern bool userns_may_setgroups(const struct user_namespace *ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -83,6 +84,10 @@ static inline void put_user_ns(struct user_namespace *ns) { } +static inline bool userns_may_setgroups(const struct user_namespace *ns) +{ + return true; +} #endif void update_mnt_policy(struct user_namespace *userns); diff --git a/kernel/groups.c b/kernel/groups.c index 984bb629c68c..67b4ba30475f 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /* init to 2 - one for init_task, one to ensure it is never freed */ @@ -227,7 +228,8 @@ bool may_setgroups(void) { struct user_namespace *user_ns = current_user_ns(); - return ns_capable(user_ns, CAP_SETGID); + return ns_capable(user_ns, CAP_SETGID) && + userns_may_setgroups(user_ns); } /* diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 59790206ca2e..b083fa233daa 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -832,6 +832,20 @@ static bool new_idmap_permitted(const struct file *file, return false; } +bool userns_may_setgroups(const struct user_namespace *ns) +{ + bool allowed; + + mutex_lock(&id_map_mutex); + /* It is not safe to use setgroups until a gid mapping in + * the user namespace has been established. + */ + allowed = ns->gid_map.nr_extents != 0; + mutex_unlock(&id_map_mutex); + + return allowed; +} + static void *userns_get(struct task_struct *task) { struct user_namespace *user_ns; From ba0922adbd2ccffe444608298ae0506401eac4c3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:14:19 -0600 Subject: [PATCH 103/214] userns: Don't allow unprivileged creation of gid mappings commit be7c6dba2332cef0677fbabb606e279ae76652c3 upstream. As any gid mapping will allow and must allow for backwards compatibility dropping groups don't allow any gid mappings to be established without CAP_SETGID in the parent user namespace. For a small class of applications this change breaks userspace and removes useful functionality. This small class of applications includes tools/testing/selftests/mount/unprivilged-remount-test.c Most of the removed functionality will be added back with the addition of a one way knob to disable setgroups. Once setgroups is disabled setting the gid_map becomes as safe as setting the uid_map. For more common applications that set the uid_map and the gid_map with privilege this change will have no affect. This is part of a fix for CVE-2014-8989. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index b083fa233daa..075020fa22fd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -810,11 +810,6 @@ static bool new_idmap_permitted(const struct file *file, if (uid_eq(uid, file->f_cred->fsuid)) return true; } - else if (cap_setid == CAP_SETGID) { - kgid_t gid = make_kgid(ns->parent, id); - if (gid_eq(gid, file->f_cred->fsgid)) - return true; - } } /* Allow anyone to set a mapping that doesn't require privilege */ From f028f2d73293b65a5e58ee7468a8683b39fd912c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 18:26:30 -0600 Subject: [PATCH 104/214] userns: Check euid no fsuid when establishing an unprivileged uid mapping commit 80dd00a23784b384ccea049bfb3f259d3f973b9d upstream. setresuid allows the euid to be set to any of uid, euid, suid, and fsuid. Therefor it is safe to allow an unprivileged user to map their euid and use CAP_SETUID privileged with exactly that uid, as no new credentials can be obtained. I can not find a combination of existing system calls that allows setting uid, euid, suid, and fsuid from the fsuid making the previous use of fsuid for allowing unprivileged mappings a bug. This is part of a fix for CVE-2014-8989. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 075020fa22fd..592ab70df216 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -807,7 +807,7 @@ static bool new_idmap_permitted(const struct file *file, u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->fsuid)) + if (uid_eq(uid, file->f_cred->euid)) return true; } } From a1821391e5072029118857f6ebb27f3cf66b9f33 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 26 Nov 2014 23:22:14 -0600 Subject: [PATCH 105/214] userns: Only allow the creator of the userns unprivileged mappings commit f95d7918bd1e724675de4940039f2865e5eec5fe upstream. If you did not create the user namespace and are allowed to write to uid_map or gid_map you should already have the necessary privilege in the parent user namespace to establish any mapping you want so this will not affect userspace in practice. Limiting unprivileged uid mapping establishment to the creator of the user namespace makes it easier to verify all credentials obtained with the uid mapping can be obtained without the uid mapping without privilege. Limiting unprivileged gid mapping establishment (which is temporarily absent) to the creator of the user namespace also ensures that the combination of uid and gid can already be obtained without privilege. This is part of the fix for CVE-2014-8989. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 592ab70df216..927d4ea4cd0b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -800,14 +800,16 @@ static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, struct uid_gid_map *new_map) { + const struct cred *cred = file->f_cred; /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ - if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1)) { + if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && + uid_eq(ns->owner, cred->euid)) { u32 id = new_map->extent[0].lower_first; if (cap_setid == CAP_SETUID) { kuid_t uid = make_kuid(ns->parent, id); - if (uid_eq(uid, file->f_cred->euid)) + if (uid_eq(uid, cred->euid)) return true; } } From 6eaab78729f4e689a86ce6bc4e33453ebff9625d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 9 Dec 2014 14:03:14 -0600 Subject: [PATCH 106/214] userns: Rename id_map_mutex to userns_state_mutex commit f0d62aec931e4ae3333c797d346dc4f188f454ba upstream. Generalize id_map_mutex so it can be used for more state of a user namespace. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 927d4ea4cd0b..78009bd5efe6 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,6 +24,7 @@ #include static struct kmem_cache *user_ns_cachep __read_mostly; +static DEFINE_MUTEX(userns_state_mutex); static bool new_idmap_permitted(const struct file *file, struct user_namespace *ns, int cap_setid, @@ -577,9 +578,6 @@ static bool mappings_overlap(struct uid_gid_map *new_map, struct uid_gid_extent return false; } - -static DEFINE_MUTEX(id_map_mutex); - static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -596,7 +594,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; /* - * The id_map_mutex serializes all writes to any given map. + * The userns_state_mutex serializes all writes to any given map. * * Any map is only ever written once. * @@ -614,7 +612,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, * order and smp_rmb() is guaranteed that we don't have crazy * architectures returning stale data. */ - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); ret = -EPERM; /* Only allow one successful write to the map */ @@ -741,7 +739,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, *ppos = count; ret = count; out: - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); if (page) free_page(page); return ret; @@ -833,12 +831,12 @@ bool userns_may_setgroups(const struct user_namespace *ns) { bool allowed; - mutex_lock(&id_map_mutex); + mutex_lock(&userns_state_mutex); /* It is not safe to use setgroups until a gid mapping in * the user namespace has been established. */ allowed = ns->gid_map.nr_extents != 0; - mutex_unlock(&id_map_mutex); + mutex_unlock(&userns_state_mutex); return allowed; } From 1c587ee50ec44a2cf07cd160c214a683608b4d2c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 2 Dec 2014 12:27:26 -0600 Subject: [PATCH 107/214] userns: Add a knob to disable setgroups on a per user namespace basis commit 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 upstream. - Expose the knob to user space through a proc file /proc//setgroups A value of "deny" means the setgroups system call is disabled in the current processes user namespace and can not be enabled in the future in this user namespace. A value of "allow" means the segtoups system call is enabled. - Descendant user namespaces inherit the value of setgroups from their parents. - A proc file is used (instead of a sysctl) as sysctls currently do not allow checking the permissions at open time. - Writing to the proc file is restricted to before the gid_map for the user namespace is set. This ensures that disabling setgroups at a user namespace level will never remove the ability to call setgroups from a process that already has that ability. A process may opt in to the setgroups disable for itself by creating, entering and configuring a user namespace or by calling setns on an existing user namespace with setgroups disabled. Processes without privileges already can not call setgroups so this is a noop. Prodcess with privilege become processes without privilege when entering a user namespace and as with any other path to dropping privilege they would not have the ability to call setgroups. So this remains within the bounds of what is possible without a knob to disable setgroups permanently in a user namespace. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 53 +++++++++++++++++++++ include/linux/user_namespace.h | 7 +++ kernel/user.c | 1 + kernel/user_namespace.c | 85 ++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index de12b8128b95..8fc784aef0b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2612,6 +2612,57 @@ static const struct file_operations proc_projid_map_operations = { .llseek = seq_lseek, .release = proc_id_map_release, }; + +static int proc_setgroups_open(struct inode *inode, struct file *file) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + int ret; + + ret = -ESRCH; + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; + + if (file->f_mode & FMODE_WRITE) { + ret = -EACCES; + if (!ns_capable(ns, CAP_SYS_ADMIN)) + goto err_put_ns; + } + + ret = single_open(file, &proc_setgroups_show, ns); + if (ret) + goto err_put_ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; +} + +static int proc_setgroups_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + int ret = single_release(inode, file); + put_user_ns(ns); + return ret; +} + +static const struct file_operations proc_setgroups_operations = { + .open = proc_setgroups_open, + .write = proc_setgroups_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_setgroups_release, +}; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, @@ -2720,6 +2771,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_CHECKPOINT_RESTORE REG("timers", S_IRUGO, proc_timers_operations), @@ -3073,6 +3125,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), + REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif }; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 648cf88c5d6d..a37081cf59da 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -17,6 +17,10 @@ struct uid_gid_map { /* 64 bytes -- 1 cache line */ } extent[UID_GID_MAP_MAX_EXTENTS]; }; +#define USERNS_SETGROUPS_ALLOWED 1UL + +#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -27,6 +31,7 @@ struct user_namespace { kuid_t owner; kgid_t group; unsigned int proc_inum; + unsigned long flags; bool may_mount_sysfs; bool may_mount_proc; }; @@ -59,6 +64,8 @@ extern struct seq_operations proc_projid_seq_operations; extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *); +extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); +extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); #else diff --git a/kernel/user.c b/kernel/user.c index 69b4c3d48cde..6bbef5604101 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -51,6 +51,7 @@ struct user_namespace init_user_ns = { .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, .proc_inum = PROC_USER_INIT_INO, + .flags = USERNS_INIT_FLAGS, .may_mount_sysfs = true, .may_mount_proc = true, }; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 78009bd5efe6..9da8cbbcd985 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -100,6 +100,11 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; + /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ + mutex_lock(&userns_state_mutex); + ns->flags = parent_ns->flags; + mutex_unlock(&userns_state_mutex); + set_cred_user_ns(new, ns); update_mnt_policy(ns); @@ -827,6 +832,84 @@ static bool new_idmap_permitted(const struct file *file, return false; } +int proc_setgroups_show(struct seq_file *seq, void *v) +{ + struct user_namespace *ns = seq->private; + unsigned long userns_flags = ACCESS_ONCE(ns->flags); + + seq_printf(seq, "%s\n", + (userns_flags & USERNS_SETGROUPS_ALLOWED) ? + "allow" : "deny"); + return 0; +} + +ssize_t proc_setgroups_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + char kbuf[8], *pos; + bool setgroups_allowed; + ssize_t ret; + + /* Only allow a very narrow range of strings to be written */ + ret = -EINVAL; + if ((*ppos != 0) || (count >= sizeof(kbuf))) + goto out; + + /* What was written? */ + ret = -EFAULT; + if (copy_from_user(kbuf, buf, count)) + goto out; + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + ret = -EINVAL; + if (strncmp(pos, "allow", 5) == 0) { + pos += 5; + setgroups_allowed = true; + } + else if (strncmp(pos, "deny", 4) == 0) { + pos += 4; + setgroups_allowed = false; + } + else + goto out; + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') + goto out; + + ret = -EPERM; + mutex_lock(&userns_state_mutex); + if (setgroups_allowed) { + /* Enabling setgroups after setgroups has been disabled + * is not allowed. + */ + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) + goto out_unlock; + } else { + /* Permanently disabling setgroups after setgroups has + * been enabled by writing the gid_map is not allowed. + */ + if (ns->gid_map.nr_extents != 0) + goto out_unlock; + ns->flags &= ~USERNS_SETGROUPS_ALLOWED; + } + mutex_unlock(&userns_state_mutex); + + /* Report a successful write */ + *ppos = count; + ret = count; +out: + return ret; +out_unlock: + mutex_unlock(&userns_state_mutex); + goto out; +} + bool userns_may_setgroups(const struct user_namespace *ns) { bool allowed; @@ -836,6 +919,8 @@ bool userns_may_setgroups(const struct user_namespace *ns) * the user namespace has been established. */ allowed = ns->gid_map.nr_extents != 0; + /* Is setgroups allowed? */ + allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); mutex_unlock(&userns_state_mutex); return allowed; From afb5285388ce7e7623ab9ddb08f287551e70426f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 Dec 2014 19:36:04 -0600 Subject: [PATCH 108/214] userns: Allow setting gid_maps without privilege when setgroups is disabled commit 66d2f338ee4c449396b6f99f5e75cd18eb6df272 upstream. Now that setgroups can be disabled and not reenabled, setting gid_map without privielge can now be enabled when setgroups is disabled. This restores most of the functionality that was lost when unprivileged setting of gid_map was removed. Applications that use this functionality will need to check to see if they use setgroups or init_groups, and if they don't they can be fixed by simply disabling setgroups before writing to gid_map. Reviewed-by: Andy Lutomirski Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- kernel/user_namespace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9da8cbbcd985..3f2fb33d291a 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -814,6 +814,11 @@ static bool new_idmap_permitted(const struct file *file, kuid_t uid = make_kuid(ns->parent, id); if (uid_eq(uid, cred->euid)) return true; + } else if (cap_setid == CAP_SETGID) { + kgid_t gid = make_kgid(ns->parent, id); + if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && + gid_eq(gid, cred->egid)) + return true; } } From 3150a5ae1c08aac6aba8454cb536316a48eecc52 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 2 Dec 2014 13:56:30 -0600 Subject: [PATCH 109/214] userns: Unbreak the unprivileged remount tests commit db86da7cb76f797a1a8b445166a15cb922c6ff85 upstream. A security fix in caused the way the unprivileged remount tests were using user namespaces to break. Tweak the way user namespaces are being used so the test works again. Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- .../mount/unprivileged-remount-test.c | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 9669d375625a..517785052f1c 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -53,17 +53,14 @@ static void die(char *fmt, ...) exit(EXIT_FAILURE); } -static void write_file(char *filename, char *fmt, ...) +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) { char buf[4096]; int fd; ssize_t written; int buf_len; - va_list ap; - va_start(ap, fmt); buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); - va_end(ap); if (buf_len < 0) { die("vsnprintf failed: %s\n", strerror(errno)); @@ -74,6 +71,8 @@ static void write_file(char *filename, char *fmt, ...) fd = open(filename, O_WRONLY); if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; die("open of %s failed: %s\n", filename, strerror(errno)); } @@ -92,6 +91,26 @@ static void write_file(char *filename, char *fmt, ...) } } +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); + +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); + +} + static int read_mnt_flags(const char *path) { int ret; @@ -144,13 +163,10 @@ static void create_and_enter_userns(void) strerror(errno)); } + maybe_write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/uid_map", "0 %d 1", uid); write_file("/proc/self/gid_map", "0 %d 1", gid); - if (setgroups(0, NULL) != 0) { - die("setgroups failed: %s\n", - strerror(errno)); - } if (setgid(0) != 0) { die ("setgid(0) failed %s\n", strerror(errno)); From 3d53f1f7166c4bc5e936155de143676bf5fc7b0c Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 19 Dec 2014 13:36:08 +0100 Subject: [PATCH 110/214] crypto: af_alg - fix backlog handling commit 7e77bdebff5cb1e9876c561f69710b9ab8fa1f7e upstream. If a request is backlogged, it's complete() handler will get called twice: once with -EINPROGRESS, and once with the final error code. af_alg's complete handler, unlike other users, does not handle the -EINPROGRESS but instead always completes the completion that recvmsg() is waiting on. This can lead to a return to user space while the request is still pending in the driver. If userspace closes the sockets before the requests are handled by the driver, this will lead to use-after-frees (and potential crashes) in the kernel due to the tfm having been freed. The crashes can be easily reproduced (for example) by reducing the max queue length in cryptod.c and running the following (from http://www.chronox.de/libkcapi.html) on AES-NI capable hardware: $ while true; do kcapi -x 1 -e -c '__ecb-aes-aesni' \ -k 00000000000000000000000000000000 \ -p 00000000000000000000000000000000 >/dev/null & done Signed-off-by: Rabin Vincent Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/af_alg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index bf948e134981..6ef6e2ad344e 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -449,6 +449,9 @@ void af_alg_complete(struct crypto_async_request *req, int err) { struct af_alg_completion *completion = req->data; + if (err == -EINPROGRESS) + return; + completion->err = err; complete(&completion->completion); } From cb22aba0ee2bc330033b85447b4d44b1501ee75f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 10 Dec 2014 15:52:22 -0800 Subject: [PATCH 111/214] ncpfs: return proper error from NCP_IOC_SETROOT ioctl commit a682e9c28cac152e6e54c39efcf046e0c8cfcf63 upstream. If some error happens in NCP_IOC_SETROOT ioctl, the appropriate error return value is then (in most cases) just overwritten before we return. This can result in reporting success to userspace although error happened. This bug was introduced by commit 2e54eb96e2c8 ("BKL: Remove BKL from ncpfs"). Propagate the errors correctly. Coverity id: 1226925. Fixes: 2e54eb96e2c80 ("BKL: Remove BKL from ncpfs") Signed-off-by: Jan Kara Cc: Petr Vandrovec Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ncpfs/ioctl.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 60426ccb3b65..2f970de02b16 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -448,7 +448,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg result = -EIO; } } - result = 0; } mutex_unlock(&server->root_setup_lock); From 12279351a42d51638a21ede8a3f6c62aea26491e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 10 Dec 2014 15:55:25 -0800 Subject: [PATCH 112/214] exit: pidns: alloc_pid() leaks pid_namespace if child_reaper is exiting commit 24c037ebf5723d4d9ab0996433cee4f96c292a4d upstream. alloc_pid() does get_pid_ns() beforehand but forgets to put_pid_ns() if it fails because disable_pid_allocation() was called by the exiting child_reaper. We could simply move get_pid_ns() down to successful return, but this fix tries to be as trivial as possible. Signed-off-by: Oleg Nesterov Reviewed-by: "Eric W. Biederman" Cc: Aaron Tomlin Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: Sterling Alexander Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/pid.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/pid.c b/kernel/pid.c index 0eb6d8e8b1da..3cdba5173600 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -335,6 +335,8 @@ out: out_unlock: spin_unlock_irq(&pidmap_lock); + put_pid_ns(ns); + out_free: while (++i <= ns->level) free_pidmap(pid->numbers + i); From 5fdeca1b0acd9790cdd5891a76bb6dfb707d2a1c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 19 Dec 2014 12:21:47 +0100 Subject: [PATCH 113/214] udf: Verify symlink size before loading it commit a1d47b262952a45aae62bd49cfaf33dd76c11a2c upstream. UDF specification allows arbitrarily large symlinks. However we support only symlinks at most one block large. Check the length of the symlink so that we don't access memory beyond end of the symlink block. Reported-by: Carl Henrik Lunde Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/symlink.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index d7c6dbe4194b..d89f324bc387 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -80,11 +80,17 @@ static int udf_symlink_filler(struct file *file, struct page *page) struct inode *inode = page->mapping->host; struct buffer_head *bh = NULL; unsigned char *symlink; - int err = -EIO; + int err; unsigned char *p = kmap(page); struct udf_inode_info *iinfo; uint32_t pos; + /* We don't support symlinks longer than one block */ + if (inode->i_size > inode->i_sb->s_blocksize) { + err = -ENAMETOOLONG; + goto out_unmap; + } + iinfo = UDF_I(inode); pos = udf_block_map(inode, 0); @@ -94,8 +100,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) } else { bh = sb_bread(inode->i_sb, pos); - if (!bh) - goto out; + if (!bh) { + err = -EIO; + goto out_unlock_inode; + } symlink = bh->b_data; } @@ -109,9 +117,10 @@ static int udf_symlink_filler(struct file *file, struct page *page) unlock_page(page); return 0; -out: +out_unlock_inode: up_read(&iinfo->i_data_sem); SetPageError(page); +out_unmap: kunmap(page); unlock_page(page); return err; From ed775f3161684770d506e150073d9f271335d5d3 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Tue, 7 Oct 2014 15:51:55 -0500 Subject: [PATCH 114/214] eCryptfs: Force RO mount when encrypted view is enabled commit 332b122d39c9cbff8b799007a825d94b2e7c12f2 upstream. The ecryptfs_encrypted_view mount option greatly changes the functionality of an eCryptfs mount. Instead of encrypting and decrypting lower files, it provides a unified view of the encrypted files in the lower filesystem. The presence of the ecryptfs_encrypted_view mount option is intended to force a read-only mount and modifying files is not supported when the feature is in use. See the following commit for more information: e77a56d [PATCH] eCryptfs: Encrypted passthrough This patch forces the mount to be read-only when the ecryptfs_encrypted_view mount option is specified by setting the MS_RDONLY flag on the superblock. Additionally, this patch removes some broken logic in ecryptfs_open() that attempted to prevent modifications of files when the encrypted view feature was in use. The check in ecryptfs_open() was not sufficient to prevent file modifications using system calls that do not operate on a file descriptor. Signed-off-by: Tyler Hicks Reported-by: Priya Bansal Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/file.c | 12 ------------ fs/ecryptfs/main.c | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a7abbea2c096..9ff3664bb3ea 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -196,23 +196,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file) { int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; - struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct dentry *ecryptfs_dentry = file->f_path.dentry; /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; - mount_crypt_stat = &ecryptfs_superblock_to_private( - ecryptfs_dentry->d_sb)->mount_crypt_stat; - if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) - && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR) - || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC) - || (file->f_flags & O_APPEND))) { - printk(KERN_WARNING "Mount has encrypted view enabled; " - "files may only be read\n"); - rc = -EPERM; - goto out; - } /* Released in ecryptfs_release or end of function if failure */ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e924cf45aad9..329a9cc2b2eb 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -494,6 +494,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags { struct super_block *s; struct ecryptfs_sb_info *sbi; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; struct ecryptfs_dentry_info *root_info; const char *err = "Getting sb failed"; struct inode *inode; @@ -512,6 +513,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags err = "Error parsing options"; goto out; } + mount_crypt_stat = &sbi->mount_crypt_stat; s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { @@ -558,11 +560,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags /** * Set the POSIX ACL flag based on whether they're enabled in the lower - * mount. Force a read-only eCryptfs mount if the lower mount is ro. - * Allow a ro eCryptfs mount even when the lower mount is rw. + * mount. */ s->s_flags = flags & ~MS_POSIXACL; - s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL; + + /** + * Force a read-only eCryptfs mount when: + * 1) The lower mount is ro + * 2) The ecryptfs_encrypted_view mount option is specified + */ + if (path.dentry->d_sb->s_flags & MS_RDONLY || + mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + s->s_flags |= MS_RDONLY; s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; From 66012982c8e3344b6fc94defba2909356c607a6d Mon Sep 17 00:00:00 2001 From: Michael Halcrow Date: Wed, 26 Nov 2014 09:09:16 -0800 Subject: [PATCH 115/214] eCryptfs: Remove buggy and unnecessary write in file name decode routine commit 942080643bce061c3dd9d5718d3b745dcb39a8bc upstream. Dmitry Chernenkov used KASAN to discover that eCryptfs writes past the end of the allocated buffer during encrypted filename decoding. This fix corrects the issue by getting rid of the unnecessary 0 write when the current bit offset is 2. Signed-off-by: Michael Halcrow Reported-by: Dmitry Chernenkov Suggested-by: Kees Cook Signed-off-by: Tyler Hicks Signed-off-by: Greg Kroah-Hartman --- fs/ecryptfs/crypto.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f71ec125290d..1da2446bf6b0 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -2102,7 +2102,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, break; case 2: dst[dst_byte_offset++] |= (src_byte); - dst[dst_byte_offset] = 0; current_bit_offset = 0; break; } From d8bf9ec799b05f5a083e786894e276cdefea470a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 14 Nov 2014 16:16:30 -0500 Subject: [PATCH 116/214] Btrfs: do not move em to modified list when unpinning commit a28046956c71985046474283fa3bcd256915fb72 upstream. We use the modified list to keep track of which extents have been modified so we know which ones are candidates for logging at fsync() time. Newly modified extents are added to the list at modification time, around the same time the ordered extent is created. We do this so that we don't have to wait for ordered extents to complete before we know what we need to log. The problem is when something like this happens log extent 0-4k on inode 1 copy csum for 0-4k from ordered extent into log sync log commit transaction log some other extent on inode 1 ordered extent for 0-4k completes and adds itself onto modified list again log changed extents see ordered extent for 0-4k has already been logged at this point we assume the csum has been copied sync log crash On replay we will see the extent 0-4k in the log, drop the original 0-4k extent which is the same one that we are replaying which also drops the csum, and then we won't find the csum in the log for that bytenr. This of course causes us to have errors about not having csums for certain ranges of our inode. So remove the modified list manipulation in unpin_extent_cache, any modified extents should have been added well before now, and we don't want them re-logged. This fixes my test that I could reliably reproduce this problem with. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_map.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index a4a7a1a8da95..0a3809500599 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -263,8 +263,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, if (!em) goto out; - if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) - list_move(&em->list, &tree->modified_extents); em->generation = gen; clear_bit(EXTENT_FLAG_PINNED, &em->flags); em->mod_start = em->start; From 5f55aee3b8880bca80c64872eadbaea1632bd4ac Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 7 Dec 2014 21:31:47 +0000 Subject: [PATCH 117/214] Btrfs: fix fs corruption on transaction abort if device supports discard commit 678886bdc6378c1cbd5072da2c5a3035000214e3 upstream. When we abort a transaction we iterate over all the ranges marked as dirty in fs_info->freed_extents[0] and fs_info->freed_extents[1], clear them from those trees, add them back (unpin) to the free space caches and, if the fs was mounted with "-o discard", perform a discard on those regions. Also, after adding the regions to the free space caches, a fitrim ioctl call can see those ranges in a block group's free space cache and perform a discard on the ranges, so the same issue can happen without "-o discard" as well. This causes corruption, affecting one or multiple btree nodes (in the worst case leaving the fs unmountable) because some of those ranges (the ones in the fs_info->pinned_extents tree) correspond to btree nodes/leafs that are referred by the last committed super block - breaking the rule that anything that was committed by a transaction is untouched until the next transaction commits successfully. I ran into this while running in a loop (for several hours) the fstest that I recently submitted: [PATCH] fstests: add btrfs test to stress chunk allocation/removal and fstrim The corruption always happened when a transaction aborted and then fsck complained like this: _check_btrfs_filesystem: filesystem on /dev/sdc is inconsistent *** fsck.btrfs output *** Check tree block failed, want=94945280, have=0 Check tree block failed, want=94945280, have=0 Check tree block failed, want=94945280, have=0 Check tree block failed, want=94945280, have=0 Check tree block failed, want=94945280, have=0 read block failed check_tree_block Couldn't open file system In this case 94945280 corresponded to the root of a tree. Using frace what I observed was the following sequence of steps happened: 1) transaction N started, fs_info->pinned_extents pointed to fs_info->freed_extents[0]; 2) node/eb 94945280 is created; 3) eb is persisted to disk; 4) transaction N commit starts, fs_info->pinned_extents now points to fs_info->freed_extents[1], and transaction N completes; 5) transaction N + 1 starts; 6) eb is COWed, and btrfs_free_tree_block() called for this eb; 7) eb range (94945280 to 94945280 + 16Kb) is added to fs_info->pinned_extents (fs_info->freed_extents[1]); 8) Something goes wrong in transaction N + 1, like hitting ENOSPC for example, and the transaction is aborted, turning the fs into readonly mode. The stack trace I got for example: [112065.253935] [] dump_stack+0x4d/0x66 [112065.254271] [] warn_slowpath_common+0x7f/0x98 [112065.254567] [] ? __btrfs_abort_transaction+0x50/0x10b [btrfs] [112065.261674] [] warn_slowpath_fmt+0x48/0x50 [112065.261922] [] ? btrfs_free_path+0x26/0x29 [btrfs] [112065.262211] [] __btrfs_abort_transaction+0x50/0x10b [btrfs] [112065.262545] [] btrfs_remove_chunk+0x537/0x58b [btrfs] [112065.262771] [] btrfs_delete_unused_bgs+0x1de/0x21b [btrfs] [112065.263105] [] cleaner_kthread+0x100/0x12f [btrfs] (...) [112065.264493] ---[ end trace dd7903a975a31a08 ]--- [112065.264673] BTRFS: error (device sdc) in btrfs_remove_chunk:2625: errno=-28 No space left [112065.264997] BTRFS info (device sdc): forced readonly 9) The clear kthread sees that the BTRFS_FS_STATE_ERROR bit is set in fs_info->fs_state and calls btrfs_cleanup_transaction(), which in turn calls btrfs_destroy_pinned_extent(); 10) Then btrfs_destroy_pinned_extent() iterates over all the ranges marked as dirty in fs_info->freed_extents[], and for each one it calls discard, if the fs was mounted with "-o discard", and adds the range to the free space cache of the respective block group; 11) btrfs_trim_block_group(), invoked from the fitrim ioctl code path, sees the free space entries and performs a discard; 12) After an umount and mount (or fsck), our eb's location on disk was full of zeroes, and it should have been untouched, because it was marked as dirty in the fs_info->pinned_extents tree, and therefore used by the trees that the last committed superblock points to. Fix this by not performing a discard and not adding the ranges to the free space caches - it's useless from this point since the fs is now in readonly mode and we won't write free space caches to disk anymore (otherwise we would leak space) nor any new superblock. By not adding the ranges to the free space caches, it prevents other code paths from allocating that space and write to it as well, therefore being safer and simpler. This isn't a new problem, as it's been present since 2011 (git commit acce952b0263825da32cf10489413dec78053347). Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 6 ------ fs/btrfs/extent-tree.c | 10 ++++++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index abecce399354..7360f03ddbe1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3857,12 +3857,6 @@ again: if (ret) break; - /* opt_discard */ - if (btrfs_test_opt(root, DISCARD)) - ret = btrfs_error_discard_extent(root, start, - end + 1 - start, - NULL); - clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); cond_resched(); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bbafa05519da..f99c71e40f8b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5277,7 +5277,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, update_global_block_rsv(fs_info); } -static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) +static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end, + const bool return_free_space) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_block_group_cache *cache = NULL; @@ -5301,7 +5302,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) if (start < cache->last_byte_to_unpin) { len = min(len, cache->last_byte_to_unpin - start); - btrfs_add_free_space(cache, start, len); + if (return_free_space) + btrfs_add_free_space(cache, start, len); } start += len; @@ -5364,7 +5366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); - unpin_extent_range(root, start, end); + unpin_extent_range(root, start, end, true); cond_resched(); } @@ -8564,7 +8566,7 @@ out: int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { - return unpin_extent_range(root, start, end); + return unpin_extent_range(root, start, end, false); } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, From 46a490cc58e717f8058b878635c11535583840d2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 8 Jan 2015 09:58:30 -0800 Subject: [PATCH 118/214] Linux 3.10.64 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9383fe24baa9..e5b63fb3d0e1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 63 +SUBLEVEL = 64 EXTRAVERSION = NAME = TOSSUG Baby Fish From e66e0fc5349525ed49935881818b2d5cc27f4468 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 18 Dec 2014 16:17:37 -0800 Subject: [PATCH 119/214] ocfs2: fix journal commit deadlock commit 136f49b9171074872f2a14ad0ab10486d1ba13ca upstream. For buffer write, page lock will be got in write_begin and released in write_end, in ocfs2_write_end_nolock(), before it unlock the page in ocfs2_free_write_ctxt(), it calls ocfs2_run_deallocs(), this will ask for the read lock of journal->j_trans_barrier. Holding page lock and ask for journal->j_trans_barrier breaks the locking order. This will cause a deadlock with journal commit threads, ocfs2cmt will get write lock of journal->j_trans_barrier first, then it wakes up kjournald2 to do the commit work, at last it waits until done. To commit journal, kjournald2 needs flushing data first, it needs get the cache page lock. Since some ocfs2 cluster locks are holding by write process, this deadlock may hung the whole cluster. unlock pages before ocfs2_run_deallocs() can fix the locking order, also put unlock before ocfs2_commit_trans() to make page lock is unlocked before j_trans_barrier to preserve unlocking order. Signed-off-by: Junxiao Bi Reviewed-by: Wengang Wang Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 20dfec72e903..f998c6009ad4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -917,7 +917,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { int i; @@ -938,7 +938,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) page_cache_release(wc->w_target_page); } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -2060,11 +2064,19 @@ out_write_size: di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); ocfs2_journal_dirty(handle, wc->w_di_bh); + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } From f8510534c0878bce9c2d019c850ea507d72f8d1d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 30 Nov 2014 20:38:40 +0100 Subject: [PATCH 120/214] ath9k_hw: fix hardware queue allocation commit ad8fdccf9c197a89e2d2fa78c453283dcc2c343f upstream. The driver passes the desired hardware queue index for a WMM data queue in qinfo->tqi_subtype. This was ignored in ath9k_hw_setuptxqueue, which instead relied on the order in which the function is called. Reported-by: Hubert Feurstein Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/mac.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c index 566109a40fb3..941b08b71308 100644 --- a/drivers/net/wireless/ath/ath9k/mac.c +++ b/drivers/net/wireless/ath/ath9k/mac.c @@ -311,14 +311,7 @@ int ath9k_hw_setuptxqueue(struct ath_hw *ah, enum ath9k_tx_queue type, q = ATH9K_NUM_TX_QUEUES - 3; break; case ATH9K_TX_QUEUE_DATA: - for (q = 0; q < ATH9K_NUM_TX_QUEUES; q++) - if (ah->txq[q].tqi_type == - ATH9K_TX_QUEUE_INACTIVE) - break; - if (q == ATH9K_NUM_TX_QUEUES) { - ath_err(common, "No available TX queue\n"); - return -1; - } + q = qinfo->tqi_subtype; break; default: ath_err(common, "Invalid TX queue type: %u\n", type); From a510bea7571d651562bbdc8fec6ce2725a2ad0ab Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 30 Nov 2014 20:38:41 +0100 Subject: [PATCH 121/214] ath9k: fix BE/BK queue order commit 78063d81d353e10cbdd279c490593113b8fdae1c upstream. Hardware queues are ordered by priority. Use queue index 0 for BK, which has lower priority than BE. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/hw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index ae3034374bc4..d7d9e311089f 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -215,8 +215,8 @@ #define AH_WOW_BEACON_MISS BIT(3) enum ath_hw_txq_subtype { - ATH_TXQ_AC_BE = 0, - ATH_TXQ_AC_BK = 1, + ATH_TXQ_AC_BK = 0, + ATH_TXQ_AC_BE = 1, ATH_TXQ_AC_VI = 2, ATH_TXQ_AC_VO = 3, }; From 5e5740524c3c8c4127d6e0eab62c619f6650e017 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 28 Nov 2014 13:49:10 +0100 Subject: [PATCH 122/214] can: peak_usb: fix cleanup sequence order in case of error during init commit af35d0f1cce7a990286e2b94c260a2c2d2a0e4b0 upstream. This patch sets the correct reverse sequence order to the instructions set to run, when any failure occurs during the initialization steps. It also adds the missing unregistration call of the can device if the failure appears after having been registered. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index a0f647f92bf5..3a220d2f2ee1 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -727,7 +727,7 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; - goto lbl_set_intf_data; + goto lbl_free_candev; } dev->udev = usb_dev; @@ -766,7 +766,7 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, err = register_candev(netdev); if (err) { dev_err(&intf->dev, "couldn't register CAN device: %d\n", err); - goto lbl_free_cmd_buf; + goto lbl_restore_intf_data; } if (dev->prev_siblings) @@ -779,14 +779,14 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, if (dev->adapter->dev_init) { err = dev->adapter->dev_init(dev); if (err) - goto lbl_free_cmd_buf; + goto lbl_unregister_candev; } /* set bus off */ if (dev->adapter->dev_set_bus) { err = dev->adapter->dev_set_bus(dev, 0); if (err) - goto lbl_free_cmd_buf; + goto lbl_unregister_candev; } /* get device number early */ @@ -798,11 +798,14 @@ static int peak_usb_create_dev(struct peak_usb_adapter *peak_usb_adapter, return 0; -lbl_free_cmd_buf: +lbl_unregister_candev: + unregister_candev(netdev); + +lbl_restore_intf_data: + usb_set_intfdata(intf, dev->prev_siblings); kfree(dev->cmd_buf); -lbl_set_intf_data: - usb_set_intfdata(intf, dev->prev_siblings); +lbl_free_candev: free_candev(netdev); return err; From 413706774c9eec19a0484392a278bf3aafd11d3b Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 28 Nov 2014 14:08:48 +0100 Subject: [PATCH 123/214] can: peak_usb: fix memset() usage commit dc50ddcd4c58a5a0226038307d6ef884bec9f8c2 upstream. This patchs fixes a misplaced call to memset() that fills the request buffer with 0. The problem was with sending PCAN_USBPRO_REQ_FCT requests, the content set by the caller was thus lost. With this patch, the memory area is zeroed only when requesting info from the device. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index 263dd921edc4..f7f796a2c50b 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -333,8 +333,6 @@ static int pcan_usb_pro_send_req(struct peak_usb_device *dev, int req_id, if (!(dev->state & PCAN_USB_STATE_CONNECTED)) return 0; - memset(req_addr, '\0', req_size); - req_type = USB_TYPE_VENDOR | USB_RECIP_OTHER; switch (req_id) { @@ -345,6 +343,7 @@ static int pcan_usb_pro_send_req(struct peak_usb_device *dev, int req_id, default: p = usb_rcvctrlpipe(dev->udev, 0); req_type |= USB_DIR_IN; + memset(req_addr, '\0', req_size); break; } From 81cb80b578c59a52a17cfd7c7a0571be24906b15 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 21 Nov 2014 16:56:12 +0000 Subject: [PATCH 124/214] swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single commit 2c3fc8d26dd09b9d7069687eead849ee81c78e46 upstream. Need to pass the pointer within the swiotlb internal buffer to the swiotlb library, that in the case of xen_unmap_single is dev_addr, not paddr. Signed-off-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 1d94316f0ea4..301b08496478 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -390,7 +390,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); + swiotlb_tbl_unmap_single(hwdev, dev_addr, size, dir); return; } From 68956d1e3aa9b2c417c6b2c0fbd3dcdd31bb1c38 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 30 Nov 2014 21:52:57 +0100 Subject: [PATCH 125/214] ath5k: fix hardware queue index assignment commit 9e4982f6a51a2442f1bb588fee42521b44b4531c upstream. Like with ath9k, ath5k queues also need to be ordered by priority. queue_info->tqi_subtype already contains the correct index, so use it instead of relying on the order of ath5k_hw_setup_tx_queue calls. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath5k/qcu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath5k/qcu.c b/drivers/net/wireless/ath/ath5k/qcu.c index 65fe929529a8..3bfd0b88016e 100644 --- a/drivers/net/wireless/ath/ath5k/qcu.c +++ b/drivers/net/wireless/ath/ath5k/qcu.c @@ -225,13 +225,7 @@ ath5k_hw_setup_tx_queue(struct ath5k_hw *ah, enum ath5k_tx_queue queue_type, } else { switch (queue_type) { case AR5K_TX_QUEUE_DATA: - for (queue = AR5K_TX_QUEUE_ID_DATA_MIN; - ah->ah_txq[queue].tqi_type != - AR5K_TX_QUEUE_INACTIVE; queue++) { - - if (queue > AR5K_TX_QUEUE_ID_DATA_MAX) - return -EINVAL; - } + queue = queue_info->tqi_subtype; break; case AR5K_TX_QUEUE_UAPSD: queue = AR5K_TX_QUEUE_ID_UAPSD; From c60c3ab7c63c70cf53895eb661e2a3fd2c6faa49 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 19 Nov 2014 18:29:02 +0100 Subject: [PATCH 126/214] ASoC: sigmadsp: Refuse to load firmware files with a non-supported version commit 50c0f21b42dd4cd02b51f82274f66912d9a7fa32 upstream. Make sure to check the version field of the firmware header to make sure to not accidentally try to parse a firmware file with a different layout. Trying to do so can result in loading invalid firmware code to the device. Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/sigmadsp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/sigmadsp.c b/sound/soc/codecs/sigmadsp.c index 4068f2491232..bb3878c9625f 100644 --- a/sound/soc/codecs/sigmadsp.c +++ b/sound/soc/codecs/sigmadsp.c @@ -176,6 +176,13 @@ static int _process_sigma_firmware(struct device *dev, goto done; } + if (ssfw_head->version != 1) { + dev_err(dev, + "Failed to load firmware: Invalid version %d. Supported firmware versions: 1\n", + ssfw_head->version); + goto done; + } + crc = crc32(0, fw->data + sizeof(*ssfw_head), fw->size - sizeof(*ssfw_head)); pr_debug("%s: crc=%x\n", __func__, crc); From ef3a852c749491082bea51e1f0b4ee27afb2b94c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Mon, 24 Nov 2014 15:32:36 +0200 Subject: [PATCH 127/214] ASoC: max98090: Fix ill-defined sidetone route commit 48826ee590da03e9882922edf96d8d27bdfe9552 upstream. Commit 5fe5b767dc6f ("ASoC: dapm: Do not pretend to support controls for non mixer/mux widgets") revealed ill-defined control in a route between "STENL Mux" and DACs in max98090.c: max98090 i2c-193C9890:00: Control not supported for path STENL Mux -> [NULL] -> DACL max98090 i2c-193C9890:00: ASoC: no dapm match for STENL Mux --> NULL --> DACL max98090 i2c-193C9890:00: ASoC: Failed to add route STENL Mux -> NULL -> DACL max98090 i2c-193C9890:00: Control not supported for path STENL Mux -> [NULL] -> DACR max98090 i2c-193C9890:00: ASoC: no dapm match for STENL Mux --> NULL --> DACR max98090 i2c-193C9890:00: ASoC: Failed to add route STENL Mux -> NULL -> DACR Since there is no control between "STENL Mux" and DACs the control name must be NULL not "NULL". Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/max98090.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index 76bfeb3c3e30..be8de7ce1cda 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1364,8 +1364,8 @@ static const struct snd_soc_dapm_route max98090_dapm_routes[] = { {"STENL Mux", "Sidetone Left", "DMICL"}, {"STENR Mux", "Sidetone Right", "ADCR"}, {"STENR Mux", "Sidetone Right", "DMICR"}, - {"DACL", "NULL", "STENL Mux"}, - {"DACR", "NULL", "STENL Mux"}, + {"DACL", NULL, "STENL Mux"}, + {"DACR", NULL, "STENL Mux"}, {"AIFINL", NULL, "SHDN"}, {"AIFINR", NULL, "SHDN"}, From 97a3f651913ffb8e13357913d6e93fb8fbacab3a Mon Sep 17 00:00:00 2001 From: Andrew Jackson Date: Fri, 19 Dec 2014 16:18:05 +0000 Subject: [PATCH 128/214] ASoC: dwc: Ensure FIFOs are flushed to prevent channel swap commit 3475c3d034d7f276a474c8bd53f44b48c8bf669d upstream. Flush the FIFOs when the stream is prepared for use. This avoids an inadvertent swapping of the left/right channels if the FIFOs are not empty at startup. Signed-off-by: Andrew Jackson Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/dwc/designware_i2s.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/dwc/designware_i2s.c b/sound/soc/dwc/designware_i2s.c index 593a3ea12d4c..489a9abf112b 100644 --- a/sound/soc/dwc/designware_i2s.c +++ b/sound/soc/dwc/designware_i2s.c @@ -263,6 +263,19 @@ static void dw_i2s_shutdown(struct snd_pcm_substream *substream, snd_soc_dai_set_dma_data(dai, substream, NULL); } +static int dw_i2s_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct dw_i2s_dev *dev = snd_soc_dai_get_drvdata(dai); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + i2s_write_reg(dev->i2s_base, TXFFR, 1); + else + i2s_write_reg(dev->i2s_base, RXFFR, 1); + + return 0; +} + static int dw_i2s_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -294,6 +307,7 @@ static struct snd_soc_dai_ops dw_i2s_dai_ops = { .startup = dw_i2s_startup, .shutdown = dw_i2s_shutdown, .hw_params = dw_i2s_hw_params, + .prepare = dw_i2s_prepare, .trigger = dw_i2s_trigger, }; From 9670f1a473dc2ce7620e91445c1fe3d9a275356d Mon Sep 17 00:00:00 2001 From: Myron Stowe Date: Thu, 30 Oct 2014 11:54:37 -0600 Subject: [PATCH 129/214] PCI: Restore detection of read-only BARs commit 36e8164882ca6d3c41cb91e6f09a3ed236841f80 upstream. Commit 6ac665c63dca ("PCI: rewrite PCI BAR reading code") masked off low-order bits from 'l', but not from 'sz'. Both are passed to pci_size(), which compares 'base == maxbase' to check for read-only BARs. The masking of 'l' means that comparison will never be 'true', so the check for read-only BARs no longer works. Resolve this by also masking off the low-order bits of 'sz' before passing it into pci_size() as 'maxbase'. With this change, pci_size() will once again catch the problems that have been encountered to date: - AGP aperture BAR of AMD-7xx host bridges: if the AGP window is disabled, this BAR is read-only and read as 0x00000008 [1] - BARs 0-4 of ALi IDE controllers can be non-zero and read-only [1] - Intel Sandy Bridge - Thermal Management Controller [8086:0103]; BAR 0 returning 0xfed98004 [2] - Intel Xeon E5 v3/Core i7 Power Control Unit [8086:2fc0]; Bar 0 returning 0x00001a [3] Link: [1] https://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/drivers/pci/probe.c?id=1307ef6621991f1c4bc3cec1b5a4ebd6fd3d66b9 ("PCI: probing read-only BARs" (pre-git)) Link: [2] https://bugzilla.kernel.org/show_bug.cgi?id=43331 Link: [3] https://bugzilla.kernel.org/show_bug.cgi?id=85991 Reported-by: William Unruh Reported-by: Martin Lucina Signed-off-by: Myron Stowe Signed-off-by: Bjorn Helgaas CC: Matthew Wilcox Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ea37072e8bf2..034a4d2964d6 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -210,14 +210,17 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, res->flags |= IORESOURCE_SIZEALIGN; if (res->flags & IORESOURCE_IO) { l &= PCI_BASE_ADDRESS_IO_MASK; + sz &= PCI_BASE_ADDRESS_IO_MASK; mask = PCI_BASE_ADDRESS_IO_MASK & (u32) IO_SPACE_LIMIT; } else { l &= PCI_BASE_ADDRESS_MEM_MASK; + sz &= PCI_BASE_ADDRESS_MEM_MASK; mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; } } else { res->flags |= (l & IORESOURCE_ROM_ENABLE); l &= PCI_ROM_ADDRESS_MASK; + sz &= PCI_ROM_ADDRESS_MASK; mask = (u32)PCI_ROM_ADDRESS_MASK; } From af74a863b9999e4d2f32140a5edc50e3de5e5a66 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 12 Sep 2014 11:32:24 -0700 Subject: [PATCH 130/214] pstore-ram: Fix hangs by using write-combine mappings commit 7ae9cb81933515dc7db1aa3c47ef7653717e3090 upstream. Currently trying to use pstore on at least ARMs can hang as we're mapping the peristent RAM with pgprot_noncached(). On ARMs, pgprot_noncached() will actually make the memory strongly ordered, and as the atomic operations pstore uses are implementation defined for strongly ordered memory, they may not work. So basically atomic operations have undefined behavior on ARM for device or strongly ordered memory types. Let's fix the issue by using write-combine variants for mappings. This corresponds to normal, non-cacheable memory on ARM. For many other architectures, this change does not change the mapping type as by default we have: #define pgprot_writecombine pgprot_noncached The reason why pgprot_noncached() was originaly used for pstore is because Colin Cross had observed lost debug prints right before a device hanging write operation on some systems. For the platforms supporting pgprot_noncached(), we can add a an optional configuration option to support that. But let's get pstore working first before adding new features. Cc: Arnd Bergmann Cc: Anton Vorontsov Cc: Colin Cross Cc: Olof Johansson Cc: linux-kernel@vger.kernel.org Acked-by: Kees Cook Signed-off-by: Rob Herring [tony@atomide.com: updated description] Signed-off-by: Tony Lindgren Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 59337326e288..50d3dcdd35fb 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -345,7 +345,7 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) page_start = start - offset_in_page(start); page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); - prot = pgprot_noncached(PAGE_KERNEL); + prot = pgprot_writecombine(PAGE_KERNEL); pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); if (!pages) { @@ -372,7 +372,7 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size) return NULL; } - return ioremap(start, size); + return ioremap_wc(start, size); } static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, From c0d9d658fa9410fa8f4a8b7eb216f236102f02f2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 16 Sep 2014 13:50:01 -0700 Subject: [PATCH 131/214] pstore-ram: Allow optional mapping with pgprot_noncached commit 027bc8b08242c59e19356b4b2c189f2d849ab660 upstream. On some ARMs the memory can be mapped pgprot_noncached() and still be working for atomic operations. As pointed out by Colin Cross , in some cases you do want to use pgprot_noncached() if the SoC supports it to see a debug printk just before a write hanging the system. On ARMs, the atomic operations on strongly ordered memory are implementation defined. So let's provide an optional kernel parameter for configuring pgprot_noncached(), and use pgprot_writecombine() by default. Cc: Arnd Bergmann Cc: Rob Herring Cc: Randy Dunlap Cc: Anton Vorontsov Cc: Colin Cross Cc: Olof Johansson Cc: Russell King Acked-by: Kees Cook Signed-off-by: Tony Lindgren Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- Documentation/ramoops.txt | 13 +++++++++++-- fs/pstore/ram.c | 13 +++++++++++-- fs/pstore/ram_core.c | 31 ++++++++++++++++++++++--------- include/linux/pstore_ram.h | 4 +++- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt index 69b3cac4749d..5d8675615e59 100644 --- a/Documentation/ramoops.txt +++ b/Documentation/ramoops.txt @@ -14,11 +14,19 @@ survive after a restart. 1. Ramoops concepts -Ramoops uses a predefined memory area to store the dump. The start and size of -the memory area are set using two variables: +Ramoops uses a predefined memory area to store the dump. The start and size +and type of the memory area are set using three variables: * "mem_address" for the start * "mem_size" for the size. The memory size will be rounded down to a power of two. + * "mem_type" to specifiy if the memory type (default is pgprot_writecombine). + +Typically the default value of mem_type=0 should be used as that sets the pstore +mapping to pgprot_writecombine. Setting mem_type=1 attempts to use +pgprot_noncached, which only works on some platforms. This is because pstore +depends on atomic operations. At least on ARM, pgprot_noncached causes the +memory to be mapped strongly ordered, and atomic operations on strongly ordered +memory are implementation defined, and won't work on many ARMs such as omaps. The memory area is divided into "record_size" chunks (also rounded down to power of two) and each oops/panic writes a "record_size" chunk of @@ -55,6 +63,7 @@ Setting the ramoops parameters can be done in 2 different manners: static struct ramoops_platform_data ramoops_data = { .mem_size = <...>, .mem_address = <...>, + .mem_type = <...>, .record_size = <...>, .dump_oops = <...>, .ecc = <...>, diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..42d5911c7e29 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400); MODULE_PARM_DESC(mem_size, "size of reserved RAM used to store oops/panic logs"); +static unsigned int mem_type; +module_param(mem_type, uint, 0600); +MODULE_PARM_DESC(mem_type, + "set to 1 to try to use unbuffered memory (default 0)"); + static int dump_oops = 1; module_param(dump_oops, int, 0600); MODULE_PARM_DESC(dump_oops, @@ -79,6 +84,7 @@ struct ramoops_context { struct persistent_ram_zone *fprz; phys_addr_t phys_addr; unsigned long size; + unsigned int memtype; size_t record_size; size_t console_size; size_t ftrace_size; @@ -331,7 +337,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, size_t sz = cxt->record_size; cxt->przs[i] = persistent_ram_new(*paddr, sz, 0, - &cxt->ecc_info); + &cxt->ecc_info, + cxt->memtype); if (IS_ERR(cxt->przs[i])) { err = PTR_ERR(cxt->przs[i]); dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n", @@ -361,7 +368,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt, return -ENOMEM; } - *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info); + *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype); if (IS_ERR(*prz)) { int err = PTR_ERR(*prz); @@ -411,6 +418,7 @@ static int ramoops_probe(struct platform_device *pdev) cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; + cxt->memtype = pdata->mem_type; cxt->record_size = pdata->record_size; cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; @@ -541,6 +549,7 @@ static void ramoops_register_dummy(void) dummy_data->mem_size = mem_size; dummy_data->mem_address = mem_address; + dummy_data->mem_type = 0; dummy_data->record_size = record_size; dummy_data->console_size = ramoops_console_size; dummy_data->ftrace_size = ramoops_ftrace_size; diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 50d3dcdd35fb..6ff97553331b 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -333,7 +333,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz) persistent_ram_update_header_ecc(prz); } -static void *persistent_ram_vmap(phys_addr_t start, size_t size) +static void *persistent_ram_vmap(phys_addr_t start, size_t size, + unsigned int memtype) { struct page **pages; phys_addr_t page_start; @@ -345,7 +346,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) page_start = start - offset_in_page(start); page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE); - prot = pgprot_writecombine(PAGE_KERNEL); + if (memtype) + prot = pgprot_noncached(PAGE_KERNEL); + else + prot = pgprot_writecombine(PAGE_KERNEL); pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL); if (!pages) { @@ -364,27 +368,35 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size) return vaddr; } -static void *persistent_ram_iomap(phys_addr_t start, size_t size) +static void *persistent_ram_iomap(phys_addr_t start, size_t size, + unsigned int memtype) { + void *va; + if (!request_mem_region(start, size, "persistent_ram")) { pr_err("request mem region (0x%llx@0x%llx) failed\n", (unsigned long long)size, (unsigned long long)start); return NULL; } - return ioremap_wc(start, size); + if (memtype) + va = ioremap(start, size); + else + va = ioremap_wc(start, size); + + return va; } static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size, - struct persistent_ram_zone *prz) + struct persistent_ram_zone *prz, int memtype) { prz->paddr = start; prz->size = size; if (pfn_valid(start >> PAGE_SHIFT)) - prz->vaddr = persistent_ram_vmap(start, size); + prz->vaddr = persistent_ram_vmap(start, size, memtype); else - prz->vaddr = persistent_ram_iomap(start, size); + prz->vaddr = persistent_ram_iomap(start, size, memtype); if (!prz->vaddr) { pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__, @@ -452,7 +464,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz) } struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info) + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype) { struct persistent_ram_zone *prz; int ret = -ENOMEM; @@ -463,7 +476,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, goto err; } - ret = persistent_ram_buffer_map(start, size, prz); + ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) goto err; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9974975d40db..4af3fdc85b01 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -53,7 +53,8 @@ struct persistent_ram_zone { }; struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info); + u32 sig, struct persistent_ram_ecc_info *ecc_info, + unsigned int memtype); void persistent_ram_free(struct persistent_ram_zone *prz); void persistent_ram_zap(struct persistent_ram_zone *prz); @@ -76,6 +77,7 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, struct ramoops_platform_data { unsigned long mem_size; unsigned long mem_address; + unsigned int mem_type; unsigned long record_size; unsigned long console_size; unsigned long ftrace_size; From 398dd8fd97a3cf7777c5ee0fb1b52b8ec2edf9b8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 27 Oct 2014 00:46:11 +0100 Subject: [PATCH 132/214] UBI: Fix invalid vfree() commit f38aed975c0c3645bbdfc5ebe35726e64caaf588 upstream. The logic of vfree()'ing vol->upd_buf is tied to vol->updating. In ubi_start_update() vol->updating is set long before vmalloc()'ing vol->upd_buf. If we encounter a write failure in ubi_start_update() before vmalloc() the UBI device release function will try to vfree() vol->upd_buf because vol->updating is set. Fix this by allocating vol->upd_buf directly after setting vol->updating. Fixes: [ 31.559338] UBI warning: vol_cdev_release: update of volume 2 not finished, volume is damaged [ 31.559340] ------------[ cut here ]------------ [ 31.559343] WARNING: CPU: 1 PID: 2747 at mm/vmalloc.c:1446 __vunmap+0xe3/0x110() [ 31.559344] Trying to vfree() nonexistent vm area (ffffc90001f2b000) [ 31.559345] Modules linked in: [ 31.565620] 0000000000000bba ffff88002a0cbdb0 ffffffff818f0497 ffff88003b9ba148 [ 31.566347] ffff88002a0cbde0 ffffffff8156f515 ffff88003b9ba148 0000000000000bba [ 31.567073] 0000000000000000 0000000000000000 ffff88002a0cbe88 ffffffff8156c10a [ 31.567793] Call Trace: [ 31.568034] [] dump_stack+0x4e/0x7a [ 31.568510] [] ubi_io_write_vid_hdr+0x155/0x160 [ 31.569084] [] ubi_eba_write_leb+0x23a/0x870 [ 31.569628] [] vol_cdev_write+0x226/0x380 [ 31.570155] [] vfs_write+0xb5/0x1f0 [ 31.570627] [] SyS_pwrite64+0x6a/0xa0 [ 31.571123] [] system_call_fastpath+0x16/0x1b Signed-off-by: Richard Weinberger Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/upd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index ec2c2dc1c1ca..2a1b6e037e1a 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -133,6 +133,10 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, ubi_assert(!vol->updating && !vol->changing_leb); vol->updating = 1; + vol->upd_buf = vmalloc(ubi->leb_size); + if (!vol->upd_buf) + return -ENOMEM; + err = set_update_marker(ubi, vol); if (err) return err; @@ -152,14 +156,12 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, err = clear_update_marker(ubi, vol, 0); if (err) return err; + + vfree(vol->upd_buf); vol->updating = 0; return 0; } - vol->upd_buf = vmalloc(ubi->leb_size); - if (!vol->upd_buf) - return -ENOMEM; - vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, vol->usable_leb_size); vol->upd_bytes = bytes; From ccebbb7e53c8d028961340d3081d69658f8a5bef Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 6 Nov 2014 16:47:49 +0100 Subject: [PATCH 133/214] UBI: Fix double free after do_sync_erase() commit aa5ad3b6eb8feb2399a5d26c8fb0060561bb9534 upstream. If the erase worker is unable to erase a PEB it will free the ubi_wl_entry itself. The failing ubi_wl_entry must not free()'d again after do_sync_erase() returns. Signed-off-by: Richard Weinberger Signed-off-by: Artem Bityutskiy Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index c95bfb183c62..49e570abe58b 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1209,7 +1209,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = do_sync_erase(ubi, e1, vol_id, lnum, 0); if (err) { - kmem_cache_free(ubi_wl_entry_slab, e1); if (e2) kmem_cache_free(ubi_wl_entry_slab, e2); goto out_ro; @@ -1223,10 +1222,8 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", e2->pnum, vol_id, lnum); err = do_sync_erase(ubi, e2, vol_id, lnum, 0); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); + if (err) goto out_ro; - } } dbg_wl("done"); @@ -1262,10 +1259,9 @@ out_not_moved: ubi_free_vid_hdr(ubi, vid_hdr); err = do_sync_erase(ubi, e2, vol_id, lnum, torture); - if (err) { - kmem_cache_free(ubi_wl_entry_slab, e2); + if (err) goto out_ro; - } + mutex_unlock(&ubi->move_mutex); return 0; From b658f2ad07e5e15a8e29bb95740452bd5b6a8eea Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 26 Nov 2014 09:42:10 +0800 Subject: [PATCH 134/214] iommu/vt-d: Fix an off-by-one bug in __domain_mapping() commit cc4f14aa170d895c9a43bdb56f62070c8a6da908 upstream. There's an off-by-one bug in function __domain_mapping(), which may trigger the BUG_ON(nr_pages < lvl_pages) when (nr_pages + 1) & superpage_mask == 0 The issue was introduced by commit 9051aa0268dc "intel-iommu: Combine domain_pfn_mapping() and domain_sg_mapping()", which sets sg_res to "nr_pages + 1" to avoid some of the 'sg_res==0' code paths. It's safe to remove extra "+1" because sg_res is only used to calculate page size now. Reported-And-Tested-by: Sudeep Dutt Signed-off-by: Jiang Liu Acked-By: David Woodhouse Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6771e3c94801..db4e10d4c7f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1796,7 +1796,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct dma_pte *first_pte = NULL, *pte = NULL; phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - unsigned long sg_res; + unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; @@ -1807,10 +1807,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; - if (sg) - sg_res = 0; - else { - sg_res = nr_pages + 1; + if (!sg) { + sg_res = nr_pages; pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } From caa853b3d80473ec7a646a7e163ddf8bc1f4ef46 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Thu, 20 Nov 2014 00:46:37 +0800 Subject: [PATCH 135/214] HID: i2c-hid: fix race condition reading reports commit 6296f4a8eb86f9abcc370fb7a1a116b8441c17fd upstream. Current driver uses a common buffer for reading reports either synchronously in i2c_hid_get_raw_report() and asynchronously in the interrupt handler. There is race condition if an interrupt arrives immediately after the report is received in i2c_hid_get_raw_report(); the common buffer is modified by the interrupt handler with the new report and then i2c_hid_get_raw_report() proceed using wrong data. Fix it by using a separate buffers for synchronous reports. Signed-off-by: Jean-Baptiste Maneyrol [Antonio Borneo: cleanup, rebase to v3.17, submit mainline] Signed-off-by: Antonio Borneo Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 2b1799a3b212..25b9844f7e41 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -134,6 +134,7 @@ struct i2c_hid { * descriptor. */ unsigned int bufsize; /* i2c buffer size */ char *inbuf; /* Input buffer */ + char *rawbuf; /* Raw Input buffer */ char *cmdbuf; /* Command buffer */ char *argsbuf; /* Command arguments buffer */ @@ -471,9 +472,11 @@ static void i2c_hid_find_max_report(struct hid_device *hid, unsigned int type, static void i2c_hid_free_buffers(struct i2c_hid *ihid) { kfree(ihid->inbuf); + kfree(ihid->rawbuf); kfree(ihid->argsbuf); kfree(ihid->cmdbuf); ihid->inbuf = NULL; + ihid->rawbuf = NULL; ihid->cmdbuf = NULL; ihid->argsbuf = NULL; ihid->bufsize = 0; @@ -489,10 +492,11 @@ static int i2c_hid_alloc_buffers(struct i2c_hid *ihid, size_t report_size) report_size; /* report */ ihid->inbuf = kzalloc(report_size, GFP_KERNEL); + ihid->rawbuf = kzalloc(report_size, GFP_KERNEL); ihid->argsbuf = kzalloc(args_len, GFP_KERNEL); ihid->cmdbuf = kzalloc(sizeof(union command) + args_len, GFP_KERNEL); - if (!ihid->inbuf || !ihid->argsbuf || !ihid->cmdbuf) { + if (!ihid->inbuf || !ihid->rawbuf || !ihid->argsbuf || !ihid->cmdbuf) { i2c_hid_free_buffers(ihid); return -ENOMEM; } @@ -519,12 +523,12 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, ret = i2c_hid_get_report(client, report_type == HID_FEATURE_REPORT ? 0x03 : 0x01, - report_number, ihid->inbuf, ask_count); + report_number, ihid->rawbuf, ask_count); if (ret < 0) return ret; - ret_count = ihid->inbuf[0] | (ihid->inbuf[1] << 8); + ret_count = ihid->rawbuf[0] | (ihid->rawbuf[1] << 8); if (ret_count <= 2) return 0; @@ -533,7 +537,7 @@ static int i2c_hid_get_raw_report(struct hid_device *hid, /* The query buffer contains the size, dropping it in the reply */ count = min(count, ret_count - 2); - memcpy(buf, ihid->inbuf + 2, count); + memcpy(buf, ihid->rawbuf + 2, count); return count; } From 32b57c08f4cca3bf821606a2f598fc397a143127 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Thu, 11 Dec 2014 16:02:45 -0800 Subject: [PATCH 136/214] HID: i2c-hid: prevent buffer overflow in early IRQ commit d1c7e29e8d276c669e8790bb8be9f505ddc48888 upstream. Before ->start() is called, bufsize size is set to HID_MIN_BUFFER_SIZE, 64 bytes. While processing the IRQ, we were asking to receive up to wMaxInputLength bytes, which can be bigger than 64 bytes. Later, when ->start is run, a proper bufsize will be calculated. Given wMaxInputLength is said to be unreliable in other part of the code, set to receive only what we can even if it results in truncated reports. Signed-off-by: Gwendal Grignou Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 25b9844f7e41..469daa04dadb 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -341,7 +341,7 @@ static int i2c_hid_hwreset(struct i2c_client *client) static void i2c_hid_get_input(struct i2c_hid *ihid) { int ret, ret_size; - int size = le16_to_cpu(ihid->hdesc.wMaxInputLength); + int size = ihid->bufsize; ret = i2c_master_recv(ihid->client, ihid->inbuf, size); if (ret != size) { From 94bb429ef60ffe198fbd250ce852d57d06fd02e4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Jan 2015 15:32:31 +0300 Subject: [PATCH 137/214] HID: roccat: potential out of bounds in pyra_sysfs_write_settings() commit 606185b20caf4c57d7e41e5a5ea4aff460aef2ab upstream. This is a static checker fix. We write some binary settings to the sysfs file. One of the settings is the "->startup_profile". There isn't any checking to make sure it fits into the pyra->profile_settings[] array in the profile_activated() function. I added a check to pyra_sysfs_write_settings() in both places because I wasn't positive that the other callers were correct. Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-roccat-pyra.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index d4f1e3bee590..264ddc4a0118 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -35,6 +35,8 @@ static struct class *pyra_class; static void profile_activated(struct pyra_device *pyra, unsigned int new_profile) { + if (new_profile >= ARRAY_SIZE(pyra->profile_settings)) + return; pyra->actual_profile = new_profile; pyra->actual_cpi = pyra->profile_settings[pyra->actual_profile].y_cpi; } @@ -236,9 +238,11 @@ static ssize_t pyra_sysfs_write_settings(struct file *fp, if (off != 0 || count != PYRA_SIZE_SETTINGS) return -EINVAL; - mutex_lock(&pyra->pyra_lock); - settings = (struct pyra_settings const *)buf; + if (settings->startup_profile >= ARRAY_SIZE(pyra->profile_settings)) + return -EINVAL; + + mutex_lock(&pyra->pyra_lock); retval = pyra_set_settings(usb_dev, settings); if (retval) { From bee704e2d767aa8936aa2dbb94674a57caa8c866 Mon Sep 17 00:00:00 2001 From: Karl Relton Date: Tue, 16 Dec 2014 15:37:22 +0000 Subject: [PATCH 138/214] HID: add battery quirk for USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO keyboard commit da940db41dcf8c04166f711646df2f35376010aa upstream. Apple bluetooth wireless keyboard (sold in UK) has always reported zero for battery strength no matter what condition the batteries are actually in. With this patch applied (applying same quirk as other Apple keyboards), the battery strength is now correctly reported. Signed-off-by: Karl Relton Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-input.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 012880a2228c..03a6acffed5d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -316,6 +316,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO), + HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, From a0dd9ca4500ed7cb9b21b3e0014ca7beadb4e633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Sat, 27 Dec 2014 00:28:30 +0200 Subject: [PATCH 139/214] HID: Add a new id 0x501a for Genius MousePen i608X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2bacedada682d5485424f5227f27a3d5d6eb551c upstream. New Genius MousePen i608X devices have a new id 0x501a instead of the old 0x5011 so add a new #define with "_2" appended and change required places. The remaining two checkpatch warnings about line length being over 80 characters are present in the original files too and this patch was made in the same style (no line break). Just adding a new id and changing the required places should make the new device work without any issues according to the bug report in the following url. This patch was made according to and fixes: https://bugzilla.kernel.org/show_bug.cgi?id=67111 Signed-off-by: Giedrius Statkevičius Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + drivers/hid/hid-kye.c | 4 ++++ drivers/hid/usbhid/hid-quirks.c | 1 + 4 files changed, 7 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 81d0e6e1f754..2bd798a7d9aa 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1687,6 +1687,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index a1e431f830e3..45c593dbf5cd 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -478,6 +478,7 @@ #define USB_DEVICE_ID_KYE_GPEN_560 0x5003 #define USB_DEVICE_ID_KYE_EASYPEN_I405X 0x5010 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 +#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 #define USB_VENDOR_ID_LABTEC 0x1020 diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index 843f2dd55200..973eed788cc6 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -303,6 +303,7 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, } break; case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: if (*rsize == MOUSEPEN_I608X_RDESC_ORIG_SIZE) { rdesc = mousepen_i608x_rdesc_fixed; *rsize = sizeof(mousepen_i608x_rdesc_fixed); @@ -383,6 +384,7 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) switch (id->product) { case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: case USB_DEVICE_ID_KYE_EASYPEN_M610X: ret = kye_tablet_enable(hdev); if (ret) { @@ -405,6 +407,8 @@ static const struct hid_device_id kye_devices[] = { USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { } diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0db9a67278ba..5b46a79dcb1f 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -110,6 +110,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_LTS1, HID_QUIRK_NO_INIT_REPORTS }, From 466ad6591b4c24c5f8a28563dc756d5e2c6d025d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 19 Dec 2014 16:04:11 -0800 Subject: [PATCH 140/214] x86_64, vdso: Fix the vdso address randomization algorithm commit 394f56fe480140877304d342dec46d50dc823d46 upstream. The theory behind vdso randomization is that it's mapped at a random offset above the top of the stack. To avoid wasting a page of memory for an extra page table, the vdso isn't supposed to extend past the lowest PMD into which it can fit. Other than that, the address should be a uniformly distributed address that meets all of the alignment requirements. The current algorithm is buggy: the vdso has about a 50% probability of being at the very end of a PMD. The current algorithm also has a decent chance of failing outright due to incorrect handling of the case where the top of the stack is near the top of its PMD. This fixes the implementation. The paxtest estimate of vdso "randomisation" improves from 11 bits to 18 bits. (Disclaimer: I don't know what the paxtest code is actually calculating.) It's worth noting that this algorithm is inherently biased: the vdso is more likely to end up near the end of its PMD than near the beginning. Ideally we would either nix the PMD sharing requirement or jointly randomize the vdso and the stack to reduce the bias. In the mean time, this is a considerable improvement with basically no risk of compatibility issues, since the allowed outputs of the algorithm are unchanged. As an easy test, doing this: for i in `seq 10000` do grep -P vdso /proc/self/maps |cut -d- -f1 done |sort |uniq -d used to produce lots of output (1445 lines on my most recent run). A tiny subset looks like this: 7fffdfffe000 7fffe01fe000 7fffe05fe000 7fffe07fe000 7fffe09fe000 7fffe0bfe000 7fffe0dfe000 Note the suspicious fe000 endings. With the fix, I get a much more palatable 76 repeated addresses. Reviewed-by: Kees Cook Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- arch/x86/vdso/vma.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e87544411..ab6ba35a9357 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -117,30 +117,45 @@ subsys_initcall(init_vdso); struct linux_binprm; -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits */ +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ static unsigned long vdso_addr(unsigned long start, unsigned len) { unsigned long addr, end; unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; if (end >= TASK_SIZE_MAX) end = TASK_SIZE_MAX; end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } /* - * page-align it here so that get_unmapped_area doesn't - * align it wrongfully again to the next page. addr can come in 4K - * unaligned here as a result of stack start randomization. + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. */ - addr = PAGE_ALIGN(addr); addr = align_vdso_addr(addr); return addr; From 04d98e96a02e24987a53f4a879abeae66440e49a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 21 Dec 2014 08:57:46 -0800 Subject: [PATCH 141/214] x86, vdso: Use asm volatile in __getcpu commit 1ddf0b1b11aa8a90cef6706e935fc31c75c406ba upstream. In Linux 3.18 and below, GCC hoists the lsl instructions in the pvclock code all the way to the beginning of __vdso_clock_gettime, slowing the non-paravirt case significantly. For unknown reasons, presumably related to the removal of a branch, the performance issue is gone as of e76b027e6408 x86,vdso: Use LSL unconditionally for vgetcpu but I don't trust GCC enough to expect the problem to stay fixed. There should be no correctness issue, because the __getcpu calls in __vdso_vlock_gettime were never necessary in the first place. Note to stable maintainers: In 3.18 and below, depending on configuration, gcc 4.9.2 generates code like this: 9c3: 44 0f 03 e8 lsl %ax,%r13d 9c7: 45 89 eb mov %r13d,%r11d 9ca: 0f 03 d8 lsl %ax,%ebx This patch won't apply as is to any released kernel, but I'll send a trivial backported version if needed. [ Backported by Andy Lutomirski. Should apply to all affected versions. This fixes a functionality bug as well as a performance bug: buggy kernels can infinite loop in __vdso_clock_gettime on affected compilers. See, for exammple: https://bugzilla.redhat.com/show_bug.cgi?id=1178975 ] Fixes: 51c19b4f5927 x86: vdso: pvclock gettime support Cc: Marcelo Tosatti Acked-by: Paolo Bonzini Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/vsyscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 2a46ca720afc..2874be9aef0a 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -34,7 +34,7 @@ static inline unsigned int __getcpu(void) native_read_tscp(&p); } else { /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } return p; From bdf2a0db176e1de7c93fe7b7c5a74756b976fb33 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 31 Oct 2014 11:13:07 -0600 Subject: [PATCH 142/214] driver core: Fix unbalanced device reference in drivers_probe commit bb34cb6bbd287b57e955bc5cfd42fcde6aaca279 upstream. bus_find_device_by_name() acquires a device reference which is never released. This results in an object leak, which on older kernels results in failure to release all resources of PCI devices. libvirt uses drivers_probe to re-attach devices to the host after assignment and is therefore a common trigger for this leak. Example: # cd /sys/bus/pci/ # dmesg -C # echo 1 > devices/0000\:01\:00.0/sriov_numvfs # echo 0 > devices/0000\:01\:00.0/sriov_numvfs # dmesg | grep 01:10 pci 0000:01:10.0: [8086:10ca] type 00 class 0x020000 kobject: '0000:01:10.0' (ffff8801d79cd0a8): kobject_add_internal: parent: '0000:00:01.0', set: 'devices' kobject: '0000:01:10.0' (ffff8801d79cd0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79cd0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' kobject: '0000:01:10.0' (ffff8801d79cd0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79cd0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' kobject: '0000:01:10.0' (ffff8801d79cd0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79cd0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' kobject: '0000:01:10.0' (ffff8801d79cd0a8): kobject_cleanup, parent (null) kobject: '0000:01:10.0' (ffff8801d79cd0a8): calling ktype release kobject: '0000:01:10.0': free name [kobject freed as expected] # dmesg -C # echo 1 > devices/0000\:01\:00.0/sriov_numvfs # echo 0000:01:10.0 > drivers_probe # echo 0 > devices/0000\:01\:00.0/sriov_numvfs # dmesg | grep 01:10 pci 0000:01:10.0: [8086:10ca] type 00 class 0x020000 kobject: '0000:01:10.0' (ffff8801d79ce0a8): kobject_add_internal: parent: '0000:00:01.0', set: 'devices' kobject: '0000:01:10.0' (ffff8801d79ce0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79ce0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' kobject: '0000:01:10.0' (ffff8801d79ce0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79ce0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' kobject: '0000:01:10.0' (ffff8801d79ce0a8): kobject_uevent_env kobject: '0000:01:10.0' (ffff8801d79ce0a8): fill_kobj_path: path = '/devices/pci0000:00/0000:00:01.0/0000:01:10.0' [no free] Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index d414331b480e..558d562f4901 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -242,13 +242,15 @@ static ssize_t store_drivers_probe(struct bus_type *bus, const char *buf, size_t count) { struct device *dev; + int err = -EINVAL; dev = bus_find_device_by_name(bus, NULL, buf); if (!dev) return -ENODEV; - if (bus_rescan_devices_helper(dev, NULL) != 0) - return -EINVAL; - return count; + if (bus_rescan_devices_helper(dev, NULL) == 0) + err = count; + put_device(dev); + return err; } static struct device *next_device(struct klist_iter *i) From 5c27da8b2773851c9920b458366c5294c9de2aa6 Mon Sep 17 00:00:00 2001 From: Jiri Jaburek Date: Thu, 18 Dec 2014 02:03:19 +0100 Subject: [PATCH 143/214] ALSA: usb-audio: extend KEF X300A FU 10 tweak to Arcam rPAC commit d70a1b9893f820fdbcdffac408c909c50f2e6b43 upstream. The Arcam rPAC seems to have the same problem - whenever anything (alsamixer, udevd, 3.9+ kernel from 60af3d037eb8c, ..) attempts to access mixer / control interface of the card, the firmware "locks up" the entire device, resulting in SNDRV_PCM_IOCTL_HW_PARAMS failed (-5): Input/output error from alsa-lib. Other operating systems can somehow read the mixer (there seems to be playback volume/mute), but any manipulation is ignored by the device (which has hardware volume controls). Signed-off-by: Jiri Jaburek Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_maps.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 0339d464791a..4df31b0f94a3 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -322,8 +322,11 @@ static struct usbmix_name_map hercules_usb51_map[] = { { 0 } /* terminator */ }; -static const struct usbmix_name_map kef_x300a_map[] = { - { 10, NULL }, /* firmware locks up (?) when we try to access this FU */ +/* some (all?) SCMS USB3318 devices are affected by a firmware lock up + * when anything attempts to access FU 10 (control) + */ +static const struct usbmix_name_map scms_usb3318_map[] = { + { 10, NULL }, { 0 } }; @@ -415,8 +418,14 @@ static struct usbmix_ctl_map usbmix_ctl_maps[] = { .map = ebox44_map, }, { + /* KEF X300A */ .id = USB_ID(0x27ac, 0x1000), - .map = kef_x300a_map, + .map = scms_usb3318_map, + }, + { + /* Arcam rPAC */ + .id = USB_ID(0x25c4, 0x0003), + .map = scms_usb3318_map, }, { 0 } /* terminator */ }; From a26f3d7af44539e6fc1d7c62e03fab95efccdbf9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 27 Nov 2014 01:34:43 +0300 Subject: [PATCH 144/214] ALSA: hda - using uninitialized data commit 69eba10e606a80665f8573221fec589430d9d1cb upstream. In olden times the snd_hda_param_read() function always set "*start_id" but in 2007 we introduced a new return and it causes uninitialized data bugs in a couple of the callers: print_codec_info() and hdmi_parse_codec(). Fixes: e8a7f136f5ed ('[ALSA] hda-intel - Improve HD-audio codec probing robustness') Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index aeefec74a061..83a0f9b4452b 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -327,8 +327,10 @@ int snd_hda_get_sub_nodes(struct hda_codec *codec, hda_nid_t nid, unsigned int parm; parm = snd_hda_param_read(codec, nid, AC_PAR_NODE_COUNT); - if (parm == -1) + if (parm == -1) { + *start_id = 0; return 0; + } *start_id = (parm >> 16) & 0x7fff; return (int)(parm & 0x7fff); } From c5bcceb0e607ef522d8b244a40d08080f04cde65 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 5 Jan 2015 13:27:33 +0100 Subject: [PATCH 145/214] ALSA: hda - Fix wrong gpio_dir & gpio_mask hint setups for IDT/STAC codecs commit c507de88f6a336bd7296c9ec0073b2d4af8b4f5e upstream. stac_store_hints() does utterly wrong for masking the values for gpio_dir and gpio_data, likely due to copy&paste errors. Fortunately, this feature is used very rarely, so the impact must be really small. Reported-by: Rasmus Villemoes Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 5dd4c4af9c9f..4ae5767a2cf5 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -573,9 +573,9 @@ static void stac_store_hints(struct hda_codec *codec) spec->gpio_mask; } if (get_int_hint(codec, "gpio_dir", &spec->gpio_dir)) - spec->gpio_mask &= spec->gpio_mask; - if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) spec->gpio_dir &= spec->gpio_mask; + if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) + spec->gpio_data &= spec->gpio_mask; if (get_int_hint(codec, "eapd_mask", &spec->eapd_mask)) spec->eapd_mask &= spec->gpio_mask; if (get_int_hint(codec, "gpio_mute", &spec->gpio_mute)) From edefe2069bc0d8ab1fd03dd9b141cbfec98aca8f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 7 Nov 2014 08:48:15 -0800 Subject: [PATCH 146/214] USB: cdc-acm: check for valid interfaces commit 403dff4e2c94f275e24fd85f40b2732ffec268a1 upstream. We need to check that we have both a valid data and control inteface for both types of headers (union and not union.) References: https://bugzilla.kernel.org/show_bug.cgi?id=83551 Reported-by: Simon Schubert <2+kernel@0x2c.org> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 1e71f918eb9f..0228d591a1d5 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1087,10 +1087,11 @@ next_desc: } else { control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0)); - if (!control_interface || !data_interface) { - dev_dbg(&intf->dev, "no interfaces\n"); - return -ENODEV; - } + } + + if (!control_interface || !data_interface) { + dev_dbg(&intf->dev, "no interfaces\n"); + return -ENODEV; } if (data_interface_num != call_interface_num) From dd4fb6fc5dd2e85e65222f0e5e1e321dec9fc2b7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 19 Nov 2014 13:06:22 -0700 Subject: [PATCH 147/214] genhd: check for int overflow in disk_expand_part_tbl() commit 5fabcb4c33fe11c7e3afdf805fde26c1a54d0953 upstream. We can get here from blkdev_ioctl() -> blkpg_ioctl() -> add_partition() with a user passed in partno value. If we pass in 0x7fffffff, the new target in disk_expand_part_tbl() overflows the 'int' and we access beyond the end of ptbl->part[] and even write to it when we do the rcu_assign_pointer() to assign the new partition. Reported-by: David Ramos Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/genhd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index e670148c3773..7694dffe9f0e 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1070,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) struct disk_part_tbl *old_ptbl = disk->part_tbl; struct disk_part_tbl *new_ptbl; int len = old_ptbl ? old_ptbl->len : 0; - int target = partno + 1; + int i, target; size_t size; - int i; + + /* + * check for int overflow, since we can get here from blkpg_ioctl() + * with a user passed 'partno'. + */ + target = partno + 1; + if (target < 0) + return -EINVAL; /* disk_max_parts() is zero during initialization, ignore if so */ if (disk_max_parts(disk) && target > disk_max_parts(disk)) From b16c4055b24b1afce2e57270003d443831350fa8 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 20 Nov 2014 14:54:35 +0100 Subject: [PATCH 148/214] cdc-acm: memory leak in error case commit d908f8478a8d18e66c80a12adb27764920c1f1ca upstream. If probe() fails not only the attributes need to be removed but also the memory freed. Reported-by: Ahmed Tamrawi Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 0228d591a1d5..2800776b2e91 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1366,6 +1366,7 @@ alloc_fail8: &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, &dev_attr_iCountryCodeRelDate); + kfree(acm->country_codes); } device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities); alloc_fail7: From 21fe2674284e19f70a2ce145f812d9329d4f1cc2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 24 Oct 2014 15:38:21 -0400 Subject: [PATCH 149/214] writeback: fix a subtle race condition in I_DIRTY clearing commit 9c6ac78eb3521c5937b2dd8a7d1b300f41092f45 upstream. After invoking ->dirty_inode(), __mark_inode_dirty() does smp_mb() and tests inode->i_state locklessly to see whether it already has all the necessary I_DIRTY bits set. The comment above the barrier doesn't contain any useful information - memory barriers can't ensure "changes are seen by all cpus" by itself. And it sure enough was broken. Please consider the following scenario. CPU 0 CPU 1 ------------------------------------------------------------------------------- enters __writeback_single_inode() grabs inode->i_lock tests PAGECACHE_TAG_DIRTY which is clear enters __set_page_dirty() grabs mapping->tree_lock sets PAGECACHE_TAG_DIRTY releases mapping->tree_lock leaves __set_page_dirty() enters __mark_inode_dirty() smp_mb() sees I_DIRTY_PAGES set leaves __mark_inode_dirty() clears I_DIRTY_PAGES releases inode->i_lock Now @inode has dirty pages w/ I_DIRTY_PAGES clear. This doesn't seem to lead to an immediately critical problem because requeue_inode() later checks PAGECACHE_TAG_DIRTY instead of I_DIRTY_PAGES when deciding whether the inode needs to be requeued for IO and there are enough unintentional memory barriers inbetween, so while the inode ends up with inconsistent I_DIRTY_PAGES flag, it doesn't fall off the IO list. The lack of explicit barrier may also theoretically affect the other I_DIRTY bits which deal with metadata dirtiness. There is no guarantee that a strong enough barrier exists between I_DIRTY_[DATA]SYNC clearing and write_inode() writing out the dirtied inode. Filesystem inode writeout path likely has enough stuff which can behave as full barrier but it's theoretically possible that the writeout may not see all the updates from ->dirty_inode(). Fix it by adding an explicit smp_mb() after I_DIRTY clearing. Note that I_DIRTY_PAGES needs a special treatment as it always needs to be cleared to be interlocked with the lockless test on __mark_inode_dirty() side. It's cleared unconditionally and reinstated after smp_mb() if the mapping still has dirty pages. Also add comments explaining how and why the barriers are paired. Lightly tested. Signed-off-by: Tejun Heo Cc: Jan Kara Cc: Mikulas Patocka Cc: Jens Axboe Cc: Al Viro Reviewed-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 387213ac2608..b44306378193 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -470,12 +470,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * write_inode() */ spin_lock(&inode->i_lock); - /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - inode->i_state &= ~I_DIRTY_PAGES; + dirty = inode->i_state & I_DIRTY; - inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); + inode->i_state &= ~I_DIRTY; + + /* + * Paired with smp_mb() in __mark_inode_dirty(). This allows + * __mark_inode_dirty() to test i_state without grabbing i_lock - + * either they see the I_DIRTY bits cleared or we see the dirtied + * inode. + * + * I_DIRTY_PAGES is always cleared together above even if @mapping + * still has dirty pages. The flag is reinstated after smp_mb() if + * necessary. This guarantees that either __mark_inode_dirty() + * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY. + */ + smp_mb(); + + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + inode->i_state |= I_DIRTY_PAGES; + spin_unlock(&inode->i_lock); + /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { int err = write_inode(inode, wbc); @@ -1146,12 +1162,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) } /* - * make sure that changes are seen by all cpus before we test i_state - * -- mikulas + * Paired with smp_mb() in __writeback_single_inode() for the + * following lockless i_state test. See there for details. */ smp_mb(); - /* avoid the locking if we can */ if ((inode->i_state & flags) == flags) return; From 6a388a8350c86335b940dea0a3cc63bdbcc238ca Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Mon, 24 Nov 2014 07:56:21 +0100 Subject: [PATCH 150/214] serial: samsung: wait for transfer completion before clock disable commit 1ff383a4c3eda8893ec61b02831826e1b1f46b41 upstream. This patch adds waiting until transmit buffer and shifter will be empty before clock disabling. Without this fix it's possible to have clock disabled while data was not transmited yet, which causes unproper state of TX line and problems in following data transfers. Signed-off-by: Robert Baldyga Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index 0c8a9fa2be6c..b8366b154fb9 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -534,11 +534,15 @@ static void s3c24xx_serial_pm(struct uart_port *port, unsigned int level, unsigned int old) { struct s3c24xx_uart_port *ourport = to_ourport(port); + int timeout = 10000; ourport->pm_level = level; switch (level) { case 3: + while (--timeout && !s3c24xx_serial_txempty_nofifo(port)) + udelay(100); + if (!IS_ERR(ourport->baudclk)) clk_disable_unprepare(ourport->baudclk); From 11e404024797de1db3cbc2479a2f7e89b2a14ad8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 5 Dec 2014 16:40:07 +0100 Subject: [PATCH 151/214] fs: nfsd: Fix signedness bug in compare_blob commit ef17af2a817db97d42dd2ec0a425231748e23dbc upstream. Bugs similar to the one in acbbe6fbb240 (kcmp: fix standard comparison bug) are in rich supply. In this variant, the problem is that struct xdr_netobj::len has type unsigned int, so the expression o1->len - o2->len _also_ has type unsigned int; it has completely well-defined semantics, and the result is some non-negative integer, which is always representable in a long long. But this means that if the conditional triggers, we are guaranteed to return a positive value from compare_blob. In this case it could be fixed by - res = o1->len - o2->len; + res = (long long)o1->len - (long long)o2->len; but I'd rather eliminate the usually broken 'return a - b;' idiom. Reviewed-by: Jeff Layton Signed-off-by: Rasmus Villemoes Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 836307ae1f08..4a58afa99654 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1200,15 +1200,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) return 0; } -static long long +static int compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2) { - long long res; - - res = o1->len - o2->len; - if (res) - return res; - return (long long)memcmp(o1->data, o2->data, o1->len); + if (o1->len < o2->len) + return -1; + if (o1->len > o2->len) + return 1; + return memcmp(o1->data, o2->data, o1->len); } static int same_name(const char *n1, const char *n2) @@ -1365,7 +1364,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root) static struct nfs4_client * find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root) { - long long cmp; + int cmp; struct rb_node *node = root->rb_node; struct nfs4_client *clp; From 0171a58775ee1eefeaf0f5a8fd30da50be400d16 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Sun, 7 Dec 2014 16:05:47 -0500 Subject: [PATCH 152/214] nfsd4: fix xdr4 inclusion of escaped char commit 5a64e56976f1ba98743e1678c0029a98e9034c81 upstream. Fix a bug where nfsd4_encode_components_esc() includes the esc_end char as an additional string encoding. Signed-off-by: Benjamin Coddington Fixes: e7a0444aef4a "nfsd: add IPv6 addr escaping to fs_location hosts" Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9b45f0666cfc..acf179d7615f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1743,6 +1743,9 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components, } else end++; + if (found_esc) + end = next; + str = end; } *pp = p; From 344e833f18ddb40252264d38dbf7d82b760ec34a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 10 Dec 2014 15:54:34 -0800 Subject: [PATCH 153/214] nilfs2: fix the nilfs_iget() vs. nilfs_new_inode() races commit 705304a863cc41585508c0f476f6d3ec28cf7e00 upstream. Same story as in commit 41080b5a2401 ("nfsd race fixes: ext2") (similar ext2 fix) except that nilfs2 needs to use insert_inode_locked4() instead of insert_inode_locked() and a bug of a check for dead inodes needs to be fixed. If nilfs_iget() is called from nfsd after nilfs_new_inode() calls insert_inode_locked4(), nilfs_iget() will wait for unlock_new_inode() at the end of nilfs_mkdir()/nilfs_create()/etc to unlock the inode. If nilfs_iget() is called before nilfs_new_inode() calls insert_inode_locked4(), it will create an in-core inode and read its data from the on-disk inode. But, nilfs_iget() will find i_nlink equals zero and fail at nilfs_read_inode_common(), which will lead it to call iget_failed() and cleanly fail. However, this sanity check doesn't work as expected for reused on-disk inodes because they leave a non-zero value in i_mode field and it hinders the test of i_nlink. This patch also fixes the issue by removing the test on i_mode that nilfs2 doesn't need. Signed-off-by: Ryusuke Konishi Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/inode.c | 32 ++++++++++++++++++++++++-------- fs/nilfs2/namei.c | 15 ++++++++++++--- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2e1372efbb00..587d699bdc2c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -49,6 +49,8 @@ struct nilfs_iget_args { int for_gc; }; +static int nilfs_iget_test(struct inode *inode, void *opaque); + void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; @@ -347,6 +349,17 @@ const struct address_space_operations nilfs_aops = { .is_partially_uptodate = block_is_partially_uptodate, }; +static int nilfs_insert_inode_locked(struct inode *inode, + struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); +} + struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; @@ -382,7 +395,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); if (err < 0) - goto failed_bmap; + goto failed_after_creation; set_bit(NILFS_I_BMAP, &ii->i_state); /* No lock is needed; iget() ensures it. */ @@ -398,21 +411,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) spin_lock(&nilfs->ns_next_gen_lock); inode->i_generation = nilfs->ns_next_generation++; spin_unlock(&nilfs->ns_next_gen_lock); - insert_inode_hash(inode); + if (nilfs_insert_inode_locked(inode, root, ino) < 0) { + err = -EIO; + goto failed_after_creation; + } err = nilfs_init_acl(inode, dir); if (unlikely(err)) - goto failed_acl; /* never occur. When supporting + goto failed_after_creation; /* never occur. When supporting nilfs_init_acl(), proper cancellation of above jobs should be considered */ return inode; - failed_acl: - failed_bmap: + failed_after_creation: clear_nlink(inode); + unlock_new_inode(inode); iput(inode); /* raw_inode will be deleted through - generic_delete_inode() */ + nilfs_evict_inode() */ goto failed; failed_ifile_create_inode: @@ -460,8 +476,8 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - if (inode->i_nlink == 0 && inode->i_mode == 0) - return -EINVAL; /* this inode is deleted */ + if (inode->i_nlink == 0) + return -ESTALE; /* this inode is deleted */ inode->i_blocks = le64_to_cpu(raw_inode->i_blocks); ii->i_flags = le32_to_cpu(raw_inode->i_flags); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 9de78f08989e..0f84b257932c 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) int err = nilfs_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -182,6 +184,7 @@ out: out_fail: drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode_inc_link_count(inode); ihold(inode); - err = nilfs_add_nondir(dentry, inode); - if (!err) + err = nilfs_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else { + inode_dec_link_count(inode); + iput(inode); nilfs_transaction_abort(dir->i_sb); + } return err; } @@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); out: if (!err) err = nilfs_transaction_commit(dir->i_sb); @@ -255,6 +263,7 @@ out_fail: drop_nlink(inode); drop_nlink(inode); nilfs_mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); out_dir: drop_nlink(dir); From 697f52b45551255d3e151110cba150107141de56 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Dec 2014 15:41:28 -0800 Subject: [PATCH 154/214] scripts/kernel-doc: don't eat struct members with __aligned commit 7b990789a4c3420fa57596b368733158e432d444 upstream. The change from \d+ to .+ inside __aligned() means that the following structure: struct test { u8 a __aligned(2); u8 b __aligned(2); }; essentially gets modified to struct test { u8 a; }; for purposes of kernel-doc, thus dropping a struct member, which in turns causes warnings and invalid kernel-doc generation. Fix this by replacing the catch-all (".") with anything that's not a semicolon ("[^;]"). Fixes: 9dc30918b23f ("scripts/kernel-doc: handle struct member __aligned without numbers") Signed-off-by: Johannes Berg Cc: Nishanth Menon Cc: Randy Dunlap Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/kernel-doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 4305b2f2ec5e..8c0e07b7a70b 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1750,7 +1750,7 @@ sub dump_struct($$) { # strip kmemcheck_bitfield_{begin,end}.*; $members =~ s/kmemcheck_bitfield_.*?;//gos; # strip attributes - $members =~ s/__aligned\s*\(.+\)//gos; + $members =~ s/__aligned\s*\([^;]*\)//gos; create_parameterlist($members, ';', $file); check_sections($file, $declaration_name, "struct", $sectcheck, $struct_actual, $nested); From f35fff85dc8a8ee4fea7802d44822d1f18d035ed Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Nov 2014 10:38:57 +0100 Subject: [PATCH 155/214] ARM: mvebu: disable I/O coherency on non-SMP situations on Armada 370/375/38x/XP commit e55355453600a33bb5ca4f71f2d7214875f3b061 upstream. Enabling the hardware I/O coherency on Armada 370, Armada 375, Armada 38x and Armada XP requires a certain number of conditions: - On Armada 370, the cache policy must be set to write-allocate. - On Armada 375, 38x and XP, the cache policy must be set to write-allocate, the pages must be mapped with the shareable attribute, and the SMP bit must be set Currently, on Armada XP, when CONFIG_SMP is enabled, those conditions are met. However, when Armada XP is used in a !CONFIG_SMP kernel, none of these conditions are met. With Armada 370, the situation is worse: since the processor is single core, regardless of whether CONFIG_SMP or !CONFIG_SMP is used, the cache policy will be set to write-back by the kernel and not write-allocate. Since solving this problem turns out to be quite complicated, and we don't want to let users with a mainline kernel known to have infrequent but existing data corruptions, this commit proposes to simply disable hardware I/O coherency in situations where it is known not to work. And basically, the is_smp() function of the kernel tells us whether it is OK to enable hardware I/O coherency or not, so this commit slightly refactors the coherency_type() function to return COHERENCY_FABRIC_TYPE_NONE when is_smp() is false, or the appropriate type of the coherency fabric in the other case. Thanks to this, the I/O coherency fabric will no longer be used at all in !CONFIG_SMP configurations. It will continue to be used in CONFIG_SMP configurations on Armada XP, Armada 375 and Armada 38x (which are multiple cores processors), but will no longer be used on Armada 370 (which is a single core processor). In the process, it simplifies the implementation of the coherency_type() function, and adds a missing call to of_node_put(). Signed-off-by: Thomas Petazzoni Fixes: e60304f8cb7bb545e79fe62d9b9762460c254ec2 ("arm: mvebu: Add hardware I/O Coherency support") Cc: # v3.8+ Acked-by: Gregory CLEMENT Link: https://lkml.kernel.org/r/1415871540-20302-3-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-mvebu/coherency.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 8278960066c3..3ee701f1d38e 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -141,6 +141,29 @@ int __init coherency_init(void) { struct device_node *np; + /* + * The coherency fabric is needed: + * - For coherency between processors on Armada XP, so only + * when SMP is enabled. + * - For coherency between the processor and I/O devices, but + * this coherency requires many pre-requisites (write + * allocate cache policy, shareable pages, SMP bit set) that + * are only meant in SMP situations. + * + * Note that this means that on Armada 370, there is currently + * no way to use hardware I/O coherency, because even when + * CONFIG_SMP is enabled, is_smp() returns false due to the + * Armada 370 being a single-core processor. To lift this + * limitation, we would have to find a way to make the cache + * policy set to write-allocate (on all Armada SoCs), and to + * set the shareable attribute in page tables (on all Armada + * SoCs except the Armada 370). Unfortunately, such decisions + * are taken very early in the kernel boot process, at a point + * where we don't know yet on which SoC we are running. + */ + if (!is_smp()) + return 0; + np = of_find_matching_node(NULL, of_coherency_table); if (np) { pr_info("Initializing Coherency fabric\n"); From 16e9d54b3f8158479e796188dcf45a6bfed18f03 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 31 Dec 2014 12:18:29 -0500 Subject: [PATCH 156/214] Btrfs: don't delay inode ref updates during log replay commit 6f8960541b1eb6054a642da48daae2320fddba93 upstream. Commit 1d52c78afbb (Btrfs: try not to ENOSPC on log replay) added a check to skip delayed inode updates during log replay because it confuses the enospc code. But the delayed processing will end up ignoring delayed refs from log replay because the inode itself wasn't put through the delayed code. This can end up triggering a warning at commit time: WARNING: CPU: 2 PID: 778 at fs/btrfs/delayed-inode.c:1410 btrfs_assert_delayed_root_empty+0x32/0x34() Which is repeated for each commit because we never process the delayed inode ref update. The fix used here is to change btrfs_delayed_delete_inode_ref to return an error if we're currently in log replay. The caller will do the ref deletion immediately and everything will work properly. Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f26f38ccd194..019fc5a68a14 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1843,6 +1843,14 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, struct btrfs_delayed_node *delayed_node; int ret = 0; + /* + * we don't do delayed inode updates during log recovery because it + * leads to enospc problems. This means we also can't do + * delayed inode refs + */ + if (BTRFS_I(inode)->root->fs_info->log_root_recovering) + return -EAGAIN; + delayed_node = btrfs_get_or_create_delayed_node(inode); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); From ac96652da2a760bbd8cb4dc74b6bf1c5d2c8babb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Dec 2014 21:23:50 +0100 Subject: [PATCH 157/214] perf/x86/intel/uncore: Make sure only uncore events are collected commit af91568e762d04931dcbdd6bef4655433d8b9418 upstream. The uncore_collect_events functions assumes that event group might contain only uncore events which is wrong, because it might contain any type of events. This bug leads to uncore framework touching 'not' uncore events, which could end up all sorts of bugs. One was triggered by Vince's perf fuzzer, when the uncore code touched breakpoint event private event space as if it was uncore event and caused BUG: BUG: unable to handle kernel paging request at ffffffff82822068 IP: [] uncore_assign_events+0x188/0x250 ... The code in uncore_assign_events() function was looking for event->hw.idx data while the event was initialized as a breakpoint with different members in event->hw union. This patch forces uncore_collect_events() to collect only uncore events. Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Yan, Zheng Link: http://lkml.kernel.org/r/1418243031-20367-2-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/perf_event_intel_uncore.c | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8aac56bda7dc..7185af255fb5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2657,6 +2657,17 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_uncore_event(struct perf_event *event) +{ + return event->pmu->event_init == uncore_pmu_event_init; +} + static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { @@ -2671,13 +2682,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b return -EINVAL; n = box->n_events; - box->event_list[n] = leader; - n++; + + if (is_uncore_event(leader)) { + box->event_list[n] = leader; + n++; + } + if (!dogrp) return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) + if (!is_uncore_event(event) || + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) From d525563b50aa3b63c7b7cb7908810facb6dc7ac5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 10 Dec 2014 21:23:51 +0100 Subject: [PATCH 158/214] perf: Fix events installation during moving group commit 9fc81d87420d0d3fd62d5e5529972c0ad9eab9cc upstream. We allow PMU driver to change the cpu on which the event should be installed to. This happened in patch: e2d37cd213dc ("perf: Allow the PMU driver to choose the CPU on which to install events") This patch also forces all the group members to follow the currently opened events cpu if the group happened to be moved. This and the change of event->cpu in perf_install_in_context() function introduced in: 0cda4c023132 ("perf: Introduce perf_pmu_migrate_context()") forces group members to change their event->cpu, if the currently-opened-event's PMU changed the cpu and there is a group move. Above behaviour causes problem for breakpoint events, which uses event->cpu to touch cpu specific data for breakpoints accounting. By changing event->cpu, some breakpoints slots were wrongly accounted for given cpu. Vinces's perf fuzzer hit this issue and caused following WARN on my setup: WARNING: CPU: 0 PID: 20214 at arch/x86/kernel/hw_breakpoint.c:119 arch_install_hw_breakpoint+0x142/0x150() Can't find any breakpoint slot [...] This patch changes the group moving code to keep the event's original cpu. Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Cc: Yan, Zheng Link: http://lkml.kernel.org/r/1418243031-20367-3-git-send-email-jolsa@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3f63ea6464ca..7bf4d519c20f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6887,11 +6887,11 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { synchronize_rcu(); - perf_install_in_context(ctx, group_leader, event->cpu); + perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); list_for_each_entry(sibling, &group_leader->sibling_list, group_entry) { - perf_install_in_context(ctx, sibling, event->cpu); + perf_install_in_context(ctx, sibling, sibling->cpu); get_ctx(ctx); } } From 2788d619a6d20e994dbbdcd91ba6847744bee675 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Nov 2014 16:39:31 +0100 Subject: [PATCH 159/214] perf session: Do not fail on processing out of order event commit f61ff6c06dc8f32c7036013ad802c899ec590607 upstream. Linus reported perf report command being interrupted due to processing of 'out of order' event, with following error: Timestamp below last timeslice flush 0x5733a8 [0x28]: failed to process type: 3 I could reproduce the issue and in my case it was caused by one CPU (mmap) being behind during record and userspace mmap reader seeing the data after other CPUs data were already stored. This is expected under some circumstances because we need to limit the number of events that we queue for reordering when we receive a PERF_RECORD_FINISHED_ROUND or when we force flush due to memory pressure. Reported-by: Linus Torvalds Signed-off-by: Jiri Olsa Acked-by: Ingo Molnar Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Linus Torvalds Cc: Matt Fleming Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1417016371-30249-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo [zhangzhiqiang: backport to 3.10: - adjust context - commit f61ff6c06d struct events_stats was defined in tools/perf/util/event.h while 3.10 stable defined in tools/perf/util/hist.h. - 3.10 stable there is no pr_oe_time() which used for debug. - After the above adjustments, becomes same to the original patch: https://github.com/torvalds/linux/commit/f61ff6c06dc8f32c7036013ad802c899ec590607 ] Signed-off-by: Zhiqiang Zhang Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/hist.h | 1 + tools/perf/util/session.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 14c2fe20aa62..20764e01df16 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -34,6 +34,7 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; + u32 nr_unordered_events; }; enum hist_column { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index e392202b96bc..6f593a704ea5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -656,8 +656,7 @@ static int perf_session_queue_event(struct perf_session *s, union perf_event *ev return -ETIME; if (timestamp < s->ordered_samples.last_flush) { - printf("Warning: Timestamp below last timeslice flush\n"); - return -EINVAL; + s->stats.nr_unordered_events++; } if (!list_empty(sc)) { @@ -1057,6 +1056,8 @@ static void perf_session__warn_about_errors(const struct perf_session *session, "Do you have a KVM guest running and not using 'perf kvm'?\n", session->stats.nr_unprocessable_samples); } + if (session->stats.nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); } #define session_done() (*(volatile int *)(&session_done)) From 6bb148fb1e6a1211c55f8cc3fcd789c34483c7bb Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 8 Jan 2015 14:32:40 -0800 Subject: [PATCH 160/214] mm, vmscan: prevent kswapd livelock due to pfmemalloc-throttled process being killed commit 9e5e3661727eaf960d3480213f8e87c8d67b6956 upstream. Charles Shirron and Paul Cassella from Cray Inc have reported kswapd stuck in a busy loop with nothing left to balance, but kswapd_try_to_sleep() failing to sleep. Their analysis found the cause to be a combination of several factors: 1. A process is waiting in throttle_direct_reclaim() on pgdat->pfmemalloc_wait 2. The process has been killed (by OOM in this case), but has not yet been scheduled to remove itself from the waitqueue and die. 3. kswapd checks for throttled processes in prepare_kswapd_sleep(): if (waitqueue_active(&pgdat->pfmemalloc_wait)) { wake_up(&pgdat->pfmemalloc_wait); return false; // kswapd will not go to sleep } However, for a process that was already killed, wake_up() does not remove the process from the waitqueue, since try_to_wake_up() checks its state first and returns false when the process is no longer waiting. 4. kswapd is running on the same CPU as the only CPU that the process is allowed to run on (through cpus_allowed, or possibly single-cpu system). 5. CONFIG_PREEMPT_NONE=y kernel is used. If there's nothing to balance, kswapd encounters no voluntary preemption points and repeatedly fails prepare_kswapd_sleep(), blocking the process from running and removing itself from the waitqueue, which would let kswapd sleep. So, the source of the problem is that we prevent kswapd from going to sleep until there are processes waiting on the pfmemalloc_wait queue, and a process waiting on a queue is guaranteed to be removed from the queue only when it gets scheduled. This was done to make sure that no process is left sleeping on pfmemalloc_wait when kswapd itself goes to sleep. However, it isn't necessary to postpone kswapd sleep until the pfmemalloc_wait queue actually empties. To prevent processes from being left sleeping, it's actually enough to guarantee that all processes waiting on pfmemalloc_wait queue have been woken up by the time we put kswapd to sleep. This patch therefore fixes this issue by substituting 'wake_up' with 'wake_up_all' and removing 'return false' in the code snippet from prepare_kswapd_sleep() above. Note that if any process puts itself in the queue after this waitqueue_active() check, or after the wake up itself, it means that the process will also wake up kswapd - and since we are under prepare_to_wait(), the wake up won't be missed. Also we update the comment prepare_kswapd_sleep() to hopefully more clearly describe the races it is preventing. Fixes: 5515061d22f0 ("mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and swap is backed by network storage") Signed-off-by: Vlastimil Babka Signed-off-by: Vladimir Davydov Cc: Mel Gorman Cc: Johannes Weiner Acked-by: Michal Hocko Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 4e89500391dc..a2fd7e759cb7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2631,18 +2631,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, return false; /* - * There is a potential race between when kswapd checks its watermarks - * and a process gets throttled. There is also a potential race if - * processes get throttled, kswapd wakes, a large process exits therby - * balancing the zones that causes kswapd to miss a wakeup. If kswapd - * is going to sleep, no process should be sleeping on pfmemalloc_wait - * so wake them now if necessary. If necessary, processes will wake - * kswapd and get throttled again + * The throttled processes are normally woken up in balance_pgdat() as + * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * race between when kswapd checks the watermarks and a process gets + * throttled. There is also a potential race if processes get + * throttled, kswapd wakes, a large process exits thereby balancing the + * zones, which causes kswapd to exit balance_pgdat() before reaching + * the wake up checks. If kswapd is going to sleep, no process should + * be sleeping on pfmemalloc_wait, so wake them now if necessary. If + * the wake up is premature, processes will wake kswapd and get + * throttled again. The difference from wake ups in balance_pgdat() is + * that here we are under prepare_to_wait(). */ - if (waitqueue_active(&pgdat->pfmemalloc_wait)) { - wake_up(&pgdat->pfmemalloc_wait); - return false; - } + if (waitqueue_active(&pgdat->pfmemalloc_wait)) + wake_up_all(&pgdat->pfmemalloc_wait); return pgdat_balanced(pgdat, order, classzone_idx); } From 88b5d12c6413c7b80c3df4527b09c9d5f65c1c0a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Jan 2015 13:00:05 -0800 Subject: [PATCH 161/214] mm: propagate error from stack expansion even for guard page commit fee7e49d45149fba60156f5b59014f764d3e3728 upstream. Jay Foad reports that the address sanitizer test (asan) sometimes gets confused by a stack pointer that ends up being outside the stack vma that is reported by /proc/maps. This happens due to an interaction between RLIMIT_STACK and the guard page: when we do the guard page check, we ignore the potential error from the stack expansion, which effectively results in a missing guard page, since the expected stack expansion won't have been done. And since /proc/maps explicitly ignores the guard page (commit d7824370e263: "mm: fix up some user-visible effects of the stack guard page"), the stack pointer ends up being outside the reported stack area. This is the minimal patch: it just propagates the error. It also effectively makes the guard page part of the stack limit, which in turn measn that the actual real stack is one page less than the stack limit. Let's see if anybody notices. We could teach acct_stack_growth() to allow an extra page for a grow-up/grow-down stack in the rlimit test, but I don't want to add more complexity if it isn't needed. Reported-and-tested-by: Jay Foad Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/mm.h | 2 +- mm/memory.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d4cdac903468..c4085192c2b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1630,7 +1630,7 @@ extern int expand_downwards(struct vm_area_struct *vma, #if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #else - #define expand_upwards(vma, address) do { } while (0) + #define expand_upwards(vma, address) (0) #endif /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ diff --git a/mm/memory.c b/mm/memory.c index 0926ccd04d7a..8b2d75f61b32 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3200,7 +3200,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (prev && prev->vm_end == address) return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_downwards(vma, address - PAGE_SIZE); + return expand_downwards(vma, address - PAGE_SIZE); } if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { struct vm_area_struct *next = vma->vm_next; @@ -3209,7 +3209,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (next && next->vm_start == address + PAGE_SIZE) return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - expand_upwards(vma, address + PAGE_SIZE); + return expand_upwards(vma, address + PAGE_SIZE); } return 0; } From 7d702b4b2b0f10cfa45f596ebc262e092c1808f0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Jan 2015 11:33:57 -0800 Subject: [PATCH 162/214] mm: Don't count the stack guard page towards RLIMIT_STACK commit 690eac53daff34169a4d74fc7bfbd388c4896abb upstream. Commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page") made sure that we return the error properly for stack growth conditions. It also theorized that counting the guard page towards the stack limit might break something, but also said "Let's see if anybody notices". Somebody did notice. Apparently android-x86 sets the stack limit very close to the limit indeed, and including the guard page in the rlimit check causes the android 'zygote' process problems. So this adds the (fairly trivial) code to make the stack rlimit check be against the actual real stack size, rather than the size of the vma that includes the guard page. Reported-and-tested-by: Chih-Wei Huang Cc: Jay Foad Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mmap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 8f87b14c7968..43a7089c6a7c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2056,14 +2056,17 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; - unsigned long new_start; + unsigned long new_start, actual_size; /* address space limit tests */ if (!may_expand_vm(mm, grow)) return -ENOMEM; /* Stack limit test */ - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + actual_size = size; + if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) + actual_size -= PAGE_SIZE; + if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ From 5054319d9fe56ed1ef6c83c00ae37fdc2b277a79 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 16 Jan 2015 07:00:00 -0800 Subject: [PATCH 163/214] Linux 3.10.65 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e5b63fb3d0e1..7889b38766db 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 64 +SUBLEVEL = 65 EXTRAVERSION = NAME = TOSSUG Baby Fish From c0131386abdafa081356c1c02129df6413e685e0 Mon Sep 17 00:00:00 2001 From: Robin Holt Date: Mon, 8 Jul 2013 16:01:35 -0700 Subject: [PATCH 164/214] reboot: x86: prepare reboot_mode for moving to generic kernel code Prepare for the moving the parsing of reboot= to the generic kernel code by making reboot_mode into a more generic form. Signed-off-by: Robin Holt Cc: H. Peter Anvin Cc: Miguel Boton Cc: Russ Anderson Cc: Robin Holt Cc: Russell King Cc: Guan Xuetao Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit edf2b1394611fef7806d4af72179dc3ac101f275) Signed-off-by: Mark Brown --- arch/x86/kernel/reboot.c | 12 +++++++----- include/linux/reboot.h | 5 +++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 76fa1e9a2b39..f7703401d6cb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -36,7 +36,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; -static int reboot_mode; +static enum reboot_mode reboot_mode; enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; @@ -88,11 +88,11 @@ static int __init reboot_setup(char *str) switch (*str) { case 'w': - reboot_mode = 0x1234; + reboot_mode = REBOOT_WARM; break; case 'c': - reboot_mode = 0; + reboot_mode = REBOOT_COLD; break; #ifdef CONFIG_SMP @@ -536,6 +536,7 @@ static void native_machine_emergency_restart(void) int i; int attempt = 0; int orig_reboot_type = reboot_type; + unsigned short mode; if (reboot_emergency) emergency_vmx_disable_all(); @@ -543,7 +544,8 @@ static void native_machine_emergency_restart(void) tboot_shutdown(TB_SHUTDOWN_REBOOT); /* Tell the BIOS if we want cold or warm reboot */ - *((unsigned short *)__va(0x472)) = reboot_mode; + mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0; + *((unsigned short *)__va(0x472)) = mode; for (;;) { /* Could also try the reset bit in the Hammer NB */ @@ -585,7 +587,7 @@ static void native_machine_emergency_restart(void) case BOOT_EFI: if (efi_enabled(EFI_RUNTIME_SERVICES)) - efi.reset_system(reboot_mode ? + efi.reset_system(reboot_mode == REBOOT_WARM ? EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 23b36304cd88..2ca9ed7cfc9b 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -10,6 +10,11 @@ #define SYS_HALT 0x0002 /* Notify of system halt */ #define SYS_POWER_OFF 0x0003 /* Notify of system power off */ +enum reboot_mode { + REBOOT_COLD = 0, + REBOOT_WARM, +}; + extern int register_reboot_notifier(struct notifier_block *); extern int unregister_reboot_notifier(struct notifier_block *); From 19a87fdc2aa7a0e37e28ecc147ee696418f3d942 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 17 Jan 2015 01:25:15 +0000 Subject: [PATCH 165/214] Revert "drivers: cpuidle: CPU idle ARM64 driver" This reverts commit 08461b57c19fb68dde8907b87ff38f38f2585f64. --- drivers/cpuidle/Kconfig | 5 - drivers/cpuidle/Kconfig.arm64 | 13 --- drivers/cpuidle/Makefile | 4 - drivers/cpuidle/cpuidle-arm64.c | 159 -------------------------------- 4 files changed, 181 deletions(-) delete mode 100644 drivers/cpuidle/Kconfig.arm64 delete mode 100644 drivers/cpuidle/cpuidle-arm64.c diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 842d7ba83101..e4d3913fe330 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -48,9 +48,4 @@ config CPU_IDLE_CALXEDA help Select this to enable cpuidle on Calxeda processors. -menu "ARM64 CPU Idle Drivers" -depends on ARM64 -source "drivers/cpuidle/Kconfig.arm64" -endmenu - endif diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64 deleted file mode 100644 index b83612c67e6d..000000000000 --- a/drivers/cpuidle/Kconfig.arm64 +++ /dev/null @@ -1,13 +0,0 @@ -# -# ARM64 CPU Idle drivers -# - -config ARM64_CPUIDLE - bool "Generic ARM64 CPU idle Driver" - select OF_IDLE_STATES - help - Select this to enable generic cpuidle driver for ARM v8. - It provides a generic idle driver whose idle states are configured - at run-time through DT nodes. The CPUidle suspend backend is - initialized by the device tree parsing code on matching the entry - method to the respective CPU operations. diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index d3e662cde224..d41366a7838c 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -8,7 +8,3 @@ obj-$(CONFIG_OF_IDLE_STATES) += of_idle_states.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o - -############################################################################### -# ARM64 drivers -obj-$(CONFIG_ARM64_CPUIDLE) += cpuidle-arm64.o diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c deleted file mode 100644 index 2cfde6ce3086..000000000000 --- a/drivers/cpuidle/cpuidle-arm64.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * ARM64 generic CPU idle driver. - * - * Copyright (C) 2014 ARM Ltd. - * Author: Lorenzo Pieralisi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "of_idle_states.h" - -typedef int (*suspend_init_fn)(struct cpuidle_driver *, - struct device_node *[]); - -struct cpu_suspend_ops { - const char *id; - suspend_init_fn init_fn; -}; - -static const struct cpu_suspend_ops suspend_operations[] __initconst = { - {"arm,psci", psci_dt_register_idle_states}, - {} -}; - -static __init const struct cpu_suspend_ops *get_suspend_ops(const char *str) -{ - int i; - - if (!str) - return NULL; - - for (i = 0; suspend_operations[i].id; i++) - if (!strcmp(suspend_operations[i].id, str)) - return &suspend_operations[i]; - - return NULL; -} - -/* - * arm_enter_idle_state - Programs CPU to enter the specified state - * - * @dev: cpuidle device - * @drv: cpuidle driver - * @idx: state index - * - * Called from the CPUidle framework to program the device to the - * specified target state selected by the governor. - */ -static int arm_enter_idle_state(struct cpuidle_device *dev, - struct cpuidle_driver *drv, int idx) -{ - int ret; - - if (!idx) { - cpu_do_idle(); - return idx; - } - - cpu_pm_enter(); - /* - * Pass idle state index to cpu_suspend which in turn will call - * the CPU ops suspend protocol with idle index as a parameter. - * - * Some states would not require context to be saved and flushed - * to DRAM, so calling cpu_suspend would not be stricly necessary. - * When power domains specifications for ARM CPUs are finalized then - * this code can be optimized to prevent saving registers if not - * needed. - */ - ret = cpu_suspend(idx); - - cpu_pm_exit(); - - return ret ? -1 : idx; -} - -struct cpuidle_driver arm64_idle_driver = { - .name = "arm64_idle", - .owner = THIS_MODULE, -}; - -static struct device_node *state_nodes[CPUIDLE_STATE_MAX] __initdata; - -/* - * arm64_idle_init - * - * Registers the arm64 specific cpuidle driver with the cpuidle - * framework. It relies on core code to parse the idle states - * and initialize them using driver data structures accordingly. - */ -static int __init arm64_idle_init(void) -{ - int i, ret; - const char *entry_method; - struct device_node *idle_states_node; - const struct cpu_suspend_ops *suspend_init; - struct cpuidle_driver *drv = &arm64_idle_driver; - - idle_states_node = of_find_node_by_path("/cpus/idle-states"); - if (!idle_states_node) - return -ENOENT; - - if (of_property_read_string(idle_states_node, "entry-method", - &entry_method)) { - pr_warn(" * %s missing entry-method property\n", - idle_states_node->full_name); - of_node_put(idle_states_node); - return -EOPNOTSUPP; - } - - suspend_init = get_suspend_ops(entry_method); - if (!suspend_init) { - pr_warn("Missing suspend initializer\n"); - of_node_put(idle_states_node); - return -EOPNOTSUPP; - } - - /* - * State at index 0 is standby wfi and considered standard - * on all ARM platforms. If in some platforms simple wfi - * can't be used as "state 0", DT bindings must be implemented - * to work around this issue and allow installing a special - * handler for idle state index 0. - */ - drv->states[0].exit_latency = 1; - drv->states[0].target_residency = 1; - drv->states[0].flags = CPUIDLE_FLAG_TIME_VALID; - strncpy(drv->states[0].name, "ARM WFI", CPUIDLE_NAME_LEN); - strncpy(drv->states[0].desc, "ARM WFI", CPUIDLE_DESC_LEN); - - drv->cpumask = (struct cpumask *) cpu_possible_mask; - /* - * Start at index 1, request idle state nodes to be filled - */ - ret = of_init_idle_driver(drv, state_nodes, 1, true); - if (ret) - return ret; - - if (suspend_init->init_fn(drv, state_nodes)) - return -EOPNOTSUPP; - - for (i = 0; i < drv->state_count; i++) - drv->states[i].enter = arm_enter_idle_state; - - return cpuidle_register(drv, NULL); -} -device_initcall(arm64_idle_init); From f1068f835e9931b1ccf378b088ae5668dd1d5190 Mon Sep 17 00:00:00 2001 From: Sudeep KarkadaNagesha Date: Thu, 18 Jul 2013 11:22:04 +0100 Subject: [PATCH 166/214] of/device: add helper to get cpu device node from logical cpu index Multiple drivers need to get the cpu device node from the cpu logical index and then access the of_node. This patch adds helper function to fetch the device node directly. Acked-by: Rob Herring Signed-off-by: Sudeep KarkadaNagesha (cherry picked from commit bd00860e960e52658cebfd65eeb4748ed6ff039f) Signed-off-by: Mark Brown Conflicts: include/linux/of_device.h --- include/linux/of_device.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 901b7435e890..192c783c2ec6 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -1,6 +1,7 @@ #ifndef _LINUX_OF_DEVICE_H #define _LINUX_OF_DEVICE_H +#include #include #include /* temporary until merge */ @@ -43,6 +44,15 @@ static inline void of_device_node_put(struct device *dev) of_node_put(dev->of_node); } +static inline struct device_node *of_cpu_device_node_get(int cpu) +{ + struct device *cpu_dev; + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return NULL; + return of_node_get(cpu_dev->of_node); +} + #else /* CONFIG_OF_DEVICE */ static inline int of_driver_match_device(struct device *dev, @@ -67,6 +77,11 @@ static inline const struct of_device_id *of_match_device( { return NULL; } + +static inline struct device_node *of_cpu_device_node_get(int cpu) +{ + return NULL; +} #endif /* CONFIG_OF_DEVICE */ #endif /* _LINUX_OF_DEVICE_H */ From ef6595e9651c6c68265c9bad665df1ba397deceb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 18 Jan 2015 12:39:09 +0000 Subject: [PATCH 167/214] Revert "drivers: cpuidle: implement OF based idle states infrastructure" This reverts commit 830b8519a41c5b13b1787ef94f31ad1d3507ec73. --- drivers/cpuidle/Kconfig | 9 - drivers/cpuidle/Makefile | 1 - drivers/cpuidle/of_idle_states.c | 274 ------------------------------- drivers/cpuidle/of_idle_states.h | 8 - 4 files changed, 292 deletions(-) delete mode 100644 drivers/cpuidle/of_idle_states.c delete mode 100644 drivers/cpuidle/of_idle_states.h diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index e4d3913fe330..c4cc27e5c8a5 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -31,15 +31,6 @@ config CPU_IDLE_GOV_MENU config ARCH_NEEDS_CPU_IDLE_COUPLED def_bool n -config OF_IDLE_STATES - bool "Idle states DT support" - depends on ARM || ARM64 - default n - help - Allows the CPU idle framework to initialize CPU idle drivers - state data by using DT provided nodes compliant with idle states - device tree bindings. - if CPU_IDLE config CPU_IDLE_CALXEDA diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index d41366a7838c..0d8bd55e776f 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -4,7 +4,6 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o -obj-$(CONFIG_OF_IDLE_STATES) += of_idle_states.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o diff --git a/drivers/cpuidle/of_idle_states.c b/drivers/cpuidle/of_idle_states.c deleted file mode 100644 index eceb1b4c4657..000000000000 --- a/drivers/cpuidle/of_idle_states.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * OF idle states parsing code. - * - * Copyright (C) 2014 ARM Ltd. - * Author: Lorenzo Pieralisi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "of_idle_states.h" - -struct state_elem { - struct list_head list; - struct device_node *node; - int val; -}; - -static struct list_head head __initdata = LIST_HEAD_INIT(head); - -static bool __init state_cpu_valid(struct device_node *state_node, - struct device_node *cpu_node) -{ - int i = 0; - struct device_node *cpu_state; - - while ((cpu_state = of_parse_phandle(cpu_node, - "cpu-idle-states", i++))) { - if (cpu_state && state_node == cpu_state) { - of_node_put(cpu_state); - return true; - } - of_node_put(cpu_state); - } - return false; -} - -static bool __init state_cpus_valid(const cpumask_t *cpus, - struct device_node *state_node) -{ - int cpu; - struct device_node *cpu_node; - - /* - * Check if state is valid on driver cpumask cpus - */ - for_each_cpu(cpu, cpus) { - cpu_node = of_get_cpu_node(cpu, NULL); - - if (!cpu_node) { - pr_err("Missing device node for CPU %d\n", cpu); - return false; - } - - if (!state_cpu_valid(state_node, cpu_node)) - return false; - } - - return true; -} - -static int __init state_cmp(void *priv, struct list_head *a, - struct list_head *b) -{ - struct state_elem *ela, *elb; - - ela = container_of(a, struct state_elem, list); - elb = container_of(b, struct state_elem, list); - - return ela->val - elb->val; -} - -static int __init add_state_node(cpumask_t *cpumask, - struct device_node *state_node) -{ - struct state_elem *el; - u32 val; - - pr_debug(" * %s...\n", state_node->full_name); - - if (!state_cpus_valid(cpumask, state_node)) - return -EINVAL; - /* - * Parse just the value required to sort the states. - */ - if (of_property_read_u32(state_node, "min-residency-us", - &val)) { - pr_debug(" * %s missing min-residency-us property\n", - state_node->full_name); - return -EINVAL; - } - - el = kmalloc(sizeof(*el), GFP_KERNEL); - if (!el) { - pr_err("%s failed to allocate memory\n", __func__); - return -ENOMEM; - } - - el->node = state_node; - el->val = val; - list_add_tail(&el->list, &head); - - return 0; -} - -static void __init init_state_node(struct cpuidle_driver *drv, - struct device_node *state_node, - int *cnt) -{ - struct cpuidle_state *idle_state; - - pr_debug(" * %s...\n", state_node->full_name); - - idle_state = &drv->states[*cnt]; - - if (of_property_read_u32(state_node, "exit-latency-us", - &idle_state->exit_latency)) { - pr_debug(" * %s missing exit-latency-us property\n", - state_node->full_name); - return; - } - - if (of_property_read_u32(state_node, "min-residency-us", - &idle_state->target_residency)) { - pr_debug(" * %s missing min-residency-us property\n", - state_node->full_name); - return; - } - /* - * It is unknown to the idle driver if and when the tick_device - * loses context when the CPU enters the idle states. To solve - * this issue the tick device must be linked to a power domain - * so that the idle driver can check on which states the device - * loses its context. Current code takes the conservative choice - * of defining the idle state as one where the tick device always - * loses its context. On platforms where tick device never loses - * its context (ie it is not a C3STOP device) this turns into - * a nop. On platforms where the tick device does lose context in some - * states, this code can be optimized, when power domain specifications - * for ARM CPUs are finalized. - */ - idle_state->flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP; - - strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN); - strncpy(idle_state->desc, state_node->name, CPUIDLE_NAME_LEN); - - (*cnt)++; -} - -static int __init init_idle_states(struct cpuidle_driver *drv, - struct device_node *state_nodes[], - unsigned int start_idx, bool init_nodes) -{ - struct state_elem *el; - struct list_head *curr, *tmp; - unsigned int cnt = start_idx; - - list_for_each_entry(el, &head, list) { - /* - * Check if the init function has to fill the - * state_nodes array on behalf of the CPUidle driver. - */ - if (init_nodes) - state_nodes[cnt] = el->node; - /* - * cnt is updated on return if a state was added. - */ - init_state_node(drv, el->node, &cnt); - - if (cnt == CPUIDLE_STATE_MAX) { - pr_warn("State index reached static CPU idle state limit\n"); - break; - } - } - - drv->state_count = cnt; - - list_for_each_safe(curr, tmp, &head) { - list_del(curr); - kfree(container_of(curr, struct state_elem, list)); - } - - /* - * If no idle states are detected, return an error and let the idle - * driver initialization fail accordingly. - */ - return (cnt > start_idx) ? 0 : -ENODATA; -} - -static void __init add_idle_states(struct cpuidle_driver *drv, - struct device_node *idle_states) -{ - struct device_node *state_node; - - for_each_child_of_node(idle_states, state_node) { - if ((!of_device_is_compatible(state_node, "arm,idle-state"))) { - pr_warn(" * %s: children of /cpus/idle-states must be \"arm,idle-state\" compatible\n", - state_node->full_name); - continue; - } - /* - * If memory allocation fails, better bail out. - * Initialized nodes are freed at initialization - * completion in of_init_idle_driver(). - */ - if ((add_state_node(drv->cpumask, state_node) == -ENOMEM)) - break; - } - /* - * Sort the states list before initializing the CPUidle driver - * states array. - */ - list_sort(NULL, &head, state_cmp); -} - -/* - * of_init_idle_driver - Parse the DT idle states and initialize the - * idle driver states array - * - * @drv: Pointer to CPU idle driver to be initialized - * @state_nodes: Array of struct device_nodes to be initialized if - * init_nodes == true. Must be sized CPUIDLE_STATE_MAX - * @start_idx: First idle state index to be initialized - * @init_nodes: Boolean to request device nodes initialization - * - * Returns: - * 0 on success - * <0 on failure - * - * On success the states array in the cpuidle driver contains - * initialized entries in the states array, starting from index start_idx. - * If init_nodes == true, on success the state_nodes array is initialized - * with idle state DT node pointers, starting from index start_idx, - * in a 1:1 relation with the idle driver states array. - */ -int __init of_init_idle_driver(struct cpuidle_driver *drv, - struct device_node *state_nodes[], - unsigned int start_idx, bool init_nodes) -{ - struct device_node *idle_states_node; - int ret; - - if (start_idx >= CPUIDLE_STATE_MAX) { - pr_warn("State index exceeds static CPU idle driver states array size\n"); - return -EINVAL; - } - - if (WARN(init_nodes && !state_nodes, - "Requested nodes stashing in an invalid nodes container\n")) - return -EINVAL; - - idle_states_node = of_find_node_by_path("/cpus/idle-states"); - if (!idle_states_node) - return -ENOENT; - - add_idle_states(drv, idle_states_node); - - ret = init_idle_states(drv, state_nodes, start_idx, init_nodes); - - of_node_put(idle_states_node); - - return ret; -} diff --git a/drivers/cpuidle/of_idle_states.h b/drivers/cpuidle/of_idle_states.h deleted file mode 100644 index 049f94ff4428..000000000000 --- a/drivers/cpuidle/of_idle_states.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __OF_IDLE_STATES -#define __OF_IDLE_STATES - -int __init of_init_idle_driver(struct cpuidle_driver *drv, - struct device_node *state_nodes[], - unsigned int start_idx, - bool init_nodes); -#endif From 89f6097555808374113d23e39b00c0d3c3d144c1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 14 Feb 2014 14:28:39 +0000 Subject: [PATCH 168/214] drivers: cpuidle: implement DT based idle states infrastructure On most common ARM systems, the low-power states a CPU can be put into are not discoverable in HW and require device tree bindings to describe power down suspend operations and idle states parameters. In order to enable DT based idle states and configure idle drivers, this patch implements the bulk infrastructure required to parse the device tree idle states bindings and initialize the corresponding CPUidle driver states data. The parsing API accepts a start index that defines the first idle state that should be initialized by the parsing code in order to give new and legacy driver flexibility over which states should be parsed using the new DT mechanism. The idle states node(s) is obtained from the phandle list of the first cpu in the driver cpumask; the kernel checks that the idle state node phandle is the same for all CPUs in the driver cpumask before declaring the idle state as valid and start parsing its content. The idle state enter function pointer is initialized through DT match structures passed in by the CPUidle driver, so that ARM legacy code can cope with platform specific idle entry method based on compatible string matching and the code used to initialize the enter function pointer can be moved to the DT generic layer. Acked-by: Catalin Marinas Acked-by: Daniel Lezcano Signed-off-by: Lorenzo Pieralisi Signed-off-by: Daniel Lezcano (cherry picked from commit 9f14da345599c14b329cf5ac9499ad322056dd32) Signed-off-by: Mark Brown Conflicts: drivers/cpuidle/Kconfig --- drivers/cpuidle/Kconfig | 3 + drivers/cpuidle/Makefile | 1 + drivers/cpuidle/dt_idle_states.c | 213 +++++++++++++++++++++++++++++++ drivers/cpuidle/dt_idle_states.h | 7 + 4 files changed, 224 insertions(+) create mode 100644 drivers/cpuidle/dt_idle_states.c create mode 100644 drivers/cpuidle/dt_idle_states.h diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index c4cc27e5c8a5..69b8db87e496 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -31,6 +31,9 @@ config CPU_IDLE_GOV_MENU config ARCH_NEEDS_CPU_IDLE_COUPLED def_bool n +config DT_IDLE_STATES + bool + if CPU_IDLE config CPU_IDLE_CALXEDA diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 0d8bd55e776f..879182f031d4 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -4,6 +4,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o +obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c new file mode 100644 index 000000000000..52f4d11bbf3f --- /dev/null +++ b/drivers/cpuidle/dt_idle_states.c @@ -0,0 +1,213 @@ +/* + * DT idle states parsing code. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Lorenzo Pieralisi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) "DT idle-states: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "dt_idle_states.h" + +static int init_state_node(struct cpuidle_state *idle_state, + const struct of_device_id *matches, + struct device_node *state_node) +{ + int err; + const struct of_device_id *match_id; + + match_id = of_match_node(matches, state_node); + if (!match_id) + return -ENODEV; + /* + * CPUidle drivers are expected to initialize the const void *data + * pointer of the passed in struct of_device_id array to the idle + * state enter function. + */ + idle_state->enter = match_id->data; + + err = of_property_read_u32(state_node, "wakeup-latency-us", + &idle_state->exit_latency); + if (err) { + u32 entry_latency, exit_latency; + + err = of_property_read_u32(state_node, "entry-latency-us", + &entry_latency); + if (err) { + pr_debug(" * %s missing entry-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + + err = of_property_read_u32(state_node, "exit-latency-us", + &exit_latency); + if (err) { + pr_debug(" * %s missing exit-latency-us property\n", + state_node->full_name); + return -EINVAL; + } + /* + * If wakeup-latency-us is missing, default to entry+exit + * latencies as defined in idle states bindings + */ + idle_state->exit_latency = entry_latency + exit_latency; + } + + err = of_property_read_u32(state_node, "min-residency-us", + &idle_state->target_residency); + if (err) { + pr_debug(" * %s missing min-residency-us property\n", + state_node->full_name); + return -EINVAL; + } + + idle_state->flags = CPUIDLE_FLAG_TIME_VALID; + if (of_property_read_bool(state_node, "local-timer-stop")) + idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP; + /* + * TODO: + * replace with kstrdup and pointer assignment when name + * and desc become string pointers + */ + strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN - 1); + strncpy(idle_state->desc, state_node->name, CPUIDLE_DESC_LEN - 1); + return 0; +} + +/* + * Check that the idle state is uniform across all CPUs in the CPUidle driver + * cpumask + */ +static bool idle_state_valid(struct device_node *state_node, unsigned int idx, + const cpumask_t *cpumask) +{ + int cpu; + struct device_node *cpu_node, *curr_state_node; + bool valid = true; + + /* + * Compare idle state phandles for index idx on all CPUs in the + * CPUidle driver cpumask. Start from next logical cpu following + * cpumask_first(cpumask) since that's the CPU state_node was + * retrieved from. If a mismatch is found bail out straight + * away since we certainly hit a firmware misconfiguration. + */ + for (cpu = cpumask_next(cpumask_first(cpumask), cpumask); + cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpumask)) { + cpu_node = of_cpu_device_node_get(cpu); + curr_state_node = of_parse_phandle(cpu_node, "cpu-idle-states", + idx); + if (state_node != curr_state_node) + valid = false; + + of_node_put(curr_state_node); + of_node_put(cpu_node); + if (!valid) + break; + } + + return valid; +} + +/** + * dt_init_idle_driver() - Parse the DT idle states and initialize the + * idle driver states array + * @drv: Pointer to CPU idle driver to be initialized + * @matches: Array of of_device_id match structures to search in for + * compatible idle state nodes. The data pointer for each valid + * struct of_device_id entry in the matches array must point to + * a function with the following signature, that corresponds to + * the CPUidle state enter function signature: + * + * int (*)(struct cpuidle_device *dev, + * struct cpuidle_driver *drv, + * int index); + * + * @start_idx: First idle state index to be initialized + * + * If DT idle states are detected and are valid the state count and states + * array entries in the cpuidle driver are initialized accordingly starting + * from index start_idx. + * + * Return: number of valid DT idle states parsed, <0 on failure + */ +int dt_init_idle_driver(struct cpuidle_driver *drv, + const struct of_device_id *matches, + unsigned int start_idx) +{ + struct cpuidle_state *idle_state; + struct device_node *state_node, *cpu_node; + int i, err = 0; + const cpumask_t *cpumask; + unsigned int state_idx = start_idx; + + if (state_idx >= CPUIDLE_STATE_MAX) + return -EINVAL; + /* + * We get the idle states for the first logical cpu in the + * driver mask (or cpu_possible_mask if the driver cpumask is not set) + * and we check through idle_state_valid() if they are uniform + * across CPUs, otherwise we hit a firmware misconfiguration. + */ + cpumask = drv->cpumask ? : cpu_possible_mask; + cpu_node = of_cpu_device_node_get(cpumask_first(cpumask)); + + for (i = 0; ; i++) { + state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); + if (!state_node) + break; + + if (!idle_state_valid(state_node, i, cpumask)) { + pr_warn("%s idle state not valid, bailing out\n", + state_node->full_name); + err = -EINVAL; + break; + } + + if (state_idx == CPUIDLE_STATE_MAX) { + pr_warn("State index reached static CPU idle driver states array size\n"); + break; + } + + idle_state = &drv->states[state_idx++]; + err = init_state_node(idle_state, matches, state_node); + if (err) { + pr_err("Parsing idle state node %s failed with err %d\n", + state_node->full_name, err); + err = -EINVAL; + break; + } + of_node_put(state_node); + } + + of_node_put(state_node); + of_node_put(cpu_node); + if (err) + return err; + /* + * Update the driver state count only if some valid DT idle states + * were detected + */ + if (i) + drv->state_count = state_idx; + + /* + * Return the number of present and valid DT idle states, which can + * also be 0 on platforms with missing DT idle states or legacy DT + * configuration predating the DT idle states bindings. + */ + return i; +} +EXPORT_SYMBOL_GPL(dt_init_idle_driver); diff --git a/drivers/cpuidle/dt_idle_states.h b/drivers/cpuidle/dt_idle_states.h new file mode 100644 index 000000000000..4818134bc65b --- /dev/null +++ b/drivers/cpuidle/dt_idle_states.h @@ -0,0 +1,7 @@ +#ifndef __DT_IDLE_STATES +#define __DT_IDLE_STATES + +int dt_init_idle_driver(struct cpuidle_driver *drv, + const struct of_device_id *matches, + unsigned int start_idx); +#endif From 29b3c03e0f74df4cbd461765b5271f643fe5c1e1 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 10:30:07 +0100 Subject: [PATCH 169/214] arm64: kernel: introduce cpu_init_idle CPU operation The CPUidle subsystem on ARM64 machines requires the idle states implementation back-end to initialize idle states parameter upon boot. This patch adds a hook in the CPU operations structure that should be initialized by the CPU operations back-end in order to provide a function that initializes cpu idle states. This patch also adds the infrastructure to arm64 kernel required to export the CPU operations based initialization interface, so that drivers (ie CPUidle) can use it when they are initialized at probe time. Reviewed-by: Catalin Marinas Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas (cherry picked from commit d64f84f696463c58e1908510e45b0f5d450f737a) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/Makefile --- arch/arm/kernel/Makefile | 1 + arch/arm64/include/asm/cpu_ops.h | 3 +++ arch/arm64/include/asm/cpuidle.h | 13 +++++++++++++ arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/cpuidle.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+) create mode 100644 arch/arm64/include/asm/cpuidle.h create mode 100644 arch/arm64/kernel/cpuidle.c diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5f3338eacad2..67b88007bb18 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o +obj-$(CONFIG_ARM_CPU_IDLE) += cpuidle.o obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index c4cdb5e5b73d..1229034d185b 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -28,6 +28,8 @@ struct device_node; * enable-method property. * @cpu_init: Reads any data necessary for a specific enable-method from the * devicetree, for a given cpu node and proposed logical id. + * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from + * devicetree, for a given cpu node and proposed logical id. * @cpu_prepare: Early one-time preparation step for a cpu. If there is a * mechanism for doing so, tests whether it is possible to boot * the given CPU. @@ -43,6 +45,7 @@ struct device_node; struct cpu_operations { const char *name; int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_init_idle)(struct device_node *, unsigned int); int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h new file mode 100644 index 000000000000..b52a9932e2b1 --- /dev/null +++ b/arch/arm64/include/asm/cpuidle.h @@ -0,0 +1,13 @@ +#ifndef __ASM_CPUIDLE_H +#define __ASM_CPUIDLE_H + +#ifdef CONFIG_CPU_IDLE +extern int cpu_init_idle(unsigned int cpu); +#else +static inline int cpu_init_idle(unsigned int cpu) +{ + return -EOPNOTSUPP; +} +#endif + +#endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index b9b87fa61bac..fc6d27105563 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_KGDB) += kgdb.o obj-y += $(arm64-obj-y) vdso/ diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c new file mode 100644 index 000000000000..19d17f51db37 --- /dev/null +++ b/arch/arm64/kernel/cpuidle.c @@ -0,0 +1,31 @@ +/* + * ARM64 CPU idle arch support + * + * Copyright (C) 2014 ARM Ltd. + * Author: Lorenzo Pieralisi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include + +int cpu_init_idle(unsigned int cpu) +{ + int ret = -EOPNOTSUPP; + struct device_node *cpu_node = of_cpu_device_node_get(cpu); + + if (!cpu_node) + return -ENODEV; + + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle) + ret = cpu_ops[cpu]->cpu_init_idle(cpu_node, cpu); + + of_node_put(cpu_node); + return ret; +} From 4bb26215c07fb204ae1d66e54406dd6bae793c20 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 28 Feb 2014 13:03:44 +0000 Subject: [PATCH 170/214] drivers: cpuidle: CPU idle ARM64 driver This patch implements a generic CPU idle driver for ARM64 machines. It relies on the DT idle states infrastructure to initialize idle states count and respective parameters. Current code assumes the driver is managing idle states on all possible CPUs but can be easily generalized to support heterogenous systems and build cpumasks at runtime using MIDRs or DT cpu nodes compatible properties. The driver relies on the arm64 CPU operations to call the idle initialization hook used to parse and save suspend back-end specific idle states information upon probing. Idle state index 0 is always initialized as a simple wfi state, ie always considered present and functional on all ARM64 platforms. Idle state indices higher than 0 trigger idle state entry by calling the cpu_suspend function, that triggers the suspend operation through the CPU operations suspend back-end hook. cpu_suspend passes the idle state index as a parameter so that the CPU operations suspend back-end can retrieve the required idle state data by using the idle state index to execute a look-up on its internal data structures. Reviewed-by: Ashwin Chaugule Reviewed-by: Catalin Marinas Signed-off-by: Lorenzo Pieralisi Signed-off-by: Daniel Lezcano (cherry picked from commit 3299b63de384159579143d4abdfb94013e0b5470) Signed-off-by: Mark Brown Conflicts: drivers/cpuidle/Kconfig drivers/cpuidle/Makefile --- drivers/cpuidle/Kconfig | 5 ++ drivers/cpuidle/Kconfig.arm64 | 14 ++++ drivers/cpuidle/Makefile | 4 + drivers/cpuidle/cpuidle-arm64.c | 133 ++++++++++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 drivers/cpuidle/Kconfig.arm64 create mode 100644 drivers/cpuidle/cpuidle-arm64.c diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 69b8db87e496..9625ce7ed5f8 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -36,6 +36,11 @@ config DT_IDLE_STATES if CPU_IDLE +menu "ARM64 CPU Idle Drivers" +depends on ARM64 +source "drivers/cpuidle/Kconfig.arm64" +endmenu + config CPU_IDLE_CALXEDA bool "CPU Idle Driver for Calxeda processors" depends on ARCH_HIGHBANK diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64 new file mode 100644 index 000000000000..d0a08ed1b2ee --- /dev/null +++ b/drivers/cpuidle/Kconfig.arm64 @@ -0,0 +1,14 @@ +# +# ARM64 CPU Idle drivers +# + +config ARM64_CPUIDLE + bool "Generic ARM64 CPU idle Driver" + select ARM64_CPU_SUSPEND + select DT_IDLE_STATES + help + Select this to enable generic cpuidle driver for ARM64. + It provides a generic idle driver whose idle states are configured + at run-time through DT nodes. The CPUidle suspend backend is + initialized by calling the CPU operations init idle hook + provided by architecture code. diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 879182f031d4..825433777ef2 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -8,3 +8,7 @@ obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o + +############################################################################### +# ARM64 drivers +obj-$(CONFIG_ARM64_CPUIDLE) += cpuidle-arm64.o diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c new file mode 100644 index 000000000000..50997ea942fc --- /dev/null +++ b/drivers/cpuidle/cpuidle-arm64.c @@ -0,0 +1,133 @@ +/* + * ARM64 generic CPU idle driver. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Lorenzo Pieralisi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) "CPUidle arm64: " fmt + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "dt_idle_states.h" + +/* + * arm64_enter_idle_state - Programs CPU to enter the specified state + * + * dev: cpuidle device + * drv: cpuidle driver + * idx: state index + * + * Called from the CPUidle framework to program the device to the + * specified target state selected by the governor. + */ +static int arm64_enter_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + int ret; + + if (!idx) { + cpu_do_idle(); + return idx; + } + + ret = cpu_pm_enter(); + if (!ret) { + /* + * Pass idle state index to cpu_suspend which in turn will + * call the CPU ops suspend protocol with idle index as a + * parameter. + */ + ret = cpu_suspend(idx); + + cpu_pm_exit(); + } + + return ret ? -1 : idx; +} + +static struct cpuidle_driver arm64_idle_driver = { + .name = "arm64_idle", + .owner = THIS_MODULE, + /* + * State at index 0 is standby wfi and considered standard + * on all ARM platforms. If in some platforms simple wfi + * can't be used as "state 0", DT bindings must be implemented + * to work around this issue and allow installing a special + * handler for idle state index 0. + */ + .states[0] = { + .enter = arm64_enter_idle_state, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "WFI", + .desc = "ARM64 WFI", + } +}; + +static const struct of_device_id arm64_idle_state_match[] __initconst = { + { .compatible = "arm,idle-state", + .data = arm64_enter_idle_state }, + { }, +}; + +/* + * arm64_idle_init + * + * Registers the arm64 specific cpuidle driver with the cpuidle + * framework. It relies on core code to parse the idle states + * and initialize them using driver data structures accordingly. + */ +static int __init arm64_idle_init(void) +{ + int cpu, ret; + struct cpuidle_driver *drv = &arm64_idle_driver; + + /* + * Initialize idle states data, starting at index 1. + * This driver is DT only, if no DT idle states are detected (ret == 0) + * let the driver initialization fail accordingly since there is no + * reason to initialize the idle driver if only wfi is supported. + */ + ret = dt_init_idle_driver(drv, arm64_idle_state_match, 1); + if (ret <= 0) { + if (ret) + pr_err("failed to initialize idle states\n"); + return ret ? : -ENODEV; + } + + /* + * Call arch CPU operations in order to initialize + * idle states suspend back-end specific data + */ + for_each_possible_cpu(cpu) { + ret = cpu_init_idle(cpu); + if (ret) { + pr_err("CPU %d failed to init idle CPU ops\n", cpu); + return ret; + } + } + + ret = cpuidle_register(drv, NULL); + if (ret) { + pr_err("failed to register cpuidle driver\n"); + return ret; + } + + return 0; +} +device_initcall(arm64_idle_init); From 2eb736d6b425cb932b038fd555243b9b0e59c036 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 18 Jan 2015 13:13:32 +0000 Subject: [PATCH 171/214] ARM: Revert ARM side of PSCI backports We use the old interfaces on ARM for TC2 and have no pressing need to update. Signed-off-by: Mark Brown --- arch/arm/include/asm/psci.h | 7 +- arch/arm/kernel/Makefile | 1 - arch/arm/kernel/psci.c | 225 +++++++++++------------------------- arch/arm/kernel/psci_smp.c | 39 +------ arch/arm/kernel/setup.c | 10 +- 5 files changed, 79 insertions(+), 203 deletions(-) diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index b1946b7bbe94..f0a8627c9f1c 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -33,19 +33,16 @@ struct psci_operations { int (*cpu_off)(struct psci_power_state state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); }; extern struct psci_operations psci_ops; extern struct smp_operations psci_smp_ops; #ifdef CONFIG_ARM_PSCI -int psci_init(void); +void psci_init(void); bool psci_smp_available(void); #else -static inline int psci_init(void) { } +static inline void psci_init(void) { } static inline bool psci_smp_available(void) { return false; } #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 39754851a3b1..65160d5fe850 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -32,7 +32,6 @@ obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o -obj-$(CONFIG_ARM_CPU_IDLE) += cpuidle.o obj-$(CONFIG_SMP) += smp.o smp_tlb.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index f705c1b27bb3..0daf4f252284 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -18,16 +18,12 @@ #include #include #include -#include -#include -#include #include #include #include #include #include -#include struct psci_operations psci_ops; @@ -35,47 +31,58 @@ struct psci_operations psci_ops; #define PSCI_SUP_DISABLED 0 #define PSCI_SUP_ENABLED 1 +static unsigned int psci; static int (*invoke_psci_fn)(u32, u32, u32, u32); -typedef int (*psci_initcall_t)(const struct device_node *); enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, - PSCI_FN_AFFINITY_INFO, - PSCI_FN_MIGRATE_INFO_TYPE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX]; +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_EOPNOTSUPP -1 +#define PSCI_RET_EINVAL -2 +#define PSCI_RET_EPERM -3 +#define PSCI_RET_EALREADYON -4 + static int psci_to_linux_errno(int errno) { switch (errno) { case PSCI_RET_SUCCESS: return 0; - case PSCI_RET_NOT_SUPPORTED: + case PSCI_RET_EOPNOTSUPP: return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: + case PSCI_RET_EINVAL: return -EINVAL; - case PSCI_RET_DENIED: + case PSCI_RET_EPERM: return -EPERM; - case PSCI_RET_ALREADY_ON: + case PSCI_RET_EALREADYON: return -EAGAIN; }; return -EINVAL; } +#define PSCI_POWER_STATE_ID_MASK 0xffff +#define PSCI_POWER_STATE_ID_SHIFT 0 +#define PSCI_POWER_STATE_TYPE_MASK 0x1 +#define PSCI_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_POWER_STATE_AFFL_MASK 0x3 +#define PSCI_POWER_STATE_AFFL_SHIFT 24 + static u32 psci_power_state_pack(struct psci_power_state state) { - return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) - & PSCI_0_2_POWER_STATE_ID_MASK) | - ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) - & PSCI_0_2_POWER_STATE_TYPE_MASK) | - ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) - & PSCI_0_2_POWER_STATE_AFFL_MASK); + return ((state.id & PSCI_POWER_STATE_ID_MASK) + << PSCI_POWER_STATE_ID_SHIFT) | + ((state.type & PSCI_POWER_STATE_TYPE_MASK) + << PSCI_POWER_STATE_TYPE_SHIFT) | + ((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) + << PSCI_POWER_STATE_AFFL_SHIFT); } /* @@ -112,14 +119,6 @@ static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, return function_id; } -static int psci_get_version(void) -{ - int err; - - err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); - return err; -} - static int psci_cpu_suspend(struct psci_power_state state, unsigned long entry_point) { @@ -163,36 +162,29 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; - err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); - return err; -} - -static int psci_migrate_info_type(void) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; - err = invoke_psci_fn(fn, 0, 0, 0); - return err; -} - -static int get_set_conduit_method(struct device_node *np) +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", }, + {}, +}; + +void __init psci_init(void) { + struct device_node *np; const char *method; + u32 id; - pr_info("probing for conduit method from DT.\n"); + if (psci == PSCI_SUP_DISABLED) + return; + + np = of_find_matching_node(NULL, psci_of_match); + if (!np) + return; + + pr_info("probing function IDs from device-tree\n"); if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; + pr_warning("missing \"method\" property\n"); + goto out_put_node; } if (!strcmp("hvc", method)) { @@ -200,98 +192,9 @@ static int get_set_conduit_method(struct device_node *np) } else if (!strcmp("smc", method)) { invoke_psci_fn = __invoke_psci_fn_smc; } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; + pr_warning("invalid \"method\" property: %s\n", method); + goto out_put_node; } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -/* - * PSCI Function IDs for v0.2+ are well defined so use - * standard values. - */ -static int psci_0_2_init(struct device_node *np) -{ - int err, ver; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - ver = psci_get_version(); - - if (ver == PSCI_RET_NOT_SUPPORTED) { - /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ - pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); - err = -EOPNOTSUPP; - goto out_put_node; - } else { - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && - PSCI_VERSION_MINOR(ver) < 2) { - err = -EINVAL; - pr_err("Conflicting PSCI version detected.\n"); - goto out_put_node; - } - } - - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; - psci_ops.affinity_info = psci_affinity_info; - - psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = - PSCI_0_2_FN_MIGRATE_INFO_TYPE; - psci_ops.migrate_info_type = psci_migrate_info_type; - - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); if (!of_property_read_u32(np, "cpu_suspend", &id)) { psci_function_id[PSCI_FN_CPU_SUSPEND] = id; @@ -315,25 +218,35 @@ static int psci_0_1_init(struct device_node *np) out_put_node: of_node_put(np); - return err; + return; } -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_init(void) +int __init psci_probe(void) { struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; + int ret = -ENODEV; - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - if (!np) - return -ENODEV; + if (psci == PSCI_SUP_ENABLED) { + np = of_find_matching_node(NULL, psci_of_match); + if (np) + ret = 0; + } - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); + of_node_put(np); + return ret; } + +static int __init early_psci(char *val) +{ + int ret = 0; + + if (strcmp(val, "enable") == 0) + psci = PSCI_SUP_ENABLED; + else if (strcmp(val, "disable") == 0) + psci = PSCI_SUP_DISABLED; + else + ret = -EINVAL; + + return ret; +} +early_param("psci", early_psci); diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c index 3af4190e4d9d..23a11424c568 100644 --- a/arch/arm/kernel/psci_smp.c +++ b/arch/arm/kernel/psci_smp.c @@ -17,10 +17,6 @@ #include #include #include -#include -#include -#include -#include #include #include @@ -72,36 +68,8 @@ void __ref psci_cpu_die(unsigned int cpu) /* We should never return */ panic("psci: cpu %d failed to shutdown\n", cpu); } - -int __ref psci_cpu_kill(unsigned int cpu) -{ - int err, i; - - if (!psci_ops.affinity_info) - return 1; - /* - * cpu_kill could race with cpu_die and we can - * potentially end up declaring this cpu undead - * while it is dying. So, try again a few times. - */ - - for (i = 0; i < 10; i++) { - err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); - if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); - return 1; - } - - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } - - pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", - cpu, err); - /* Make platform_cpu_kill() fail. */ - return 0; -} - +#else +#define psci_cpu_die NULL #endif bool __init psci_smp_available(void) @@ -112,8 +80,5 @@ bool __init psci_smp_available(void) struct smp_operations __initdata psci_smp_ops = { .smp_boot_secondary = psci_boot_secondary, -#ifdef CONFIG_HOTPLUG_CPU .cpu_die = psci_cpu_die, - .cpu_kill = psci_cpu_kill, -#endif }; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d4d020178d77..29beb8c76560 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -835,10 +835,12 @@ void __init setup_arch(char **cmdline_p) psci_init(); #ifdef CONFIG_SMP if (is_smp()) { - if (psci_smp_available()) - smp_set_ops(&psci_smp_ops); - else if (mdesc->smp) - smp_set_ops(mdesc->smp); + if (!mdesc->smp_init || !mdesc->smp_init()) { + if (psci_smp_available()) + smp_set_ops(&psci_smp_ops); + else if (mdesc->smp) + smp_set_ops(mdesc->smp); + } smp_init_cpus(); } #endif From 6ad33af5f2a607b92b4c60eaf7bced300e57ce9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:47 +0000 Subject: [PATCH 172/214] clockevents: Get rid of the notifier chain 7+ years and still a single user. Kill it. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.098520211@linutronix.de Signed-off-by: Thomas Gleixner (cherry picked from commit 7172a286ced0c1f4f239a0fa09db54ed37d3ead2) Signed-off-by: Mark Brown --- include/linux/clockchips.h | 1 - kernel/time/clockevents.c | 35 +++-------------------------------- kernel/time/tick-broadcast.c | 5 ++--- kernel/time/tick-common.c | 30 +++++------------------------- kernel/time/tick-internal.h | 7 ++++--- 5 files changed, 14 insertions(+), 64 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 963d71431388..2f498f66c1bb 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -150,7 +150,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); -extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, bool force); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c6d6400ee137..dd70b4842c62 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include "tick-internal.h" @@ -23,10 +22,6 @@ /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); - -/* Notification for clock events */ -static RAW_NOTIFIER_HEAD(clockevents_chain); - /* Protection for the above */ static DEFINE_RAW_SPINLOCK(clockevents_lock); @@ -232,30 +227,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, return (rc && force) ? clockevents_program_min_delta(dev) : rc; } -/** - * clockevents_register_notifier - register a clock events change listener - */ -int clockevents_register_notifier(struct notifier_block *nb) -{ - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&clockevents_lock, flags); - ret = raw_notifier_chain_register(&clockevents_chain, nb); - raw_spin_unlock_irqrestore(&clockevents_lock, flags); - - return ret; -} - -/* - * Notify about a clock event change. Called with clockevents_lock - * held. - */ -static void clockevents_do_notify(unsigned long reason, void *dev) -{ - raw_notifier_call_chain(&clockevents_chain, reason, dev); -} - /* * Called after a notify add to make devices available which were * released from the notifier call. @@ -269,7 +240,7 @@ static void clockevents_notify_released(void) struct clock_event_device, list); list_del(&dev->list); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); } } @@ -290,7 +261,7 @@ void clockevents_register_device(struct clock_event_device *dev) raw_spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); - clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); + tick_check_new_device(dev); clockevents_notify_released(); raw_spin_unlock_irqrestore(&clockevents_lock, flags); @@ -433,7 +404,7 @@ void clockevents_notify(unsigned long reason, void *arg) int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); - clockevents_do_notify(reason, arg); + tick_notify(reason, arg); switch (reason) { case CLOCK_EVT_NOTIFY_CPU_DEAD: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 20d6fba70652..0cb9d86c9bdf 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -64,7 +64,7 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) /* * Check, if the device can be utilized as broadcast device: */ -int tick_check_broadcast_device(struct clock_event_device *dev) +void tick_install_broadcast_device(struct clock_event_device *dev) { struct clock_event_device *cur = tick_broadcast_device.evtdev; @@ -72,7 +72,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) (tick_broadcast_device.evtdev && tick_broadcast_device.evtdev->rating >= dev->rating) || (dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; + return; clockevents_exchange_device(tick_broadcast_device.evtdev, dev); if (cur) @@ -90,7 +90,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev) */ if (dev->features & CLOCK_EVT_FEAT_ONESHOT) tick_clock_notify(); - return 1; } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5d3fb100bc06..dbf4e18d5101 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -208,11 +208,11 @@ static void tick_setup_device(struct tick_device *td, /* * Check, if the new registered device should be used. */ -static int tick_check_new_device(struct clock_event_device *newdev) +void tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; - int cpu, ret = NOTIFY_OK; + int cpu; unsigned long flags; raw_spin_lock_irqsave(&tick_device_lock, flags); @@ -275,18 +275,14 @@ static int tick_check_new_device(struct clock_event_device *newdev) tick_oneshot_notify(); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - return NOTIFY_STOP; + return; out_bc: /* * Can the new device be used as a broadcast device ? */ - if (tick_check_broadcast_device(newdev)) - ret = NOTIFY_STOP; - + tick_install_broadcast_device(newdev); raw_spin_unlock_irqrestore(&tick_device_lock, flags); - - return ret; } /* @@ -360,17 +356,10 @@ static void tick_resume(void) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -/* - * Notification about clock event devices - */ -static int tick_notify(struct notifier_block *nb, unsigned long reason, - void *dev) +void tick_notify(unsigned long reason, void *dev) { switch (reason) { - case CLOCK_EVT_NOTIFY_ADD: - return tick_check_new_device(dev); - case CLOCK_EVT_NOTIFY_BROADCAST_ON: case CLOCK_EVT_NOTIFY_BROADCAST_OFF: case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: @@ -404,21 +393,12 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, default: break; } - - return NOTIFY_OK; } -static struct notifier_block tick_notifier = { - .notifier_call = tick_notify, -}; - /** * tick_init - initialize the tick control - * - * Register the notifier with the clockevents framework */ void __init tick_init(void) { - clockevents_register_notifier(&tick_notifier); tick_broadcast_init(); } diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f0299eae4602..60742fe6f63d 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -18,6 +18,8 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); +extern void tick_notify(unsigned long reason, void *dev); +extern void tick_check_new_device(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); @@ -90,7 +92,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; } */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); -extern int tick_check_broadcast_device(struct clock_event_device *dev); +extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); @@ -102,9 +104,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); #else /* !BROADCAST */ -static inline int tick_check_broadcast_device(struct clock_event_device *dev) +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { - return 0; } static inline int tick_is_broadcast_device(struct clock_event_device *dev) From 32f45d57f49dc2a130b332954a4fa42dd52fb666 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 25 Apr 2013 20:31:48 +0000 Subject: [PATCH 173/214] clockevents: Move the tick_notify() switch case to clockevents_notify() No need to call another function and have duplicated cases. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Magnus Damm Link: http://lkml.kernel.org/r/20130425143436.235746557@linutronix.de Signed-off-by: Thomas Gleixner (cherry picked from commit 8c53daf63f56791ed47fc585206ef3049489612f) Signed-off-by: Mark Brown Conflicts: kernel/time/tick-common.c --- kernel/time/clockevents.c | 28 +++++++++++++++++++++- kernel/time/tick-common.c | 47 ++++--------------------------------- kernel/time/tick-internal.h | 5 +++- 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index dd70b4842c62..0e3a8448e115 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -404,10 +404,36 @@ void clockevents_notify(unsigned long reason, void *arg) int cpu; raw_spin_lock_irqsave(&clockevents_lock, flags); - tick_notify(reason, arg); switch (reason) { + case CLOCK_EVT_NOTIFY_BROADCAST_ON: + case CLOCK_EVT_NOTIFY_BROADCAST_OFF: + case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + tick_broadcast_on_off(reason, arg); + break; + + case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: + case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: + tick_broadcast_oneshot_control(reason); + break; + + case CLOCK_EVT_NOTIFY_CPU_DYING: + tick_handover_do_timer(arg); + break; + + case CLOCK_EVT_NOTIFY_SUSPEND: + tick_suspend(); + tick_suspend_broadcast(); + break; + + case CLOCK_EVT_NOTIFY_RESUME: + tick_resume(); + break; + case CLOCK_EVT_NOTIFY_CPU_DEAD: + tick_shutdown_broadcast_oneshot(arg); + tick_shutdown_broadcast(arg); + tick_shutdown(arg); /* * Unregister the clock event devices which were * released from the users in the notify chain. diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index dbf4e18d5101..e5d6c4cdeba9 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -290,7 +290,7 @@ out_bc: * * Called with interrupts disabled. */ -static void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(int *cpup) { if (*cpup == tick_do_timer_cpu) { int cpu = cpumask_first(cpu_online_mask); @@ -307,7 +307,7 @@ static void tick_handover_do_timer(int *cpup) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -static void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int *cpup) { struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); struct clock_event_device *dev = td->evtdev; @@ -328,7 +328,7 @@ static void tick_shutdown(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_suspend(void) +void tick_suspend(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; @@ -338,7 +338,7 @@ static void tick_suspend(void) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -static void tick_resume(void) +void tick_resume(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; @@ -356,45 +356,6 @@ static void tick_resume(void) raw_spin_unlock_irqrestore(&tick_device_lock, flags); } -void tick_notify(unsigned long reason, void *dev) -{ - switch (reason) { - - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_on_off(reason, dev); - break; - - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: - case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_oneshot_control(reason); - break; - - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(dev); - break; - - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(dev); - tick_shutdown_broadcast(dev); - tick_shutdown(dev); - break; - - case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend(); - tick_suspend_broadcast(); - break; - - case CLOCK_EVT_NOTIFY_RESUME: - tick_resume(); - break; - - default: - break; - } -} - /** * tick_init - initialize the tick control */ diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 60742fe6f63d..06bfc8802dfb 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -18,8 +18,11 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); -extern void tick_notify(unsigned long reason, void *dev); extern void tick_check_new_device(struct clock_event_device *dev); +extern void tick_handover_do_timer(int *cpup); +extern void tick_shutdown(unsigned int *cpup); +extern void tick_suspend(void); +extern void tick_resume(void); extern void clockevents_shutdown(struct clock_event_device *dev); From 05c267173970d7d720eb93e21b779a7002e08eb1 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:06 +0530 Subject: [PATCH 174/214] time: Change the return type of clockevents_notify() to integer The broadcast framework can potentially be made use of by archs which do not have an external clock device as well. Then, it is required that one of the CPUs need to handle the broadcasting of wakeup IPIs to the CPUs in deep idle. As a result its local timers should remain functional all the time. For such a CPU, the BROADCAST_ENTER notification has to fail indicating that its clock device cannot be shutdown. To make way for this support, change the return type of tick_broadcast_oneshot_control() and hence clockevents_notify() to indicate such scenarios. Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080606.17187.78306.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner (cherry picked from commit da7e6f45c34d39186b72328bacc4dd86bff60e0a) Signed-off-by: Mark Brown --- include/linux/clockchips.h | 6 +++--- kernel/time/clockevents.c | 8 +++++--- kernel/time/tick-broadcast.c | 6 ++++-- kernel/time/tick-internal.h | 6 +++--- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 2f498f66c1bb..3390db716ee3 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -181,9 +181,9 @@ static inline int tick_check_broadcast_expired(void) { return 0; } #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS -extern void clockevents_notify(unsigned long reason, void *arg); +extern int clockevents_notify(unsigned long reason, void *arg); #else -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } #endif #else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ @@ -191,7 +191,7 @@ static inline void clockevents_notify(unsigned long reason, void *arg) {} static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } #endif diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 0e3a8448e115..506864c1f6a2 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -396,12 +396,13 @@ void clockevents_resume(void) #ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events + * Returns 0 on success, any other value on error */ -void clockevents_notify(unsigned long reason, void *arg) +int clockevents_notify(unsigned long reason, void *arg) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu; + int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); @@ -414,7 +415,7 @@ void clockevents_notify(unsigned long reason, void *arg) case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_oneshot_control(reason); + ret = tick_broadcast_oneshot_control(reason); break; case CLOCK_EVT_NOTIFY_CPU_DYING: @@ -457,6 +458,7 @@ void clockevents_notify(unsigned long reason, void *arg) break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); + return ret; } EXPORT_SYMBOL_GPL(clockevents_notify); #endif diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 0cb9d86c9bdf..ed01fe1104ee 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -549,14 +549,15 @@ again: /* * Powerstate information: The system enters/leaves a state, where * affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. */ -void tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(unsigned long reason) { struct clock_event_device *bc, *dev; struct tick_device *td; unsigned long flags; ktime_t now; - int cpu; + int cpu, ret = 0; /* * Periodic mode does not care about the enter/exit of power @@ -662,6 +663,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + return ret; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 06bfc8802dfb..ae120c263a74 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -39,7 +39,7 @@ extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern void tick_broadcast_oneshot_control(unsigned long reason); +extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); @@ -51,7 +51,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } @@ -80,7 +80,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { From fe20b8ebfda57865520b70c5407d441ee78cbc03 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:32 +0530 Subject: [PATCH 175/214] tick: Introduce hrtimer based broadcast On some architectures, in certain CPU deep idle states the local timers stop. An external clock device is used to wakeup these CPUs. The kernel support for the wakeup of these CPUs is provided by the tick broadcast framework by using the external clock device as the wakeup source. However not all implementations of architectures provide such an external clock device. This patch includes support in the broadcast framework to handle the wakeup of the CPUs in deep idle states on such systems by queuing a hrtimer on one of the CPUs, which is meant to handle the wakeup of CPUs in deep idle states. This patchset introduces a pseudo clock device which can be registered by the archs as tick_broadcast_device in the absence of a real external clock device. Once registered, the broadcast framework will work as is for these architectures as long as the archs take care of the BROADCAST_ENTER notification failing for one of the CPUs. This CPU is made the stand by CPU to handle wakeup of the CPUs in deep idle and it *must not enter deep idle states*. The CPU with the earliest wakeup is chosen to be this CPU. Hence this way the stand by CPU dynamically moves around and so does the hrtimer which is queued to trigger at the next earliest wakeup time. This is consistent with the case where an external clock device is present. The smp affinity of this clock device is set to the CPU with the earliest wakeup. This patchset handles the hotplug of the stand by CPU as well by moving the hrtimer on to the CPU handling the CPU_DEAD notification. Originally-from: Thomas Gleixner Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080632.17187.80532.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner (cherry picked from commit 5d1638acb9f62fa7eb0c07cb85318bbe1f13b227) Signed-off-by: Mark Brown Conflicts: kernel/time/Makefile --- include/linux/clockchips.h | 9 +++ kernel/time/Makefile | 2 +- kernel/time/tick-broadcast-hrtimer.c | 106 +++++++++++++++++++++++++++ kernel/time/tick-broadcast.c | 54 +++++++++++++- 4 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 kernel/time/tick-broadcast-hrtimer.c diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 3390db716ee3..702935e1884c 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -60,6 +60,11 @@ enum clock_event_mode { */ #define CLOCK_EVT_FEAT_DYNIRQ 0x000020 +/* + * Clockevent device is based on a hrtimer for broadcast + */ +#define CLOCK_EVT_FEAT_HRTIMER 0x000080 + /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low @@ -81,6 +86,7 @@ enum clock_event_mode { * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) + * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code */ @@ -110,6 +116,7 @@ struct clock_event_device { const char *name; int rating; int irq; + int bound_on; const struct cpumask *cpumask; struct list_head list; } ____cacheline_aligned; @@ -175,9 +182,11 @@ extern int tick_receive_broadcast(void); #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } +static void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS diff --git a/kernel/time/Makefile b/kernel/time/Makefile index ff7d9d2ab504..efb64bee1817 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -3,7 +3,7 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c new file mode 100644 index 000000000000..92425279312b --- /dev/null +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -0,0 +1,106 @@ +/* + * linux/kernel/time/tick-broadcast-hrtimer.c + * This file emulates a local clock event device + * via a pseudo clock device. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tick-internal.h" + +static struct hrtimer bctimer; + +static void bc_set_mode(enum clock_event_mode mode, + struct clock_event_device *bc) +{ + switch (mode) { + case CLOCK_EVT_MODE_SHUTDOWN: + /* + * Note, we cannot cancel the timer here as we might + * run into the following live lock scenario: + * + * cpu 0 cpu1 + * lock(broadcast_lock); + * hrtimer_interrupt() + * bc_handler() + * tick_handle_oneshot_broadcast(); + * lock(broadcast_lock); + * hrtimer_cancel() + * wait_for_callback() + */ + hrtimer_try_to_cancel(&bctimer); + break; + default: + break; + } +} + +/* + * This is called from the guts of the broadcast code when the cpu + * which is about to enter idle has the earliest broadcast timer event. + */ +static int bc_set_next(ktime_t expires, struct clock_event_device *bc) +{ + /* + * We try to cancel the timer first. If the callback is on + * flight on some other cpu then we let it handle it. If we + * were able to cancel the timer nothing can rearm it as we + * own broadcast_lock. + * + * However we can also be called from the event handler of + * ce_broadcast_hrtimer itself when it expires. We cannot + * restart the timer because we are in the callback, but we + * can set the expiry time and let the callback return + * HRTIMER_RESTART. + */ + if (hrtimer_try_to_cancel(&bctimer) >= 0) { + hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); + /* Bind the "device" to the cpu */ + bc->bound_on = smp_processor_id(); + } else if (bc->bound_on == smp_processor_id()) { + hrtimer_set_expires(&bctimer, expires); + } + return 0; +} + +static struct clock_event_device ce_broadcast_hrtimer = { + .set_mode = bc_set_mode, + .set_next_ktime = bc_set_next, + .features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME | + CLOCK_EVT_FEAT_HRTIMER, + .rating = 0, + .bound_on = -1, + .min_delta_ns = 1, + .max_delta_ns = KTIME_MAX, + .min_delta_ticks = 1, + .max_delta_ticks = KTIME_MAX, + .mult = 1, + .shift = 0, + .cpumask = cpu_all_mask, +}; + +static enum hrtimer_restart bc_handler(struct hrtimer *t) +{ + ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); + + if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX) + return HRTIMER_NORESTART; + + return HRTIMER_RESTART; +} + +void tick_setup_hrtimer_broadcast(void) +{ + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + bctimer.function = bc_handler; + clockevents_register_device(&ce_broadcast_hrtimer); +} diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ed01fe1104ee..5d732da73009 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -546,6 +546,42 @@ again: raw_spin_unlock(&tick_broadcast_lock); } +static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) +{ + if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return 0; + if (bc->next_event.tv64 == KTIME_MAX) + return 0; + return bc->bound_on == cpu ? -EBUSY : 0; +} + +static void broadcast_shutdown_local(struct clock_event_device *bc, + struct clock_event_device *dev) +{ + /* + * For hrtimer based broadcasting we cannot shutdown the cpu + * local device if our own event is the first one to expire or + * if we own the broadcast timer. + */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { + if (broadcast_needs_cpu(bc, smp_processor_id())) + return; + if (dev->next_event.tv64 < bc->next_event.tv64) + return; + } + clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); +} + +static void broadcast_move_bc(int deadcpu) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || !broadcast_needs_cpu(bc, deadcpu)) + return; + /* This moves the broadcast assignment to this cpu */ + clockevents_program_event(bc, bc->next_event, 1); +} + /* * Powerstate information: The system enters/leaves a state, where * affected devices might stop @@ -564,7 +600,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) * states */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return; + return 0; /* * We are called with preemtion disabled from the depth of the @@ -575,7 +611,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + return 0; bc = tick_broadcast_device.evtdev; @@ -583,7 +619,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and @@ -596,6 +632,16 @@ int tick_broadcast_oneshot_control(unsigned long reason) dev->next_event.tv64 < bc->next_event.tv64) tick_broadcast_set_event(bc, cpu, dev->next_event, 1); } + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we remove the + * CPU from the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); @@ -767,6 +813,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) */ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); + broadcast_move_bc(cpu); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } From a8db4f03c37ad9bee11f6b673d17f42ef7a742f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 16:00:46 +0100 Subject: [PATCH 176/214] time: Fixup fallout from recent clockevent/tick changes Make the stub function static inline instead of static and move the clockevents related function into the proper ifdeffed section. Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner Cc: Soren Brinkmann Cc: Preeti U Murthy (cherry picked from commit f1689bb7abec8e2e670d8ad11eaa86d54bad8cfd) Signed-off-by: Mark Brown --- include/linux/clockchips.h | 2 +- kernel/time/tick-internal.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 702935e1884c..fd294ee03dcf 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -186,7 +186,7 @@ extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } -static void tick_setup_hrtimer_broadcast(void) {}; +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index ae120c263a74..c85edb18d68e 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -145,6 +145,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) return !(dev->features & CLOCK_EVT_FEAT_DUMMY); } +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); + #endif extern void do_timer(unsigned long ticks); From eab6f41c54bbdfb82ba46f3ee047bfe5dbd74e6b Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Sun, 9 Feb 2014 11:32:22 +0530 Subject: [PATCH 177/214] tick: Fixup more fallout from hrtimer broadcast mode The hrtimer mode of broadcast is supported only when GENERIC_CLOCKEVENTS_BROADCAST and TICK_ONESHOT config options are enabled. Hence compile in the functions for hrtimer mode of broadcast only when these options are selected. Also fix max_delta_ticks value for the pseudo clock device. Reported-by: Fengguang Wu Reported-by: Ingo Molnar Signed-off-by: Preeti U Murthy Link: http://lkml.kernel.org/r/52F719EE.9010304@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner (cherry picked from commit 849401b66d305f3feb75b6db7459b95ad190552a) Signed-off-by: Mark Brown Conflicts: kernel/time/Makefile --- include/linux/clockchips.h | 1 + kernel/time/Makefile | 5 ++++- kernel/time/tick-broadcast-hrtimer.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index fd294ee03dcf..a4520e27f8bd 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -202,6 +202,7 @@ static inline void clockevents_resume(void) {} static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif diff --git a/kernel/time/Makefile b/kernel/time/Makefile index efb64bee1817..a64e0de74c0d 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -3,7 +3,10 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o +ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) + obj-y += tick-broadcast.o + obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o +endif obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index 92425279312b..eb682d5c697c 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -82,7 +82,7 @@ static struct clock_event_device ce_broadcast_hrtimer = { .min_delta_ns = 1, .max_delta_ns = KTIME_MAX, .min_delta_ticks = 1, - .max_delta_ticks = KTIME_MAX, + .max_delta_ticks = ULONG_MAX, .mult = 1, .shift = 0, .cpumask = cpu_all_mask, From 91eba7f91f33524f3b9a68d9a752be1dbb49bc90 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 10:40:30 +0100 Subject: [PATCH 178/214] arm64: Use pr_* instead of printk This patch fixed the following checkpatch complaint as using pr_* instead of printk. WARNING: printk() should include KERN_ facility level Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Signed-off-by: Catalin Marinas (cherry picked from commit ac7b406c1a9d50ddbf5e5cbce8ca4d68d36ac2db) Signed-off-by: Mark Brown --- arch/arm64/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7ffadddb645d..268ce966afd6 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -156,7 +156,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = thread_saved_pc(tsk); } - printk("Call trace:\n"); + pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; int ret; @@ -328,17 +328,17 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pte %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } void __init trap_init(void) From 95c91bdd8eafdc74337049c45d74c903b7dac49c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:12 +0100 Subject: [PATCH 179/214] arm64: vdso: move to _install_special_mapping and remove arch_vma_name _install_special_mapping replaces install_special_mapping and removes the need to detect special VMA in arch_vma_name. This patch moves the vdso and compat vectors page code over to the new API. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 2fea7f6c98f5957e539eb8aa0ce849729b900342) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 80 ++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 40ba5dea2ed7..ad8dddbffef2 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -85,22 +85,29 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - int ret; + static struct vm_special_mapping spec = { + .name = "[vectors]", + .pages = vectors_page, + + }; + void *ret; down_write(&mm->mmap_sem); current->mm->context.vdso = (void *)addr; /* Map vectors page at the high address. */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - vectors_page); + ret = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + &spec); up_write(&mm->mmap_sem); - return ret; + return PTR_ERR_OR_ZERO(ret); } #endif /* CONFIG_COMPAT */ +static struct vm_special_mapping vdso_spec[2]; + static int __init vdso_init(void) { int i; @@ -127,6 +134,17 @@ static int __init vdso_init(void) /* Grab the vDSO data page. */ vdso_pagelist[i] = virt_to_page(vdso_data); + /* Populate the special mapping structures */ + vdso_spec[0] = (struct vm_special_mapping) { + .name = "[vdso]", + .pages = vdso_pagelist, + }; + + vdso_spec[1] = (struct vm_special_mapping) { + .name = "[vvar]", + .pages = vdso_pagelist + vdso_pages, + }; + return 0; } arch_initcall(vdso_init); @@ -136,7 +154,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, { struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - int ret; + void *ret; vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -145,23 +163,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; + ret = ERR_PTR(vdso_base); goto up_fail; } mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (ret) + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_spec[0]); + if (IS_ERR(ret)) goto up_fail; vdso_base += vdso_text_len; - ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - vdso_pagelist + vdso_pages); - if (ret) + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + &vdso_spec[1]); + if (IS_ERR(ret)) goto up_fail; up_write(&mm->mmap_sem); @@ -170,35 +188,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, up_fail: mm->context.vdso = NULL; up_write(&mm->mmap_sem); - return ret; -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - unsigned long vdso_text; - - if (!vma->vm_mm) - return NULL; - - vdso_text = (unsigned long)vma->vm_mm->context.vdso; - - /* - * We can re-use the vdso pointer in mm_context_t for identifying - * the vectors page for compat applications. The vDSO will always - * sit above TASK_UNMAPPED_BASE and so we don't need to worry about - * it conflicting with the vectors base. - */ - if (vma->vm_start == vdso_text) { -#ifdef CONFIG_COMPAT - if (vma->vm_start == AARCH32_VECTORS_BASE) - return "[vectors]"; -#endif - return "[vdso]"; - } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { - return "[vvar]"; - } - - return NULL; + return PTR_ERR(ret); } /* From 82a95d0521cb6258559d18dca736da8272ba05a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:13 +0100 Subject: [PATCH 180/214] arm64: vdso: move data page before code pages Andy pointed out that binutils generates additional sections in the vdso image (e.g. section string table) which, if our .text section gets big enough, could cross a page boundary and end up screwing up the location where the kernel expects to put the data page. This patch solves the issue in the same manner as x86_32, by moving the data page before the code pages. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 601255ae3c98fdeeee3a8bb4696425e4f868b4f1) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 34 +++++++++++++++---------------- arch/arm64/kernel/vdso/vdso.lds.S | 4 +--- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ad8dddbffef2..b8289f08795f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -118,8 +118,8 @@ static int __init vdso_init(void) } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", - vdso_pages + 1, vdso_pages, 1L, &vdso_start); + pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", + vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -127,22 +127,22 @@ static int __init vdso_init(void) if (vdso_pagelist == NULL) return -ENOMEM; + /* Grab the vDSO data page. */ + vdso_pagelist[0] = virt_to_page(vdso_data); + /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); - - /* Grab the vDSO data page. */ - vdso_pagelist[i] = virt_to_page(vdso_data); + vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vdso]", + .name = "[vvar]", .pages = vdso_pagelist, }; vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist + vdso_pages, + .name = "[vdso]", + .pages = &vdso_pagelist[1], }; return 0; @@ -166,22 +166,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, ret = ERR_PTR(vdso_base); goto up_fail; } - mm->context.vdso = (void *)vdso_base; - - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, &vdso_spec[0]); if (IS_ERR(ret)) goto up_fail; - vdso_base += vdso_text_len; - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_spec[1]); if (IS_ERR(ret)) goto up_fail; + up_write(&mm->mmap_sem); return 0; diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 8154b8d1c826..beca249bc2f3 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,6 +28,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { + PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -57,9 +58,6 @@ SECTIONS _end = .; PROVIDE(end = .); - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) From c106ad82ccd063b34a89fbc5c4a44c9dfd53c930 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 18:19:18 +0100 Subject: [PATCH 181/214] arm64: kernel: enable PSCI cpu operations on UP systems PSCI CPU operations have to be enabled on UP kernels so that calls like eg cpu_suspend can be made functional on UP too. This patch reworks the PSCI CPU operations so that they can be enabled on UP systems. Acked-by: Mark Rutland Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas (cherry picked from commit 756854d9b99a735f86bc3b86df5c19be12e8746e) Signed-off-by: Mark Brown --- arch/arm64/kernel/cpu_ops.c | 2 +- arch/arm64/kernel/psci.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index 04efea8fe4bc..24fb449bc6aa 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -30,8 +30,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { #ifdef CONFIG_SMP &smp_spin_table_ops, - &cpu_psci_ops, #endif + &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 9e9798f91172..a623c44a6bc4 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -434,9 +434,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) return 0; } #endif +#endif const struct cpu_operations cpu_psci_ops = { .name = "psci", +#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -445,6 +447,6 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif +#endif }; -#endif From dfbf350f1b30e0d9bf68bce97191603bfd2ce714 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 18:19:19 +0100 Subject: [PATCH 182/214] arm64: kernel: add __init marker to PSCI init functions PSCI init functions must be marked as __init so that they are freed by the kernel upon boot. This patch marks the PSCI init functions as such since they need not be persistent in the kernel address space after the kernel has booted. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas (cherry picked from commit b9e97ef93c630404f305350d88d09391d1a55648) Signed-off-by: Mark Brown --- arch/arm64/kernel/psci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index a623c44a6bc4..553954771a67 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -235,7 +235,7 @@ static void psci_sys_poweroff(void) * PSCI Function IDs for v0.2+ are well defined so use * standard values. */ -static int psci_0_2_init(struct device_node *np) +static int __init psci_0_2_init(struct device_node *np) { int err, ver; @@ -296,7 +296,7 @@ out_put_node: /* * PSCI < v0.2 get PSCI Function IDs via DT. */ -static int psci_0_1_init(struct device_node *np) +static int __init psci_0_1_init(struct device_node *np) { u32 id; int err; From 29cdbe3b974687b51f1016489455e65751a7fc40 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Jul 2014 16:32:43 +0100 Subject: [PATCH 183/214] arm64: add MIDR_EL1 field accessors The MIDR_EL1 register is composed of a number of bitfields, and uses of the fields has so far involved open-coding of the shifts and masks required. This patch adds shifts and masks for each of the MIDR_EL1 subfields, and also provides accessors built atop of these. Existing uses within cputype.h are updated to use these accessors. The read_cpuid_part_number macro is modified to return the extracted bitfield rather than returning the value in-place with all other fields (including revision) masked out, to better match the other accessors. As the value is only used in comparison with the *_CPU_PART_* macros which are similarly updated, and these values are never exposed to userspace, this change should not affect any functionality. Signed-off-by: Mark Rutland Acked-by: Will Deacon Reviewed-by: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas (cherry picked from commit 89c4a306e7631bcb71cc537c8a029172af6047fe) Signed-off-by: Mark Brown --- arch/arm64/include/asm/cputype.h | 33 +++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 343f7f737970..632298c2d951 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -36,15 +36,34 @@ __val; \ }) +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) +#define MIDR_ARCHITECTURE_SHIFT 16 +#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT) +#define MIDR_ARCHITECTURE(midr) \ + (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 -#define ARM_CPU_PART_AEM_V8 0xD0F0 -#define ARM_CPU_PART_FOUNDATION 0xD000 -#define ARM_CPU_PART_CORTEX_A53 0xD030 -#define ARM_CPU_PART_CORTEX_A57 0xD070 +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A53 0xD03 -#define APM_CPU_PART_POTENZA 0x0000 +#define APM_CPU_PART_POTENZA 0x000 #ifndef __ASSEMBLY__ @@ -65,12 +84,12 @@ static inline u64 __attribute_const__ read_cpuid_mpidr(void) static inline unsigned int __attribute_const__ read_cpuid_implementor(void) { - return (read_cpuid_id() & 0xFF000000) >> 24; + return MIDR_IMPLEMENTOR(read_cpuid_id()); } static inline unsigned int __attribute_const__ read_cpuid_part_number(void) { - return (read_cpuid_id() & 0xFFF0); + return MIDR_PARTNUM(read_cpuid_id()); } static inline u32 __attribute_const__ read_cpuid_cachetype(void) From 34c668387658fabfb18c67abdc7f194009954d7e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 18 Jul 2014 11:54:37 +0100 Subject: [PATCH 184/214] arm64: Create non-empty ZONE_DMA when DRAM starts above 4GB ZONE_DMA is created to allow 32-bit only devices to access memory in the absence of an IOMMU. On systems where the memory starts above 4GB, it is expected that some devices have a DMA offset hardwired to be able to access the bottom of the memory. Linux currently supports DT bindings for the DMA offsets but they are not (easily) available early during boot. This patch tries to guess a DMA offset and assumes that ZONE_DMA corresponds to the 32-bit mask above the start of DRAM. Fixes: 2d5a5612bc (arm64: Limit the CMA buffer to 32-bit if ZONE_DMA) Signed-off-by: Catalin Marinas Reported-by: Mark Salter Tested-by: Mark Salter Tested-by: Anup Patel (cherry picked from commit d50314a6b0702c630c35b88148c1acb76d2e4ede) Signed-off-by: Mark Brown Conflicts: arch/arm64/mm/init.c --- arch/arm64/mm/init.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 81bdd29df3a4..6757d02d6976 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -68,6 +68,17 @@ static int __init early_initrd(char *p) } early_param("initrd", early_initrd); +/* + * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It + * currently assumes that for memory starting above 4G, 32-bit devices will + * use a DMA offset. + */ +static phys_addr_t max_zone_dma_phys(void) +{ + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); + return min(offset + (1ULL << 32), memblock_end_of_DRAM()); +} + static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; @@ -78,9 +89,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) { - unsigned long max_dma_phys = - (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1); - max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + max_dma = PFN_DOWN(max_zone_dma_phys()); zone_size[ZONE_DMA] = max_dma - min; } zone_size[ZONE_NORMAL] = max - max_dma; @@ -171,7 +180,11 @@ void __init arm64_memblock_init(void) } early_init_fdt_scan_reserved_mem(); - dma_contiguous_reserve(0); + + /* 4GB maximum for 32-bit only capable devices */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + dma_phys_limit = max_zone_dma_phys(); + dma_contiguous_reserve(dma_phys_limit); memblock_allow_resize(); memblock_dump_all(); From a8ba6a1bc823c07982767189e03a40e203dd98c9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 16 Jul 2014 12:07:17 +0100 Subject: [PATCH 185/214] arm64: Do not initialise the fixmap page tables in head.S The early_ioremap_init() function already handles fixmap pte initialisation, so upgrade this to cover all of pud/pmd/pte and remove one page from swapper_pg_dir. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee (cherry picked from commit 7edd88ad7e59c2b7b49da0e00f251884fb785d4f) Signed-off-by: Mark Brown --- arch/arm64/include/asm/page.h | 8 ++++---- arch/arm64/kernel/head.S | 7 ------- arch/arm64/mm/ioremap.c | 26 ++++++++++++++++++-------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index a6331e6a92b5..e84ca637af6b 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,11 +33,11 @@ /* * The idmap and swapper page tables need some space reserved in the kernel - * image. The idmap only requires a pgd and a next level table to (section) map - * the kernel, while the swapper also maps the FDT and requires an additional - * table to map an early UART. See __create_page_tables for more information. + * image. Both require a pgd and a next level table to (section) map the + * kernel. The the swapper also maps the FDT (see __create_page_tables for + * more information). */ -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f49bb0f5a7f8..7218fefc6ac9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -455,13 +455,6 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: - /* - * Create the pgd entry for the fixed mappings. - */ - ldr x5, =FIXADDR_TOP // Fixed mapping virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 - /* * Since the page tables have been populated with non-cacheable * accesses (MMU disabled), invalidate the idmap and swapper page diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 00d315ae1de9..cf25fa9aa68f 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -98,19 +98,25 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -#ifndef CONFIG_ARM64_64K_PAGES static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#ifndef CONFIG_ARM64_64K_PAGES +static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +static inline pud_t * __init early_ioremap_pud(unsigned long addr) { pgd_t *pgd; - pud_t *pud; pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + return pud_offset(pgd, addr); +} + +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +{ + pud_t *pud = early_ioremap_pud(addr); + BUG_ON(pud_none(*pud) || pud_bad(*pud)); return pmd_offset(pud, addr); @@ -127,13 +133,17 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) void __init early_ioremap_init(void) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; + unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); -#ifndef CONFIG_ARM64_64K_PAGES - /* need to populate pmd for 4k pagesize only */ + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); -#endif + /* * The boot-ioremap range spans multiple pmds, for which * we are not prepared: From fb8acafd9091e9864ecc3f62fb4281db5cd52a7c Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 24 Jul 2014 15:56:15 +0100 Subject: [PATCH 186/214] arm64: fix soft lockup due to large tlb flush range Under certain loads, this soft lockup has been observed: BUG: soft lockup - CPU#2 stuck for 22s! [ip6tables:1016] Modules linked in: ip6t_rpfilter ip6t_REJECT cfg80211 rfkill xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw vfat fat efivarfs xfs libcrc32c CPU: 2 PID: 1016 Comm: ip6tables Not tainted 3.13.0-0.rc7.30.sa2.aarch64 #1 task: fffffe03e81d1400 ti: fffffe03f01f8000 task.ti: fffffe03f01f8000 PC is at __cpu_flush_kern_tlb_range+0xc/0x40 LR is at __purge_vmap_area_lazy+0x28c/0x3ac pc : [] lr : [] pstate: 80000145 sp : fffffe03f01fbb70 x29: fffffe03f01fbb70 x28: fffffe03f01f8000 x27: fffffe0000b19000 x26: 00000000000000d0 x25: 000000000000001c x24: fffffe03f01fbc50 x23: fffffe03f01fbc58 x22: fffffe03f01fbc10 x21: fffffe0000b2a3f8 x20: 0000000000000802 x19: fffffe0000b2a3c8 x18: 000003fffdf52710 x17: 000003ff9d8bb910 x16: fffffe000050fbfc x15: 0000000000005735 x14: 000003ff9d7e1a5c x13: 0000000000000000 x12: 000003ff9d7e1a5c x11: 0000000000000007 x10: fffffe0000c09af0 x9 : fffffe0000ad1000 x8 : 000000000000005c x7 : fffffe03e8624000 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000000 x3 : fffffe0000c09cc8 x2 : 0000000000000000 x1 : 000fffffdfffca80 x0 : 000fffffcd742150 The __cpu_flush_kern_tlb_range() function looks like: ENTRY(__cpu_flush_kern_tlb_range) dsb sy lsr x0, x0, #12 lsr x1, x1, #12 1: tlbi vaae1is, x0 add x0, x0, #1 cmp x0, x1 b.lo 1b dsb sy isb ret ENDPROC(__cpu_flush_kern_tlb_range) The above soft lockup shows the PC at tlbi insn with: x0 = 0x000fffffcd742150 x1 = 0x000fffffdfffca80 So __cpu_flush_kern_tlb_range has 0x128ba930 tlbi flushes left after it has already been looping for 23 seconds!. Looking up one frame at __purge_vmap_area_lazy(), there is: ... list_for_each_entry_rcu(va, &vmap_area_list, list) { if (va->flags & VM_LAZY_FREE) { if (va->va_start < *start) *start = va->va_start; if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; } } ... if (nr || force_flush) flush_tlb_kernel_range(*start, *end); So if two areas are being freed, the range passed to flush_tlb_kernel_range() may be as large as the vmalloc space. For arm64, this is ~240GB for 4k pagesize and ~2TB for 64kpage size. This patch works around this problem by adding a loop limit. If the range is larger than the limit, use flush_tlb_all() rather than flushing based on individual pages. The limit chosen is arbitrary as the TLB size is implementation specific and not accessible in an architected way. The aim of the arbitrary limit is to avoid soft lockup. Signed-off-by: Mark Salter [catalin.marinas@arm.com: commit log update] [catalin.marinas@arm.com: marginal optimisation] [catalin.marinas@arm.com: changed to MAX_TLB_RANGE and added comment] Signed-off-by: Catalin Marinas (cherry picked from commit 05ac65305437e8ef63d2d19cac704138a2a05aa5) Signed-off-by: Mark Brown --- arch/arm64/include/asm/tlbflush.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3796ea6bb734..73f0ce570fb3 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -98,8 +98,8 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; @@ -112,7 +112,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; start >>= 12; @@ -125,6 +125,29 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end isb(); } +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_range(vma, start, end); + else + flush_tlb_mm(vma->vm_mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_kernel_range(start, end); + else + flush_tlb_all(); +} + /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ From 6eb8fed568d2d39d198391012c62c0bd5f36b8ac Mon Sep 17 00:00:00 2001 From: Arun Chandran Date: Thu, 26 Jun 2014 15:16:03 +0530 Subject: [PATCH 187/214] arm64: vdso: fix build error when switching from LE to BE Building a kernel with CPU_BIG_ENDIAN fails if there are stale objects from a !CPU_BIG_ENDIAN build. Due to a missing FORCE prerequisite on an if_changed rule in the VDSO Makefile, we attempt to link a stale LE object into the new BE kernel. According to Documentation/kbuild/makefiles.txt, FORCE is required for if_changed rules and forgetting it is a common mistake, so fix it by 'Forcing' the build of vdso. This patch fixes build errors like these: arch/arm64/kernel/vdso/note.o: compiled for a little endian system and target is big endian failed to merge target specific data of file arch/arm64/kernel/vdso/note.o arch/arm64/kernel/vdso/sigreturn.o: compiled for a little endian system and target is big endian failed to merge target specific data of file arch/arm64/kernel/vdso/sigreturn.o Tested-by: Mark Rutland Signed-off-by: Arun Chandran Signed-off-by: Will Deacon (cherry picked from commit 1915e2ad1cf548217c963121e4076b3d44dd0169) Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 84b942612051..ff3bddea482d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -43,7 +43,7 @@ $(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S +$(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands From 83f1bff42dfdbf3f4bc70a07f4304db54f89d27d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 31 Jul 2014 11:36:08 +0100 Subject: [PATCH 188/214] arm64: don't call break hooks for BRK exceptions from EL0 Our break hooks are used to handle brk exceptions from kgdb (and potentially kprobes if that code ever resurfaces), so don't bother calling them if the BRK exception comes from userspace. This prevents userspace from trapping to a kdb shell on systems where kgdb is enabled and active. Cc: Reported-by: Omar Sandoval Signed-off-by: Will Deacon (cherry picked from commit c878e0cff5c5e56b216951cbe75f7a3dd500a736) Signed-off-by: Mark Brown Conflicts: arch/arm64/kernel/debug-monitors.c --- arch/arm64/kernel/debug-monitors.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index fea84694fce4..e3b37ee9076a 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -303,23 +303,20 @@ static int brk_handler(unsigned long addr, unsigned int esr, { siginfo_t info; - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; - pr_warn("unexpected brk exception at %lx, esr=0x%x\n", - (long)instruction_pointer(regs), esr); - - if (!user_mode(regs)) + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; + } - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); return 0; } From 6d4a4b3a1b64adb217706c9b52e87972d305de08 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 29 May 2014 18:16:54 +0100 Subject: [PATCH 189/214] arm64: kernel: initialize broadcast hrtimer based clock event device On platforms implementing CPU power management, the CPUidle subsystem can allow CPUs to enter idle states where local timers logic is lost on power down. To keep the software timers functional the kernel relies on an always-on broadcast timer to be present in the platform to relay the interrupt signalling the timer expiries. For platforms implementing CPU core gating that do not implement an always-on HW timer or implement it in a broken way, this patch adds code to initialize the kernel hrtimer based clock event device upon boot (which can be chosen as tick broadcast device by the kernel). It relies on a dynamically chosen CPU to be always powered-up. This CPU then relays the timer interrupt to CPUs in deep-idle states through its HW local timer device. Having a CPU always-on has implications on power management platform capabilities and makes CPUidle suboptimal, since at least a CPU is kept always in a shallow idle state by the kernel to relay timer interrupts, but at least leaves the kernel with a functional system with some working power management capabilities. The hrtimer based clock event device is unconditionally registered, but has the lowest possible rating such that any broadcast-capable HW clock event device present will be chosen in preference as the tick broadcast device. Reviewed-by: Preeti U Murthy Acked-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon (cherry picked from commit 9358d755bd5cba8965ea79f2a446e689323409f9, again after temporary revert) Signed-off-by: Mark Brown --- arch/arm64/kernel/time.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index e1c7a540361b..1ddb2bd5932a 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -75,6 +75,8 @@ void __init time_init(void) clocksource_of_init(); + tick_setup_hrtimer_broadcast(); + arch_timer_rate = arch_timer_get_rate(); if (!arch_timer_rate) panic("Unable to initialise architected timer.\n"); From 432d182423d53bc7f03d3a399cd57482c5ca47b0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 13 Jun 2014 13:41:20 +0100 Subject: [PATCH 190/214] arm64: Limit the CMA buffer to 32-bit if ZONE_DMA When the CMA buffer is allocated, it is too early to know whether devices will require ZONE_DMA memory. This patch limits the CMA buffer to (DMA_BIT_MASK(32) + 1) if CONFIG_ZONE_DMA is enabled. In addition, it computes the dma_to_phys(DMA_BIT_MASK(32)) before the increment (no current functional change). Signed-off-by: Catalin Marinas (cherry picked from commit 2d5a5612bceda8edd25b29f363c4e2c6cda28bab) Signed-off-by: Mark Brown Conflicts: arch/arm64/mm/init.c --- arch/arm64/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6757d02d6976..d0f86b74fce6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -144,6 +144,7 @@ static void arm64_memory_present(void) void __init arm64_memblock_init(void) { u64 *reserve_map, base, size; + phys_addr_t dma_phys_limit = 0; /* * Register the kernel text, kernel data, initrd, and initial From c86df325d594f33eff078fe6b6014f1994f9bcb7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 13 Aug 2014 18:53:03 +0100 Subject: [PATCH 191/214] arm64: align randomized TEXT_OFFSET on 4 kB boundary When booting via UEFI, the kernel Image is loaded at a 4 kB boundary and the embedded EFI stub is executed in place. The EFI stub relocates the Image to reside TEXT_OFFSET bytes above a 2 MB boundary, and jumps into the kernel proper. In AArch64, PC relative symbol references are emitted using adrp/add or adrp/ldr pairs, where the offset into a 4 kB page is resolved using a separate :lo12: relocation. This implicitly assumes that the code will always be executed at the same relative offset with respect to a 4 kB boundary, or the references will point to the wrong address. This means we should link the kernel at a 4 kB aligned base address in order to remain compatible with the base address the UEFI loader uses when doing the initial load of Image. So update the code that generates TEXT_OFFSET to choose a multiple of 4 kB. At the same time, update the code so it chooses from the interval [0..2MB) as the author originally intended. Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon (cherry picked from commit 4190312beb2acfb7bfb1bb971e24a759aa96b0e8) Signed-off-by: Mark Brown Conflicts: arch/arm64/Makefile arch/arm64/kernel/head.S --- arch/arm64/Makefile | 4 ++++ arch/arm64/kernel/head.S | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ab6b358cc35..a5004a5b7aa4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -32,7 +32,11 @@ CHECKFLAGS += -D__aarch64__ head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. +ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) +TEXT_OFFSET := $(shell awk 'BEGIN {srand(); printf "0x%03x000\n", int(512 * rand())}') +else TEXT_OFFSET := 0x00080000 +endif export TEXT_OFFSET GZFLAGS diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7218fefc6ac9..9129a917203e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,8 +37,12 @@ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xfff) != 0 +#error TEXT_OFFSET must be at least 4KB aligned +#elif (PAGE_OFFSET & 0x1fffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0x1fffff +#error TEXT_OFFSET must be less than 2MB #endif .macro pgtbl, ttb0, ttb1, virt_to_phys From 7454a3fe586c6ae1272682e6188549916a47562f Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Thu, 14 Aug 2014 20:49:46 +0530 Subject: [PATCH 192/214] arm64: mm: update max pa bits to 48 Now that we support 48-bit physical addressing, update MAX_PHYSMEM_BITS accordingly. Acked-by: Catalin Marinas Signed-off-by: Ganapatrao Kulkarni Signed-off-by: Will Deacon (cherry picked from commit 07a15dd55a3d65f81b4b09eab293f4afc720b082) Signed-off-by: Mark Brown --- arch/arm64/include/asm/sparsemem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 1be62bcb9d47..74a9d301819f 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -17,7 +17,7 @@ #define __ASM_SPARSEMEM_H #ifdef CONFIG_SPARSEMEM -#define MAX_PHYSMEM_BITS 40 +#define MAX_PHYSMEM_BITS 48 #define SECTION_SIZE_BITS 30 #endif From 660ba08887a7f4a0a1ac14f6d1693568fa2623fa Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 22 Aug 2014 20:49:16 +0100 Subject: [PATCH 193/214] arm64: Remove unused variable in head.S Remove an unused local variable from head.S. It seems this was never used even from the initial commit 9703d9d7f77ce129621f7d80a844822e2daa7008 (arm64: Kernel booting and initialisation), and is a left over from a previous implementation of __calc_phys_offset. Signed-off-by: Geoff Levand Signed-off-by: Will Deacon (cherry picked from commit 5843be2279d7a91ef48c20ac31715d1eb9607a84) Signed-off-by: Mark Brown --- arch/arm64/kernel/head.S | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9129a917203e..b7f76fff32a6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -231,10 +231,6 @@ ENTRY(__boot_cpu_mode) .long 0 .popsection - .align 3 -2: .quad . - .quad PAGE_OFFSET - #ifdef CONFIG_SMP .align 3 1: .quad . From 2fee2fdbfb9e95f592c6fbfab263d8dc86d5772e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Aug 2014 14:13:24 +0100 Subject: [PATCH 194/214] arm64: ptrace: fix compat hardware watchpoint reporting I'm not sure what I was on when I wrote this, but when iterating over the hardware watchpoint array (hbp_watch_array), our index is off by ARM_MAX_BRP, so we walk off the end of our thread_struct... ... except, a dodgy condition in the loop means that it never executes at all (bp cannot be NULL). This patch fixes the code so that we remove the bp check and use the correct index for accessing the watchpoint structures. Cc: Signed-off-by: Will Deacon (cherry picked from commit 27d7ff273c2aad37b28f6ff0cab2cfa35b51e648) Signed-off-by: Mark Brown --- arch/arm64/include/asm/hw_breakpoint.h | 1 - arch/arm64/kernel/ptrace.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index d064047612b1..52b484b6aa1a 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -79,7 +79,6 @@ static inline void decode_ctrl_reg(u32 reg, */ #define ARM_MAX_BRP 16 #define ARM_MAX_WRP 16 -#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP) /* Virtual debug register bases. */ #define AARCH64_DBG_REG_BVR 0 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 71c6a623e226..f9177993d135 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -81,7 +81,8 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { + + for (i = 0; i < ARM_MAX_WRP; ++i) { if (current->thread.debug.hbp_watch[i] == bp) { info.si_errno = -((i << 1) + 1); break; From adf5da773504328b2520ecf63f2098eaef982349 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Aug 2014 14:20:24 +0100 Subject: [PATCH 195/214] arm64: ptrace: fix compat reg getter/setter return values copy_{to,from}_user return the number of bytes remaining on failure, not an error code. This patch returns -EFAULT when the copy operation didn't complete, rather than expose the number of bytes not copied directly to userspace. Signed-off-by: Will Deacon (cherry picked from commit 85487edd252fa04718dcd735bc0f41213bbb9546) Signed-off-by: Mark Brown --- arch/arm64/kernel/ptrace.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f9177993d135..2b453624d5ad 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -656,8 +656,10 @@ static int compat_gpr_get(struct task_struct *target, kbuf += sizeof(reg); } else { ret = copy_to_user(ubuf, ®, sizeof(reg)); - if (ret) + if (ret) { + ret = -EFAULT; break; + } ubuf += sizeof(reg); } @@ -695,8 +697,10 @@ static int compat_gpr_set(struct task_struct *target, kbuf += sizeof(reg); } else { ret = copy_from_user(®, ubuf, sizeof(reg)); - if (ret) - return ret; + if (ret) { + ret = -EFAULT; + break; + } ubuf += sizeof(reg); } From b9725baa1ec2a8827096d3941a38c887cd07ffd9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 Aug 2014 16:11:10 +0100 Subject: [PATCH 196/214] arm64: report correct stack pointer in KSTK_ESP for compat tasks The KSTK_ESP macro is used to determine the user stack pointer for a given task. In particular, this is used to to report the '[stack]' VMA in /proc/self/maps, which is used by Android to determine the stack location for children of the main thread. This patch fixes the macro to use user_stack_pointer instead of directly returning sp. This means that we report w13 instead of sp, since the former is used as the stack pointer when executing in AArch32 state. Cc: Reported-by: Serban Constantinescu Signed-off-by: Will Deacon (cherry picked from commit 3168a743461ecf86adf3e7dcfcd79271828fb263) Signed-off-by: Mark Brown --- arch/arm64/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index db3112886968..3b7bb031f98f 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -132,7 +132,7 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) #define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) -#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) +#define KSTK_ESP(tsk) user_stack_pointer(task_pt_regs(tsk)) /* * Prefetching support From ecf06ef82a0be34db53a469c22fd2c95b0b80039 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 2 Sep 2014 11:35:24 +0100 Subject: [PATCH 197/214] arm64: use irq_set_affinity with force=false when migrating irqs The arm64 interrupt migration code on cpu offline calls irqchip.irq_set_affinity() with the argument force=true. Originally this argument had no effect because it was not used by any interrupt chip driver and there was no semantics defined. This changed with commit 01f8fa4f01d8 ("genirq: Allow forcing cpu affinity of interrupts") which made the force argument useful to route interrupts to not yet online cpus without checking the target cpu against the cpu online mask. The following commit ffde1de64012 ("irqchip: gic: Support forced affinity setting") implemented this for the GIC interrupt controller. As a consequence the cpu offline irq migration fails if CPU0 is offlined, because CPU0 is still set in the affinity mask and the validation against cpu online mask is skipped to the force argument being true. The following first_cpu(mask) selection always selects CPU0 as the target. Commit 601c942176d8("arm64: use cpu_online_mask when using forced irq_set_affinity") intended to fix the above mentioned issue but introduced another issue where affinity can be migrated to a wrong CPU due to unconditional copy of cpu_online_mask. As with for arm, solve the issue by calling irq_set_affinity() with force=false from the CPU offline irq migration code so the GIC driver validates the affinity mask against CPU online mask and therefore removes CPU0 from the possible target candidates. Also revert the changes done in the commit 601c942176d8 as it's no longer needed. Tested on Juno platform. Fixes: 601c942176d8("arm64: use cpu_online_mask when using forced irq_set_affinity") Signed-off-by: Sudeep Holla Acked-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Cc: # 3.10.x Signed-off-by: Will Deacon (cherry picked from commit 3d8afe3099ebc602848aa7f09235cce3a9a023ce) Signed-off-by: Mark Brown --- arch/arm64/kernel/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 473e5dbf8f39..dfa6e3e74fdd 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -105,7 +105,7 @@ static bool migrate_one_irq(struct irq_desc *desc) c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) cpumask_copy(d->affinity, affinity); return ret; From 6d11fa22acd8d5ddcdb80e2e520f3543e0fd6077 Mon Sep 17 00:00:00 2001 From: Arun Chandran Date: Mon, 18 Aug 2014 10:06:58 +0100 Subject: [PATCH 198/214] arm64: convert part of soft_restart() to assembly The current soft_restart() and setup_restart implementations incorrectly assume that compiler will not spill/fill values to/from stack. However this assumption seems to be wrong, revealed by the disassembly of the currently existing code (v3.16) built with Linaro GCC 4.9-2014.05. ffffffc000085224 : ffffffc000085224: a9be7bfd stp x29, x30, [sp,#-32]! ffffffc000085228: 910003fd mov x29, sp ffffffc00008522c: f9000fa0 str x0, [x29,#24] ffffffc000085230: 94003d21 bl ffffffc0000946b4 ffffffc000085234: 94003b33 bl ffffffc000093f00 ffffffc000085238: 94003dfa bl ffffffc000094a20 ffffffc00008523c: 94003b31 bl ffffffc000093f00 ffffffc000085240: b0003321 adrp x1, ffffffc0006ea000 ffffffc000085244: f9400fa0 ldr x0, [x29,#24] ----> spilled addr ffffffc000085248: f942fc22 ldr x2, [x1,#1528] ----> global memstart_addr ffffffc00008524c: f0000061 adrp x1, ffffffc000094000 <__inval_cache_range+0x40> ffffffc000085250: 91290021 add x1, x1, #0xa40 ffffffc000085254: 8b010041 add x1, x2, x1 ffffffc000085258: d2c00802 mov x2, #0x4000000000 // #274877906944 ffffffc00008525c: 8b020021 add x1, x1, x2 ffffffc000085260: d63f0020 blr x1 ... Here the compiler generates memory accesses after the cache is disabled, loading stale values for the spilled value and global variable. As we cannot control when the compiler will access memory we must rewrite the functions in assembly to stash values we need in registers prior to disabling the cache, avoiding the use of memory. Reviewed-by: Mark Rutland Signed-off-by: Arun Chandran Signed-off-by: Will Deacon (cherry picked from commit 5e051531447259e5df95c44bccb69979537c19e4) Signed-off-by: Mark Brown Conflicts: arch/arm64/include/asm/proc-fns.h --- arch/arm64/include/asm/proc-fns.h | 2 ++ arch/arm64/kernel/process.c | 30 ++---------------------------- arch/arm64/mm/proc.S | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 7cdf466fd0c5..a496aa6ff7ef 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -31,6 +31,8 @@ extern void cpu_cache_off(void); extern void cpu_do_idle(void); extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); +void cpu_soft_restart(phys_addr_t cpu_reset, + unsigned long addr) __attribute__((noreturn)); #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index feb01cf4b255..f38f224b04bb 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -56,36 +56,10 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -static void setup_restart(void) -{ - /* - * Tell the mm system that we are going to reboot - - * we may need it to insert some 1:1 mappings so that - * soft boot works. - */ - setup_mm_for_reboot(); - - /* Clean and invalidate caches */ - flush_cache_all(); - - /* Turn D-cache off */ - cpu_cache_off(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); -} - void soft_restart(unsigned long addr) { - typedef void (*phys_reset_t)(unsigned long); - phys_reset_t phys_reset; - - setup_restart(); - - /* Switch to the identity mapping */ - phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); - phys_reset(addr); - + setup_mm_for_reboot(); + cpu_soft_restart(virt_to_phys(cpu_reset), addr); /* Should never get here */ BUG(); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index e85bf0667bd7..cc65d201cd9d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -69,6 +69,21 @@ ENTRY(cpu_reset) ret x0 ENDPROC(cpu_reset) +ENTRY(cpu_soft_restart) + /* Save address of cpu_reset() and reset address */ + mov x19, x0 + mov x20, x1 + + /* Turn D-cache off */ + bl cpu_cache_off + + /* Push out all dirty data, and ensure cache is empty */ + bl flush_cache_all + + mov x0, x20 + ret x19 +ENDPROC(cpu_soft_restart) + /* * cpu_do_idle() * From 73cf4139a89eb67351d5e63209d8f554ada0ae6e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 19 Aug 2014 20:41:42 +0100 Subject: [PATCH 199/214] arm64: Introduce {set,clear}_pte_bit It's useful to be able to change individual bits in ptes at times. Introduce functions for this and update existing pte_mk* functions to use these primatives. Acked-by: Will Deacon Signed-off-by: Laura Abbott [will: added missing inline keyword for new header functions] Signed-off-by: Will Deacon (cherry picked from commit b6d4f2800b7bad654caf00654f4bff21594ef838) Signed-off-by: Mark Brown --- arch/arm64/include/asm/pgtable.h | 33 ++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7099e3b84778..d86b4d4df1f0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -141,46 +141,51 @@ extern struct page *empty_zero_page; #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) +static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot) +{ + pte_val(pte) &= ~pgprot_val(prot); + return pte; +} + +static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) +{ + pte_val(pte) |= pgprot_val(prot); + return pte; +} + static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~PTE_WRITE; - return pte; + return clear_pte_bit(pte, __pgprot(PTE_WRITE)); } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= PTE_WRITE; - return pte; + return set_pte_bit(pte, __pgprot(PTE_WRITE)); } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~PTE_DIRTY; - return pte; + return clear_pte_bit(pte, __pgprot(PTE_DIRTY)); } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= PTE_DIRTY; - return pte; + return set_pte_bit(pte, __pgprot(PTE_DIRTY)); } static inline pte_t pte_mkold(pte_t pte) { - pte_val(pte) &= ~PTE_AF; - return pte; + return clear_pte_bit(pte, __pgprot(PTE_AF)); } static inline pte_t pte_mkyoung(pte_t pte) { - pte_val(pte) |= PTE_AF; - return pte; + return set_pte_bit(pte, __pgprot(PTE_AF)); } static inline pte_t pte_mkspecial(pte_t pte) { - pte_val(pte) |= PTE_SPECIAL; - return pte; + return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); } static inline void set_pte(pte_t *ptep, pte_t pte) From da81aae1834686f559d6e06060d8f3c3f7cdf508 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 19 Aug 2014 20:41:43 +0100 Subject: [PATCH 200/214] arm64: Add CONFIG_DEBUG_SET_MODULE_RONX support In a similar fashion to other architecture, add the infrastructure and Kconfig to enable DEBUG_SET_MODULE_RONX support. When enabled, module ranges will be marked read-only/no-execute as appropriate. Signed-off-by: Laura Abbott [will: fixed off-by-one in module end check] Signed-off-by: Will Deacon (cherry picked from commit 11d91a770f1fff44dafdf88d6089a3451f99c9b6) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig.debug --- arch/arm64/Kconfig.debug | 11 ++++ arch/arm64/include/asm/cacheflush.h | 4 ++ arch/arm64/mm/Makefile | 2 +- arch/arm64/mm/pageattr.c | 96 +++++++++++++++++++++++++++++ 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/mm/pageattr.c diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index e1b0c4601b3e..bb55717c8dad 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -44,4 +44,15 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" + depends on MODULES + help + This option helps catch unintended modifications to loadable + kernel module's text and read-only data. It also prevents execution + of module data. Such protection may interfere with run-time code + patching and dynamic kernel tracing - and they might also protect + against certain classes of kernel exploits. + If in doubt, say "N". + endmenu diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index f2defe1c380c..689b6379188c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -148,4 +148,8 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) { } +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); #endif diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3ecb56c624d3..c56179ed2c09 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -1,5 +1,5 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ - context.o proc.o + context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c new file mode 100644 index 000000000000..75e744e4cec5 --- /dev/null +++ b/arch/arm64/mm/pageattr.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include + +#include +#include + +struct page_change_data { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + struct page_change_data *cdata = data; + pte_t pte = *ptep; + + pte = clear_pte_bit(pte, cdata->clear_mask); + pte = set_pte_bit(pte, cdata->set_mask); + + set_pte(ptep, pte); + return 0; +} + +static int change_memory_common(unsigned long addr, int numpages, + pgprot_t set_mask, pgprot_t clear_mask) +{ + unsigned long start = addr; + unsigned long size = PAGE_SIZE*numpages; + unsigned long end = start + size; + int ret; + struct page_change_data data; + + if (!IS_ALIGNED(addr, PAGE_SIZE)) { + addr &= PAGE_MASK; + WARN_ON_ONCE(1); + } + + if (!is_module_address(start) || !is_module_address(end - 1)) + return -EINVAL; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, end); + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(PTE_RDONLY), + __pgprot(PTE_WRITE)); +} +EXPORT_SYMBOL_GPL(set_memory_ro); + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(PTE_WRITE), + __pgprot(PTE_RDONLY)); +} +EXPORT_SYMBOL_GPL(set_memory_rw); + +int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(PTE_PXN), + __pgprot(0)); +} +EXPORT_SYMBOL_GPL(set_memory_nx); + +int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(0), + __pgprot(PTE_PXN)); +} +EXPORT_SYMBOL_GPL(set_memory_x); From 98f12c0a80ae19316c1b32f610d094aaae2c5fc0 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Wed, 27 Aug 2014 05:29:29 +0100 Subject: [PATCH 201/214] arm64: LLVMLinux: Add current_stack_pointer() for arm64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a global named register for current_stack_pointer. The use of this new variable guarantees that both gcc and clang can access this register in C code. Signed-off-by: Behan Webster Reviewed-by: Jan-Simon Möller Reviewed-by: Mark Charlebois Reviewed-by: Olof Johansson Acked-by: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit 3337a10e0d0cbc9225cefc23aa7a604b698367ed) Signed-off-by: Mark Brown --- arch/arm64/include/asm/thread_info.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 23a3c4791d86..5093806522e1 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -68,6 +68,11 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) +/* + * how to get the current stack pointer from C + */ +register unsigned long current_stack_pointer asm ("sp"); + /* * how to get the thread information struct from C */ From 9b2ff971ff9b5d88712078c6ebafb1d85e1469d7 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Wed, 27 Aug 2014 05:29:30 +0100 Subject: [PATCH 202/214] arm64: LLVMLinux: Use current_stack_pointer in save_stack_trace_tsk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the global current_stack_pointer to get the value of the stack pointer. This change supports being able to compile the kernel with both gcc and clang. Signed-off-by: Behan Webster Signed-off-by: Mark Charlebois Reviewed-by: Jan-Simon Möller Reviewed-by: Olof Johansson Acked-by: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit bb28cec4ea2f5151c08e061c6de825a8c853bbd6) Signed-off-by: Mark Brown --- arch/arm64/kernel/stacktrace.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 38f0558f0c0a..6f44bf97e4cf 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -111,10 +111,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) frame.sp = thread_saved_sp(tsk); frame.pc = thread_saved_pc(tsk); } else { - register unsigned long current_sp asm("sp"); data.no_sched_functions = 0; frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_sp; + frame.sp = current_stack_pointer; frame.pc = (unsigned long)save_stack_trace_tsk; } From 51c50680dbf8a763bd787e0f7e3fff3df7036997 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Wed, 27 Aug 2014 05:29:31 +0100 Subject: [PATCH 203/214] arm64: LLVMLinux: Calculate current_thread_info from current_stack_pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the global current_stack_pointer to get the value of the stack pointer. This change supports being able to compile the kernel with both gcc and clang. Signed-off-by: Behan Webster Signed-off-by: Mark Charlebois Reviewed-by: Jan-Simon Möller Reviewed-by: Olof Johansson Acked-by: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit 786248705ecf5290f26534e8eef62ba6dd63b806) Signed-off-by: Mark Brown --- arch/arm64/include/asm/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5093806522e1..d971013f73c1 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -80,8 +80,8 @@ static inline struct thread_info *current_thread_info(void) __attribute_const__; static inline struct thread_info *current_thread_info(void) { - register unsigned long sp asm ("sp"); - return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); + return (struct thread_info *) + (current_stack_pointer & ~(THREAD_SIZE - 1)); } #define thread_saved_pc(tsk) \ From 29769603ea7b06e98f229b87bc0a67058cea394d Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Wed, 27 Aug 2014 05:29:32 +0100 Subject: [PATCH 204/214] arm64: LLVMLinux: Use current_stack_pointer in kernel/traps.c Use the global current_stack_pointer to get the value of the stack pointer. This change supports being able to compile the kernel with both gcc and clang. Signed-off-by: Behan Webster Signed-off-by: Mark Charlebois Reviewed-by: Olof Johansson Acked-by: Will Deacon Signed-off-by: Will Deacon (cherry picked from commit 2128df143d840a20e12818290eb6e40b95cc4ac0) Signed-off-by: Mark Brown --- arch/arm64/kernel/traps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 268ce966afd6..12cd34ff35d0 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -132,7 +132,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - const register unsigned long current_sp asm ("sp"); pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); @@ -145,7 +144,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = regs->pc; } else if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); - frame.sp = current_sp; + frame.sp = current_stack_pointer; frame.pc = (unsigned long)dump_backtrace; } else { /* From 0fab209aac128615dd2a8e88188073f89fba6dab Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Wed, 27 Aug 2014 05:29:33 +0100 Subject: [PATCH 205/214] arm64: LLVMLinux: Use global stack register variable for aarch64 To support both Clang and GCC, use the global stack register variable vs a local register variable. Author: Mark Charlebois Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Signed-off-by: Will Deacon (cherry picked from commit 34ccf8f455f1ae7761810a74308f82daca67ced1) Signed-off-by: Mark Brown --- arch/arm64/include/asm/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 453a179469a3..5279e5733386 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -26,13 +26,13 @@ static inline void set_my_cpu_offset(unsigned long off) static inline unsigned long __my_cpu_offset(void) { unsigned long off; - register unsigned long *sp asm ("sp"); /* * We want to allow caching the value, so avoid using volatile and * instead use a fake stack read to hazard against barrier(). */ - asm("mrs %0, tpidr_el1" : "=r" (off) : "Q" (*sp)); + asm("mrs %0, tpidr_el1" : "=r" (off) : + "Q" (*(const unsigned long *)current_stack_pointer)); return off; } From b4382e715d6240e460c29a730c5fe9740aea68e5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:35 -0700 Subject: [PATCH 206/214] lib/genalloc.c: add power aligned algorithm One of the more common algorithms used for allocation is to align the start address of the allocation to the order of size requested. Add this as an algorithm option for genalloc. Signed-off-by: Laura Abbott Acked-by: Will Deacon Acked-by: Olof Johansson Reviewed-by: Catalin Marinas Cc: Arnd Bergmann Cc: David Riley Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 505e3be6c082489a32a88e042f930d047b6415bc) Signed-off-by: Mark Brown --- include/linux/genalloc.h | 4 ++++ lib/genalloc.c | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 661d374aeb2d..d278793da6e7 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -106,6 +106,10 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data); +extern unsigned long gen_pool_first_fit_order_align(unsigned long *map, + unsigned long size, unsigned long start, unsigned int nr, + void *data); + extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data); diff --git a/lib/genalloc.c b/lib/genalloc.c index b35cfa9bc3d4..095b8f9b1a50 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -446,6 +446,26 @@ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, } EXPORT_SYMBOL(gen_pool_first_fit); +/** + * gen_pool_first_fit_order_align - find the first available region + * of memory matching the size requirement. The region will be aligned + * to the order of the size specified. + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @data: additional data - unused + */ +unsigned long gen_pool_first_fit_order_align(unsigned long *map, + unsigned long size, unsigned long start, + unsigned int nr, void *data) +{ + unsigned long align_mask = roundup_pow_of_two(nr) - 1; + + return bitmap_find_next_zero_area(map, size, start, nr, align_mask); +} +EXPORT_SYMBOL(gen_pool_first_fit_order_align); + /** * gen_pool_best_fit - find the best fitting region of memory * macthing the size requirement (no alignment constraint) From e4d45c70b7bad66ada07b19edcc1ffa692c1845b Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:38 -0700 Subject: [PATCH 207/214] lib/genalloc.c: add genpool range check function After allocating an address from a particular genpool, there is no good way to verify if that address actually belongs to a genpool. Introduce addr_in_gen_pool which will return if an address plus size falls completely within the genpool range. Signed-off-by: Laura Abbott Acked-by: Will Deacon Reviewed-by: Olof Johansson Reviewed-by: Catalin Marinas Cc: Arnd Bergmann Cc: David Riley Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9efb3a421d55d30b65fb0dbee05108d15c6c55f7) Signed-off-by: Mark Brown --- include/linux/genalloc.h | 3 +++ lib/genalloc.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index d278793da6e7..bc24f34c6ecb 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -117,6 +117,9 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, int nid); extern struct gen_pool *dev_get_gen_pool(struct device *dev); +bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, + size_t size); + #ifdef CONFIG_OF extern struct gen_pool *of_get_named_gen_pool(struct device_node *np, const char *propname, int index); diff --git a/lib/genalloc.c b/lib/genalloc.c index 095b8f9b1a50..f8b131bd920e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -368,6 +368,35 @@ void gen_pool_for_each_chunk(struct gen_pool *pool, } EXPORT_SYMBOL(gen_pool_for_each_chunk); +/** + * addr_in_gen_pool - checks if an address falls within the range of a pool + * @pool: the generic memory pool + * @start: start address + * @size: size of the region + * + * Check if the range of addresses falls within the specified pool. Returns + * true if the entire range is contained in the pool and false otherwise. + */ +bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, + size_t size) +{ + bool found = false; + unsigned long end = start + size; + struct gen_pool_chunk *chunk; + + rcu_read_lock(); + list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) { + if (start >= chunk->start_addr && start <= chunk->end_addr) { + if (end <= chunk->end_addr) { + found = true; + break; + } + } + } + rcu_read_unlock(); + return found; +} + /** * gen_pool_avail - get available free space of the pool * @pool: pool to get available free space From dd403c2d2cf5c25eb82310bfe70704c0ea8452b7 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:40 -0700 Subject: [PATCH 208/214] common: dma-mapping: introduce common remapping functions For architectures without coherent DMA, memory for DMA may need to be remapped with coherent attributes. Factor out the the remapping code from arm and put it in a common location to reduce code duplication. As part of this, the arm APIs are now migrated away from ioremap_page_range to the common APIs which use map_vm_area for remapping. This should be an equivalent change and using map_vm_area is more correct as ioremap_page_range is intended to bring in io addresses into the cpu space and not regular kernel managed memory. Signed-off-by: Laura Abbott Reviewed-by: Catalin Marinas Cc: Arnd Bergmann Cc: David Riley Cc: Olof Johansson Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Cc: Will Deacon Cc: James Hogan Cc: Laura Abbott Cc: Mitchel Humpherys Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 513510ddba9650fc7da456eefeb0ead7632324f6) Signed-off-by: Mark Brown --- arch/arm/mm/dma-mapping.c | 57 +++---------------- drivers/base/dma-mapping.c | 72 ++++++++++++++++++++++++ include/asm-generic/dma-mapping-common.h | 9 +++ 3 files changed, 90 insertions(+), 48 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3aac96..b2aa3e4f0e67 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -265,37 +265,19 @@ static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { - struct vm_struct *area; - unsigned long addr; - /* * DMA allocation can be mapped to user space, so lets * set VM_USERMAP flags too. */ - area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, - caller); - if (!area) - return NULL; - addr = (unsigned long)area->addr; - area->phys_addr = __pfn_to_phys(page_to_pfn(page)); - - if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) { - vunmap((void *)addr); - return NULL; - } - return (void *)addr; + return dma_common_contiguous_remap(page, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP, + prot, caller); } static void __dma_free_remap(void *cpu_addr, size_t size) { - unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP; - struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || (area->flags & flags) != flags) { - WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); - return; - } - unmap_kernel_range((unsigned long)cpu_addr, size); - vunmap(cpu_addr); + dma_common_free_remap(cpu_addr, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP); } #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K @@ -1167,29 +1149,8 @@ static void * __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) { - unsigned int i, nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct vm_struct *area; - unsigned long p; - - area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, - caller); - if (!area) - return NULL; - - area->pages = pages; - area->nr_pages = nr_pages; - p = (unsigned long)area->addr; - - for (i = 0; i < nr_pages; i++) { - phys_addr_t phys = __pfn_to_phys(page_to_pfn(pages[i])); - if (ioremap_page_range(p, p + PAGE_SIZE, phys, prot)) - goto err; - p += PAGE_SIZE; - } - return area->addr; -err: - unmap_kernel_range((unsigned long)area->addr, size); - vunmap(area->addr); + return dma_common_pages_remap(pages, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller); return NULL; } @@ -1386,8 +1347,8 @@ void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, } if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) { - unmap_kernel_range((unsigned long)cpu_addr, size); - vunmap(cpu_addr); + dma_common_free_remap(cpu_addr, size, + VM_ARM_DMA_CONSISTENT | VM_USERMAP); } __iommu_remove_mapping(dev, handle, size); diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 0ce39a33b3c2..9c36ca9ed1dd 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include /* @@ -267,3 +269,73 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } EXPORT_SYMBOL(dma_common_mmap); + +#ifdef CONFIG_MMU +/* + * remaps an array of PAGE_SIZE pages into another vm_area + * Cannot be used in non-sleeping contexts + */ +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller) +{ + struct vm_struct *area; + + area = get_vm_area_caller(size, vm_flags, caller); + if (!area) + return NULL; + + area->pages = pages; + + if (map_vm_area(area, prot, pages)) { + vunmap(area->addr); + return NULL; + } + + return area->addr; +} + +/* + * remaps an allocated contiguous region into another vm_area. + * Cannot be used in non-sleeping contexts + */ + +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller) +{ + int i; + struct page **pages; + void *ptr; + unsigned long pfn; + + pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); + if (!pages) + return NULL; + + for (i = 0, pfn = page_to_pfn(page); i < (size >> PAGE_SHIFT); i++) + pages[i] = pfn_to_page(pfn + i); + + ptr = dma_common_pages_remap(pages, size, vm_flags, prot, caller); + + kfree(pages); + + return ptr; +} + +/* + * unmaps a range previously mapped by dma_common_*_remap + */ +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) +{ + struct vm_struct *area = find_vm_area(cpu_addr); + + if (!area || (area->flags & vm_flags) != vm_flags) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); +} +#endif diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h index de8bf89940f8..a9fd248f5d48 100644 --- a/include/asm-generic/dma-mapping-common.h +++ b/include/asm-generic/dma-mapping-common.h @@ -179,6 +179,15 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size); +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller); + +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller); +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); + /** * dma_mmap_attrs - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices From 598e8b9c75705238d4e0c7ff8532ca4acfefdcde Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 29 Aug 2014 16:08:02 +0100 Subject: [PATCH 209/214] arm64: Add brackets around user_stack_pointer() Commit 5f888a1d33 (ARM64: perf: support dwarf unwinding in compat mode) changes user_stack_pointer() to return the compat SP for 32-bit tasks but without brackets around the whole definition, with possible issues on the call sites (noticed with a subsequent fix for KSTK_ESP). Fixes: 5f888a1d33c4 (ARM64: perf: support dwarf unwinding in compat mode) Reported-by: Sudeep Holla Cc: Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon (cherry picked from commit 2520d039728b2a3c5ae7f79fe2a0e9d182855b12) Signed-off-by: Mark Brown --- arch/arm64/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index b8e7cf199ce6..47d0ab177f7e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -132,7 +132,7 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) + (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) /* * Are the current registers suitable for user mode? (used to maintain From 3a65e616ab6f8b07c2c0bf8657ec24f87627a7c1 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Fri, 23 Jan 2015 19:14:19 +0800 Subject: [PATCH 210/214] configs: add kernel internel boot testing configuration The usage likes following: $scripts/kconfig/merge_config.sh linaro/configs/linaro-base.conf linaro/configs/booting-test.conf ... It was tested on lsk-3.10 pandaboard ES. Signed-off-by: Alex Shi --- linaro/configs/booting-test.conf | 66 ++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 linaro/configs/booting-test.conf diff --git a/linaro/configs/booting-test.conf b/linaro/configs/booting-test.conf new file mode 100644 index 000000000000..7592b2185963 --- /dev/null +++ b/linaro/configs/booting-test.conf @@ -0,0 +1,66 @@ +CONFIG_UNUSED_SYMBOLS=y +CONFIG_HEADERS_CHECK=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_SELFTEST=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=400 +CONFIG_DEBUG_KMEMLEAK_TEST=m +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_RT_MUTEX_TESTER=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_KOBJECT=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_VM_RB=y +CONFIG_DEBUG_WRITECOUNT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_LIST=y +CONFIG_TEST_LIST_SORT=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_BLOCK_EXT_DEVT=y +CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y +CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_FAULT_INJECTION=y +CONFIG_TRACER_MAX_TRACE=y +CONFIG_GENERIC_TRACER=y +CONFIG_FUNCTION_TRACER=y +CONFIG_FUNCTION_GRAPH_TRACER=y +CONFIG_IRQSOFF_TRACER=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_TRACER_SNAPSHOT=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_FTRACE_MCOUNT_RECORD=y +CONFIG_FTRACE_SELFTEST=y +CONFIG_FTRACE_STARTUP_TEST=y +CONFIG_EVENT_TRACE_TEST_SYSCALLS=y +CONFIG_RING_BUFFER_BENCHMARK=y +CONFIG_RING_BUFFER_STARTUP_TEST=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DMA_API_DEBUG=y +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_STRING_HELPERS=y +CONFIG_TEST_KSTRTOX=y +CONFIG_STRICT_DEVMEM=y +CONFIG_OLD_MCOUNT=y +CONFIG_DEBUG_USER=y + From 73771b8d2dcd9959ef09c3b486edc4e0333fbd79 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 23 Jan 2015 12:04:03 +0000 Subject: [PATCH 211/214] ARM: Revert "coresight: adding basic support for Vexpress TC2" This reverts commit 63041eff2297676a345478c2b1cc6f76c63102db. For some reason this is causing failures before the serial console comes up on TC2 when booted using UEFI, revert pending investigation. Signed-off-by: Mark Brown --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 199 --------------------- 1 file changed, 199 deletions(-) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 62e629bd3550..d2803be4e1a8 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -303,205 +303,6 @@ }; }; - etb@0,20010000 { - compatible = "arm,coresight-etb10", "arm,primecell"; - reg = <0 0x20010000 0 0x1000>; - - coresight-default-sink; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etb_in_port: endpoint@0 { - slave-mode; - remote-endpoint = <&replicator_out_port0>; - }; - }; - }; - - tpiu@0,20030000 { - compatible = "arm,coresight-tpiu", "arm,primecell"; - reg = <0 0x20030000 0 0x1000>; - - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - tpiu_in_port: endpoint@0 { - slave-mode; - remote-endpoint = <&replicator_out_port1>; - }; - }; - }; - - replicator { - /* non-configurable replicators don't show up on the - * AMBA bus. As such no need to add "arm,primecell". - */ - compatible = "arm,coresight-replicator"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - /* replicator output ports */ - port@0 { - reg = <0>; - replicator_out_port0: endpoint { - remote-endpoint = <&etb_in_port>; - }; - }; - - port@1 { - reg = <1>; - replicator_out_port1: endpoint { - remote-endpoint = <&tpiu_in_port>; - }; - }; - - /* replicator input port */ - port@2 { - reg = <0>; - replicator_in_port0: endpoint { - slave-mode; - remote-endpoint = <&funnel_out_port0>; - }; - }; - }; - }; - - funnel@0,20040000 { - compatible = "arm,coresight-funnel", "arm,primecell"; - reg = <0 0x20040000 0 0x1000>; - - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - ports { - #address-cells = <1>; - #size-cells = <0>; - - /* funnel output port */ - port@0 { - reg = <0>; - funnel_out_port0: endpoint { - remote-endpoint = - <&replicator_in_port0>; - }; - }; - - /* funnel input ports */ - port@1 { - reg = <0>; - funnel_in_port0: endpoint { - slave-mode; - remote-endpoint = <&ptm0_out_port>; - }; - }; - - port@2 { - reg = <1>; - funnel_in_port1: endpoint { - slave-mode; - remote-endpoint = <&ptm1_out_port>; - }; - }; - - port@3 { - reg = <2>; - funnel_in_port2: endpoint { - slave-mode; - remote-endpoint = <&etm0_out_port>; - }; - }; - - /* Input port #3 is for ITM, not supported here */ - - port@4 { - reg = <4>; - funnel_in_port4: endpoint { - slave-mode; - remote-endpoint = <&etm1_out_port>; - }; - }; - - port@5 { - reg = <5>; - funnel_in_port5: endpoint { - slave-mode; - remote-endpoint = <&etm2_out_port>; - }; - }; - }; - }; - - ptm@0,2201c000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2201c000 0 0x1000>; - - cpu = <&cpu0>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - ptm0_out_port: endpoint { - remote-endpoint = <&funnel_in_port0>; - }; - }; - }; - - ptm@0,2201d000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2201d000 0 0x1000>; - - cpu = <&cpu1>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - ptm1_out_port: endpoint { - remote-endpoint = <&funnel_in_port1>; - }; - }; - }; - - etm@0,2203c000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203c000 0 0x1000>; - - cpu = <&cpu2>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm0_out_port: endpoint { - remote-endpoint = <&funnel_in_port2>; - }; - }; - }; - - etm@0,2203d000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203d000 0 0x1000>; - - cpu = <&cpu3>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm1_out_port: endpoint { - remote-endpoint = <&funnel_in_port4>; - }; - }; - }; - - etm@0,2203e000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203e000 0 0x1000>; - - cpu = <&cpu4>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm2_out_port: endpoint { - remote-endpoint = <&funnel_in_port5>; - }; - }; - }; - smb { compatible = "simple-bus"; From d74b6d355a010ed2d70da9455f92167f8f5bbd60 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 23 Jan 2015 20:15:15 +0000 Subject: [PATCH 212/214] Revert "arm64: vdso: move data page before code pages" This reverts commit 82a95d0521cb6258559d18dca736da8272ba05a7. Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 34 +++++++++++++++---------------- arch/arm64/kernel/vdso/vdso.lds.S | 4 +++- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index b8289f08795f..ad8dddbffef2 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -118,8 +118,8 @@ static int __init vdso_init(void) } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", + vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -127,22 +127,22 @@ static int __init vdso_init(void) if (vdso_pagelist == NULL) return -ENOMEM; - /* Grab the vDSO data page. */ - vdso_pagelist[0] = virt_to_page(vdso_data); - /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + + /* Grab the vDSO data page. */ + vdso_pagelist[i] = virt_to_page(vdso_data); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vvar]", + .name = "[vdso]", .pages = vdso_pagelist, }; vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vdso]", - .pages = &vdso_pagelist[1], + .name = "[vvar]", + .pages = vdso_pagelist + vdso_pages, }; return 0; @@ -166,22 +166,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, ret = ERR_PTR(vdso_base); goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, + mm->context.vdso = (void *)vdso_base; + + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_spec[0]); if (IS_ERR(ret)) goto up_fail; - vdso_base += PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_base += vdso_text_len; + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, &vdso_spec[1]); if (IS_ERR(ret)) goto up_fail; - up_write(&mm->mmap_sem); return 0; diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index beca249bc2f3..8154b8d1c826 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,7 +28,6 @@ OUTPUT_ARCH(aarch64) SECTIONS { - PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -58,6 +57,9 @@ SECTIONS _end = .; PROVIDE(end = .); + . = ALIGN(PAGE_SIZE); + PROVIDE(_vdso_data = .); + /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) From b749ecc6dac6514265c4a07f3d283c214377fcc5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 23 Jan 2015 20:15:32 +0000 Subject: [PATCH 213/214] Revert "arm64: vdso: move to _install_special_mapping and remove arch_vma_name" This reverts commit 95c91bdd8eafdc74337049c45d74c903b7dac49c. Signed-off-by: Mark Brown --- arch/arm64/kernel/vdso.c | 80 ++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ad8dddbffef2..40ba5dea2ed7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -85,29 +85,22 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - static struct vm_special_mapping spec = { - .name = "[vectors]", - .pages = vectors_page, - - }; - void *ret; + int ret; down_write(&mm->mmap_sem); current->mm->context.vdso = (void *)addr; /* Map vectors page at the high address. */ - ret = _install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - &spec); + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + vectors_page); up_write(&mm->mmap_sem); - return PTR_ERR_OR_ZERO(ret); + return ret; } #endif /* CONFIG_COMPAT */ -static struct vm_special_mapping vdso_spec[2]; - static int __init vdso_init(void) { int i; @@ -134,17 +127,6 @@ static int __init vdso_init(void) /* Grab the vDSO data page. */ vdso_pagelist[i] = virt_to_page(vdso_data); - /* Populate the special mapping structures */ - vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vdso]", - .pages = vdso_pagelist, - }; - - vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist + vdso_pages, - }; - return 0; } arch_initcall(vdso_init); @@ -154,7 +136,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, { struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - void *ret; + int ret; vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -163,23 +145,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = ERR_PTR(vdso_base); + ret = vdso_base; goto up_fail; } mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_spec[0]); - if (IS_ERR(ret)) + ret = install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_pagelist); + if (ret) goto up_fail; vdso_base += vdso_text_len; - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - &vdso_spec[1]); - if (IS_ERR(ret)) + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_pagelist + vdso_pages); + if (ret) goto up_fail; up_write(&mm->mmap_sem); @@ -188,7 +170,35 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, up_fail: mm->context.vdso = NULL; up_write(&mm->mmap_sem); - return PTR_ERR(ret); + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + unsigned long vdso_text; + + if (!vma->vm_mm) + return NULL; + + vdso_text = (unsigned long)vma->vm_mm->context.vdso; + + /* + * We can re-use the vdso pointer in mm_context_t for identifying + * the vectors page for compat applications. The vDSO will always + * sit above TASK_UNMAPPED_BASE and so we don't need to worry about + * it conflicting with the vectors base. + */ + if (vma->vm_start == vdso_text) { +#ifdef CONFIG_COMPAT + if (vma->vm_start == AARCH32_VECTORS_BASE) + return "[vectors]"; +#endif + return "[vdso]"; + } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { + return "[vvar]"; + } + + return NULL; } /* From 9698ac7c353d6f22100c9fa24a830cae6a466cb4 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 9 Oct 2014 15:26:44 -0700 Subject: [PATCH 214/214] arm64: add atomic pool for non-coherent and CMA allocations Neither CMA nor noncoherent allocations support atomic allocations. Add a dedicated atomic pool to support this. Reviewed-by: Catalin Marinas Signed-off-by: Laura Abbott Cc: Arnd Bergmann Cc: David Riley Cc: Olof Johansson Cc: Ritesh Harjain Cc: Russell King Cc: Thierry Reding Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d4932f9e81ae7a7bf3c3967e48373909b9c98ee5) Signed-off-by: Mark Brown --- arch/arm64/Kconfig | 1 + arch/arm64/mm/dma-mapping.c | 164 +++++++++++++++++++++++++++++++----- drivers/base/dma-mapping.c | 2 +- 3 files changed, 147 insertions(+), 20 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4142cab9d4ff..d185ea209ef1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,7 @@ config ARM64 select CLONE_BACKWARDS select COMMON_CLK select DCACHE_WORD_ACCESS + select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_EARLY_IOREMAP select GENERIC_IOMAP diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index f39a55d58918..eeb1cf3ff299 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,54 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, return prot; } +static struct gen_pool *atomic_pool; + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K +static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +static void *__alloc_from_pool(size_t size, struct page **ret_page) +{ + unsigned long val; + void *ptr = NULL; + + if (!atomic_pool) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } + + val = gen_pool_alloc(atomic_pool, size); + if (val) { + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); + + *ret_page = phys_to_page(phys); + ptr = (void *)val; + } + + return ptr; +} + +static bool __in_atomic_pool(void *start, size_t size) +{ + return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); +} + +static int __free_from_pool(void *start, size_t size) +{ + if (!__in_atomic_pool(start, size)) + return 0; + + gen_pool_free(atomic_pool, (unsigned long)start, size); + + return 1; +} + static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) @@ -50,7 +99,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA)) { + if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { struct page *page; size = PAGE_ALIGN(size); @@ -70,50 +119,54 @@ static void __dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { + bool freed; + phys_addr_t paddr = dma_to_phys(dev, dma_handle); + if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); return; } - if (IS_ENABLED(CONFIG_DMA_CMA)) { - phys_addr_t paddr = dma_to_phys(dev, dma_handle); - - dma_release_from_contiguous(dev, + freed = dma_release_from_contiguous(dev, phys_to_page(paddr), size >> PAGE_SHIFT); - } else { + if (!freed) swiotlb_free_coherent(dev, size, vaddr, dma_handle); - } } static void *__dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { - struct page *page, **map; + struct page *page; void *ptr, *coherent_ptr; - int order, i; size = PAGE_ALIGN(size); - order = get_order(size); + + if (!(flags & __GFP_WAIT)) { + struct page *page = NULL; + void *addr = __alloc_from_pool(size, &page); + + if (addr) + *dma_handle = phys_to_dma(dev, page_to_phys(page)); + + return addr; + + } ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); if (!ptr) goto no_mem; - map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); - if (!map) - goto no_map; /* remove any dirty cache lines on the kernel alias */ __dma_flush_range(ptr, ptr + size); /* create a coherent mapping */ page = virt_to_page(ptr); - for (i = 0; i < (size >> PAGE_SHIFT); i++) - map[i] = page + i; - coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, - __get_dma_pgprot(attrs, __pgprot(PROT_NORMAL_NC), false)); - kfree(map); + coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, + __get_dma_pgprot(attrs, + __pgprot(PROT_NORMAL_NC), false), + NULL); if (!coherent_ptr) goto no_map; @@ -132,6 +185,8 @@ static void __dma_free_noncoherent(struct device *dev, size_t size, { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + if (__free_from_pool(vaddr, size)) + return; vunmap(vaddr); __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); } @@ -329,6 +384,67 @@ static struct notifier_block amba_bus_nb = { extern int swiotlb_late_init_with_default_size(size_t default_size); +static int __init atomic_pool_init(void) +{ + pgprot_t prot = __pgprot(PROT_NORMAL_NC); + unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; + struct page *page; + void *addr; + unsigned int pool_size_order = get_order(atomic_pool_size); + + if (dev_get_cma_area(NULL)) + page = dma_alloc_from_contiguous(NULL, nr_pages, + pool_size_order); + else + page = alloc_pages(GFP_DMA, pool_size_order); + + if (page) { + int ret; + void *page_addr = page_address(page); + + memset(page_addr, 0, atomic_pool_size); + __dma_flush_range(page_addr, page_addr + atomic_pool_size); + + atomic_pool = gen_pool_create(PAGE_SHIFT, -1); + if (!atomic_pool) + goto free_page; + + addr = dma_common_contiguous_remap(page, atomic_pool_size, + VM_USERMAP, prot, atomic_pool_init); + + if (!addr) + goto destroy_genpool; + + ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, + page_to_phys(page), + atomic_pool_size, -1); + if (ret) + goto remove_mapping; + + gen_pool_set_algo(atomic_pool, + gen_pool_first_fit_order_align, + (void *)PAGE_SHIFT); + + pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", + atomic_pool_size / 1024); + return 0; + } + goto out; + +remove_mapping: + dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); +destroy_genpool: + gen_pool_destroy(atomic_pool); + atomic_pool = NULL; +free_page: + if (!dma_release_from_contiguous(NULL, page, nr_pages)) + __free_pages(page, pool_size_order); +out: + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", + atomic_pool_size / 1024); + return -ENOMEM; +} + static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); @@ -343,7 +459,17 @@ static int __init swiotlb_late_init(void) return swiotlb_late_init_with_default_size(swiotlb_size); } -arch_initcall(swiotlb_late_init); + +static int __init arm64_dma_init(void) +{ + int ret = 0; + + ret |= swiotlb_late_init(); + ret |= atomic_pool_init(); + + return ret; +} +arch_initcall(arm64_dma_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 9c36ca9ed1dd..dd302ea3b599 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -287,7 +287,7 @@ void *dma_common_pages_remap(struct page **pages, size_t size, area->pages = pages; - if (map_vm_area(area, prot, pages)) { + if (map_vm_area(area, prot, &pages)) { vunmap(area->addr); return NULL; }