From 34d9769988397a1edf93ad1966c167591ab29e79 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 19 Dec 2013 09:23:06 +0000 Subject: [PATCH 001/296] gator: Version 5.17 Signed-off-by: Jon Medhurst --- drivers/gator/Makefile | 61 ++-- drivers/gator/gator.h | 6 +- drivers/gator/gator_annotate_kernel.c | 107 ++++-- drivers/gator/gator_backtrace.c | 10 +- drivers/gator/gator_cookies.c | 36 +- drivers/gator/gator_events.sh | 19 -- drivers/gator/gator_events_armv6.c | 7 - drivers/gator/gator_events_armv7.c | 7 - drivers/gator/gator_events_block.c | 2 - drivers/gator/gator_events_ccn-504.c | 192 ++++++----- drivers/gator/gator_events_irq.c | 2 - drivers/gator/gator_events_l2c-310.c | 2 - drivers/gator/gator_events_mali_4xx.c | 30 +- drivers/gator/gator_events_mali_common.c | 9 +- drivers/gator/gator_events_mali_common.h | 4 +- drivers/gator/gator_events_mali_t6xx.c | 60 +++- drivers/gator/gator_events_mali_t6xx_hw.c | 46 ++- drivers/gator/gator_events_meminfo.c | 307 +++++++++++++----- ...events_mmaped.c => gator_events_mmapped.c} | 122 +++---- drivers/gator/gator_events_net.c | 5 +- drivers/gator/gator_events_perf_pmu.c | 83 ++++- drivers/gator/gator_events_sched.c | 2 - drivers/gator/gator_events_scorpion.c | 7 - drivers/gator/gator_main.c | 108 +++++- drivers/gator/gator_marshaling.c | 19 ++ drivers/gator/gator_trace_gpu.c | 6 +- drivers/gator/gator_trace_sched.c | 20 +- .../mali/mali_mjollnir_profiling_gator_api.h | 15 +- .../mali/mali_utgard_profiling_gator_api.h | 15 +- drivers/gator/mali_t6xx.mk | 11 +- tools/gator/daemon/Buffer.cpp | 11 + tools/gator/daemon/Buffer.h | 5 + tools/gator/daemon/Child.cpp | 13 +- tools/gator/daemon/Child.h | 4 + tools/gator/daemon/ConfigurationXML.h | 4 + tools/gator/daemon/Driver.h | 6 +- tools/gator/daemon/Fifo.h | 4 + tools/gator/daemon/Hwmon.cpp | 4 + tools/gator/daemon/Hwmon.h | 4 + tools/gator/daemon/OlySocket.cpp | 23 +- tools/gator/daemon/Sender.h | 4 + tools/gator/daemon/SessionData.cpp | 12 +- tools/gator/daemon/SessionData.h | 6 +- tools/gator/daemon/SessionXML.h | 4 + tools/gator/daemon/StreamlineSetup.h | 4 + tools/gator/daemon/common.mk | 2 +- tools/gator/daemon/events-CCI-400.xml | 62 +++- tools/gator/daemon/events-Linux.xml | 10 +- tools/gator/daemon/events-Mali-T6xx.xml | 10 + tools/gator/daemon/events-Mali-T6xx_hw.xml | 5 +- tools/gator/daemon/main.cpp | 25 +- 51 files changed, 1058 insertions(+), 484 deletions(-) delete mode 100755 drivers/gator/gator_events.sh rename drivers/gator/{gator_events_mmaped.c => gator_events_mmapped.c} (56%) diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile index 0d4ca68701e0..3dc9d059a4b4 100644 --- a/drivers/gator/Makefile +++ b/drivers/gator/Makefile @@ -3,7 +3,8 @@ ifneq ($(KERNELRELEASE),) # Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c # EXTRA_CFLAGS += -DGATOR_KERNEL_STACK_UNWINDING -obj-m := gator.o +CONFIG_GATOR ?= m +obj-$(CONFIG_GATOR) := gator.o gator-y := gator_main.o \ gator_events_irq.o \ @@ -11,23 +12,40 @@ gator-y := gator_main.o \ gator_events_net.o \ gator_events_block.o \ gator_events_meminfo.o \ - gator_events_perf_pmu.o - -gator-y += gator_events_mmaped.o + gator_events_perf_pmu.o \ + gator_events_mmapped.o \ +# Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags ifneq ($(GATOR_WITH_MALI_SUPPORT),) -ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx) -gator-y += gator_events_mali_t6xx.o \ + CONFIG_GATOR_WITH_MALI_SUPPORT := y + ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx) + CONFIG_GATOR_MALI_4XXMP := n + CONFIG_GATOR_MALI_T6XX := y + else + CONFIG_GATOR_MALI_4XXMP := y + CONFIG_GATOR_MALI_T6XX := n + endif + EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT) + ifneq ($(GATOR_MALI_INTERFACE_STYLE),) + EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE) + endif +endif + +ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y) + ifeq ($(CONFIG_GATOR_MALI_T6XX),y) + gator-y += gator_events_mali_t6xx.o \ gator_events_mali_t6xx_hw.o -include $(M)/mali_t6xx.mk -else -gator-y += gator_events_mali_4xx.o -endif -gator-y += gator_events_mali_common.o -EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT) -ifneq ($(GATOR_MALI_INTERFACE_STYLE),) -EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE) -endif + include $(src)/mali_t6xx.mk + else + gator-y += gator_events_mali_4xx.o + endif + gator-y += gator_events_mali_common.o + + ifneq ($(CONFIG_GATOR_MALI_PATH),) + ccflags-y += -I$(CONFIG_GATOR_MALI_PATH) + endif + ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx + ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx endif # GATOR_TEST controls whether to include (=1) or exclude (=0) test code. @@ -42,17 +60,6 @@ gator-$(CONFIG_ARM) += gator_events_armv6.o \ gator-$(CONFIG_ARM64) += gator_events_ccn-504.o -$(obj)/gator_main.o: gator_events.h - -clean-files := gator_events.h - - chk_events.h = : - quiet_chk_events.h = echo ' CHK $@' -silent_chk_events.h = : -gator_events.h: FORCE - @$($(quiet)chk_events.h) - $(Q)cd $(obj) ; $(CONFIG_SHELL) $(obj)/gator_events.sh $@ - else all: @@ -63,7 +70,7 @@ all: $(error) clean: - rm -f *.o .*.cmd gator_events.h modules.order Module.symvers gator.ko gator.mod.c + rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c rm -rf .tmp_versions endif diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h index 2e122da767d8..d8981ed85a6a 100644 --- a/drivers/gator/gator.h +++ b/drivers/gator/gator.h @@ -112,14 +112,10 @@ struct gator_interface { void (*offline_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu' int (*read)(int **buffer); int (*read64)(long long **buffer); + int (*read_proc)(long long **buffer, struct task_struct *); struct list_head list; }; -// gator_events_init is used as a search term in gator_events.sh -#define gator_events_init(initfn) \ - static inline int __gator_events_init_test(void) \ - { return initfn(); } - int gator_events_install(struct gator_interface *interface); int gator_events_get_key(void); u32 gator_cpuid(void); diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c index 4715f64a1865..a406e4882974 100644 --- a/drivers/gator/gator_annotate_kernel.c +++ b/drivers/gator/gator_annotate_kernel.c @@ -29,11 +29,27 @@ static void kannotate_write(const char *ptr, unsigned int size) } } +static void marshal_u16(char *buf, u16 val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; +} + +static void marshal_u32(char *buf, u32 val) { + buf[0] = val & 0xff; + buf[1] = (val >> 8) & 0xff; + buf[2] = (val >> 16) & 0xff; + buf[3] = (val >> 24) & 0xff; +} + void gator_annotate_channel(int channel, const char *str) { - int str_size = strlen(str) & 0xffff; - long long header = ESCAPE_CODE | (STRING_ANNOTATION << 8) | (channel << 16) | ((long long)str_size << 48); - kannotate_write((char *)&header, sizeof(header)); + const u16 str_size = strlen(str) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u16(header + 6, str_size); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size); } @@ -48,14 +64,14 @@ EXPORT_SYMBOL(gator_annotate); void gator_annotate_channel_color(int channel, int color, const char *str) { - int str_size = (strlen(str) + 4) & 0xffff; + const u16 str_size = (strlen(str) + 4) & 0xffff; char header[12]; header[0] = ESCAPE_CODE; header[1] = STRING_ANNOTATION; - *(u32 *)(&header[2]) = channel; - *(u16 *)(&header[6]) = str_size; - *(u32 *)(&header[8]) = color; - kannotate_write((char *)&header, sizeof(header)); + marshal_u32(header + 2, channel); + marshal_u16(header + 6, str_size); + marshal_u32(header + 8, color); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size - 4); } @@ -70,8 +86,12 @@ EXPORT_SYMBOL(gator_annotate_color); void gator_annotate_channel_end(int channel) { - long long header = ESCAPE_CODE | (STRING_ANNOTATION << 8) | (channel << 16); - kannotate_write((char *)&header, sizeof(header)); + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = STRING_ANNOTATION; + marshal_u32(header + 2, channel); + marshal_u16(header + 6, 0); + kannotate_write(header, sizeof(header)); } EXPORT_SYMBOL(gator_annotate_channel_end); @@ -85,14 +105,14 @@ EXPORT_SYMBOL(gator_annotate_end); void gator_annotate_name_channel(int channel, int group, const char* str) { - int str_size = strlen(str) & 0xffff; + const u16 str_size = strlen(str) & 0xffff; char header[12]; header[0] = ESCAPE_CODE; header[1] = NAME_CHANNEL_ANNOTATION; - *(u32 *)(&header[2]) = channel; - *(u32 *)(&header[6]) = group; - *(u16 *)(&header[10]) = str_size; - kannotate_write((char *)&header, sizeof(header)); + marshal_u32(header + 2, channel); + marshal_u32(header + 6, group); + marshal_u16(header + 10, str_size); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size); } @@ -100,9 +120,13 @@ EXPORT_SYMBOL(gator_annotate_name_channel); void gator_annotate_name_group(int group, const char* str) { - int str_size = strlen(str) & 0xffff; - long long header = ESCAPE_CODE | (NAME_GROUP_ANNOTATION << 8) | (group << 16) | ((long long)str_size << 48); - kannotate_write((char *)&header, sizeof(header)); + const u16 str_size = strlen(str) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = NAME_GROUP_ANNOTATION; + marshal_u32(header + 2, group); + marshal_u16(header + 6, str_size); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size); } @@ -110,11 +134,16 @@ EXPORT_SYMBOL(gator_annotate_name_group); void gator_annotate_visual(const char *data, unsigned int length, const char *str) { - int str_size = strlen(str) & 0xffff; - int visual_annotation = ESCAPE_CODE | (VISUAL_ANNOTATION << 8) | (str_size << 16); - kannotate_write((char *)&visual_annotation, sizeof(visual_annotation)); + const u16 str_size = strlen(str) & 0xffff; + char header[4]; + char header_length[4]; + header[0] = ESCAPE_CODE; + header[1] = VISUAL_ANNOTATION; + marshal_u16(header + 2, str_size); + marshal_u32(header_length, length); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size); - kannotate_write((char *)&length, sizeof(length)); + kannotate_write(header_length, sizeof(header_length)); kannotate_write(data, length); } @@ -122,17 +151,23 @@ EXPORT_SYMBOL(gator_annotate_visual); void gator_annotate_marker(void) { - int header = ESCAPE_CODE | (MARKER_ANNOTATION << 8); - kannotate_write((char *)&header, sizeof(header)); + char header[4]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, 0); + kannotate_write(header, sizeof(header)); } EXPORT_SYMBOL(gator_annotate_marker); void gator_annotate_marker_str(const char *str) { - int str_size = strlen(str) & 0xffff; - int header = ESCAPE_CODE | (MARKER_ANNOTATION << 8) | (str_size << 16); - kannotate_write((char *)&header, sizeof(header)); + const u16 str_size = strlen(str) & 0xffff; + char header[4]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, str_size); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size); } @@ -140,17 +175,25 @@ EXPORT_SYMBOL(gator_annotate_marker_str); void gator_annotate_marker_color(int color) { - long long header = (ESCAPE_CODE | (MARKER_ANNOTATION << 8) | 0x00040000 | ((long long)color << 32)); - kannotate_write((char *)&header, sizeof(header)); + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, 4); + marshal_u32(header + 4, color); + kannotate_write(header, sizeof(header)); } EXPORT_SYMBOL(gator_annotate_marker_color); void gator_annotate_marker_color_str(int color, const char *str) { - int str_size = (strlen(str) + 4) & 0xffff; - long long header = ESCAPE_CODE | (MARKER_ANNOTATION << 8) | (str_size << 16) | ((long long)color << 32); - kannotate_write((char *)&header, sizeof(header)); + const u16 str_size = (strlen(str) + 4) & 0xffff; + char header[8]; + header[0] = ESCAPE_CODE; + header[1] = MARKER_ANNOTATION; + marshal_u16(header + 2, str_size); + marshal_u32(header + 4, color); + kannotate_write(header, sizeof(header)); kannotate_write(str, str_size - 4); } diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c index 0670d6cea9bd..ffacb490194c 100644 --- a/drivers/gator/gator_backtrace.c +++ b/drivers/gator/gator_backtrace.c @@ -132,13 +132,21 @@ static int report_trace(struct stackframe *frame, void *d) // Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile // #define GATOR_KERNEL_STACK_UNWINDING + +#if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING) +// Disabled by default +MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding."); +bool kernel_stack_unwinding = 0; +module_param(kernel_stack_unwinding, bool, 0644); +#endif + static void kernel_backtrace(int cpu, struct pt_regs *const regs) { #if defined(__arm__) || defined(__aarch64__) #ifdef GATOR_KERNEL_STACK_UNWINDING int depth = gator_backtrace_depth; #else - int depth = 1; + int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1); #endif struct stackframe frame; if (depth == 0) diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c index 5f98a1cc309c..91adfdde9be2 100644 --- a/drivers/gator/gator_cookies.c +++ b/drivers/gator/gator_cookies.c @@ -8,7 +8,8 @@ */ #define COOKIEMAP_ENTRIES 1024 /* must be power of 2 */ -#define TRANSLATE_SIZE 256 +#define TRANSLATE_BUFFER_SIZE 512 // must be a power of 2 - 512/4 = 128 entries +#define TRANSLATE_TEXT_SIZE 256 #define MAX_COLLISIONS 2 static uint32_t *gator_crc32_table; @@ -22,7 +23,7 @@ static DEFINE_PER_CPU(int, translate_buffer_read); static DEFINE_PER_CPU(int, translate_buffer_write); static DEFINE_PER_CPU(void **, translate_buffer); -static inline uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq); +static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq); static void wq_cookie_handler(struct work_struct *unused); DECLARE_WORK(cookie_work, wq_cookie_handler); static struct timer_list app_process_wake_up_timer; @@ -107,11 +108,13 @@ static void cookiemap_add(uint64_t key, uint32_t value) values[0] = value; } +#ifndef CONFIG_PREEMPT_RT_FULL static void translate_buffer_write_ptr(int cpu, void *x) { per_cpu(translate_buffer, cpu)[per_cpu(translate_buffer_write, cpu)++] = x; per_cpu(translate_buffer_write, cpu) &= translate_buffer_mask; } +#endif static void *translate_buffer_read_ptr(int cpu) { @@ -124,7 +127,7 @@ static void wq_cookie_handler(struct work_struct *unused) { struct task_struct *task; char *text; - int cpu = get_physical_cpu(); + int cpu = get_physical_cpu(), cookie; unsigned int commit; mutex_lock(&start_mutex); @@ -134,7 +137,8 @@ static void wq_cookie_handler(struct work_struct *unused) while (per_cpu(translate_buffer_read, cpu) != commit) { task = (struct task_struct *)translate_buffer_read_ptr(cpu); text = (char *)translate_buffer_read_ptr(cpu); - get_cookie(cpu, task, text, true); + cookie = get_cookie(cpu, task, text, true); + marshal_link(cookie, task->tgid, task->pid); } } @@ -156,15 +160,16 @@ static int translate_app_process(const char **text, int cpu, struct task_struct struct mm_struct *mm; struct page *page = NULL; struct vm_area_struct *page_vma; - int bytes, offset, retval = 0, ptr; + int bytes, offset, retval = 0; char *buf = per_cpu(translate_text, cpu); +#ifndef CONFIG_PREEMPT_RT_FULL // Push work into a work queue if in atomic context as the kernel functions below might sleep // Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems // inconsistent during a context switch between android/linux versions if (!from_wq) { // Check if already in buffer - ptr = per_cpu(translate_buffer_read, cpu); + int ptr = per_cpu(translate_buffer_read, cpu); while (ptr != per_cpu(translate_buffer_write, cpu)) { if (per_cpu(translate_buffer, cpu)[ptr] == (void *)task) goto out; @@ -174,9 +179,11 @@ static int translate_app_process(const char **text, int cpu, struct task_struct translate_buffer_write_ptr(cpu, (void *)task); translate_buffer_write_ptr(cpu, (void *)*text); + // Not safe to call in RT-Preempt full in schedule switch context mod_timer(&app_process_wake_up_timer, jiffies + 1); goto out; } +#endif mm = get_task_mm(task); if (!mm) @@ -186,8 +193,8 @@ static int translate_app_process(const char **text, int cpu, struct task_struct addr = mm->arg_start; len = mm->arg_end - mm->arg_start; - if (len > TRANSLATE_SIZE) - len = TRANSLATE_SIZE; + if (len > TRANSLATE_TEXT_SIZE) + len = TRANSLATE_TEXT_SIZE; down_read(&mm->mmap_sem); while (len) { @@ -225,7 +232,7 @@ out: return retval; } -static inline uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq) +static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq) { unsigned long flags, cookie; uint64_t key; @@ -312,8 +319,7 @@ static int cookies_initialize(void) uint32_t crc, poly; int i, j, cpu, size, err = 0; - int translate_buffer_size = 512; // must be a power of 2 - translate_buffer_mask = translate_buffer_size / sizeof(per_cpu(translate_buffer, 0)[0]) - 1; + translate_buffer_mask = TRANSLATE_BUFFER_SIZE / sizeof(per_cpu(translate_buffer, 0)[0]) - 1; for_each_present_cpu(cpu) { per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu; @@ -334,7 +340,7 @@ static int cookies_initialize(void) } memset(per_cpu(cookie_values, cpu), 0, size); - per_cpu(translate_buffer, cpu) = (void **)kmalloc(translate_buffer_size, GFP_KERNEL); + per_cpu(translate_buffer, cpu) = (void **)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL); if (!per_cpu(translate_buffer, cpu)) { err = -ENOMEM; goto cookie_setup_error; @@ -343,7 +349,7 @@ static int cookies_initialize(void) per_cpu(translate_buffer_write, cpu) = 0; per_cpu(translate_buffer_read, cpu) = 0; - per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_SIZE, GFP_KERNEL); + per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL); if (!per_cpu(translate_text, cpu)) { err = -ENOMEM; goto cookie_setup_error; @@ -353,6 +359,10 @@ static int cookies_initialize(void) // build CRC32 table poly = 0x04c11db7; gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL); + if (!gator_crc32_table) { + err = -ENOMEM; + goto cookie_setup_error; + } for (i = 0; i < 256; i++) { crc = i; for (j = 8; j > 0; j--) { diff --git a/drivers/gator/gator_events.sh b/drivers/gator/gator_events.sh deleted file mode 100755 index 5467dd6d17d6..000000000000 --- a/drivers/gator/gator_events.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh - -EVENTS=`grep gator_events_init *.c | sed 's/.\+gator_events_init(\(.\+\)).\+/\1/'` - -( - echo /\* This file is auto generated \*/ - echo - for EVENT in $EVENTS; do - echo __weak int $EVENT\(void\)\; - done - echo - echo static int \(*gator_events_list[]\)\(void\) = { - for EVENT in $EVENTS; do - echo \ $EVENT, - done - echo }\; -) > $1.tmp - -cmp -s $1 $1.tmp && rm $1.tmp || mv $1.tmp $1 diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c index 4f1bca6e2dbe..dd7974090b82 100644 --- a/drivers/gator/gator_events_armv6.c +++ b/drivers/gator/gator_events_armv6.c @@ -234,11 +234,4 @@ int gator_events_armv6_init(void) return gator_events_install(&gator_events_armv6_interface); } -gator_events_init(gator_events_armv6_init); - -#else -int gator_events_armv6_init(void) -{ - return -1; -} #endif diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c index 58f29566eeab..30881c8fd3fd 100644 --- a/drivers/gator/gator_events_armv7.c +++ b/drivers/gator/gator_events_armv7.c @@ -309,11 +309,4 @@ int gator_events_armv7_init(void) return gator_events_install(&gator_events_armv7_interface); } -gator_events_init(gator_events_armv7_init); - -#else -int gator_events_armv7_init(void) -{ - return -1; -} #endif diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c index 56c6a6736529..691ef2574536 100644 --- a/drivers/gator/gator_events_block.c +++ b/drivers/gator/gator_events_block.c @@ -151,5 +151,3 @@ int gator_events_block_init(void) return gator_events_install(&gator_events_block_interface); } - -gator_events_init(gator_events_block_init); diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c index b91a9a149b90..b89231967c75 100644 --- a/drivers/gator/gator_events_ccn-504.c +++ b/drivers/gator/gator_events_ccn-504.c @@ -6,23 +6,16 @@ * published by the Free Software Foundation. */ -/******************************************************************************* - * WARNING: This code is an experimental implementation of the CCN-504 hardware - * counters which has not been tested on the hardware. Commented debug - * statements are present and can be uncommented for diagnostic purposes. - ******************************************************************************/ - #include #include #include "gator.h" -#define PERIPHBASE 0x2E000000 - #define NUM_REGIONS 256 #define REGION_SIZE (64*1024) #define REGION_DEBUG 1 #define REGION_XP 64 +#define NUM_XPS 11 // DT (Debug) region #define PMEVCNTSR0 0x0150 @@ -34,27 +27,86 @@ // XP region #define DT_CONFIG 0x0300 +#define DT_CONTROL 0x0370 // Multiple #define PMU_EVENT_SEL 0x0600 #define OLY_ID 0xFF00 #define CCNT 4 -#define CNTMAX (4 + 1) +#define CNTMAX (CCNT + 1) #define get_pmu_event_id(event) (((event) >> 0) & 0xFF) #define get_node_type(event) (((event) >> 8) & 0xFF) #define get_region(event) (((event) >> 16) & 0xFF) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) + +// From kernel/params.c +#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ + int param_set_##name(const char *val, struct kernel_param *kp) \ + { \ + tmptype l; \ + int ret; \ + \ + if (!val) return -EINVAL; \ + ret = strtolfn(val, 0, &l); \ + if (ret == -EINVAL || ((type)l != l)) \ + return -EINVAL; \ + *((type *)kp->arg) = l; \ + return 0; \ + } \ + int param_get_##name(char *buffer, struct kernel_param *kp) \ + { \ + return sprintf(buffer, format, *((type *)kp->arg)); \ + } + +#else + +// From kernel/params.c +#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \ + int param_set_##name(const char *val, const struct kernel_param *kp) \ + { \ + tmptype l; \ + int ret; \ + \ + ret = strtolfn(val, 0, &l); \ + if (ret < 0 || ((type)l != l)) \ + return ret < 0 ? ret : -EINVAL; \ + *((type *)kp->arg) = l; \ + return 0; \ + } \ + int param_get_##name(char *buffer, const struct kernel_param *kp) \ + { \ + return scnprintf(buffer, PAGE_SIZE, format, \ + *((type *)kp->arg)); \ + } \ + struct kernel_param_ops param_ops_##name = { \ + .set = param_set_##name, \ + .get = param_get_##name, \ + }; \ + EXPORT_SYMBOL(param_set_##name); \ + EXPORT_SYMBOL(param_get_##name); \ + EXPORT_SYMBOL(param_ops_##name) + +#endif + +STANDARD_PARAM_DEF(u64, u64, "%llu", u64, strict_strtoull); + +// From include/linux/moduleparam.h +#define param_check_u64(name, p) __param_check(name, p, u64) + MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address"); -static unsigned long ccn504_addr = 0; -module_param(ccn504_addr, ulong, 0444); +static u64 ccn504_addr = 0; +module_param(ccn504_addr, u64, 0444); static void __iomem *gator_events_ccn504_base; +static bool gator_events_ccn504_global_enabled; static unsigned long gator_events_ccn504_enabled[CNTMAX]; static unsigned long gator_events_ccn504_event[CNTMAX]; static unsigned long gator_events_ccn504_key[CNTMAX]; static int gator_events_ccn504_buffer[2*CNTMAX]; +static int gator_events_ccn504_prev[CNTMAX]; static void gator_events_ccn504_create_shutdown(void) { @@ -96,7 +148,6 @@ static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG); dt_config |= (value + event_num) << (4*event_num); - //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, dt_config, (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG); writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG); } @@ -104,6 +155,20 @@ static int gator_events_ccn504_start(void) { int i; + gator_events_ccn504_global_enabled = 0; + for (i = 0; i < CNTMAX; ++i) { + if (gator_events_ccn504_enabled[i]) { + gator_events_ccn504_global_enabled = 1; + break; + } + } + + if (!gator_events_ccn504_global_enabled) { + return 0; + } + + memset(&gator_events_ccn504_prev, 0x80, sizeof(gator_events_ccn504_prev)); + // Disable INTREQ on overflow // [6] ovfl_intr_en = 0 // perhaps set to 1? @@ -112,9 +177,22 @@ static int gator_events_ccn504_start(void) // [4:1] cntcfg = 0 // Enable PMU features // [0] pmu_en = 1 - //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0x1, REGION_DEBUG*REGION_SIZE + PMCR); writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR); + // Configure the XPs + for (i = 0; i < NUM_XPS; ++i) { + int dt_control; + + // Pass on all events + writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG); + + // Enable PMU capability + // [0] dt_enable = 1 + dt_control = readl(gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL); + dt_control |= 0x1; + writel(dt_control, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL); + } + // Assume no other pmu_event_sel registers are set // cycle counter does not need to be enabled @@ -134,15 +212,14 @@ static int gator_events_ccn504_start(void) pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]); node_type = get_node_type(gator_events_ccn504_event[i]); region = get_region(gator_events_ccn504_event[i]); - //printk(KERN_ERR "%s(%s:%i) pmu_event_id: %x node_type: %x region: %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_id, node_type, region); // Verify the node_type oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID); oly_id = oly_id_whole & 0x1F; node_id = (oly_id_whole >> 8) & 0x7F; if ((oly_id != node_type) || - ((node_type == 0x16) && ((oly_id == 0x14) || (oly_id == 0x15) || (oly_id == 0x16) || (oly_id == 0x18) || (oly_id == 0x19) || (oly_id == 0x1A)))) { - printk(KERN_ERR "%s(%s:%i) oly_id is %x expected %x\n", __FUNCTION__, __FILE__, __LINE__, oly_id, node_type); + ((node_type == 0x16) && ((oly_id != 0x14) && (oly_id != 0x15) && (oly_id != 0x16) && (oly_id != 0x18) && (oly_id != 0x19) && (oly_id != 0x1A)))) { + printk(KERN_ERR "gator: oly_id is 0x%x expected 0x%x\n", oly_id, node_type); return -1; } @@ -160,7 +237,6 @@ static int gator_events_ccn504_start(void) gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC); break; } - //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, pmu_event_sel, region*REGION_SIZE + PMU_EVENT_SEL); writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL); } @@ -171,21 +247,25 @@ static void gator_events_ccn504_stop(void) { int i; + if (!gator_events_ccn504_global_enabled) { + return; + } + // cycle counter does not need to be disabled for (i = 0; i < CCNT; ++i) { - int node_type; int region; - node_type = get_node_type(gator_events_ccn504_event[i]); + if (!gator_events_ccn504_enabled[i]) { + continue; + } + region = get_region(gator_events_ccn504_event[i]); - //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, region*REGION_SIZE + PMU_EVENT_SEL); writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL); } // Clear dt_config - for (i = 0; i < 11; ++i) { - //printk(KERN_ERR "%s(%s:%i) writel %x %x\n", __FUNCTION__, __FILE__, __LINE__, 0, (REGION_XP + i)*REGION_SIZE + DT_CONFIG); + for (i = 0; i < NUM_XPS; ++i) { writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG); } } @@ -194,27 +274,20 @@ static int gator_events_ccn504_read(int **buffer) { int i; int len = 0; + int value; - if (!on_primary_core()) { + if (!on_primary_core() || !gator_events_ccn504_global_enabled) { return 0; } // Verify the pmsr register is zero - //i = 0; - while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0) { - //++i; - } - //printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i); + while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0); // Request a PMU snapshot writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ); // Wait for the snapshot - //i = 0; - while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0) { - //++i; - } - //printk(KERN_ERR "%s(%s:%i) %i\n", __FUNCTION__, __FILE__, __LINE__, i); + while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0); // Read the shadow registers for (i = 0; i < CNTMAX; ++i) { @@ -222,8 +295,12 @@ static int gator_events_ccn504_read(int **buffer) continue; } - gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i]; - gator_events_ccn504_buffer[len++] = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i)); + value = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i)); + if (gator_events_ccn504_prev[i] != 0x80808080) { + gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i]; + gator_events_ccn504_buffer[len++] = value - gator_events_ccn504_prev[i]; + } + gator_events_ccn504_prev[i] = value; // Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does? } @@ -231,20 +308,12 @@ static int gator_events_ccn504_read(int **buffer) // Clear the PMU snapshot status writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR); + if (buffer) + *buffer = gator_events_ccn504_buffer; + return len; } -static void __maybe_unused gator_events_ccn504_enumerate(int pos, int size) -{ - int i; - u32 oly_id; - - for (i = pos; i < pos + size; ++i) { - oly_id = readl(gator_events_ccn504_base + i*REGION_SIZE + OLY_ID); - printk(KERN_ERR "%s(%s:%i) %i %08x\n", __FUNCTION__, __FILE__, __LINE__, i, oly_id); - } -} - static struct gator_interface gator_events_ccn504_interface = { .shutdown = gator_events_ccn504_create_shutdown, .create_files = gator_events_ccn504_create_files, @@ -263,36 +332,9 @@ int gator_events_ccn504_init(void) gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE); if (gator_events_ccn504_base == NULL) { - printk(KERN_ERR "%s(%s:%i) ioremap returned NULL\n", __FUNCTION__, __FILE__, __LINE__); + printk(KERN_ERR "gator: ioremap returned NULL\n"); return -1; } - //printk(KERN_ERR "%s(%s:%i)\n", __FUNCTION__, __FILE__, __LINE__); - - // Test - can memory be read - { - //gator_events_ccn504_enumerate(0, NUM_REGIONS); - -#if 0 - // DT - gator_events_ccn504_enumerate(1, 1); - // HN-F - gator_events_ccn504_enumerate(32, 8); - // XP - gator_events_ccn504_enumerate(64, 11); - // RN-I - gator_events_ccn504_enumerate(128, 1); - gator_events_ccn504_enumerate(130, 1); - gator_events_ccn504_enumerate(134, 1); - gator_events_ccn504_enumerate(140, 1); - gator_events_ccn504_enumerate(144, 1); - gator_events_ccn504_enumerate(148, 1); - // SBAS - gator_events_ccn504_enumerate(129, 1); - gator_events_ccn504_enumerate(137, 1); - gator_events_ccn504_enumerate(139, 1); - gator_events_ccn504_enumerate(147, 1); -#endif - } for (i = 0; i < CNTMAX; ++i) { gator_events_ccn504_enabled[i] = 0; @@ -302,5 +344,3 @@ int gator_events_ccn504_init(void) return gator_events_install(&gator_events_ccn504_interface); } - -gator_events_init(gator_events_ccn504_init); diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c index b4df7faefff8..b11879a248f8 100644 --- a/drivers/gator/gator_events_irq.c +++ b/drivers/gator/gator_events_irq.c @@ -163,5 +163,3 @@ int gator_events_irq_init(void) return gator_events_install(&gator_events_irq_interface); } - -gator_events_init(gator_events_irq_init); diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c index 21aa4a214d97..ee521af22517 100644 --- a/drivers/gator/gator_events_l2c-310.c +++ b/drivers/gator/gator_events_l2c-310.c @@ -206,5 +206,3 @@ int gator_events_l2c310_init(void) return gator_events_install(&gator_events_l2c310_interface); } - -gator_events_init(gator_events_l2c310_init); diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c index dd275f70b700..6719c1ec73a2 100644 --- a/drivers/gator/gator_events_mali_4xx.c +++ b/drivers/gator/gator_events_mali_4xx.c @@ -415,25 +415,12 @@ static void mali_counter_initialize(void) int i; int core_id; - mali_osk_fb_control_set_type *mali_set_fb_event; mali_profiling_control_type *mali_control; init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1); init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1); init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1); - mali_set_fb_event = symbol_get(_mali_osk_fb_control_set); - - if (mali_set_fb_event) { - pr_debug("gator: mali online _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event); - - mali_set_fb_event(0, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0)); - - symbol_put(_mali_osk_fb_control_set); - } else { - printk("gator: mali online _mali_osk_fb_control_set symbol not found\n"); - } - /* Generic control interface for Mali DDK. */ mali_control = symbol_get(_mali_profiling_control); if (mali_control) { @@ -491,7 +478,6 @@ static void mali_counter_initialize(void) static void mali_counter_deinitialize(void) { mali_profiling_set_event_type *mali_set_hw_event; - mali_osk_fb_control_set_type *mali_set_fb_event; mali_profiling_control_type *mali_control; mali_set_hw_event = symbol_get(_mali_profiling_set_event); @@ -509,23 +495,11 @@ static void mali_counter_deinitialize(void) printk("gator: mali offline _mali_profiling_set_event symbol not found\n"); } - mali_set_fb_event = symbol_get(_mali_osk_fb_control_set); - - if (mali_set_fb_event) { - pr_debug("gator: mali offline _mali_osk_fb_control_set symbol @ %p\n", mali_set_fb_event); - - mali_set_fb_event(0, 0); - - symbol_put(_mali_osk_fb_control_set); - } else { - printk("gator: mali offline _mali_osk_fb_control_set symbol not found\n"); - } - /* Generic control interface for Mali DDK. */ mali_control = symbol_get(_mali_profiling_control); if (mali_control) { - pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_set_fb_event); + pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control); /* Reset the DDK state - disable counter collection */ mali_control(SW_COUNTER_ENABLE, 0); @@ -747,5 +721,3 @@ int gator_events_mali_init(void) return gator_events_install(&gator_events_mali_interface); } - -gator_events_init(gator_events_mali_init); diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c index 5a98b3745fc2..466ca1683c7e 100644 --- a/drivers/gator/gator_events_mali_common.c +++ b/drivers/gator/gator_events_mali_common.c @@ -28,7 +28,7 @@ extern const char *gator_mali_get_mali_name(void) } } -extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter) +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event) { int err; char buf[255]; @@ -56,6 +56,13 @@ extern int gator_mali_create_file_system(const char *mali_name, const char *even pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf); return -1; } + if (event != NULL) { + err = gatorfs_create_ulong(sb, dir, "event", event); + if (err != 0) { + pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf); + return -1; + } + } } return 0; diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h index d67ee2d245ad..509f9b61884a 100644 --- a/drivers/gator/gator_events_mali_common.h +++ b/drivers/gator/gator_events_mali_common.h @@ -43,7 +43,6 @@ typedef struct { * Mali-4xx */ typedef int mali_profiling_set_event_type(unsigned int, int); -typedef void mali_osk_fb_control_set_type(unsigned int, unsigned int); typedef void mali_profiling_control_type(unsigned int, unsigned int); typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *); @@ -51,7 +50,6 @@ typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, un * Driver entry points for functions called directly by gator. */ extern int _mali_profiling_set_event(unsigned int, int); -extern void _mali_osk_fb_control_set(unsigned int, unsigned int); extern void _mali_profiling_control(unsigned int, unsigned int); extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *); @@ -75,7 +73,7 @@ extern const char *gator_mali_get_mali_name(void); * * @return 0 if entry point was created, non-zero if not. */ -extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter); +extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event); /** * Initializes the counter array. diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c index 2576a99a126a..7bf7d6a6dbf9 100644 --- a/drivers/gator/gator_events_mali_t6xx.c +++ b/drivers/gator/gator_events_mali_t6xx.c @@ -109,12 +109,14 @@ enum { #define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0])) #define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS) #define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0])) -#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS) +#define FILMSTRIP (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS) +#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS + 1) /* * gatorfs variables for counter enable state */ static mali_counter counters[NUMBER_OF_EVENTS]; +static unsigned long filmstrip_event; /* An array used to return the data we recorded * as key,value pairs hence the *2 @@ -285,28 +287,37 @@ static int create_files(struct super_block *sb, struct dentry *root) */ int counter_index = 0; const char *mali_name = gator_mali_get_mali_name(); + mali_profiling_control_type *mali_control; for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) { - if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event]) != 0) { + if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0) { return -1; } counter_index++; } counter_index = 0; for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) { - if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event]) != 0) { + if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) { return -1; } counter_index++; } counter_index = 0; for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) { - if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event]) != 0) { + if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0) { return -1; } counter_index++; } + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) { + return -1; + } + symbol_put(_mali_profiling_control); + } + return 0; } @@ -350,6 +361,7 @@ static int register_tracepoints(void) static int start(void) { unsigned int cnt; + mali_profiling_control_type *mali_control; /* Clean all data for the next capture */ for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) { @@ -370,6 +382,30 @@ static int start(void) return -1; } + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + /* The event attribute in the XML file keeps the actual frame rate. */ + unsigned int enabled = counters[FILMSTRIP].enabled ? 1 : 0; + unsigned int rate = filmstrip_event & 0xff; + unsigned int resize_factor = (filmstrip_event >> 8) & 0xff; + + pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control); + +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) + mali_control(FBDUMP_CONTROL_ENABLE, enabled); + mali_control(FBDUMP_CONTROL_RATE, rate); + mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor); + + pr_debug("gator: sent mali_control enabled=%d, rate=%d, resize_factor=%d\n", enabled, rate, resize_factor); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali online _mali_profiling_control symbol not found\n"); + } + /* * Set the first timestamp for calculating the sample interval. The first interval could be quite long, * since it will be the time between 'start' and the first 'read'. @@ -382,6 +418,8 @@ static int start(void) static void stop(void) { + mali_profiling_control_type *mali_control; + pr_debug("gator: Mali-T6xx: stop\n"); /* @@ -402,6 +440,18 @@ static void stop(void) GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change); pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n"); + + /* Generic control interface for Mali DDK. */ + mali_control = symbol_get(_mali_profiling_control); + if (mali_control) { + pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control); + + mali_control(FBDUMP_CONTROL_ENABLE, 0); + + symbol_put(_mali_profiling_control); + } else { + printk("gator: mali offline _mali_profiling_control symbol not found\n"); + } } static int read(int **buffer) @@ -508,5 +558,3 @@ extern int gator_events_mali_t6xx_init(void) return gator_events_install(&gator_events_mali_t6xx_interface); } - -gator_events_init(gator_events_mali_t6xx_init); diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c index f557350eb9bc..e406991398d9 100644 --- a/drivers/gator/gator_events_mali_t6xx_hw.c +++ b/drivers/gator/gator_events_mali_t6xx_hw.c @@ -63,6 +63,8 @@ static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol; static kbase_va_free_type *kbase_va_free_symbol; static kbase_destroy_context_type *kbase_destroy_context_symbol; +static long shader_present_low = 0; + /** The interval between reads, in ns. * * Earlier we introduced @@ -496,6 +498,7 @@ static int start(void) mali_error err; int cnt; u16 bitmask[] = { 0, 0, 0, 0 }; + unsigned long long shadersPresent = 0; /* Setup HW counters */ num_hardware_counters_enabled = 0; @@ -539,6 +542,11 @@ static int start(void) goto out; } + + /* See if we can get the number of shader cores */ + shadersPresent = kbdevice->shader_present_bitmap; + shader_present_low = (unsigned long)shadersPresent; + /* * The amount of memory needed to store the dump (bytes) * DUMP_SIZE = number of core groups @@ -679,21 +687,41 @@ static int read(int **buffer) kbase_device_busy = false; if (success == MALI_TRUE) { + /* Cycle through hardware counters and accumulate totals */ for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) { const mali_counter *counter = &counters[cnt]; if (counter->enabled) { const int block = GET_HW_BLOCK(cnt); const int counter_offset = GET_COUNTER_OFFSET(cnt); - const u32 *counter_block = (u32 *) ((uintptr_t)kernel_dump_buffer + vithar_blocks[block]); - const u32 *counter_address = counter_block + counter_offset; - value = *counter_address; + const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block]; + /* If counter belongs to shader block need to take into account all cores */ if (block == SHADER_BLOCK) { - /* (counter_address + 0x000) has already been accounted-for above. */ - value += *(counter_address + 0x100); - value += *(counter_address + 0x200); - value += *(counter_address + 0x300); + int i = 0; + int shader_core_count = 0; + value = 0; + + for (i = 0; i < 4; i++) { + if ((shader_present_low >> i) & 1) { + value += *((u32*) (block_base_address + (0x100 * i)) + counter_offset); + shader_core_count++; + } + } + + for (i = 0; i < 4; i++) { + if((shader_present_low >> (i+4)) & 1) { + value += *((u32*)(block_base_address + (0x100 * i) + 0x800) + counter_offset); + shader_core_count++; + } + } + + /* Need to total by number of cores to produce an average */ + if (shader_core_count != 0) { + value /= shader_core_count; + } + } else { + value = *((u32*)block_base_address + counter_offset); } counter_dump[len++] = counter->key; @@ -727,7 +755,7 @@ static int create_files(struct super_block *sb, struct dentry *root) const char *mali_name = gator_mali_get_mali_name(); for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) { - if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event]) != 0) + if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) return -1; counter_index++; } @@ -754,5 +782,3 @@ int gator_events_mali_t6xx_hw_init(void) return gator_events_install(&gator_events_mali_t6xx_interface); } - -gator_events_init(gator_events_mali_t6xx_hw_init); diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c index c1e360d12895..451290d9af17 100644 --- a/drivers/gator/gator_events_meminfo.c +++ b/drivers/gator/gator_events_meminfo.c @@ -8,27 +8,62 @@ */ #include "gator.h" + +#include +#include +#include +#include #include #include -#include -#define MEMINFO_MEMFREE 0 -#define MEMINFO_MEMUSED 1 -#define MEMINFO_BUFFERRAM 2 -#define MEMINFO_TOTAL 3 +enum { + MEMINFO_MEMFREE, + MEMINFO_MEMUSED, + MEMINFO_BUFFERRAM, + MEMINFO_TOTAL, +}; -static ulong meminfo_global_enabled; +enum { + PROC_SIZE, + PROC_SHARE, + PROC_TEXT, + PROC_DATA, + PROC_COUNT, +}; + +static const char * const meminfo_names[] = { + "Linux_meminfo_memfree", + "Linux_meminfo_memused", + "Linux_meminfo_bufferram", +}; + +static const char * const proc_names[] = { + "Linux_proc_statm_size", + "Linux_proc_statm_share", + "Linux_proc_statm_text", + "Linux_proc_statm_data", +}; + +static bool meminfo_global_enabled; static ulong meminfo_enabled[MEMINFO_TOTAL]; -static ulong meminfo_key[MEMINFO_TOTAL]; -static unsigned long long meminfo_buffer[MEMINFO_TOTAL * 2]; +static ulong meminfo_keys[MEMINFO_TOTAL]; +static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)]; static int meminfo_length = 0; -static unsigned int mem_event = 0; static bool new_data_avail; -static void wq_sched_handler(struct work_struct *wsptr); -DECLARE_WORK(work, wq_sched_handler); -static struct timer_list meminfo_wake_up_timer; -static void meminfo_wake_up_handler(unsigned long unused_data); +static bool proc_global_enabled; +static ulong proc_enabled[PROC_COUNT]; +static ulong proc_keys[PROC_COUNT]; +static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]); + +static int gator_meminfo_func(void *data); +static bool gator_meminfo_run; +// Initialize semaphore unlocked to initialize memory values +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) +static DECLARE_MUTEX(gator_meminfo_sem); +#else +static DEFINE_SEMAPHORE(gator_meminfo_sem); +#endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order)) @@ -36,7 +71,7 @@ GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order)) #endif { - mem_event++; + up(&gator_meminfo_sem); } #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) @@ -45,12 +80,12 @@ GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold)) GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold)) #endif { - mem_event++; + up(&gator_meminfo_sem); } GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype)) { - mem_event++; + up(&gator_meminfo_sem); } static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root) @@ -59,24 +94,21 @@ static int gator_events_meminfo_create_files(struct super_block *sb, struct dent int i; for (i = 0; i < MEMINFO_TOTAL; i++) { - switch (i) { - case MEMINFO_MEMFREE: - dir = gatorfs_mkdir(sb, root, "Linux_meminfo_memfree"); - break; - case MEMINFO_MEMUSED: - dir = gatorfs_mkdir(sb, root, "Linux_meminfo_memused"); - break; - case MEMINFO_BUFFERRAM: - dir = gatorfs_mkdir(sb, root, "Linux_meminfo_bufferram"); - break; - default: - return -1; - } + dir = gatorfs_mkdir(sb, root, meminfo_names[i]); if (!dir) { return -1; } gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]); - gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_key[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]); + } + + for (i = 0; i < PROC_COUNT; ++i) { + dir = gatorfs_mkdir(sb, root, proc_names[i]); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]); + gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]); } return 0; @@ -86,13 +118,26 @@ static int gator_events_meminfo_start(void) { int i; - new_data_avail = true; + new_data_avail = false; + meminfo_global_enabled = 0; for (i = 0; i < MEMINFO_TOTAL; i++) { if (meminfo_enabled[i]) { meminfo_global_enabled = 1; + break; } } + proc_global_enabled = 0; + for (i = 0; i < PROC_COUNT; ++i) { + if (proc_enabled[i]) { + proc_global_enabled = 1; + break; + } + } + if (meminfo_enabled[MEMINFO_MEMUSED]) { + proc_global_enabled = 1; + } + if (meminfo_global_enabled == 0) return 0; @@ -111,9 +156,16 @@ static int gator_events_meminfo_start(void) if (GATOR_REGISTER_TRACE(mm_page_alloc)) goto mm_page_alloc_exit; - setup_timer(&meminfo_wake_up_timer, meminfo_wake_up_handler, 0); + // Start worker thread + gator_meminfo_run = true; + // Since the mutex starts unlocked, memory values will be initialized + if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo"))) + goto kthread_run_exit; + return 0; +kthread_run_exit: + GATOR_UNREGISTER_TRACE(mm_page_alloc); mm_page_alloc_exit: #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) GATOR_UNREGISTER_TRACE(mm_pagevec_free); @@ -132,8 +184,6 @@ mm_page_free_exit: static void gator_events_meminfo_stop(void) { - int i; - if (meminfo_global_enabled) { #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) GATOR_UNREGISTER_TRACE(mm_page_free_direct); @@ -144,68 +194,75 @@ static void gator_events_meminfo_stop(void) #endif GATOR_UNREGISTER_TRACE(mm_page_alloc); - del_timer_sync(&meminfo_wake_up_timer); - } - - meminfo_global_enabled = 0; - for (i = 0; i < MEMINFO_TOTAL; i++) { - meminfo_enabled[i] = 0; + // Stop worker thread + gator_meminfo_run = false; + up(&gator_meminfo_sem); } } // Must be run in process context as the kernel function si_meminfo() can sleep -static void wq_sched_handler(struct work_struct *wsptr) +static int gator_meminfo_func(void *data) { struct sysinfo info; int i, len; unsigned long long value; - meminfo_length = len = 0; - - si_meminfo(&info); - for (i = 0; i < MEMINFO_TOTAL; i++) { - if (meminfo_enabled[i]) { - switch (i) { - case MEMINFO_MEMFREE: - value = info.freeram * PAGE_SIZE; - break; - case MEMINFO_MEMUSED: - value = (info.totalram - info.freeram) * PAGE_SIZE; - break; - case MEMINFO_BUFFERRAM: - value = info.bufferram * PAGE_SIZE; - break; - default: - value = 0; - break; - } - meminfo_buffer[len++] = (unsigned long long)meminfo_key[i]; - meminfo_buffer[len++] = value; + for (;;) { + if (down_killable(&gator_meminfo_sem)) { + break; } + + // Eat up any pending events + while (!down_trylock(&gator_meminfo_sem)); + + if (!gator_meminfo_run) { + break; + } + + meminfo_length = len = 0; + + si_meminfo(&info); + for (i = 0; i < MEMINFO_TOTAL; i++) { + if (meminfo_enabled[i]) { + switch (i) { + case MEMINFO_MEMFREE: + value = info.freeram * PAGE_SIZE; + break; + case MEMINFO_MEMUSED: + // pid -1 means system wide + meminfo_buffer[len++] = 1; + meminfo_buffer[len++] = -1; + // Emit value + meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED]; + meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE; + // Clear pid + meminfo_buffer[len++] = 1; + meminfo_buffer[len++] = 0; + continue; + case MEMINFO_BUFFERRAM: + value = info.bufferram * PAGE_SIZE; + break; + default: + value = 0; + break; + } + meminfo_buffer[len++] = meminfo_keys[i]; + meminfo_buffer[len++] = value; + } + } + + meminfo_length = len; + new_data_avail = true; } - meminfo_length = len; - new_data_avail = true; -} - -static void meminfo_wake_up_handler(unsigned long unused_data) -{ - // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater - schedule_work(&work); + return 0; } static int gator_events_meminfo_read(long long **buffer) { - static unsigned int last_mem_event = 0; - if (!on_primary_core() || !meminfo_global_enabled) return 0; - if (last_mem_event != mem_event) { - last_mem_event = mem_event; - mod_timer(&meminfo_wake_up_timer, jiffies + 1); - } - if (!new_data_avail) return 0; @@ -217,11 +274,97 @@ static int gator_events_meminfo_read(long long **buffer) return meminfo_length; } +static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct *task) +{ + struct mm_struct *mm; + u64 share = 0; + int i; + long long value; + int len = 0; + int cpu = get_physical_cpu(); + long long *buf = per_cpu(proc_buffer, cpu); + + if (!proc_global_enabled) { + return 0; + } + + // Collect the memory stats of the process instead of the thread + if (task->group_leader != NULL) { + task = task->group_leader; + } + + // get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch + mm = task->mm; + if (mm == NULL) { + return 0; + } + + // Derived from task_statm in fs/proc/task_mmu.c + if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) { + share = get_mm_counter(mm, +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32) + file_rss +#else + MM_FILEPAGES +#endif + ); + } + + // key of 1 indicates a pid + buf[len++] = 1; + buf[len++] = task->pid; + + for (i = 0; i < PROC_COUNT; ++i) { + if (proc_enabled[i]) { + switch (i) { + case PROC_SIZE: + value = mm->total_vm; + break; + case PROC_SHARE: + value = share; + break; + case PROC_TEXT: + value = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; + break; + case PROC_DATA: + value = mm->total_vm - mm->shared_vm; + break; + } + + buf[len++] = proc_keys[i]; + buf[len++] = value * PAGE_SIZE; + } + } + + if (meminfo_enabled[MEMINFO_MEMUSED]) { + value = share + get_mm_counter(mm, +#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32) + anon_rss +#else + MM_ANONPAGES +#endif + ); + // Send resident for this pid + buf[len++] = meminfo_keys[MEMINFO_MEMUSED]; + buf[len++] = value * PAGE_SIZE; + } + + // Clear pid + buf[len++] = 1; + buf[len++] = 0; + + if (buffer) + *buffer = buf; + + return len; +} + static struct gator_interface gator_events_meminfo_interface = { .create_files = gator_events_meminfo_create_files, .start = gator_events_meminfo_start, .stop = gator_events_meminfo_stop, .read64 = gator_events_meminfo_read, + .read_proc = gator_events_meminfo_read_proc, }; int gator_events_meminfo_init(void) @@ -231,10 +374,14 @@ int gator_events_meminfo_init(void) meminfo_global_enabled = 0; for (i = 0; i < MEMINFO_TOTAL; i++) { meminfo_enabled[i] = 0; - meminfo_key[i] = gator_events_get_key(); + meminfo_keys[i] = gator_events_get_key(); + } + + proc_global_enabled = 0; + for (i = 0; i < PROC_COUNT; ++i) { + proc_enabled[i] = 0; + proc_keys[i] = gator_events_get_key(); } return gator_events_install(&gator_events_meminfo_interface); } - -gator_events_init(gator_events_meminfo_init); diff --git a/drivers/gator/gator_events_mmaped.c b/drivers/gator/gator_events_mmapped.c similarity index 56% rename from drivers/gator/gator_events_mmaped.c rename to drivers/gator/gator_events_mmapped.c index f7670f62a258..f055e48d317a 100644 --- a/drivers/gator/gator_events_mmaped.c +++ b/drivers/gator/gator_events_mmapped.c @@ -11,12 +11,18 @@ * To add them to the events.xml, create an events-mmap.xml with the * following contents and rebuild gatord: * - * - * + * + * * * * * + * + * When adding custom events, be sure do the following + * - add any needed .c files to the gator driver Makefile + * - call gator_events_install in the events init function + * - add the init function to GATOR_EVENTS_LIST in gator_main.c + * - add a new events-*.xml file to the gator daemon and rebuild */ #include @@ -25,79 +31,71 @@ #include "gator.h" -#define MMAPED_COUNTERS_NUM 3 +#define MMAPPED_COUNTERS_NUM 3 + +static int mmapped_global_enabled; static struct { unsigned long enabled; unsigned long event; unsigned long key; -} mmaped_counters[MMAPED_COUNTERS_NUM]; +} mmapped_counters[MMAPPED_COUNTERS_NUM]; -static int mmaped_buffer[MMAPED_COUNTERS_NUM * 2]; +static int mmapped_buffer[MMAPPED_COUNTERS_NUM * 2]; -#ifdef TODO -static void __iomem *mmaped_base; -#endif - -#ifndef TODO static s64 prev_time; -#endif -/* Adds mmaped_cntX directories and enabled, event, and key files to /dev/gator/events */ -static int gator_events_mmaped_create_files(struct super_block *sb, +/* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */ +static int gator_events_mmapped_create_files(struct super_block *sb, struct dentry *root) { int i; - for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { char buf[16]; struct dentry *dir; - snprintf(buf, sizeof(buf), "mmaped_cnt%d", i); + snprintf(buf, sizeof(buf), "mmapped_cnt%d", i); dir = gatorfs_mkdir(sb, root, buf); if (WARN_ON(!dir)) return -1; gatorfs_create_ulong(sb, dir, "enabled", - &mmaped_counters[i].enabled); + &mmapped_counters[i].enabled); gatorfs_create_ulong(sb, dir, "event", - &mmaped_counters[i].event); + &mmapped_counters[i].event); gatorfs_create_ro_ulong(sb, dir, "key", - &mmaped_counters[i].key); + &mmapped_counters[i].key); } return 0; } -static int gator_events_mmaped_start(void) +static int gator_events_mmapped_start(void) { -#ifdef TODO - for (i = 0; i < MMAPED_COUNTERS_NUM; i++) - writel(mmaped_counters[i].event, - mmaped_base + COUNTERS_CONFIG_OFFSET[i]); - - writel(ENABLED, COUNTERS_CONTROL_OFFSET); -#endif - -#ifndef TODO + int i; struct timespec ts; + getnstimeofday(&ts); prev_time = timespec_to_ns(&ts); -#endif + + mmapped_global_enabled = 0; + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + if (mmapped_counters[i].enabled) { + mmapped_global_enabled = 1; + break; + } + } return 0; } -static void gator_events_mmaped_stop(void) +static void gator_events_mmapped_stop(void) { -#ifdef TODO - writel(DISABLED, COUNTERS_CONTROL_OFFSET); -#endif } -#ifndef TODO /* This function "simulates" counters, generating values of fancy * functions like sine or triangle... */ -static int mmaped_simulate(int counter, int delta_in_us) +static int mmapped_simulate(int counter, int delta_in_us) { int result = 0; @@ -157,73 +155,55 @@ static int mmaped_simulate(int counter, int delta_in_us) return result; } -#endif -static int gator_events_mmaped_read(int **buffer) +static int gator_events_mmapped_read(int **buffer) { int i; int len = 0; -#ifndef TODO int delta_in_us; struct timespec ts; s64 time; -#endif /* System wide counters - read from one core only */ - if (!on_primary_core()) + if (!on_primary_core() || !mmapped_global_enabled) return 0; -#ifndef TODO getnstimeofday(&ts); time = timespec_to_ns(&ts); delta_in_us = (int)(time - prev_time) / 1000; prev_time = time; -#endif - for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { - if (mmaped_counters[i].enabled) { - mmaped_buffer[len++] = mmaped_counters[i].key; -#ifdef TODO - mmaped_buffer[len++] = - readl(mmaped_base + COUNTERS_VALUE_OFFSET[i]); -#else - mmaped_buffer[len++] = - mmaped_simulate(mmaped_counters[i].event, + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + if (mmapped_counters[i].enabled) { + mmapped_buffer[len++] = mmapped_counters[i].key; + mmapped_buffer[len++] = + mmapped_simulate(mmapped_counters[i].event, delta_in_us); -#endif } } if (buffer) - *buffer = mmaped_buffer; + *buffer = mmapped_buffer; return len; } -static struct gator_interface gator_events_mmaped_interface = { - .create_files = gator_events_mmaped_create_files, - .start = gator_events_mmaped_start, - .stop = gator_events_mmaped_stop, - .read = gator_events_mmaped_read, +static struct gator_interface gator_events_mmapped_interface = { + .create_files = gator_events_mmapped_create_files, + .start = gator_events_mmapped_start, + .stop = gator_events_mmapped_stop, + .read = gator_events_mmapped_read, }; /* Must not be static! */ -int __init gator_events_mmaped_init(void) +int __init gator_events_mmapped_init(void) { int i; -#ifdef TODO - mmaped_base = ioremap(COUNTERS_PHYS_ADDR, SZ_4K); - if (!mmaped_base) - return -ENOMEM; -#endif - - for (i = 0; i < MMAPED_COUNTERS_NUM; i++) { - mmaped_counters[i].enabled = 0; - mmaped_counters[i].key = gator_events_get_key(); + for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) { + mmapped_counters[i].enabled = 0; + mmapped_counters[i].key = gator_events_get_key(); } - return gator_events_install(&gator_events_mmaped_interface); + return gator_events_install(&gator_events_mmapped_interface); } - -gator_events_init(gator_events_mmaped_init); diff --git a/drivers/gator/gator_events_net.c b/drivers/gator/gator_events_net.c index 80cdee41ae3d..9c8d3a43eaeb 100644 --- a/drivers/gator/gator_events_net.c +++ b/drivers/gator/gator_events_net.c @@ -73,6 +73,8 @@ static void calculate_delta(int *rx, int *tx) static int gator_events_net_create_files(struct super_block *sb, struct dentry *root) { + // Network counters are not currently supported in RT-Preempt full because mod_timer is used +#ifndef CONFIG_PREEMPT_RT_FULL struct dentry *dir; dir = gatorfs_mkdir(sb, root, "Linux_net_rx"); @@ -88,6 +90,7 @@ static int gator_events_net_create_files(struct super_block *sb, struct dentry * } gatorfs_create_ulong(sb, dir, "enabled", &nettx_enabled); gatorfs_create_ro_ulong(sb, dir, "key", &nettx_key); +#endif return 0; } @@ -167,5 +170,3 @@ int gator_events_net_init(void) return gator_events_install(&gator_events_net_interface); } - -gator_events_init(gator_events_net_init); diff --git a/drivers/gator/gator_events_perf_pmu.c b/drivers/gator/gator_events_perf_pmu.c index 53b2d0a5afbf..d472df918ab0 100644 --- a/drivers/gator/gator_events_perf_pmu.c +++ b/drivers/gator/gator_events_perf_pmu.c @@ -6,13 +6,18 @@ * published by the Free Software Foundation. */ -#include -#include #include "gator.h" // gator_events_armvX.c is used for Linux 2.6.x #if GATOR_PERF_PMU_SUPPORT +#include +#ifdef CONFIG_OF +#include +#endif +#include +#include + extern bool event_based_sampling; // Maximum number of per-core counters - currently reserves enough space for two full hardware PMUs for big.LITTLE @@ -22,6 +27,9 @@ extern bool event_based_sampling; // + 1 for the cci-400 cycles counter #define UCCNT (CCI_400 + 1) +// Default to 0 if unable to probe the revision which was the previous behavior +#define DEFAULT_CCI_REVISION 0 + // A gator_attr is needed for every counter struct gator_attr { // Set once in gator_events_perf_pmu_*_init - the name of the event in the gatorfs @@ -404,17 +412,81 @@ static void __attr_init(struct gator_attr *const attr) attr->key = gator_events_get_key(); } +#ifdef CONFIG_OF + +static const struct of_device_id arm_cci_matches[] = { + {.compatible = "arm,cci-400" }, + {}, +}; + +static int probe_cci_revision(void) +{ + struct device_node *np; + struct resource res; + void __iomem *cci_ctrl_base; + int rev; + int ret = DEFAULT_CCI_REVISION; + + np = of_find_matching_node(NULL, arm_cci_matches); + if (!np) { + return ret; + } + + if (of_address_to_resource(np, 0, &res)) { + goto node_put; + } + + cci_ctrl_base = ioremap(res.start, resource_size(&res)); + + rev = (readl_relaxed(cci_ctrl_base + 0xfe8) >> 4) & 0xf; + + if (rev <= 4) { + ret = 0; + } else if (rev <= 6) { + ret = 1; + } + + iounmap(cci_ctrl_base); + + node_put: + of_node_put(np); + + return ret; +} + +#else + +static int probe_cci_revision(void) +{ + return DEFAULT_CCI_REVISION; +} + +#endif + static void gator_events_perf_pmu_cci_init(const int type) { int cnt; + const char *cci_name; - strncpy(uc_attrs[uc_attr_count].name, "cci-400_ccnt", sizeof(uc_attrs[uc_attr_count].name)); + switch (probe_cci_revision()) { + case 0: + cci_name = "cci-400"; + break; + case 1: + cci_name = "cci-400-r1"; + break; + default: + pr_debug("gator: unrecognized cci-400 revision\n"); + return; + } + + snprintf(uc_attrs[uc_attr_count].name, sizeof(uc_attrs[uc_attr_count].name), "%s_ccnt", cci_name); uc_attrs[uc_attr_count].type = type; ++uc_attr_count; for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) { struct gator_attr *const attr = &uc_attrs[uc_attr_count]; - snprintf(attr->name, sizeof(attr->name), "cci-400_cnt%d", cnt); + snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", cci_name, cnt); attr->type = type; } } @@ -477,7 +549,7 @@ int gator_events_perf_pmu_init(void) } if (pe->pmu != NULL && type == pe->pmu->type) { - if (strcmp("CCI", pe->pmu->name) == 0) { + if (strcmp("CCI", pe->pmu->name) == 0 || strcmp("CCI_400", pe->pmu->name) == 0) { gator_events_perf_pmu_cci_init(type); } else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) { found_cpu = true; @@ -512,5 +584,4 @@ int gator_events_perf_pmu_init(void) return gator_events_install(&gator_events_perf_pmu_interface); } -gator_events_init(gator_events_perf_pmu_init); #endif diff --git a/drivers/gator/gator_events_sched.c b/drivers/gator/gator_events_sched.c index 461a0511143d..29f4e39e261c 100644 --- a/drivers/gator/gator_events_sched.c +++ b/drivers/gator/gator_events_sched.c @@ -111,5 +111,3 @@ int gator_events_sched_init(void) return gator_events_install(&gator_events_sched_interface); } - -gator_events_init(gator_events_sched_init); diff --git a/drivers/gator/gator_events_scorpion.c b/drivers/gator/gator_events_scorpion.c index aaf306a4b4c8..c91db1219d08 100644 --- a/drivers/gator/gator_events_scorpion.c +++ b/drivers/gator/gator_events_scorpion.c @@ -666,11 +666,4 @@ int gator_events_scorpion_init(void) return gator_events_install(&gator_events_scorpion_interface); } -gator_events_init(gator_events_scorpion_init); - -#else -int gator_events_scorpion_init(void) -{ - return -1; -} #endif diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c index 7dd70d9eccf9..9773ae24d6f2 100644 --- a/drivers/gator/gator_main.c +++ b/drivers/gator/gator_main.c @@ -8,7 +8,8 @@ */ // This version must match the gator daemon version -static unsigned long gator_protocol_version = 16; +#define PROTOCOL_VERSION 17 +static unsigned long gator_protocol_version = PROTOCOL_VERSION; #include #include @@ -22,16 +23,20 @@ static unsigned long gator_protocol_version = 16; #include #include #include +#include #include #include #include "gator.h" -#include "gator_events.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) #error kernels prior to 2.6.32 are not supported #endif +#if defined(MODULE) && !defined(CONFIG_MODULES) +#error Cannot build a module against a kernel that does not support modules. To resolve, either rebuild the kernel to support modules or build gator as part of the kernel. +#endif + #if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING) #error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined #endif @@ -44,7 +49,7 @@ static unsigned long gator_protocol_version = 16; #error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling #endif -#if defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS) #error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems #endif @@ -87,6 +92,7 @@ static unsigned long gator_protocol_version = 16; #define MESSAGE_COOKIE 1 #define MESSAGE_THREAD_NAME 2 #define HRTIMER_CORE_NAME 3 +#define MESSAGE_LINK 4 #define MESSAGE_GPU_START 1 #define MESSAGE_GPU_STOP 2 @@ -136,6 +142,7 @@ static u64 gator_live_rate; static unsigned long gator_started; static u64 gator_monotonic_started; +static u64 gator_hibernate_time; static unsigned long gator_buffer_opened; static unsigned long gator_timer_count; static unsigned long gator_response_type; @@ -147,6 +154,8 @@ bool event_based_sampling; static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait); static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait); static struct timer_list gator_buffer_wake_up_timer; +static bool gator_buffer_wake_stop; +static struct task_struct *gator_buffer_wake_thread; static LIST_HEAD(gator_events); static DEFINE_PER_CPU(u64, last_timestamp); @@ -189,6 +198,34 @@ static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer); // The time after which the buffer should be committed for live display static DEFINE_PER_CPU(u64, gator_buffer_commit_time); +// List of all gator events - new events must be added to this list +#define GATOR_EVENTS_LIST \ + GATOR_EVENT(gator_events_armv6_init) \ + GATOR_EVENT(gator_events_armv7_init) \ + GATOR_EVENT(gator_events_block_init) \ + GATOR_EVENT(gator_events_ccn504_init) \ + GATOR_EVENT(gator_events_irq_init) \ + GATOR_EVENT(gator_events_l2c310_init) \ + GATOR_EVENT(gator_events_mali_init) \ + GATOR_EVENT(gator_events_mali_t6xx_hw_init) \ + GATOR_EVENT(gator_events_mali_t6xx_init) \ + GATOR_EVENT(gator_events_meminfo_init) \ + GATOR_EVENT(gator_events_mmapped_init) \ + GATOR_EVENT(gator_events_net_init) \ + GATOR_EVENT(gator_events_perf_pmu_init) \ + GATOR_EVENT(gator_events_sched_init) \ + GATOR_EVENT(gator_events_scorpion_init) \ + +#define GATOR_EVENT(EVENT_INIT) __weak int EVENT_INIT(void); +GATOR_EVENTS_LIST +#undef GATOR_EVENT + +static int (*gator_events_list[])(void) = { +#define GATOR_EVENT(EVENT_INIT) EVENT_INIT, +GATOR_EVENTS_LIST +#undef GATOR_EVENT +}; + /****************************************************************************** * Application Includes ******************************************************************************/ @@ -392,6 +429,21 @@ static void gator_buffer_wake_up(unsigned long data) wake_up(&gator_buffer_wait); } +static int gator_buffer_wake_func(void *data) +{ + while (!gator_buffer_wake_stop) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (gator_buffer_wake_stop) { + break; + } + + gator_buffer_wake_up(0); + } + + return 0; +} + /****************************************************************************** * Commit interface ******************************************************************************/ @@ -517,7 +569,14 @@ static void gator_commit_buffer(int cpu, int buftype, u64 time) marshal_frame(cpu, buftype); // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater - mod_timer(&gator_buffer_wake_up_timer, jiffies + 1); + if (per_cpu(in_scheduler_context, cpu)) { +#ifndef CONFIG_PREEMPT_RT_FULL + // mod_timer can not be used in interrupt context in RT-Preempt full + mod_timer(&gator_buffer_wake_up_timer, jiffies + 1); +#endif + } else { + wake_up_process(gator_buffer_wake_thread); + } } static void buffer_check(int cpu, int buftype, u64 time) @@ -590,8 +649,13 @@ void gator_backtrace_handler(struct pt_regs *const regs) // Collect counters if (!per_cpu(collecting, cpu)) { - collect_counters(time); + collect_counters(time, NULL); } + + // No buffer flushing occurs during sched switch for RT-Preempt full. The block counter frame will be flushed by collect_counters, but the sched buffer needs to be explicitly flushed +#ifdef CONFIG_PREEMPT_RT_FULL + buffer_check(cpu, SCHED_TRACE_BUF, time); +#endif } static int gator_running; @@ -815,6 +879,7 @@ static struct notifier_block __refdata gator_hotcpu_notifier = { static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy) { int cpu; + struct timespec ts; switch (event) { case PM_HIBERNATION_PREPARE: @@ -825,9 +890,20 @@ static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void for_each_online_cpu(cpu) { gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false); } + + // Record the wallclock hibernate time + getnstimeofday(&ts); + gator_hibernate_time = timespec_to_ns(&ts) - gator_get_time(); break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: + // Adjust gator_monotonic_started for the time spent sleeping, as gator_get_time does not account for it + if (gator_hibernate_time > 0) { + getnstimeofday(&ts); + gator_monotonic_started += gator_hibernate_time + gator_get_time() - timespec_to_ns(&ts); + gator_hibernate_time = 0; + } + for_each_online_cpu(cpu) { gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false); } @@ -902,8 +978,10 @@ int gator_events_install(struct gator_interface *interface) int gator_events_get_key(void) { - // key of zero is reserved as a timestamp - static int key = 1; + // key 0 is reserved as a timestamp + // key 1 is reserved as the marker for thread specific counters + // Odd keys are assigned by the driver, even keys by the daemon + static int key = 3; const int ret = key; key += 2; @@ -916,7 +994,7 @@ static int gator_init(void) calc_first_cluster_size(); - // events sources (gator_events.h, generated by gator_events.sh) + // events sources for (i = 0; i < ARRAY_SIZE(gator_events_list); i++) if (gator_events_list[i]) gator_events_list[i](); @@ -941,6 +1019,11 @@ static int gator_start(void) unsigned long cpu, i; struct gator_interface *gi; + gator_buffer_wake_stop = false; + if (IS_ERR(gator_buffer_wake_thread = kthread_run(gator_buffer_wake_func, NULL, "gator_bwake"))) { + goto bwake_failure; + } + if (gator_migrate_start()) goto migrate_failure; @@ -1011,6 +1094,9 @@ cookies_failure: events_failure: gator_migrate_stop(); migrate_failure: + gator_buffer_wake_stop = true; + wake_up_process(gator_buffer_wake_thread); +bwake_failure: return -1; } @@ -1034,6 +1120,9 @@ static void gator_stop(void) gi->stop(); gator_migrate_stop(); + + gator_buffer_wake_stop = true; + wake_up_process(gator_buffer_wake_thread); } /****************************************************************************** @@ -1438,3 +1527,6 @@ module_exit(gator_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("ARM Ltd"); MODULE_DESCRIPTION("Gator system profiler"); +#define STRIFY2(ARG) #ARG +#define STRIFY(ARG) STRIFY2(ARG) +MODULE_VERSION(STRIFY(PROTOCOL_VERSION)); diff --git a/drivers/gator/gator_marshaling.c b/drivers/gator/gator_marshaling.c index 3282de843f71..af80ff62e712 100644 --- a/drivers/gator/gator_marshaling.c +++ b/drivers/gator/gator_marshaling.c @@ -89,6 +89,25 @@ static void marshal_thread_name(int pid, char *name) local_irq_restore(flags); } +static void marshal_link(int cookie, int tgid, int pid) +{ + unsigned long cpu = get_physical_cpu(), flags; + u64 time; + + local_irq_save(flags); + time = gator_get_time(); + if (buffer_check_space(cpu, NAME_BUF, MAXSIZE_PACK64 + 5 * MAXSIZE_PACK32)) { + gator_buffer_write_packed_int(cpu, NAME_BUF, MESSAGE_LINK); + gator_buffer_write_packed_int64(cpu, NAME_BUF, time); + gator_buffer_write_packed_int(cpu, NAME_BUF, cookie); + gator_buffer_write_packed_int(cpu, NAME_BUF, tgid); + gator_buffer_write_packed_int(cpu, NAME_BUF, pid); + } + // Check and commit; commit is set to occur once buffer is 3/4 full + buffer_check(cpu, NAME_BUF, time); + local_irq_restore(flags); +} + static bool marshal_backtrace_header(int exec_cookie, int tgid, int pid, int inKernel, u64 time) { int cpu = get_physical_cpu(); diff --git a/drivers/gator/gator_trace_gpu.c b/drivers/gator/gator_trace_gpu.c index 12623c4036ee..be135b4aac56 100644 --- a/drivers/gator/gator_trace_gpu.c +++ b/drivers/gator/gator_trace_gpu.c @@ -85,7 +85,7 @@ static void mali_gpu_stop(int unit, int core) int count; int last_tgid = 0; int last_pid = 0; - int last_job_id = 0; + //int last_job_id = 0; spin_lock(&mali_gpu_jobs_lock); if (mali_gpu_jobs[unit][core].count == 0) { @@ -97,7 +97,7 @@ static void mali_gpu_stop(int unit, int core) if (count) { last_tgid = mali_gpu_jobs[unit][core].last_tgid; last_pid = mali_gpu_jobs[unit][core].last_pid; - last_job_id = mali_gpu_jobs[unit][core].last_job_id; + //last_job_id = mali_gpu_jobs[unit][core].last_job_id; } spin_unlock(&mali_gpu_jobs_lock); @@ -242,7 +242,7 @@ int gator_trace_gpu_start(void) * Absence of gpu trace points is not an error */ - memset(&mali_gpu_jobs, sizeof(mali_gpu_jobs), 0); + memset(&mali_gpu_jobs, 0, sizeof(mali_gpu_jobs)); gpu_trace_registered = mali_timeline_trace_registered = mali_job_slots_trace_registered = 0; #if defined(MALI_SUPPORT) && (MALI_SUPPORT != MALI_T6xx) diff --git a/drivers/gator/gator_trace_sched.c b/drivers/gator/gator_trace_sched.c index e98815e25b9c..332b3f6ba965 100644 --- a/drivers/gator/gator_trace_sched.c +++ b/drivers/gator/gator_trace_sched.c @@ -22,6 +22,7 @@ enum { static DEFINE_PER_CPU(uint64_t *, taskname_keys); static DEFINE_PER_CPU(int, collecting); +static DEFINE_PER_CPU(bool, in_scheduler_context); // this array is never read as the cpu wait charts are derived counters // the files are needed, nonetheless, to show that these counters are available @@ -89,7 +90,7 @@ void emit_pid_name(struct task_struct *task) } } -static void collect_counters(u64 time) +static void collect_counters(u64 time, struct task_struct *task) { int *buffer, len, cpu = get_physical_cpu(); long long *buffer64; @@ -104,17 +105,26 @@ static void collect_counters(u64 time) len = gi->read64(&buffer64); marshal_event64(len, buffer64); } + if (gi->read_proc && task != NULL) { + len = gi->read_proc(&buffer64, task); + marshal_event64(len, buffer64); + } } // Only check after writing all counters so that time and corresponding counters appear in the same frame buffer_check(cpu, BLOCK_COUNTER_BUF, time); // Commit buffers on timeout if (gator_live_rate > 0 && time >= per_cpu(gator_buffer_commit_time, cpu)) { - static const int buftypes[] = { COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF }; + static const int buftypes[] = { NAME_BUF, COUNTER_BUF, BLOCK_COUNTER_BUF, SCHED_TRACE_BUF }; + unsigned long flags; int i; + + local_irq_save(flags); for (i = 0; i < ARRAY_SIZE(buftypes); ++i) { gator_commit_buffer(cpu, buftypes[i], time); } + local_irq_restore(flags); + // Try to preemptively flush the annotate buffer to reduce the chance of the buffer being full if (on_primary_core() && spin_trylock(&annotate_lock)) { gator_commit_buffer(0, ANNOTATE_BUF, time); @@ -151,6 +161,8 @@ GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_ int state; int cpu = get_physical_cpu(); + per_cpu(in_scheduler_context, cpu) = true; + // do as much work as possible before disabling interrupts cookie = get_exec_cookie(cpu, next); emit_pid_name(next); @@ -163,10 +175,12 @@ GATOR_DEFINE_PROBE(sched_switch, TP_PROTO(struct task_struct *prev, struct task_ } per_cpu(collecting, cpu) = 1; - collect_counters(gator_get_time()); + collect_counters(gator_get_time(), prev); per_cpu(collecting, cpu) = 0; marshal_sched_trace_switch(next->tgid, next->pid, cookie, state); + + per_cpu(in_scheduler_context, cpu) = false; } GATOR_DEFINE_PROBE(sched_process_free, TP_PROTO(struct task_struct *p)) diff --git a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h index 3db454371d59..347a4fe404bc 100644 --- a/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h +++ b/drivers/gator/mali/mali_mjollnir_profiling_gator_api.h @@ -1,11 +1,10 @@ -/* - * This confidential and proprietary software may be used only as - * authorised by a licensing agreement from ARM Limited - * (C) COPYRIGHT 2013 ARM Limited - * ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorised - * copies and copies may only be made to the extent permitted - * by a licensing agreement from ARM Limited. +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * */ #ifndef __MALI_MJOLLNIR_PROFILING_GATOR_API_H__ diff --git a/drivers/gator/mali/mali_utgard_profiling_gator_api.h b/drivers/gator/mali/mali_utgard_profiling_gator_api.h index c02a1a43efff..559647a76d29 100644 --- a/drivers/gator/mali/mali_utgard_profiling_gator_api.h +++ b/drivers/gator/mali/mali_utgard_profiling_gator_api.h @@ -1,11 +1,10 @@ -/* - * This confidential and proprietary software may be used only as - * authorised by a licensing agreement from ARM Limited - * (C) COPYRIGHT 2013 ARM Limited - * ALL RIGHTS RESERVED - * The entire notice above must be reproduced on all authorised - * copies and copies may only be made to the extent permitted - * by a licensing agreement from ARM Limited. +/** + * Copyright (C) ARM Limited 2013. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * */ #ifndef __MALI_UTGARD_PROFILING_GATOR_API_H__ diff --git a/drivers/gator/mali_t6xx.mk b/drivers/gator/mali_t6xx.mk index 2cc64113ea2c..1a98c1c6a73f 100644 --- a/drivers/gator/mali_t6xx.mk +++ b/drivers/gator/mali_t6xx.mk @@ -9,16 +9,17 @@ EXTRA_CFLAGS += -DMALI_USE_UMP=1 \ -DMALI_BACKEND_KERNEL=1 \ -DMALI_NO_MALI=0 -KBASE_DIR = $(DDK_DIR)/kernel/drivers/gpu/arm/t6xx/kbase -OSK_DIR = $(DDK_DIR)/kernel/drivers/gpu/arm/t6xx/kbase/osk -UMP_DIR = $(DDK_DIR)/kernel/include/linux +DDK_DIR ?= . +KBASE_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase +OSK_DIR = $(DDK_DIR)/drivers/gpu/arm/t6xx/kbase/osk +UMP_DIR = $(DDK_DIR)/include/linux # Include directories in the DDK -EXTRA_CFLAGS += -I$(DDK_DIR) \ +EXTRA_CFLAGS += -I$(KBASE_DIR)/ \ -I$(KBASE_DIR)/.. \ -I$(OSK_DIR)/.. \ -I$(UMP_DIR)/.. \ - -I$(DDK_DIR)/kernel/include \ + -I$(DDK_DIR)/include \ -I$(KBASE_DIR)/osk/src/linux/include \ -I$(KBASE_DIR)/platform_dummy \ -I$(KBASE_DIR)/src diff --git a/tools/gator/daemon/Buffer.cpp b/tools/gator/daemon/Buffer.cpp index c7abbf3a1820..090a71553277 100644 --- a/tools/gator/daemon/Buffer.cpp +++ b/tools/gator/daemon/Buffer.cpp @@ -193,6 +193,17 @@ bool Buffer::eventHeader (const uint64_t curr_time) { return retval; } +bool Buffer::eventTid (const int tid) { + bool retval = false; + if (checkSpace(2*MAXSIZE_PACK32)) { + packInt(1); // key of 1 indicates a tid + packInt(tid); + retval = true; + } + + return retval; +} + void Buffer::event (const int32_t key, const int32_t value) { if (checkSpace(2 * MAXSIZE_PACK32)) { packInt(key); diff --git a/tools/gator/daemon/Buffer.h b/tools/gator/daemon/Buffer.h index f820cfd851e3..b3c8d78cf758 100644 --- a/tools/gator/daemon/Buffer.h +++ b/tools/gator/daemon/Buffer.h @@ -32,6 +32,7 @@ public: void frame (); bool eventHeader (uint64_t curr_time); + bool eventTid (int tid); void event (int32_t key, int32_t value); void event64 (int64_t key, int64_t value); @@ -56,6 +57,10 @@ private: char *const buf; uint64_t commitTime; sem_t *const readerSem; + + // Intentionally unimplemented + Buffer(const Buffer &); + Buffer &operator=(const Buffer &); }; #endif // BUFFER_H diff --git a/tools/gator/daemon/Child.cpp b/tools/gator/daemon/Child.cpp index c0540762698f..9ee2ef8afb9d 100644 --- a/tools/gator/daemon/Child.cpp +++ b/tools/gator/daemon/Child.cpp @@ -86,7 +86,7 @@ static void child_handler(int signum) { } } -static void* durationThread(void* pVoid) { +static void *durationThread(void *) { prctl(PR_SET_NAME, (unsigned long)&"gatord-duration", 0, 0, 0); sem_wait(&startProfile); if (gSessionData->mSessionIsActive) { @@ -102,7 +102,7 @@ static void* durationThread(void* pVoid) { return 0; } -static void* stopThread(void* pVoid) { +static void *stopThread(void *) { OlySocket* socket = child->socket; prctl(PR_SET_NAME, (unsigned long)&"gatord-stopper", 0, 0, 0); @@ -139,7 +139,7 @@ static void* stopThread(void* pVoid) { return 0; } -void* countersThread(void* pVoid) { +static void *countersThread(void *) { prctl(PR_SET_NAME, (unsigned long)&"gatord-counters", 0, 0, 0); gSessionData->hwmon.start(); @@ -192,7 +192,7 @@ void* countersThread(void* pVoid) { return NULL; } -static void* senderThread(void* pVoid) { +static void *senderThread(void *) { int length = 1; char* data; char end_sequence[] = {RESPONSE_APC_DATA, 0, 0, 0, 0}; @@ -340,7 +340,8 @@ void Child::run() { thread_creation_success = false; } - if (gSessionData->hwmon.countersEnabled()) { + bool startcountersThread = gSessionData->hwmon.countersEnabled(); + if (startcountersThread) { if (pthread_create(&countersThreadID, NULL, countersThread, this)) { thread_creation_success = false; } @@ -378,7 +379,7 @@ void Child::run() { } while (bytesCollected > 0); logg->logMessage("Exit collect data loop"); - if (gSessionData->hwmon.countersEnabled()) { + if (startcountersThread) { pthread_join(countersThreadID, NULL); } diff --git a/tools/gator/daemon/Child.h b/tools/gator/daemon/Child.h index e39d18276407..0330e9d78027 100644 --- a/tools/gator/daemon/Child.h +++ b/tools/gator/daemon/Child.h @@ -26,6 +26,10 @@ private: int mNumConnections; void initialization(); + + // Intentionally unimplemented + Child(const Child &); + Child &operator=(const Child &); }; #endif //__CHILD_H__ diff --git a/tools/gator/daemon/ConfigurationXML.h b/tools/gator/daemon/ConfigurationXML.h index eba7dc4bac46..5650f487b990 100644 --- a/tools/gator/daemon/ConfigurationXML.h +++ b/tools/gator/daemon/ConfigurationXML.h @@ -29,6 +29,10 @@ private: int parse(const char* xmlFile); int configurationsTag(mxml_node_t *node); void configurationTag(mxml_node_t *node); + + // Intentionally unimplemented + ConfigurationXML(const ConfigurationXML &); + ConfigurationXML &operator=(const ConfigurationXML &); }; #endif // COUNTERS_H diff --git a/tools/gator/daemon/Driver.h b/tools/gator/daemon/Driver.h index dd1dc27d1cdb..f3a932f852cb 100644 --- a/tools/gator/daemon/Driver.h +++ b/tools/gator/daemon/Driver.h @@ -29,7 +29,7 @@ public: // Emits available counters virtual void writeCounters(mxml_node_t *root) const = 0; // Emits possible dynamically generated events/counters - virtual void writeEvents(mxml_node_t *root) const {} + virtual void writeEvents(mxml_node_t *) const {} Driver *getNext() const { return next; } @@ -39,6 +39,10 @@ protected: private: static Driver *head; Driver *next; + + // Intentionally unimplemented + Driver(const Driver &); + Driver &operator=(const Driver &); }; #endif // DRIVER_H diff --git a/tools/gator/daemon/Fifo.h b/tools/gator/daemon/Fifo.h index ada42b9fb584..d25cd6882561 100644 --- a/tools/gator/daemon/Fifo.h +++ b/tools/gator/daemon/Fifo.h @@ -39,6 +39,10 @@ private: sem_t* mReaderSem; char* mBuffer; bool mEnd; + + // Intentionally unimplemented + Fifo(const Fifo &); + Fifo &operator=(const Fifo &); }; #endif //__FIFO_H__ diff --git a/tools/gator/daemon/Hwmon.cpp b/tools/gator/daemon/Hwmon.cpp index 07925680c1f6..1d7c0da9cc83 100644 --- a/tools/gator/daemon/Hwmon.cpp +++ b/tools/gator/daemon/Hwmon.cpp @@ -63,6 +63,10 @@ private: double previous_value; sensors_subfeature_type input; + + // Intentionally unimplemented + HwmonCounter(const HwmonCounter &); + HwmonCounter &operator=(const HwmonCounter &); }; HwmonCounter::HwmonCounter(HwmonCounter *next, int key, const sensors_chip_name *chip, const sensors_feature *feature) : next(next), key(key), polled(false), readable(false), enabled(false), duplicate(false), chip(chip), feature(feature) { diff --git a/tools/gator/daemon/Hwmon.h b/tools/gator/daemon/Hwmon.h index 35981dc3d9ad..46bb42e898d7 100644 --- a/tools/gator/daemon/Hwmon.h +++ b/tools/gator/daemon/Hwmon.h @@ -34,6 +34,10 @@ private: HwmonCounter *findCounter(const Counter &counter) const; HwmonCounter *counters; + + // Intentionally unimplemented + Hwmon(const Hwmon &); + Hwmon &operator=(const Hwmon &); }; #endif // HWMON_H diff --git a/tools/gator/daemon/OlySocket.cpp b/tools/gator/daemon/OlySocket.cpp index 132510df584a..ab5c3c2c8938 100644 --- a/tools/gator/daemon/OlySocket.cpp +++ b/tools/gator/daemon/OlySocket.cpp @@ -11,6 +11,7 @@ #include #ifdef WIN32 #include +#include #else #include #include @@ -126,11 +127,17 @@ void OlySocket::createSingleServerConnection(int port) { } void OlySocket::createServerSocket(int port) { + int family = AF_INET6; + // Create socket - mFDServer = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + mFDServer = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP); if (mFDServer < 0) { - logg->logError(__FILE__, __LINE__, "Error creating server socket"); - handleException(); + family = AF_INET; + mFDServer = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (mFDServer < 0) { + logg->logError(__FILE__, __LINE__, "Error creating server socket"); + handleException(); + } } // Enable address reuse, another solution would be to create the server socket once and only close it when the object exits @@ -141,11 +148,11 @@ void OlySocket::createServerSocket(int port) { } // Create sockaddr_in structure, ensuring non-populated fields are zero - struct sockaddr_in sockaddr; - memset((void*)&sockaddr, 0, sizeof(struct sockaddr_in)); - sockaddr.sin_family = AF_INET; - sockaddr.sin_port = htons(port); - sockaddr.sin_addr.s_addr = INADDR_ANY; + struct sockaddr_in6 sockaddr; + memset((void*)&sockaddr, 0, sizeof(sockaddr)); + sockaddr.sin6_family = family; + sockaddr.sin6_port = htons(port); + sockaddr.sin6_addr = in6addr_any; // Bind the socket to an address if (bind(mFDServer, (const struct sockaddr*)&sockaddr, sizeof(sockaddr)) < 0) { diff --git a/tools/gator/daemon/Sender.h b/tools/gator/daemon/Sender.h index 8f23361a5def..b388f039bad7 100644 --- a/tools/gator/daemon/Sender.h +++ b/tools/gator/daemon/Sender.h @@ -33,6 +33,10 @@ private: FILE* mDataFile; char* mDataFileName; pthread_mutex_t mSendMutex; + + // Intentionally unimplemented + Sender(const Sender &); + Sender &operator=(const Sender &); }; #endif //__SENDER_H__ diff --git a/tools/gator/daemon/SessionData.cpp b/tools/gator/daemon/SessionData.cpp index 4068d4e957f0..cf844075401f 100644 --- a/tools/gator/daemon/SessionData.cpp +++ b/tools/gator/daemon/SessionData.cpp @@ -44,13 +44,13 @@ void SessionData::parseSessionXML(char* xmlString) { SessionXML session(xmlString); session.parse(); - // Set session data values + // Set session data values - use prime numbers just below the desired value to reduce the chance of events firing at the same time if (strcmp(session.parameters.sample_rate, "high") == 0) { - mSampleRate = 10000; + mSampleRate = 9973; // 10000 } else if (strcmp(session.parameters.sample_rate, "normal") == 0) { - mSampleRate = 1000; + mSampleRate = 997; // 1000 } else if (strcmp(session.parameters.sample_rate, "low") == 0) { - mSampleRate = 100; + mSampleRate = 97; // 100 } else if (strcmp(session.parameters.sample_rate, "none") == 0) { mSampleRate = 0; } else { @@ -139,7 +139,9 @@ void SessionData::readCpuInfo() { } int getEventKey() { - // Start one after the gator.ko's value of 1 + // key 0 is reserved as a timestamp + // key 1 is reserved as the marker for thread specific counters + // Odd keys are assigned by the driver, even keys by the daemon static int key = 2; const int ret = key; diff --git a/tools/gator/daemon/SessionData.h b/tools/gator/daemon/SessionData.h index e72fa5d7c5ed..c834251527cf 100644 --- a/tools/gator/daemon/SessionData.h +++ b/tools/gator/daemon/SessionData.h @@ -16,7 +16,7 @@ #define MAX_PERFORMANCE_COUNTERS 50 -#define PROTOCOL_VERSION 16 +#define PROTOCOL_VERSION 17 #define PROTOCOL_DEV 1000 // Differentiates development versions (timestamp) from release versions struct ImageLinkList { @@ -62,6 +62,10 @@ public: private: void readCpuInfo(); + + // Intentionally unimplemented + SessionData(const SessionData &); + SessionData &operator=(const SessionData &); }; extern SessionData* gSessionData; diff --git a/tools/gator/daemon/SessionXML.h b/tools/gator/daemon/SessionXML.h index c7e3798d6950..0fb03bd6627c 100644 --- a/tools/gator/daemon/SessionXML.h +++ b/tools/gator/daemon/SessionXML.h @@ -33,6 +33,10 @@ private: char* mPath; void sessionTag(mxml_node_t *tree, mxml_node_t *node); void sessionImage(mxml_node_t *node); + + // Intentionally unimplemented + SessionXML(const SessionXML &); + SessionXML &operator=(const SessionXML &); }; #endif // SESSION_XML_H diff --git a/tools/gator/daemon/StreamlineSetup.h b/tools/gator/daemon/StreamlineSetup.h index 092d956ec99f..d6d9a6ea2991 100644 --- a/tools/gator/daemon/StreamlineSetup.h +++ b/tools/gator/daemon/StreamlineSetup.h @@ -38,6 +38,10 @@ private: void sendDefaults(); void sendCounters(); void writeConfiguration(char* xml); + + // Intentionally unimplemented + StreamlineSetup(const StreamlineSetup &); + StreamlineSetup &operator=(const StreamlineSetup &); }; #endif //__STREAMLINE_SETUP_H__ diff --git a/tools/gator/daemon/common.mk b/tools/gator/daemon/common.mk index ee2415b8825c..031d16906881 100644 --- a/tools/gator/daemon/common.mk +++ b/tools/gator/daemon/common.mk @@ -6,7 +6,7 @@ # -std=c++0x is the planned new c++ standard # -std=c++98 is the 1998 c++ standard CFLAGS += -O3 -Wall -fno-exceptions -pthread -MMD -DETCDIR=\"/etc\" -Ilibsensors -CXXFLAGS += -fno-rtti +CXXFLAGS += -fno-rtti -Wextra # -Weffc++ ifeq ($(WERROR),1) CFLAGS += -Werror endif diff --git a/tools/gator/daemon/events-CCI-400.xml b/tools/gator/daemon/events-CCI-400.xml index 86db2087e1f5..4fa77117d2d8 100644 --- a/tools/gator/daemon/events-CCI-400.xml +++ b/tools/gator/daemon/events-CCI-400.xml @@ -17,7 +17,7 @@ - + @@ -45,3 +45,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/gator/daemon/events-Linux.xml b/tools/gator/daemon/events-Linux.xml index 4a30ad6ec4e6..31a90a1d6335 100644 --- a/tools/gator/daemon/events-Linux.xml +++ b/tools/gator/daemon/events-Linux.xml @@ -6,11 +6,11 @@ - - - - - + + + + + diff --git a/tools/gator/daemon/events-Mali-T6xx.xml b/tools/gator/daemon/events-Mali-T6xx.xml index 647e3d5b0fcf..2465238a8bda 100644 --- a/tools/gator/daemon/events-Mali-T6xx.xml +++ b/tools/gator/daemon/events-Mali-T6xx.xml @@ -36,3 +36,13 @@ + + + + + + + diff --git a/tools/gator/daemon/events-Mali-T6xx_hw.xml b/tools/gator/daemon/events-Mali-T6xx_hw.xml index 8cfe7c3084d5..03566cbb06ab 100644 --- a/tools/gator/daemon/events-Mali-T6xx_hw.xml +++ b/tools/gator/daemon/events-Mali-T6xx_hw.xml @@ -60,12 +60,15 @@ - + + + + diff --git a/tools/gator/daemon/main.cpp b/tools/gator/daemon/main.cpp index d1b0913aa78f..bfd36b98766c 100644 --- a/tools/gator/daemon/main.cpp +++ b/tools/gator/daemon/main.cpp @@ -93,7 +93,7 @@ static void handler(int signum) { } // Child exit Signal Handler -static void child_exit(int signum) { +static void child_exit(int) { int status; int pid = wait(&status); if (pid != -1) { @@ -106,13 +106,18 @@ static void child_exit(int signum) { static int udpPort(int port) { int s; - struct sockaddr_in sockaddr; + struct sockaddr_in6 sockaddr; int on; + int family = AF_INET6; - s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); if (s == -1) { - logg->logError(__FILE__, __LINE__, "socket failed"); - handleException(); + family = AF_INET; + s = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (s == -1) { + logg->logError(__FILE__, __LINE__, "socket failed"); + handleException(); + } } on = 1; @@ -122,9 +127,9 @@ static int udpPort(int port) { } memset((void*)&sockaddr, 0, sizeof(sockaddr)); - sockaddr.sin_family = AF_INET; - sockaddr.sin_port = htons(port); - sockaddr.sin_addr.s_addr = INADDR_ANY; + sockaddr.sin6_family = family; + sockaddr.sin6_port = htons(port); + sockaddr.sin6_addr = in6addr_any; if (bind(s, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) { logg->logError(__FILE__, __LINE__, "socket failed"); handleException(); @@ -173,7 +178,7 @@ static void* answerThread(void* pVoid) { for (;;) { char buf[128]; - struct sockaddr_in sockaddr; + struct sockaddr_in6 sockaddr; socklen_t addrlen; int read; addrlen = sizeof(sockaddr); @@ -386,7 +391,7 @@ static struct cmdline_t parseCommandLine(int argc, char** argv) { } // Gator data flow: collector -> collector fifo -> sender -int main(int argc, char** argv, char* envp[]) { +int main(int argc, char** argv) { // Ensure proper signal handling by making gatord the process group leader // e.g. it may not be the group leader when launched as 'sudo gatord' setsid(); From a4e47900af0ce31d8c2807df94b2e182add8c0c4 Mon Sep 17 00:00:00 2001 From: Drew Richardson Date: Tue, 17 Dec 2013 23:40:56 +0000 Subject: [PATCH 002/296] gator-driver: Handle task struct correctly Use put_task_struct/get_task_struct to ensure the task_struct pointer is still valid. Change translate_buffer to ensure that both arguments are written atomically. Drop additional requests if the translate_buffer is full. Signed-off-by: Drew Richardson Signed-off-by: Pawel Moll --- drivers/gator/gator_cookies.c | 74 +++++++++++++++++++++++------------ drivers/gator/gator_iks.c | 2 + 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c index 91adfdde9be2..eb9b946170c1 100644 --- a/drivers/gator/gator_cookies.c +++ b/drivers/gator/gator_cookies.c @@ -15,13 +15,18 @@ static uint32_t *gator_crc32_table; static unsigned int translate_buffer_mask; +struct cookie_args { + struct task_struct *task; + const char *text; +}; + static DEFINE_PER_CPU(char *, translate_text); static DEFINE_PER_CPU(uint32_t, cookie_next_key); static DEFINE_PER_CPU(uint64_t *, cookie_keys); static DEFINE_PER_CPU(uint32_t *, cookie_values); static DEFINE_PER_CPU(int, translate_buffer_read); static DEFINE_PER_CPU(int, translate_buffer_write); -static DEFINE_PER_CPU(void **, translate_buffer); +static DEFINE_PER_CPU(struct cookie_args *, translate_buffer); static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq); static void wq_cookie_handler(struct work_struct *unused); @@ -109,36 +114,58 @@ static void cookiemap_add(uint64_t key, uint32_t value) } #ifndef CONFIG_PREEMPT_RT_FULL -static void translate_buffer_write_ptr(int cpu, void *x) +static void translate_buffer_write_args(int cpu, struct task_struct *task, const char *text) { - per_cpu(translate_buffer, cpu)[per_cpu(translate_buffer_write, cpu)++] = x; - per_cpu(translate_buffer_write, cpu) &= translate_buffer_mask; + unsigned long flags; + int write; + int next_write; + struct cookie_args *args; + + local_irq_save(flags); + + write = per_cpu(translate_buffer_write, cpu); + next_write = (write + 1) & translate_buffer_mask; + + // At least one entry must always remain available as when read == write, the queue is empty not full + if (next_write != per_cpu(translate_buffer_read, cpu)) { + args = &per_cpu(translate_buffer, cpu)[write]; + args->task = task; + args->text = text; + get_task_struct(task); + per_cpu(translate_buffer_write, cpu) = next_write; + } + + local_irq_restore(flags); } #endif -static void *translate_buffer_read_ptr(int cpu) +static void translate_buffer_read_args(int cpu, struct cookie_args *args) { - void *value = per_cpu(translate_buffer, cpu)[per_cpu(translate_buffer_read, cpu)++]; - per_cpu(translate_buffer_read, cpu) &= translate_buffer_mask; - return value; + unsigned long flags; + int read; + + local_irq_save(flags); + + read = per_cpu(translate_buffer_read, cpu); + *args = per_cpu(translate_buffer, cpu)[read]; + per_cpu(translate_buffer_read, cpu) = (read + 1) & translate_buffer_mask; + + local_irq_restore(flags); } static void wq_cookie_handler(struct work_struct *unused) { - struct task_struct *task; - char *text; + struct cookie_args args; int cpu = get_physical_cpu(), cookie; - unsigned int commit; mutex_lock(&start_mutex); if (gator_started != 0) { - commit = per_cpu(translate_buffer_write, cpu); - while (per_cpu(translate_buffer_read, cpu) != commit) { - task = (struct task_struct *)translate_buffer_read_ptr(cpu); - text = (char *)translate_buffer_read_ptr(cpu); - cookie = get_cookie(cpu, task, text, true); - marshal_link(cookie, task->tgid, task->pid); + while (per_cpu(translate_buffer_read, cpu) != per_cpu(translate_buffer_write, cpu)) { + translate_buffer_read_args(cpu, &args); + cookie = get_cookie(cpu, args.task, args.text, true); + marshal_link(cookie, args.task->tgid, args.task->pid); + put_task_struct(args.task); } } @@ -169,15 +196,14 @@ static int translate_app_process(const char **text, int cpu, struct task_struct // inconsistent during a context switch between android/linux versions if (!from_wq) { // Check if already in buffer - int ptr = per_cpu(translate_buffer_read, cpu); - while (ptr != per_cpu(translate_buffer_write, cpu)) { - if (per_cpu(translate_buffer, cpu)[ptr] == (void *)task) + int pos = per_cpu(translate_buffer_read, cpu); + while (pos != per_cpu(translate_buffer_write, cpu)) { + if (per_cpu(translate_buffer, cpu)[pos].task == task) goto out; - ptr = (ptr + 2) & translate_buffer_mask; + pos = (pos + 1) & translate_buffer_mask; } - translate_buffer_write_ptr(cpu, (void *)task); - translate_buffer_write_ptr(cpu, (void *)*text); + translate_buffer_write_args(cpu, task, *text); // Not safe to call in RT-Preempt full in schedule switch context mod_timer(&app_process_wake_up_timer, jiffies + 1); @@ -340,7 +366,7 @@ static int cookies_initialize(void) } memset(per_cpu(cookie_values, cpu), 0, size); - per_cpu(translate_buffer, cpu) = (void **)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL); + per_cpu(translate_buffer, cpu) = (struct cookie_args *)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL); if (!per_cpu(translate_buffer, cpu)) { err = -ENOMEM; goto cookie_setup_error; diff --git a/drivers/gator/gator_iks.c b/drivers/gator/gator_iks.c index 24233d775581..0a90bdd1904e 100644 --- a/drivers/gator/gator_iks.c +++ b/drivers/gator/gator_iks.c @@ -147,11 +147,13 @@ static void gator_send_iks_core_names(void) { int cpu; // Send the cpu names + preempt_disable(); for (cpu = 0; cpu < nr_cpu_ids; ++cpu) { if (mpidr_cpus[cpu] != NULL) { gator_send_core_name(cpu, mpidr_cpus[cpu]->cpuid, mpidr_cpus[cpu]); } } + preempt_enable(); } static int gator_migrate_start(void) From 45030a56ba386e4ea0597b8f167f494b2d4ee259 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Fri, 18 May 2012 09:06:15 +0100 Subject: [PATCH 003/296] gator: Revert #error about lack of CONFIG_PERF_EVENTS Make this a #warning as in version 5.9. This lets Gator build (with much reduced functionality) on platforms which require CONFIG_PERF_EVENTS to be disabled for whatever reason. Signed-off-by: Jon Medhurst --- drivers/gator/gator_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gator/gator_main.c b/drivers/gator/gator_main.c index 9773ae24d6f2..19f51c7cd8ee 100644 --- a/drivers/gator/gator_main.c +++ b/drivers/gator/gator_main.c @@ -55,9 +55,9 @@ static unsigned long gator_protocol_version = PROTOCOL_VERSION; #if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT)) #ifndef CONFIG_PERF_EVENTS -#error gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters +#warning gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters #elif !defined CONFIG_HW_PERF_EVENTS -#error gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters +#warning gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters #endif #endif From 29d5b5842b26f7923c206265a0fb81004813aa78 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 10 May 2012 17:35:03 +0100 Subject: [PATCH 004/296] gator: Add config for building the module in-tree Signed-off-by: Jon Medhurst --- drivers/Kconfig | 2 ++ drivers/Makefile | 2 ++ drivers/gator/Kconfig | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 drivers/gator/Kconfig diff --git a/drivers/Kconfig b/drivers/Kconfig index 9953a42809ec..d27feb5460f3 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig" source "drivers/reset/Kconfig" +source "drivers/gator/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 130abc1dfd65..092a62e79688 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -152,3 +152,5 @@ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_IPACK_BUS) += ipack/ obj-$(CONFIG_NTB) += ntb/ + +obj-$(CONFIG_GATOR) += gator/ diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig new file mode 100644 index 000000000000..7ea0fcc3d01d --- /dev/null +++ b/drivers/gator/Kconfig @@ -0,0 +1,33 @@ +config GATOR + tristate "Gator module for ARM's Streamline Performance Analyzer" + default m if (ARM || ARM64) + depends on PROFILING + depends on HIGH_RES_TIMERS + depends on LOCAL_TIMERS || !(ARM && SMP) + select TRACING + +config GATOR_WITH_MALI_SUPPORT + bool + +choice + prompt "Enable Mali GPU support in Gator" + depends on GATOR + optional + +config GATOR_MALI_400MP + bool "Mali-400MP" + select GATOR_WITH_MALI_SUPPORT + +config GATOR_MALI_T6XX + bool "Mali-T604 or Mali-T658" + select GATOR_WITH_MALI_SUPPORT + +endchoice + +config GATOR_MALI_PATH + string "Path to Mali driver" + depends on GATOR_WITH_MALI_SUPPORT + default "drivers/gpu/arm/mali400mp" + help + The gator code adds this to its include path so it can get the Mali + trace headers with: #include "linux/mali_linux_trace.h" From c73e263bac89f90d65fdf42b667bb281faec7345 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 23 Apr 2013 12:35:02 +0100 Subject: [PATCH 005/296] mm: hugetlb: Copy huge_pmd_share from x86 to mm. Under x86, multiple puds can be made to reference the same bank of huge pmds provided that they represent a full PUD_SIZE of shared huge memory that is aligned to a PUD_SIZE boundary. The code to share pmds does not require any architecture specific knowledge other than the fact that pmds can be indexed, thus can be beneficial to some other architectures. This patch copies the huge pmd sharing (and unsharing) logic from x86/ to mm/ and introduces a new config option to activate it: CONFIG_ARCH_WANTS_HUGE_PMD_SHARE Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++ mm/hugetlb.c | 122 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c8958f563116..dca09c0cd3cc 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -70,6 +70,10 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); void copy_huge_page(struct page *dst, struct page *src); +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +#endif + extern unsigned long hugepages_treat_as_movable; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index aa3b9a63394b..7fd37a8b5194 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3210,6 +3210,128 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) hugetlb_acct_memory(h, -(chg - freed)); } +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* Allow segments to share if only one is marked locked */ + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vm_flags != svm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing. + */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + pte_t *pte; + + if (!vma_shareable(vma, addr)) + return (pte_t *)pmd_alloc(mm, pud, addr); + + mutex_lock(&mapping->i_mmap_mutex); + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, + (pmd_t *)((unsigned long)spte & PAGE_MASK)); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + pte = (pte_t *)pmd_alloc(mm, pud, addr); + mutex_unlock(&mapping->i_mmap_mutex); + return pte; +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 8be95218cb747eb5951b63cef9a928be0617704e Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Mon, 29 Apr 2013 14:29:48 +0100 Subject: [PATCH 006/296] x86: mm: Remove x86 version of huge_pmd_share. The huge_pmd_share code has been copied over to mm/hugetlb.c to make it accessible to other architectures. Remove the x86 copy of the huge_pmd_share code and enable the ARCH_WANT_HUGE_PMD_SHARE config flag. That way we reference the general one. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 + arch/x86/mm/hugetlbpage.c | 120 -------------------------------------- 2 files changed, 3 insertions(+), 120 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da25625..56d606b2497c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -207,6 +207,9 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index ae1aa71d0115..7e522a359972 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,126 +16,6 @@ #include #include -static unsigned long page_table_shareable(struct vm_area_struct *svma, - struct vm_area_struct *vma, - unsigned long addr, pgoff_t idx) -{ - unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + - svma->vm_start; - unsigned long sbase = saddr & PUD_MASK; - unsigned long s_end = sbase + PUD_SIZE; - - /* Allow segments to share if only one is marked locked */ - unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED; - unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED; - - /* - * match the virtual addresses, permission and the alignment of the - * page table page. - */ - if (pmd_index(addr) != pmd_index(saddr) || - vm_flags != svm_flags || - sbase < svma->vm_start || svma->vm_end < s_end) - return 0; - - return saddr; -} - -static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) -{ - unsigned long base = addr & PUD_MASK; - unsigned long end = base + PUD_SIZE; - - /* - * check on proper vm_flags and page table alignment - */ - if (vma->vm_flags & VM_MAYSHARE && - vma->vm_start <= base && end <= vma->vm_end) - return 1; - return 0; -} - -/* - * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() - * and returns the corresponding pte. While this is not necessary for the - * !shared pmd case because we can allocate the pmd later as well, it makes the - * code much cleaner. pmd allocation is essential for the shared case because - * pud has to be populated inside the same i_mmap_mutex section - otherwise - * racing tasks could either miss the sharing (see huge_pte_offset) or select a - * bad pmd for sharing. - */ -static pte_t * -huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) -{ - struct vm_area_struct *vma = find_vma(mm, addr); - struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + - vma->vm_pgoff; - struct vm_area_struct *svma; - unsigned long saddr; - pte_t *spte = NULL; - pte_t *pte; - - if (!vma_shareable(vma, addr)) - return (pte_t *)pmd_alloc(mm, pud, addr); - - mutex_lock(&mapping->i_mmap_mutex); - vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { - if (svma == vma) - continue; - - saddr = page_table_shareable(svma, vma, addr, idx); - if (saddr) { - spte = huge_pte_offset(svma->vm_mm, saddr); - if (spte) { - get_page(virt_to_page(spte)); - break; - } - } - } - - if (!spte) - goto out; - - spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) - pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else - put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); -out: - pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); - return pte; -} - -/* - * unmap huge page backed by shared pte. - * - * Hugetlb pte page is ref counted at the time of mapping. If pte is shared - * indicated by page_count > 1, unmap is achieved by clearing pud and - * decrementing the ref count. If count == 1, the pte page is not shared. - * - * called with vma->vm_mm->page_table_lock held. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user - */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - pgd_t *pgd = pgd_offset(mm, *addr); - pud_t *pud = pud_offset(pgd, *addr); - - BUG_ON(page_count(virt_to_page(ptep)) == 0); - if (page_count(virt_to_page(ptep)) == 1) - return 0; - - pud_clear(pud); - put_page(virt_to_page(ptep)); - *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; - return 1; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { From 55de0a786c4b021e0e32cc03c24e8e25249b4ae3 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:02:03 +0100 Subject: [PATCH 007/296] mm: hugetlb: Copy general hugetlb code from x86 to mm. The huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d functions in x86/mm/hugetlbpage.c do not rely on any architecture specific knowledge other than the fact that pmds and puds can be treated as huge ptes. To allow other architectures to use this code (and reduce the need for code duplication), this patch copies these functions into mm, replaces the use of pud_large with pud_huge and provides a config flag to activate them: CONFIG_ARCH_WANT_GENERAL_HUGETLB If CONFIG_ARCH_WANT_HUGE_PMD_SHARE is also active then the huge_pmd_share code will be called by huge_pte_alloc (othewise we call pmd_alloc and skip the sharing code). Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- mm/hugetlb.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7fd37a8b5194..52caa3d25b6f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2972,15 +2972,6 @@ out_mutex: return ret; } -/* Can be overriden by architectures */ -__attribute__((weak)) struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, unsigned long *nr_pages, @@ -3330,8 +3321,96 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; return 1; } +#define want_pmd_share() (1) +#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + return NULL; +} +#define want_pmd_share() (0) #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ +#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + } else { + BUG_ON(sz != PMD_SIZE); + if (want_pmd_share() && pud_none(*pud)) + pte = huge_pmd_share(mm, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + } + } + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) { + if (pud_huge(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + return page; +} + +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pud); + if (page) + page += ((address & ~PUD_MASK) >> PAGE_SHIFT); + return page; +} + +#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From 409fba2624e5fe66eb1e86005801acbba65eb64c Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 08:03:42 +0100 Subject: [PATCH 008/296] x86: mm: Remove general hugetlb code from x86. huge_pte_alloc, huge_pte_offset and follow_huge_p[mu]d have already been copied over to mm. This patch removes the x86 copies of these functions and activates the general ones by enabling: CONFIG_ARCH_WANT_GENERAL_HUGETLB Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- arch/x86/Kconfig | 3 ++ arch/x86/mm/hugetlbpage.c | 67 --------------------------------------- 2 files changed, 3 insertions(+), 67 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 56d606b2497c..787072769a80 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -210,6 +210,9 @@ config ARCH_SUSPEND_POSSIBLE config ARCH_WANT_HUGE_PMD_SHARE def_bool y +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + config ZONE_DMA32 bool default X86_64 diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 7e522a359972..7e73e8c69096 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -16,49 +16,6 @@ #include #include -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pte_t *pte = NULL; - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - if (pud) { - if (sz == PUD_SIZE) { - pte = (pte_t *)pud; - } else { - BUG_ON(sz != PMD_SIZE); - if (pud_none(*pud)) - pte = huge_pmd_share(mm, addr, pud); - else - pte = (pte_t *)pmd_alloc(mm, pud, addr); - } - } - BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - - return pte; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd = NULL; - - pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { - pud = pud_offset(pgd, addr); - if (pud_present(*pud)) { - if (pud_large(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - } - } - return (pte_t *) pmd; -} - #if 0 /* This is just for testing */ struct page * follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) @@ -120,30 +77,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_PSE); } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page * -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - #endif /* x86_64 also uses this file */ From 855b2b7fea77794f3bec69095785e94577447211 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 7 May 2013 14:46:03 +0100 Subject: [PATCH 009/296] mm: thp: Correct the HPAGE_PMD_ORDER check. All Transparent Huge Pages are allocated by the buddy allocator. A compile time check is in place that fails when the order of a transparent huge page is too large to be allocated by the buddy allocator. Unfortunately that compile time check passes when: HPAGE_PMD_ORDER == MAX_ORDER ( which is incorrect as the buddy allocator can only allocate memory of order strictly less than MAX_ORDER. ) This patch updates the compile time check to fail in the above case. Signed-off-by: Steve Capper Acked-by: Catalin Marinas Acked-by: Andrew Morton --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..26ee56c80dc7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -123,7 +123,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma, } while (0) extern void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, pmd_t *pmd); -#if HPAGE_PMD_ORDER > MAX_ORDER +#if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif extern int hugepage_madvise(struct vm_area_struct *vma, From 4142fa151d8534e29b1d490567f45354976c6c61 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 30 Apr 2013 11:00:33 +0100 Subject: [PATCH 010/296] ARM64: mm: Restore memblock limit when map_mem finished. In paging_init the memblock limit is set to restrict any addresses returned by early_alloc to fit within the initial direct kernel mapping in swapper_pg_dir. This allows map_mem to allocate puds, pmds and ptes from the initial direct kernel mapping. The limit stays low after paging_init() though, meaning any bootmem allocations will be from a restricted subset of memory. Gigabyte huge pages, for instance, are normally allocated from bootmem as their order (18) is too large for the default buddy allocator (MAX_ORDER = 11). This patch restores the memblock limit when map_mem has finished, allowing gigabyte huge pages (and other objects) to be allocated from all of bootmem. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ba7477efad5c..49961d1fa033 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -305,6 +305,16 @@ static void __init map_mem(void) { struct memblock_region *reg; + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, + * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (aligned). + */ + memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); + /* map all the memory banks */ for_each_memblock(memory, reg) { phys_addr_t start = reg->base; @@ -315,6 +325,9 @@ static void __init map_mem(void) create_mapping(start, __phys_to_virt(start), end - start); } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } /* @@ -325,12 +338,6 @@ void __init paging_init(void) { void *zero_page; - /* - * Maximum PGDIR_SIZE addressable via the initial direct kernel - * mapping in swapper_pg_dir. - */ - memblock_set_current_limit((PHYS_OFFSET & PGDIR_MASK) + PGDIR_SIZE); - init_mem_pgprot(); map_mem(); From 2f43620c2d249a83d45b89a3143f135498cf30ea Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 2 May 2013 16:25:42 +0100 Subject: [PATCH 011/296] ARM64: mm: Make PAGE_NONE pages read only and no-execute. If we consider the following code sequence: my_pte = pte_modify(entry, myprot); x = pte_write(my_pte); y = pte_exec(my_pte); If myprot comes from a PROT_NONE page, then x and y will both be true which is undesireable behaviour. This patch sets the no-execute and read-only bits for PAGE_NONE such that the code above will return false for both x and y. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e9a1a1d81892..dc0f67de9e8a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -66,7 +66,7 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) #define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) @@ -76,7 +76,7 @@ extern pgprot_t pgprot_default; #define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) #define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE) +#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) #define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) #define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) From 8f363037a23789d8a33b914b671940f32c6f4ed1 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 28 May 2013 13:35:51 +0100 Subject: [PATCH 012/296] ARM64: mm: Move PTE_PROT_NONE bit. Under ARM64, PTEs can be broadly categorised as follows: - Present and valid: Bit #0 is set. The PTE is valid and memory access to the region may fault. - Present and invalid: Bit #0 is clear and bit #1 is set. Represents present memory with PROT_NONE protection. The PTE is an invalid entry, and the user fault handler will raise a SIGSEGV. - Not present (file or swap): Bits #0 and #1 are clear. Memory represented has been paged out. The PTE is an invalid entry, and the fault handler will try and re-populate the memory where necessary. Huge PTEs are block descriptors that have bit #1 clear. If we wish to represent PROT_NONE huge PTEs we then run into a problem as there is no way to distinguish between regular and huge PTEs if we set bit #1. To resolve this ambiguity this patch moves PTE_PROT_NONE from bit #1 to bit #2 and moves PTE_FILE from bit #2 to bit #3. The number of swap/file bits is reduced by 1 as a consequence, leaving 60 bits for file and swap entries. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dc0f67de9e8a..0f3aa1bc7ac8 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,8 +25,8 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 1) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ +#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) @@ -281,12 +281,12 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-8: swap type + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-8: swap type * bits 9-63: swap offset */ -#define __SWP_TYPE_SHIFT 3 +#define __SWP_TYPE_SHIFT 4 #define __SWP_TYPE_BITS 6 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -306,15 +306,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0-1: present (must be zero) - * bit 2: PTE_FILE - * bits 3-63: file offset / PAGE_SIZE + * bits 0, 2: present (must both be zero) + * bit 3: PTE_FILE + * bits 4-63: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 3) -#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 4) +#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) -#define PTE_FILE_MAX_BITS 61 +#define PTE_FILE_MAX_BITS 60 extern int kern_addr_valid(unsigned long addr); From 7f7f783676eaaffde97ca5467206e6cb8f6ecd1a Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 10 Apr 2013 13:48:00 +0100 Subject: [PATCH 013/296] ARM64: mm: HugeTLB support. Add huge page support to ARM64, different huge page sizes are supported depending on the size of normal pages: PAGE_SIZE is 4KB: 2MB - (pmds) these can be allocated at any time. 1024MB - (puds) usually allocated on bootup with the command line with something like: hugepagesz=1G hugepages=6 PAGE_SIZE is 64KB: 512MB - (pmds) usually allocated on bootup via command line. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 9 ++ arch/arm64/include/asm/hugetlb.h | 117 +++++++++++++++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 8 ++ arch/arm64/include/asm/pgtable.h | 13 ++- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/fault.c | 19 +--- arch/arm64/mm/hugetlbpage.c | 70 +++++++++++++++ 7 files changed, 220 insertions(+), 17 deletions(-) create mode 100644 arch/arm64/include/asm/hugetlb.h create mode 100644 arch/arm64/mm/hugetlbpage.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..cd6eca84a21c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,15 @@ config HW_PERF_EVENTS Enable hardware performance counter support for perf events. If disabled, perf events will use software events only. +config SYS_SUPPORTS_HUGETLBFS + def_bool y + +config ARCH_WANT_GENERAL_HUGETLB + def_bool y + +config ARCH_WANT_HUGE_PMD_SHARE + def_bool y if !ARM64_64K_PAGES + source "mm/Kconfig" endmenu diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h new file mode 100644 index 000000000000..5b7ca8ace95f --- /dev/null +++ b/arch/arm64/include/asm/hugetlb.h @@ -0,0 +1,117 @@ +/* + * arch/arm64/include/asm/hugetlb.h + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_HUGETLB_H +#define __ASM_HUGETLB_H + +#include +#include + +static inline pte_t huge_ptep_get(pte_t *ptep) +{ + return *ptep; +} + +static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + set_pte_at(mm, addr, ptep, pte); +} + +static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + ptep_clear_flush(vma, addr, ptep); +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + ptep_set_wrprotect(mm, addr, ptep); +} + +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + return ptep_get_and_clear(mm, addr, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); +} + +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, + unsigned long ceiling) +{ + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (addr & ~huge_page_mask(h)) + return -EINVAL; + return 0; +} + +static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ +} + +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); +} + +static inline int arch_prepare_hugepage(struct page *page) +{ + return 0; +} + +static inline void arch_release_hugepage(struct page *page) +{ +} + +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} + +#endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 75fd13d289b9..e6e0a0d4cf9a 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -25,12 +25,19 @@ /* * Hardware page table definitions. * + * Level 1 descriptor (PUD). + */ + +#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) + +/* * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) +#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) /* * Section @@ -53,6 +60,7 @@ #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) #define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) +#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ #define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0f3aa1bc7ac8..29eac20dee5a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -173,8 +173,17 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, /* * Huge pte definitions. */ -#define pte_huge(pte) ((pte_val(pte) & PTE_TYPE_MASK) == PTE_TYPE_HUGEPAGE) -#define pte_mkhuge(pte) (__pte((pte_val(pte) & ~PTE_TYPE_MASK) | PTE_TYPE_HUGEPAGE)) +#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) + +/* + * Hugetlb definitions. + */ +#define HUGE_MAX_HSTATE 2 +#define HPAGE_SHIFT PMD_SHIFT +#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define __HAVE_ARCH_PTE_SPECIAL diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 3140a2abcdc2..b51d36401d83 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -2,3 +2,4 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o tlb.o proc.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f51d669c8ebd..6c8ba25bf6bb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -358,17 +358,6 @@ static int __kprobes do_translation_fault(unsigned long addr, return 0; } -/* - * Some section permission faults need to be handled gracefully. They can - * happen due to a __{get,put}_user during an oops. - */ -static int do_sect_fault(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - do_bad_area(addr, esr, regs); - return 0; -} - /* * This abort handler always returns "fault". */ @@ -392,12 +381,12 @@ static struct fault_info { { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, { do_bad, SIGBUS, 0, "reserved access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, { do_bad, SIGBUS, 0, "reserved permission fault" }, - { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, - { do_sect_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, { do_bad, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "asynchronous external abort" }, diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c new file mode 100644 index 000000000000..2fc8258bab2d --- /dev/null +++ b/arch/arm64/mm/hugetlbpage.c @@ -0,0 +1,70 @@ +/* + * arch/arm64/mm/hugetlbpage.c + * + * Copyright (C) 2013 Linaro Ltd. + * + * Based on arch/x86/mm/hugetlbpage.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} +#endif + +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !(pmd_val(pmd) & PMD_TABLE_BIT); +} + +int pud_huge(pud_t pud) +{ + return !(pud_val(pud) & PUD_TABLE_BIT); +} + +static __init int setup_hugepagesz(char *opt) +{ + unsigned long ps = memparse(opt, &opt); + if (ps == PMD_SIZE) { + hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); + } else if (ps == PUD_SIZE) { + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + } else { + pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20); + return 0; + } + return 1; +} +__setup("hugepagesz=", setup_hugepagesz); From bc54bbc17d6fa0907c5d239aa89a09a03a77d7a4 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 25 Apr 2013 15:19:21 +0100 Subject: [PATCH 014/296] ARM64: mm: Raise MAX_ORDER for 64KB pages and THP. The buddy allocator has a default MAX_ORDER of 11, which is too low to allocate enough memory for 512MB Transparent HugePages if our base page size is 64KB. This patch introduces MAX_ZONE_ORDER and sets it to 14 when 64KB pages are used in conjuction with THP, otherwise the default value of 11 is used. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cd6eca84a21c..10607d63b945 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -191,6 +191,11 @@ config ARCH_WANT_HUGE_PMD_SHARE source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int + default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "11" + endmenu menu "Boot options" From e66c9ccc07b6b6b88d8f9a29f516e9b7dbc4d52b Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 19 Apr 2013 16:23:57 +0100 Subject: [PATCH 015/296] ARM64: mm: THP support. Bring Transparent HugePage support to ARM. The size of a transparent huge page depends on the normal page size. A transparent huge page is always represented as a pmd. If PAGE_SIZE is 4KB, THPs are 2MB. If PAGE_SIZE is 64KB, THPs are 512MB. Signed-off-by: Steve Capper Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 3 ++ arch/arm64/include/asm/pgtable-hwdef.h | 4 ++ arch/arm64/include/asm/pgtable.h | 55 ++++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 6 +++ arch/arm64/include/asm/tlbflush.h | 2 + 5 files changed, 70 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 10607d63b945..308a55636f76 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -189,6 +189,9 @@ config ARCH_WANT_GENERAL_HUGETLB config ARCH_WANT_HUGE_PMD_SHARE def_bool y if !ARM64_64K_PAGES +config HAVE_ARCH_TRANSPARENT_HUGEPAGE + def_bool y + source "mm/Kconfig" config FORCE_MAX_ZONEORDER diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e6e0a0d4cf9a..63c9d0de05bb 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -42,6 +42,10 @@ /* * Section */ +#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ +#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 29eac20dee5a..12297a29cb90 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -187,6 +187,61 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL +/* + * Software PMD bits for THP + */ + +#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) + +/* + * THP definitions. + */ +#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) + +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) +#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#endif + +#define PMD_BIT_FUNC(fn,op) \ +static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } + +PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); +PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); +PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); + +#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) + +#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) + +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) + +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | + PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | + PMD_SECT_VALID; + pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); + return pmd; +} + +#define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) + +static inline int has_transparent_hugepage(void) +{ + return 1; +} + /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 5546653e5cc8..717031a762c2 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -190,4 +190,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, #define tlb_migrate_finish(mm) do { } while (0) +static inline void +tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) +{ + tlb_add_flush(tlb, addr); +} + #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 122d6320f745..8b482035cfc2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -117,6 +117,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, dsb(); } +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif #endif From f28d02f4858d071a8c8a479be7ab81636f7f4c32 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2013 16:59:27 +0000 Subject: [PATCH 016/296] arm64: Move PTE_PROT_NONE higher up PTE_PROT_NONE means that a pte is present but does not have any read/write attributes. However, setting the memory type like pgprot_writecombine() is allowed and such bits overlap with PTE_PROT_NONE. This causes mmap/munmap issues in drivers that change the vma->vm_pg_prot on PROT_NONE mappings. This patch reverts the PTE_FILE/PTE_PROT_NONE shift in commit 59911ca4325d (ARM64: mm: Move PTE_PROT_NONE bit) and moves PTE_PROT_NONE together with the other software bits. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas Tested-by: Steve Capper Cc: # 3.11+ --- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 12297a29cb90..8a81c8daf5b4 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -25,10 +25,11 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 2) /* only when !PTE_VALID */ -#define PTE_FILE (_AT(pteval_t, 1) << 3) /* only when !pte_present() */ +#define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) + /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. @@ -345,18 +346,20 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a swap entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-8: swap type - * bits 9-63: swap offset + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-8: swap type + * bits 9-57: swap offset */ -#define __SWP_TYPE_SHIFT 4 +#define __SWP_TYPE_SHIFT 3 #define __SWP_TYPE_BITS 6 +#define __SWP_OFFSET_BITS 49 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) +#define __SWP_OFFSET_MASK ((1UL << __SWP_OFFSET_BITS) - 1) #define __swp_type(x) (((x).val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK) -#define __swp_offset(x) ((x).val >> __SWP_OFFSET_SHIFT) +#define __swp_offset(x) (((x).val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK) #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) @@ -370,15 +373,15 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; /* * Encode and decode a file entry: - * bits 0, 2: present (must both be zero) - * bit 3: PTE_FILE - * bits 4-63: file offset / PAGE_SIZE + * bits 0-1: present (must be zero) + * bit 2: PTE_FILE + * bits 3-57: file offset / PAGE_SIZE */ #define pte_file(pte) (pte_val(pte) & PTE_FILE) -#define pte_to_pgoff(x) (pte_val(x) >> 4) -#define pgoff_to_pte(x) __pte(((x) << 4) | PTE_FILE) +#define pte_to_pgoff(x) (pte_val(x) >> 3) +#define pgoff_to_pte(x) __pte(((x) << 3) | PTE_FILE) -#define PTE_FILE_MAX_BITS 60 +#define PTE_FILE_MAX_BITS 55 extern int kern_addr_valid(unsigned long addr); From e087bc5eb1ff9c47e9700c78f9d9d72b865f30ab Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 5 Dec 2013 12:04:51 +0000 Subject: [PATCH 017/296] arm64: mm: Fix PMD_SECT_PROT_NONE definition Modify the value of PMD_SECT_PROT_NONE to match that of PTE_NONE. This should have been in commit 3676f9ef5481 (Move PTE_PROT_NONE higher up). Signed-off-by: Steve Capper Cc: # 3.11+: 3676f9ef5481: arm64: Move PTE_PROT_NONE higher up Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 63c9d0de05bb..2294f2330960 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -43,7 +43,7 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 2) +#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) From 1bf5c38e784b8a4fd7fcc820ea79366e9716b447 Mon Sep 17 00:00:00 2001 From: Andreas Sandberg Date: Tue, 21 Jan 2014 15:49:09 -0800 Subject: [PATCH 018/296] mm/hugetlb.c: call MMU notifiers when copying a hugetlb page range When copy_hugetlb_page_range() is called to copy a range of hugetlb mappings, the secondary MMUs are not notified if there is a protection downgrade, which breaks COW semantics in KVM. This patch adds the necessary MMU notifier calls. Signed-off-by: Andreas Sandberg Acked-by: Steve Capper Acked-by: Marc Zyngier Cc: Mel Gorman Cc: Rik van Riel Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 52caa3d25b6f..96cfebd0d67a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2336,16 +2336,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, int cow; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ + int ret = 0; cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + mmun_start = vma->vm_start; + mmun_end = vma->vm_end; + if (cow) + mmu_notifier_invalidate_range_start(src, mmun_start, mmun_end); + for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; dst_pte = huge_pte_alloc(dst, addr, sz); - if (!dst_pte) - goto nomem; + if (!dst_pte) { + ret = -ENOMEM; + break; + } /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) @@ -2365,10 +2375,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); } - return 0; -nomem: - return -ENOMEM; + if (cow) + mmu_notifier_invalidate_range_end(src, mmun_start, mmun_end); + + return ret; } static int is_hugetlb_entry_migration(pte_t pte) From 26e700376b6028c66c2c3ea70b47f3325ec916e5 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:12 +0000 Subject: [PATCH 019/296] arm64: mm: Remove PTE_BIT_FUNC macro Expand out the pte manipulation functions. This makes our life easier when using things like tags and cscope. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 49 ++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8a81c8daf5b4..57ed0035656c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -140,16 +140,47 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) -#define PTE_BIT_FUNC(fn,op) \ -static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + pte_val(pte) |= PTE_RDONLY; + return pte; +} -PTE_BIT_FUNC(wrprotect, |= PTE_RDONLY); -PTE_BIT_FUNC(mkwrite, &= ~PTE_RDONLY); -PTE_BIT_FUNC(mkclean, &= ~PTE_DIRTY); -PTE_BIT_FUNC(mkdirty, |= PTE_DIRTY); -PTE_BIT_FUNC(mkold, &= ~PTE_AF); -PTE_BIT_FUNC(mkyoung, |= PTE_AF); -PTE_BIT_FUNC(mkspecial, |= PTE_SPECIAL); +static inline pte_t pte_mkwrite(pte_t pte) +{ + pte_val(pte) &= ~PTE_RDONLY; + return pte; +} + +static inline pte_t pte_mkclean(pte_t pte) +{ + pte_val(pte) &= ~PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkdirty(pte_t pte) +{ + pte_val(pte) |= PTE_DIRTY; + return pte; +} + +static inline pte_t pte_mkold(pte_t pte) +{ + pte_val(pte) &= ~PTE_AF; + return pte; +} + +static inline pte_t pte_mkyoung(pte_t pte) +{ + pte_val(pte) |= PTE_AF; + return pte; +} + +static inline pte_t pte_mkspecial(pte_t pte) +{ + pte_val(pte) |= PTE_SPECIAL; + return pte; +} static inline void set_pte(pte_t *ptep, pte_t pte) { From 1c0f6f904655f94757977b647bb99aa15d716eee Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 15 Jan 2014 14:07:13 +0000 Subject: [PATCH 020/296] arm64: mm: Introduce PTE_WRITE We have the following means for encoding writable or dirty ptes: PTE_DIRTY PTE_RDONLY !pte_dirty && !pte_write 0 1 !pte_dirty && pte_write 0 1 pte_dirty && !pte_write 1 1 pte_dirty && pte_write 1 0 So we can't distinguish between writable clean ptes and read only ptes. This can cause problems with ptes being incorrectly flagged as read only when they are writable but not dirty. This patch introduces a new software bit PTE_WRITE which allows us to correctly identify writable ptes. PTE_RDONLY is now only clear for valid ptes where a page is both writable and dirty. Signed-off-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas Conflicts: arch/arm64/include/asm/pgtable.h --- arch/arm64/include/asm/pgtable.h | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 57ed0035656c..775fe212ff5c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -28,7 +28,7 @@ #define PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !pte_present() */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) - /* bit 57 for PMD_SECT_SPLITTING */ +#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -67,23 +67,23 @@ extern pgprot_t pgprot_default; #define _MOD_PROT(p, b) __pgprot_modify(p, 0, b) -#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) -#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY) -#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY) +#define PAGE_NONE __pgprot_modify(pgprot_default, PTE_TYPE_MASK, PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_SHARED _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define PAGE_COPY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_COPY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_READONLY _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_default, PTE_USER | PTE_NG | PTE_PXN) +#define PAGE_KERNEL _MOD_PROT(pgprot_default, PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC _MOD_PROT(pgprot_default, PTE_UXN | PTE_DIRTY | PTE_WRITE) -#define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) -#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) -#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) -#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_RDONLY) -#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_RDONLY) +define __PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define __PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define __PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +#define __PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_COPY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) +#define __PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) +#define __PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN) #endif /* __ASSEMBLY__ */ @@ -134,7 +134,7 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) #define pte_young(pte) (pte_val(pte) & PTE_AF) #define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_write(pte) (pte_val(pte) & PTE_WRITE) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ @@ -142,13 +142,13 @@ extern struct page *empty_zero_page; static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) |= PTE_RDONLY; + pte_val(pte) &= ~PTE_WRITE; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) &= ~PTE_RDONLY; + pte_val(pte) |= PTE_WRITE; return pte; } @@ -195,8 +195,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_valid_user(pte)) { if (pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (!pte_dirty(pte)) - pte = pte_wrprotect(pte); + if (pte_dirty(pte) && pte_write(pte)) + pte_val(pte) &= ~PTE_RDONLY; + else + pte_val(pte) |= PTE_RDONLY; } set_pte(ptep, pte); @@ -364,7 +366,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } From 52b07c2707864bd8fa67093912dc6a4d2d602493 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 11:38:53 +0000 Subject: [PATCH 021/296] arm64: mm: Add double logical invert to pte accessors Page table entries on ARM64 are 64 bits, and some pte functions such as pte_dirty return a bitwise-and of a flag with the pte value. If the flag to be tested resides in the upper 32 bits of the pte, then we run into the danger of the result being dropped if downcast. For example: gather_stats(page, md, pte_dirty(*pte), 1); where pte_dirty(*pte) is downcast to an int. This patch adds a double logical invert to all the pte_ accessors to ensure predictable downcasting. Signed-off-by: Steve Capper Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 775fe212ff5c..513fb3402d65 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -130,11 +130,11 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (pte_val(pte) & PTE_WRITE) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ From 8b531976d51c33d5ef83ca2637b08afc2ed51802 Mon Sep 17 00:00:00 2001 From: Ruchi Kandoi Date: Fri, 7 Mar 2014 12:54:30 -0800 Subject: [PATCH 022/296] Power: Add guard condition for maximum wakeup reasons Ensure the array for the wakeup reason IRQs does not overflow. Change-Id: Iddc57a3aeb1888f39d4e7b004164611803a4d37c Signed-off-by: Ruchi Kandoi (cherry picked from commit b5ea40cdfcf38296535f931a7e5e7bf47b6fad7f) --- kernel/power/wakeup_reason.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index 82e69fe52d0b..caf44213b14c 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -79,6 +79,13 @@ void log_wakeup_reason(int irq) printk(KERN_INFO "Resume caused by IRQ %d\n", irq); spin_lock(&resume_reason_lock); + if (irq_count == MAX_WAKEUP_REASON_IRQS) { + spin_unlock(&resume_reason_lock); + printk(KERN_WARNING "Resume caused by more than %d IRQs\n", + MAX_WAKEUP_REASON_IRQS); + return; + } + irq_list[irq_count++] = irq; spin_unlock(&resume_reason_lock); } From c9331cabfdfec4b8b0086ed155bb28e46330b14f Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 10 Mar 2014 14:21:30 -0700 Subject: [PATCH 023/296] power: wakeup_reason: rename irq_count to irqcount On x86, irq_count conflicts with a declaration in arch/x86/include/asm/processor.h Change-Id: I3e4fde0ff64ef59ff5ed2adc0ea3a644641ee0b7 Signed-off-by: Greg Hackmann --- kernel/power/wakeup_reason.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c index caf44213b14c..188a6bfacf5a 100644 --- a/kernel/power/wakeup_reason.c +++ b/kernel/power/wakeup_reason.c @@ -30,7 +30,7 @@ #define MAX_WAKEUP_REASON_IRQS 32 static int irq_list[MAX_WAKEUP_REASON_IRQS]; -static int irq_count; +static int irqcount; static struct kobject *wakeup_reason; static spinlock_t resume_reason_lock; @@ -40,7 +40,7 @@ static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr, int irq_no, buf_offset = 0; struct irq_desc *desc; spin_lock(&resume_reason_lock); - for (irq_no = 0; irq_no < irq_count; irq_no++) { + for (irq_no = 0; irq_no < irqcount; irq_no++) { desc = irq_to_desc(irq_list[irq_no]); if (desc && desc->action && desc->action->name) buf_offset += sprintf(buf + buf_offset, "%d %s\n", @@ -79,14 +79,14 @@ void log_wakeup_reason(int irq) printk(KERN_INFO "Resume caused by IRQ %d\n", irq); spin_lock(&resume_reason_lock); - if (irq_count == MAX_WAKEUP_REASON_IRQS) { + if (irqcount == MAX_WAKEUP_REASON_IRQS) { spin_unlock(&resume_reason_lock); printk(KERN_WARNING "Resume caused by more than %d IRQs\n", MAX_WAKEUP_REASON_IRQS); return; } - irq_list[irq_count++] = irq; + irq_list[irqcount++] = irq; spin_unlock(&resume_reason_lock); } @@ -97,7 +97,7 @@ static int wakeup_reason_pm_event(struct notifier_block *notifier, switch (pm_event) { case PM_SUSPEND_PREPARE: spin_lock(&resume_reason_lock); - irq_count = 0; + irqcount = 0; spin_unlock(&resume_reason_lock); break; default: From b0359d4fc547ed60187c302e906f26a9b64ec79b Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 10 Mar 2014 17:00:25 -0700 Subject: [PATCH 024/296] video: adf: export the adf_attachment_allow symbol to modules. There are no in-tree users of adf_attachment_allow, but out-of-tree modules want to use it. It looks like this function should be EXPORT_SYMBOL. Change-Id: Iad522dc5d32ac09fec6483bbc317db8ecae12e97 Signed-off-by: Alistair Strachan --- drivers/video/adf/adf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/video/adf/adf.c b/drivers/video/adf/adf.c index 933e74ac8098..2d20024fd251 100644 --- a/drivers/video/adf/adf.c +++ b/drivers/video/adf/adf.c @@ -920,6 +920,7 @@ done: return ret; } +EXPORT_SYMBOL(adf_attachment_allow); /** * adf_obj_type_str - string representation of an adf_obj_type From 6b04956b0ec9ebc3ec730905780fe64195167a00 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 23 Jun 2013 23:47:15 +0900 Subject: [PATCH 025/296] staging: android: Fix typo in staging/android Fix "with with" in debug message. Issue: ABIT-21 Change-Id: Icd0b195524f4c77272276991a1e8a00aecef65c9 Signed-off-by: Masanari Iida Signed-off-by: Greg Kroah-Hartman Signed-off-by: Qiming Shi Signed-off-by: Jun Tian --- drivers/staging/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 55195ea28de6..d4e529001934 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -792,7 +792,7 @@ static void binder_delete_free_buffer(struct binder_proc *proc, list_del(&buffer->entry); if (free_page_start || free_page_end) { binder_debug(BINDER_DEBUG_BUFFER_ALLOC, - "%d: merge free, buffer %p do not share page%s%s with with %p or %p\n", + "%d: merge free, buffer %p do not share page%s%s with %p or %p\n", proc->pid, buffer, free_page_start ? "" : " end", free_page_end ? "" : " start", prev, next); binder_update_page_range(proc, 0, free_page_start ? From 61382e516c9015d5c91334e1f28db551af9e9198 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 09:41:37 +0000 Subject: [PATCH 026/296] ARM: fix ARCH_IXP4xx usage of ARCH_SUPPORTS_BIG_ENDIAN The Kconfig for arch/arm/mach-ixp4xx has a local definition of ARCH_SUPPORTS_BIG_ENDIAN which could be used elsewhere. This means that if IXP4xx is selected and this symbol is selected eleswhere then an warning is produced. Clean the following error up by making the symbol be selected by the main ARCH_IXP4XX definition and have a common definition in arch/arm/mm/Kconfig warning: (ARCH_xxx) selects ARCH_SUPPORTS_BIG_ENDIAN which has unmet direct dependencies (ARCH_IXP4XX) warning: (ARCH_xxx) selects ARCH_SUPPORTS_BIG_ENDIAN which has unmet direct dependencies (ARCH_IXP4XX) Signed-off-by: Ben Dooks (cherry picked from commit d10d2d485497cdc62a7660cd981f8f1ae0dffe7d) Signed-off-by: Victor Kamensky --- arch/arm/Kconfig | 1 + arch/arm/mach-ixp4xx/Kconfig | 4 ---- arch/arm/mm/Kconfig | 6 ++++++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1116be551be5..1e9cca81eeac 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -474,6 +474,7 @@ config ARCH_IXP4XX bool "IXP4xx-based" depends on MMU select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_REQUIRE_GPIOLIB select CLKSRC_MMIO select CPU_XSCALE diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index 73a2d905af8a..72de05f09cb8 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -1,9 +1,5 @@ if ARCH_IXP4XX -config ARCH_SUPPORTS_BIG_ENDIAN - bool - default y - menu "Intel IXP4xx Implementation Options" comment "IXP4xx Platforms" diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 08c9fe917d1f..7763900dee84 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -932,3 +932,9 @@ config ARCH_HAS_BARRIERS help This option allows the use of custom mandatory barriers included via the mach/barriers.h file. + +config ARCH_SUPPORTS_BIG_ENDIAN + bool + help + This option specifies the architecture can support big endian + operation. From 87d7e2fbb75c47fb83363953b8f865a6d4d359aa Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 12 Feb 2013 18:59:57 +0000 Subject: [PATCH 027/296] ARM: asm: Add ARM_BE8() assembly helper Add ARM_BE8() helper to wrap any code conditional on being compile when CONFIG_ARM_ENDIAN_BE8 is selected and convert existing places where this is to use it. Acked-by: Nicolas Pitre Reviewed-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit 457c2403c513c74f60d5757fd11ae927e5554a38) Signed-off-by: Victor Kamensky --- arch/arm/boot/compressed/head.S | 8 ++------ arch/arm/include/asm/assembler.h | 7 +++++++ arch/arm/kernel/entry-armv.S | 5 ++--- arch/arm/kernel/entry-common.S | 4 +--- arch/arm/mm/abort-ev6.S | 5 ++--- arch/arm/mm/proc-v6.S | 4 +--- arch/arm/mm/proc-v7.S | 4 +--- 7 files changed, 16 insertions(+), 21 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 032a8d987148..4eb8364edc12 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -679,9 +679,7 @@ __armv4_mmu_cache_on: mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement orr r0, r0, #0x0030 -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables bl __common_mmu_cache_on mov r0, #0 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs @@ -708,9 +706,7 @@ __armv7_mmu_cache_on: orr r0, r0, #1 << 22 @ U (v6 unaligned access model) @ (needed for ARM1176) #ifdef CONFIG_MMU -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r0, r0, #1 << 25 @ big-endian page tables -#endif + ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg orrne r0, r0, #1 @ MMU enabled movne r1, #0xfffffffd @ domain 0 = client diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 05ee9eebad6b..e780afbcee54 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -53,6 +53,13 @@ #define put_byte_3 lsl #0 #endif +/* Select code for any configuration running in BE8 mode */ +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define ARM_BE8(code...) code +#else +#define ARM_BE8(code...) +#endif + /* * Data preload for architectures that support it */ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 32640ae7750f..45a68d6bb2a3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -416,9 +416,8 @@ __und_usr: bne __und_usr_thumb sub r4, r2, #4 @ ARM instr at LR - 4 1: ldrt r0, [r4] -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r0, r0 @ little endian instruction -#endif + ARM_BE8(rev r0, r0) @ little endian instruction + @ r0 = 32-bit ARM instruction which caused the exception @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index bc5bc0a97131..8c79344552d5 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -379,9 +379,7 @@ ENTRY(vector_swi) #else ldr r10, [lr, #-4] @ get SWI instruction #endif -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r10, r10 @ little endian instruction -#endif + ARM_BE8(rev r10, r10) @ little endian instruction #elif defined(CONFIG_AEABI) diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 80741992a9fc..3815a8262af0 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -38,9 +38,8 @@ ENTRY(v6_early_abort) bne do_DataAbort bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r3, r3 -#endif + ARM_BE8(rev r3, r3) + do_ldrd_abort tmp=ip, insn=r3 tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index d07352819580..b96c6e64943e 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -219,9 +219,7 @@ __v6_setup: @ complete invalidations adr r5, v6_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 19da84172cc3..769496e6e8e9 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -352,9 +352,7 @@ __v7_setup: #endif adr r5, v7_crval ldmia r5, {r5, r6} -#ifdef CONFIG_CPU_ENDIAN_BE8 - orr r6, r6, #1 << 25 @ big-endian page tables -#endif + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE orr r5, r5, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" From c145b15891c0964d86d0ed3b6c79f5457f28db2d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 16:23:08 +0100 Subject: [PATCH 028/296] ARM: fixup_pv_table bug when CPU_ENDIAN_BE8 The fixup_pv_table assumes that the instructions are in the same endian configuration as the data, but when the CPU is running in BE8 the instructions stay in little-endian format. Make sure if CONFIG_CPU_ENDIAN_BE8 is set that we do all the alterations to the instructions taking in to account the LDR/STR will be swapping the data endian-ness. Since the code is only modifying a byte, we avoid dual-swapping the data, and just change the bits we clear and ORR in (in the case where the code is not thumb2). For thumb2, we add the necessary rev16 instructions to ensure that the instructions are processed in the correct format, as it was easier than re-writing the code to contain a mask and shift. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin Tested-by: Thomas Petazzoni (cherry picked from commit 2f9bf9beddb1649485b47302a5aba9761cbc9084) Signed-off-by: Victor Kamensky --- arch/arm/kernel/head.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2725c87fade6..807a9f5a458c 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -583,8 +583,10 @@ __fixup_a_pv_table: b 2f 1: add r7, r3 ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) and ip, 0x8f00 orr ip, r6 @ mask in offset bits 31-24 +ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot @@ -593,8 +595,14 @@ __fixup_a_pv_table: #else b 2f 1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + orr ip, ip, r6, lsl#24 +#else bic ip, ip, #0x000000ff orr ip, ip, r6 @ mask in offset bits 31-24 +#endif str ip, [r7, r3] 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot From 47287dde86da7a2fceb0973b99b20399d13cd38d Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 09:40:42 +0000 Subject: [PATCH 029/296] ARM: set BE8 if LE in head code If we are booting in LE and compiled for BE8, then add code to set the state to bE8. Since the instruction stream is always LE, we do not need to do anything special to the instruction. Also ensure that the secondary processors are started in the same mode. Note, we do add about 20 bytes to the kernel image, but it seems easier to do this than adding another configuration to change. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin Tested-by: Thomas Petazzoni (cherry picked from commit 97bcb0fea590d3d704f985bec08f342d28992634) Signed-off-by: Victor Kamensky Conflicts: arch/arm/kernel/sleep.S --- arch/arm/boot/compressed/head.S | 1 + arch/arm/kernel/head.S | 4 ++++ arch/arm/kernel/sleep.S | 1 + 3 files changed, 6 insertions(+) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 4eb8364edc12..f6e34be012ff 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -135,6 +135,7 @@ start: .word _edata @ zImage end address THUMB( .thumb ) 1: + ARM_BE8( setend be ) @ go BE8 if compiled for BE8 mrs r9, cpsr #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install @ get into SVC mode, reversibly diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 807a9f5a458c..11284e744c80 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -77,6 +77,7 @@ __HEAD ENTRY(stext) + ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, @@ -350,6 +351,9 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ + + ARM_BE8(setend be) @ ensure we are in BE8 mode + #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install_secondary #endif diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index b5c1e636ed85..baf4d28213a5 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -81,6 +81,7 @@ ENDPROC(cpu_resume_after_mmu) .data .align ENTRY(cpu_resume) +ARM_BE8(setend be) @ ensure we are in BE mode #ifdef CONFIG_SMP mov r1, #0 @ fall-back logical index for UP ALT_SMP(mrc p15, 0, r0, c0, c0, 5) From 1eff66bbede9cab18f5cafcb376ebebdc6a3f95c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 6 Feb 2013 18:25:36 +0000 Subject: [PATCH 030/296] ARM: pl01x debug code endian fix The PL01X debug code needs to take into account which endian mode the processor is running in. If it is big-endian, ensure the data is swapped appropriately. Note, we could do this slightly more efficiently if we have an macro to do the necessary swap for the bits used by test. Reviewed-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit 76e3faf156fa95b6465e747d702b94faf67117fc) Signed-off-by: Victor Kamensky Conflicts: arch/arm/include/debug/pl01x.S --- arch/arm/include/asm/hardware/debug-pl01x.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/include/asm/hardware/debug-pl01x.S b/arch/arm/include/asm/hardware/debug-pl01x.S index f9fd083eff63..6489d1ffe3c8 100644 --- a/arch/arm/include/asm/hardware/debug-pl01x.S +++ b/arch/arm/include/asm/hardware/debug-pl01x.S @@ -18,12 +18,14 @@ .macro waituart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_TXFF bne 1001b .endm .macro busyuart,rd,rx 1001: ldr \rd, [\rx, #UART01x_FR] + ARM_BE8( rev \rd, \rd ) tst \rd, #UART01x_FR_BUSY bne 1001b .endm From bd4b0c40b0d0858f59127056a4a9f93b9a4af500 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 6 Feb 2013 18:44:20 +0000 Subject: [PATCH 031/296] ARM: twd: data endian fix Ensure the twd driver uses the correct calls to access the hardware to ensure that we do not end up with data in the wrong endian format. Reviewed-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit 2e874ea342146130206f8b39f2103f33690a7547) Signed-off-by: Victor Kamensky Conflicts: arch/arm/kernel/smp_twd.c --- arch/arm/kernel/smp_twd.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index f6fd1d4398c6..4971ccf012ca 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -45,7 +45,7 @@ static void twd_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_PERIODIC; - __raw_writel(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), + writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), twd_base + TWD_TIMER_LOAD); break; case CLOCK_EVT_MODE_ONESHOT: @@ -58,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode, ctrl = 0; } - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); } static int twd_set_next_event(unsigned long evt, struct clock_event_device *unused) { - unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); + unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL); ctrl |= TWD_TIMER_CONTROL_ENABLE; - __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); return 0; } @@ -82,8 +82,8 @@ static int twd_set_next_event(unsigned long evt, */ static int twd_timer_ack(void) { - if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { - __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); + if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) { + writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); return 1; } @@ -209,15 +209,15 @@ static void __cpuinit twd_calibrate_rate(void) waitjiffies += 5; /* enable, no interrupt or reload */ - __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL); /* maximum value */ - __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); while (get_jiffies_64() < waitjiffies) udelay(10); - count = __raw_readl(twd_base + TWD_TIMER_COUNTER); + count = readl_relaxed(twd_base + TWD_TIMER_COUNTER); twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); @@ -275,7 +275,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) * bother with the below. */ if (per_cpu(percpu_setup_called, cpu)) { - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clockevents_register_device(*__this_cpu_ptr(twd_evt)); enable_percpu_irq(clk->irq, 0); return 0; @@ -288,7 +288,7 @@ static int __cpuinit twd_timer_setup(struct clock_event_device *clk) * The following is done once per CPU the first time .setup() is * called. */ - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); clk->name = "local_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | From ed0d40f6dba757f9689583724dcc392010bce4aa Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 7 Feb 2013 11:14:21 +0000 Subject: [PATCH 032/296] ARM: smp_scu: data endian fixes The smp_scu driver needs to use the relaxed readl/write accessors to avoid any issues with the endian mode the processor core is in. Reviewed-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit 099a4809133dc6548d37cc143ab0cb9c2eba97bb) Signed-off-by: Victor Kamensky --- arch/arm/kernel/smp_scu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 5bc1a63284e3..1aafa0d785eb 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -28,7 +28,7 @@ */ unsigned int __init scu_get_core_count(void __iomem *scu_base) { - unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG); + unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG); return (ncores & 0x03) + 1; } @@ -42,19 +42,19 @@ void scu_enable(void __iomem *scu_base) #ifdef CONFIG_ARM_ERRATA_764369 /* Cortex-A9 only */ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { - scu_ctrl = __raw_readl(scu_base + 0x30); + scu_ctrl = readl_relaxed(scu_base + 0x30); if (!(scu_ctrl & 1)) - __raw_writel(scu_ctrl | 0x1, scu_base + 0x30); + writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30); } #endif - scu_ctrl = __raw_readl(scu_base + SCU_CTRL); + scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) return; scu_ctrl |= 1; - __raw_writel(scu_ctrl, scu_base + SCU_CTRL); + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* * Ensure that the data accessed by CPU0 before the SCU was @@ -80,9 +80,9 @@ int scu_power_mode(void __iomem *scu_base, unsigned int mode) if (mode > 3 || mode == 1 || cpu > 3) return -EINVAL; - val = __raw_readb(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; + val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; val |= mode; - __raw_writeb(val, scu_base + SCU_CPU_STATUS + cpu); + writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu); return 0; } From cfccea19fe8983134b45ea0a495a4adabc4973b8 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 09:31:34 +0000 Subject: [PATCH 033/296] ARM: highbank: enable big-endian Apart from a xgmac driver issue, the highbank seems to work correctly in big-endian mode. Allow the selection of big-endian in the system. Signed-off-by: Ben Dooks Acked-by: Rob Herring (cherry picked from commit 50eec2fce45ed48575f1c0582b748e409da08511) Signed-off-by: Victor Kamensky --- arch/arm/mach-highbank/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index cd9fcb1cd7ab..b8466fb00f55 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -2,6 +2,7 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7 select ARCH_HAS_CPUFREQ select ARCH_HAS_OPP + select ARCH_SUPPORTS_BIG_ENDIAN select ARCH_WANT_OPTIONAL_GPIOLIB select ARM_AMBA select ARM_GIC From 197a3541d781995a80ea3d2f167d8d97d0cbdc9b Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 1 Feb 2013 10:36:22 +0000 Subject: [PATCH 034/296] ARM: mvebu: support running big-endian Add indication we can run these cores in BE mode, and ensure that the secondary CPU is set to big-endian mode in the initialisation code as the initial code runs little-endian. Signed-off-by: Ben Dooks Tested-by: Thomas Petazzoni Acked-by: Jason Cooper (cherry picked from commit bca028e7c2537fea9f401c20dd7b2103358b5efe) Signed-off-by: Victor Kamensky Conflicts: arch/arm/mach-mvebu/headsmp.S --- arch/arm/mach-mvebu/Kconfig | 1 + arch/arm/mach-mvebu/coherency_ll.S | 3 +++ arch/arm/mach-mvebu/headsmp.S | 3 +++ 3 files changed, 7 insertions(+) diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 80a8bcacd9d5..317cdb800099 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -1,5 +1,6 @@ config ARCH_MVEBU bool "Marvell SOCs with Device Tree support" if ARCH_MULTI_V7 + select ARCH_SUPPORTS_BIG_ENDIAN select CLKSRC_MMIO select COMMON_CLK select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 5476669ba905..ee7598fe75db 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -20,6 +20,8 @@ #define ARMADA_XP_CFB_CTL_REG_OFFSET 0x0 #define ARMADA_XP_CFB_CFG_REG_OFFSET 0x4 +#include + .text /* * r0: Coherency fabric base register address @@ -29,6 +31,7 @@ ENTRY(ll_set_cpu_coherent) /* Create bit by cpu index */ mov r3, #(1 << 24) lsl r1, r3, r1 +ARM_BE8(rev r1, r1) /* Add CPU to SMP group - Atomic */ add r3, r0, #ARMADA_XP_CFB_CTL_REG_OFFSET diff --git a/arch/arm/mach-mvebu/headsmp.S b/arch/arm/mach-mvebu/headsmp.S index a06e0ede8c08..458ed3fb2626 100644 --- a/arch/arm/mach-mvebu/headsmp.S +++ b/arch/arm/mach-mvebu/headsmp.S @@ -21,6 +21,8 @@ #include #include +#include + /* * At this stage the secondary CPUs don't have acces yet to the MMU, so * we have to provide physical addresses @@ -35,6 +37,7 @@ * startup */ ENTRY(armada_xp_secondary_startup) + ARM_BE8(setend be ) @ go BE8 if entered LE /* Read CPU id */ mrc p15, 0, r1, c0, c0, 5 From 17225ab47c37bc578fa4ef5aac5f2368fc503c37 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 28 May 2013 21:34:50 +0100 Subject: [PATCH 035/296] ARM: vexpress: add big endian support Add support for the versatile express systems to boot big-endian. Signed-off-by: Ben Dooks (cherry picked from commit 98dec91fa36a4a74f7c44dd2dfb000203656f4f4) Signed-off-by: Victor Kamensky --- arch/arm/mach-vexpress/Kconfig | 1 + arch/arm/plat-versatile/headsmp.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index dd3d5975a5c1..39858ba03084 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -3,6 +3,7 @@ config ARCH_VEXPRESS select ARCH_HAS_CPUFREQ select ARCH_HAS_OPP select ARCH_REQUIRE_GPIOLIB + select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S index 2677bc3762d7..40f27e52de75 100644 --- a/arch/arm/plat-versatile/headsmp.S +++ b/arch/arm/plat-versatile/headsmp.S @@ -10,6 +10,7 @@ */ #include #include +#include /* * Realview/Versatile Express specific entry point for secondary CPUs. @@ -17,6 +18,7 @@ * until we're ready for them to initialise. */ ENTRY(versatile_secondary_startup) + ARM_BE8(setend be) mrc p15, 0, r0, c0, c0, 5 bic r0, #0xff000000 adr r4, 1f From 2037b6f8fdba30601e92c10b3d9f79e81d82e9bd Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 18 Jul 2013 21:10:56 +0100 Subject: [PATCH 036/296] ARM: alignment: correctly decode instructions in BE8 mode. If we are in BE8 mode, we must deal with the instruction stream being in LE order when data is being loaded in BE order. Ensure the data is swapped before processing to avoid thre following: Change to using to provide the necessary conversion functions to change the byte ordering. This stops the following warning messages from the kernel on a fault: Unhandled fault: alignment exception (0x001) at 0xbfa09567 Alignment trap: not handling instruction 030091e8 at [<80333e8c>] Signed-off-by: Ben Dooks Reviewed-by: Dave Martin Tested-by: Thomas Petazzoni (cherry picked from commit 8592edf0dec8159fde379eb7e056eaddbbd697f2) Signed-off-by: Victor Kamensky --- arch/arm/mm/alignment.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b89078..924036473b16 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "fault.h" @@ -762,21 +763,25 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); fault = probe_kernel_address(ptr, tinstr); + tinstr = __mem_to_opcode_thumb16(tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ u16 tinst2 = 0; fault = probe_kernel_address(ptr + 1, tinst2); - instr = (tinstr << 16) | tinst2; + tinst2 = __mem_to_opcode_thumb16(tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { isize = 2; instr = thumb2arm(tinstr); } } - } else + } else { fault = probe_kernel_address(instrptr, instr); + instr = __mem_to_opcode_arm(instr); + } if (fault) { type = TYPE_FAULT; From 2a08f5bc8607f1fcc8345c2a9d60f16210debeb5 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 19 Jul 2013 17:12:05 +0100 Subject: [PATCH 037/296] ARM: traps: use to get correct instruction order The trap handler needs to take into account the endian configuration of the system when loading instructions. Use to provide the necessary conversion functions. Signed-off-by: Ben Dooks Tested-by: Thomas Petazzoni (cherry picked from commit a79a0cb1d35ec422dcf493cef1bebf9fdfcfdb9a) Signed-off-by: Victor Kamensky --- arch/arm/kernel/traps.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d6a0fdb6c2ee..5989b0f317dd 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -34,6 +34,7 @@ #include #include #include +#include static const char *handler[]= { "prefetch abort", @@ -408,25 +409,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (processor_mode(regs) == SVC_MODE) { #ifdef CONFIG_THUMB2_KERNEL if (thumb_mode(regs)) { - instr = ((u16 *)pc)[0]; + instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]); if (is_wide_instruction(instr)) { - instr <<= 16; - instr |= ((u16 *)pc)[1]; + u16 inst2; + inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]); + instr = __opcode_thumb32_compose(instr, inst2); } } else #endif - instr = *(u32 *) pc; + instr = __mem_to_opcode_arm(*(u32 *) pc); } else if (thumb_mode(regs)) { if (get_user(instr, (u16 __user *)pc)) goto die_sig; + instr = __mem_to_opcode_thumb16(instr); if (is_wide_instruction(instr)) { unsigned int instr2; if (get_user(instr2, (u16 __user *)pc+1)) goto die_sig; - instr <<= 16; - instr |= instr2; + instr2 = __mem_to_opcode_thumb16(instr2); + instr = __opcode_thumb32_compose(instr, instr2); } } else if (get_user(instr, (u32 __user *)pc)) { + instr = __mem_to_opcode_arm(instr); goto die_sig; } From a973c8bf09c8312c5127662f12cd7b1267064ef7 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 19 Jul 2013 18:27:23 +0100 Subject: [PATCH 038/296] ARM: module: correctly relocate instructions in BE8 When in BE8 mode, our instructions are not in the same ordering as the data, so use to take this into account. Note, also requires modules to be built --be8 Signed-off-by: Ben Dooks Reviewed-by: Dave Martin (cherry picked from commit f592d323bc2353db871d1e840f05b27e0730fb10) Signed-off-by: Victor Kamensky --- arch/arm/kernel/module.c | 57 ++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 1e9be5d25e56..7e137873083d 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -24,6 +24,7 @@ #include #include #include +#include #ifdef CONFIG_XIP_KERNEL /* @@ -60,6 +61,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, Elf32_Sym *sym; const char *symname; s32 offset; + u32 tmp; #ifdef CONFIG_THUMB2_KERNEL u32 upper, lower, sign, j1, j2; #endif @@ -95,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: - offset = (*(u32 *)loc & 0x00ffffff) << 2; + offset = __mem_to_opcode_arm(*(u32 *)loc); + offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; @@ -111,9 +114,10 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, } offset >>= 2; + offset &= 0x00ffffff; - *(u32 *)loc &= 0xff000000; - *(u32 *)loc |= offset & 0x00ffffff; + *(u32 *)loc &= __opcode_to_mem_arm(0xff000000); + *(u32 *)loc |= __opcode_to_mem_arm(offset); break; case R_ARM_V4BX: @@ -121,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, * other bits to re-code instruction as * MOV PC,Rm. */ - *(u32 *)loc &= 0xf000000f; - *(u32 *)loc |= 0x01a0f000; + *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f); + *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000); break; case R_ARM_PREL31: @@ -132,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: - offset = *(u32 *)loc; + offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; @@ -140,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) offset >>= 16; - *(u32 *)loc &= 0xfff0f000; - *(u32 *)loc |= ((offset & 0xf000) << 4) | - (offset & 0x0fff); + tmp &= 0xfff0f000; + tmp |= ((offset & 0xf000) << 4) | + (offset & 0x0fff); + + *(u32 *)loc = __opcode_to_mem_arm(tmp); break; #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * 25 bit signed address range (Thumb-2 BL and B.W @@ -198,17 +204,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, sign = (offset >> 24) & 1; j1 = sign ^ (~(offset >> 23) & 1); j2 = sign ^ (~(offset >> 22) & 1); - *(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) | + upper = (u16)((upper & 0xf800) | (sign << 10) | ((offset >> 12) & 0x03ff)); - *(u16 *)(loc + 2) = (u16)((lower & 0xd000) | - (j1 << 13) | (j2 << 11) | - ((offset >> 1) & 0x07ff)); + lower = (u16)((lower & 0xd000) | + (j1 << 13) | (j2 << 11) | + ((offset >> 1) & 0x07ff)); + + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * MOVT/MOVW instructions encoding in Thumb-2: @@ -229,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) offset >>= 16; - *(u16 *)loc = (u16)((upper & 0xfbf0) | - ((offset & 0xf000) >> 12) | - ((offset & 0x0800) >> 1)); - *(u16 *)(loc + 2) = (u16)((lower & 0x8f00) | - ((offset & 0x0700) << 4) | - (offset & 0x00ff)); + upper = (u16)((upper & 0xfbf0) | + ((offset & 0xf000) >> 12) | + ((offset & 0x0800) >> 1)); + lower = (u16)((lower & 0x8f00) | + ((offset & 0x0700) << 4) | + (offset & 0x00ff)); + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; #endif From f948a286a732c65e1b21588ebdd429aee1b79492 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Mon, 22 Jul 2013 16:32:19 +0100 Subject: [PATCH 039/296] ARM: set --be8 when linking modules To avoid having to make every text section swap the instruction order of all instructions, make sure modules are built also built with --be8 (as is the current kernel final link). If we do not do this, we would end up having to swap all instructions when loading a module, instead of just the instructions that we are applying ELF relocations to. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin (cherry picked from commit 0ab89d0bf8054c3146ec06df357946bb87f36729) Signed-off-by: Victor Kamensky --- arch/arm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 1ba358ba16b8..70bc19e2274f 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,6 +16,7 @@ LDFLAGS := LDFLAGS_vmlinux :=-p --no-undefined -X ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 +LDFLAGS_MODULE += --be8 endif OBJCOPYFLAGS :=-O binary -R .comment -S From fb2feb1831b02408515b00de10b6824fb758d88f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 24 Jul 2013 16:09:57 +0100 Subject: [PATCH 040/296] ARM: hardware: fix endian-ness in The needs to take into account the endian-ness of the processor when reading and writing data, so change to using the readl/writel relaxed variants from the raw ones. Signed-off-by: Ben Dooks (cherry picked from commit bfdef3b32d2f36bf137c039de9a545cdfcfbafe2) Signed-off-by: Victor Kamensky --- arch/arm/include/asm/hardware/coresight.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/hardware/coresight.h b/arch/arm/include/asm/hardware/coresight.h index 0cf7a6b842ff..ad774f37c47c 100644 --- a/arch/arm/include/asm/hardware/coresight.h +++ b/arch/arm/include/asm/hardware/coresight.h @@ -24,8 +24,8 @@ #define TRACER_TIMEOUT 10000 #define etm_writel(t, v, x) \ - (__raw_writel((v), (t)->etm_regs + (x))) -#define etm_readl(t, x) (__raw_readl((t)->etm_regs + (x))) + (writel_relaxed((v), (t)->etm_regs + (x))) +#define etm_readl(t, x) (readl_relaxed((t)->etm_regs + (x))) /* CoreSight Management Registers */ #define CSMR_LOCKACCESS 0xfb0 @@ -142,8 +142,8 @@ #define ETBFF_TRIGFL BIT(10) #define etb_writel(t, v, x) \ - (__raw_writel((v), (t)->etb_regs + (x))) -#define etb_readl(t, x) (__raw_readl((t)->etb_regs + (x))) + (writel_relaxed((v), (t)->etb_regs + (x))) +#define etb_readl(t, x) (readl_relaxed((t)->etb_regs + (x))) #define etm_lock(t) do { etm_writel((t), 0, CSMR_LOCKACCESS); } while (0) #define etm_unlock(t) \ From 9884db0b95b316311fc47dcc9456b6d678631e27 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 24 Jul 2013 15:44:56 +0100 Subject: [PATCH 041/296] ARM: net: fix arm instruction endian-ness in bpf_jit_32.c Use to correctly transform instruction byte ordering into in-memory ordering. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin (cherry picked from commit 3460743e025addc1ecbd496db2231181a2431774) Signed-off-by: Victor Kamensky --- arch/arm/net/bpf_jit_32.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6de423dbd385..78351ca8d51e 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "bpf_jit_32.h" @@ -113,8 +114,11 @@ static u32 jit_udiv(u32 dividend, u32 divisor) static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) { + inst |= (cond << 28); + inst = __opcode_to_mem_arm(inst); + if (ctx->target != NULL) - ctx->target[ctx->idx] = inst | (cond << 28); + ctx->target[ctx->idx] = inst; ctx->idx++; } From 56cbe5efe6f28a7efb459620281717fd7c109e05 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 25 Jul 2013 14:38:03 +0100 Subject: [PATCH 042/296] ARM: Correct BUG() assembly to ensure it is endian-agnostic Currently BUG() uses .word or .hword to create the necessary illegal instructions. However if we are building BE8 then these get swapped by the linker into different illegal instructions in the text. This means that the BUG() macro does not get trapped properly. Change to using to provide the necessary ARM instruction building as we cannot rely on gcc/gas having the `.inst` instructions which where added to try and resolve this issue (reported by Dave Martin ). Signed-off-by: Ben Dooks Reviewed-by: Dave Martin (cherry picked from commit 63328070eff2f4fd730c86966a0dbc976147c39f) Signed-off-by: Victor Kamensky --- arch/arm/include/asm/bug.h | 10 ++++++---- arch/arm/kernel/traps.c | 8 +++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 7af5c6c3653a..b274bde24905 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -2,6 +2,8 @@ #define _ASMARM_BUG_H #include +#include +#include #ifdef CONFIG_BUG @@ -12,10 +14,10 @@ */ #ifdef CONFIG_THUMB2_KERNEL #define BUG_INSTR_VALUE 0xde02 -#define BUG_INSTR_TYPE ".hword " +#define BUG_INSTR(__value) __inst_thumb16(__value) #else #define BUG_INSTR_VALUE 0xe7f001f2 -#define BUG_INSTR_TYPE ".word " +#define BUG_INSTR(__value) __inst_arm(__value) #endif @@ -33,7 +35,7 @@ #define __BUG(__file, __line, __value) \ do { \ - asm volatile("1:\t" BUG_INSTR_TYPE #__value "\n" \ + asm volatile("1:\t" BUG_INSTR(__value) "\n" \ ".pushsection .rodata.str, \"aMS\", %progbits, 1\n" \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ @@ -48,7 +50,7 @@ do { \ #define __BUG(__file, __line, __value) \ do { \ - asm volatile(BUG_INSTR_TYPE #__value); \ + asm volatile(BUG_INSTR(__value) "\n"); \ unreachable(); \ } while (0) #endif /* CONFIG_DEBUG_BUGVERBOSE */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5989b0f317dd..b4fd850c34b2 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -348,15 +348,17 @@ void arm_notify_die(const char *str, struct pt_regs *regs, int is_valid_bugaddr(unsigned long pc) { #ifdef CONFIG_THUMB2_KERNEL - unsigned short bkpt; + u16 bkpt; + u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); #else - unsigned long bkpt; + u32 bkpt; + u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif if (probe_kernel_address((unsigned *)pc, bkpt)) return 0; - return bkpt == BUG_INSTR_VALUE; + return bkpt == insn; } #endif From 124fd249b01ee8234be028ba0fefac8473cd5036 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 25 Jul 2013 15:47:40 +0100 Subject: [PATCH 043/296] ARM: kdgb: use for data to be assembled as intruction The arch_kgdb_breakpoint() function uses an inline assembly directive to assemble a specific instruction using .word. This means the linker will not treat is as an instruction, and therefore incorrectly swap the endian-ness if running BE8. As noted, this code means that kgdb is really only usable on arm32 kernels, and should be made dependant on not being a thumb2 kernel until fixed. However this is not something to be added to this patch. Signed-off-by: Ben Dooks Reviewed-by: Dave Martin (cherry picked from commit 5a8b93fc9457be90adfa10d3df6497393c5e2dc2) Signed-off-by: Victor Kamensky --- arch/arm/include/asm/kgdb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h index 48066ce9ea34..0a9d5dd93294 100644 --- a/arch/arm/include/asm/kgdb.h +++ b/arch/arm/include/asm/kgdb.h @@ -11,6 +11,7 @@ #define __ARM_KGDB_H__ #include +#include /* * GDB assumes that we're a user process being debugged, so @@ -41,7 +42,7 @@ static inline void arch_kgdb_breakpoint(void) { - asm(".word 0xe7ffdeff"); + asm(__inst_arm(0xe7ffdeff)); } extern void kgdb_handle_bus_error(void); From d66e226e5f9197e89511c6ddae2394ee53cb5d02 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Fri, 26 Jul 2013 09:28:53 -0700 Subject: [PATCH 044/296] ARM: atomic64: fix endian-ness in atomic.h Fix inline asm for atomic64_xxx functions in arm atomic.h. Instead of %H operand specifiers code should use %Q for least significant part of the value, and %R for the most significant part of the value. %H always returns the higher of the two register numbers, and therefore it is not endian neutral. %H should be used with ldrexd and strexd instructions. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit 2245f92498b216b50e744423bde17626287409d8) Signed-off-by: Victor Kamensky --- arch/arm/include/asm/atomic.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index da1c77d39327..6447a0b7b127 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -301,8 +301,8 @@ static inline void atomic64_add(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_add\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -320,8 +320,8 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_add_return\n" "1: ldrexd %0, %H0, [%3]\n" -" adds %0, %0, %4\n" -" adc %H0, %H0, %H4\n" +" adds %Q0, %Q0, %Q4\n" +" adc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -341,8 +341,8 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_sub\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -360,8 +360,8 @@ static inline u64 atomic64_sub_return(u64 i, atomic64_t *v) __asm__ __volatile__("@ atomic64_sub_return\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, %4\n" -" sbc %H0, %H0, %H4\n" +" subs %Q0, %Q0, %Q4\n" +" sbc %R0, %R0, %R4\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" " bne 1b" @@ -428,9 +428,9 @@ static inline u64 atomic64_dec_if_positive(atomic64_t *v) __asm__ __volatile__("@ atomic64_dec_if_positive\n" "1: ldrexd %0, %H0, [%3]\n" -" subs %0, %0, #1\n" -" sbc %H0, %H0, #0\n" -" teq %H0, #0\n" +" subs %Q0, %Q0, #1\n" +" sbc %R0, %R0, #0\n" +" teq %R0, #0\n" " bmi 2f\n" " strexd %1, %0, %H0, [%3]\n" " teq %1, #0\n" @@ -459,8 +459,8 @@ static inline int atomic64_add_unless(atomic64_t *v, u64 a, u64 u) " teqeq %H0, %H5\n" " moveq %1, #0\n" " beq 2f\n" -" adds %0, %0, %6\n" -" adc %H0, %H0, %H6\n" +" adds %Q0, %Q0, %Q6\n" +" adc %R0, %R0, %R6\n" " strexd %2, %0, %H0, [%4]\n" " teq %2, #0\n" " bne 1b\n" From 43fb56589e8a57efc166673dae4da94bfca32f08 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 27 Aug 2013 22:41:57 -0700 Subject: [PATCH 045/296] ARM: signal: sigreturn_codes should be endian neutral to work in BE8 In case of BE8 kernel data is in BE order whereas code stays in LE order. Move sigreturn_codes to separate .S file and use proper assembler mnemonics for these code snippets. In this case compiler will take care of proper instructions byteswaps for BE8 case. Change assumes that sufficiently Thumb-capable tools are used to build kernel. Problem was discovered during ltp testing of BE system: all rt_sig* tests failed. Tested against the same tests in both BE and LE modes. Signed-off-by: Victor Kamensky Reviewed-by: Dave Martin Signed-off-by: Ben Dooks (cherry picked from commit 574e2b5111e13827da501771b27d92e6e3f2e3d7) Signed-off-by: Victor Kamensky Conflicts: arch/arm/kernel/Makefile --- arch/arm/kernel/Makefile | 3 +- arch/arm/kernel/signal.c | 24 +--------- arch/arm/kernel/sigreturn_codes.S | 80 +++++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 24 deletions(-) create mode 100644 arch/arm/kernel/sigreturn_codes.S diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index dd9d90ab65d0..aa775438388c 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,7 +17,8 @@ CFLAGS_REMOVE_return_address.o = -pg obj-y := elf.o entry-armv.o entry-common.o irq.o opcodes.o \ process.o ptrace.o return_address.o sched_clock.o \ - setup.o signal.o stacktrace.o sys_arm.o time.o traps.o + setup.o signal.o sigreturn_codes.o \ + stacktrace.o sys_arm.o time.o traps.o obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 5a42c12767af..3c23086dc8e2 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -21,29 +21,7 @@ #include #include -/* - * For ARM syscalls, we encode the syscall number into the instruction. - */ -#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) - -/* - * With EABI, the syscall number has to be loaded into r7. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -/* - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -static const unsigned long sigreturn_codes[7] = { - MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, -}; +extern const unsigned long sigreturn_codes[7]; static unsigned long signal_return_offset; diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S new file mode 100644 index 000000000000..3c5d0f2170fd --- /dev/null +++ b/arch/arm/kernel/sigreturn_codes.S @@ -0,0 +1,80 @@ +/* + * sigreturn_codes.S - code sinpets for sigreturn syscalls + * + * Created by: Victor Kamensky, 2013-08-13 + * Copyright: (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include + +/* + * For ARM syscalls, we encode the syscall number into the instruction. + * With EABI, the syscall number has to be loaded into r7. As result + * ARM syscall sequence snippet will have move and svc in .arm encoding + * + * For Thumb syscalls, we pass the syscall number via r7. We therefore + * need two 16-bit instructions in .thumb encoding + * + * Please note sigreturn_codes code are not executed in place. Instead + * they just copied by kernel into appropriate places. Code inside of + * arch/arm/kernel/signal.c is very sensitive to layout of these code + * snippets. + */ + +#if __LINUX_ARM_ARCH__ <= 4 + /* + * Note we manually set minimally required arch that supports + * required thumb opcodes for early arch versions. It is OK + * for this file to be used in combination with other + * lower arch variants, since these code snippets are only + * used as input data. + */ + .arch armv4t +#endif + + .section .rodata + .global sigreturn_codes + .type sigreturn_codes, #object + + .arm + +sigreturn_codes: + + /* ARM sigreturn syscall code snippet */ + mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn syscall code snippet */ + .thumb + movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* ARM sigreturn_rt syscall code snippet */ + .arm + mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) + + /* Thumb sigreturn_rt syscall code snippet */ + .thumb + movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* + * Note on addtional space: setup_return in signal.c + * algorithm uses two words copy regardless whether + * it is thumb case or not, so we need additional + * word after real last entry. + */ + .arm + .space 4 + + .size sigreturn_codes, . - sigreturn_codes From f00f697ba020680765d95e0587e1dbf3ed9ed9f3 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Mon, 7 Oct 2013 21:37:19 -0700 Subject: [PATCH 046/296] ARM: mcpm: fix big endian issue in mcpm startup code In big endian mode mcpm_entry_point is first function that called on secondaries CPU. First it should switch CPU into big endian code. [ben.dooks@codethink.co.uk: merge fix patch from Victor into this] Signed-off-by: Victor Kamensky Acked-by: Nicolas Pitre Reviewed-by: Dave Martin Signed-off-by: Ben Dooks (cherry picked from commit 519ceb9fd10cd7e836d0aa97b2068cc9e97f463b) Signed-off-by: Victor Kamensky --- arch/arm/common/mcpm_head.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S index 057e9c5a9e1f..0decb3c07165 100644 --- a/arch/arm/common/mcpm_head.S +++ b/arch/arm/common/mcpm_head.S @@ -15,6 +15,7 @@ #include #include +#include #include "vlock.h" @@ -47,6 +48,7 @@ ENTRY(mcpm_entry_point) + ARM_BE8(setend be) THUMB( adr r12, BSYM(1f) ) THUMB( bx r12 ) THUMB( .thumb ) From b0e8afd6ffd73719ad05dfbcfd0e0a6ec674b0c2 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Mon, 7 Oct 2013 08:48:23 -0700 Subject: [PATCH 047/296] ARM: tlb: ASID macro should give 32bit result for BE correct operation In order for ASID macro to be used as expression passed to inline asm as 'r' operand it needs to give 32 bit unsigned result, not unsigned 64bit expression. Otherwise when 64bit ASID is passed to inline assembler statement as 'r' operand (32bit) compiler behavior is not well specified. For example when __flush_tlb_mm function compiled in big endian case, and ASID is passed to tlb_op macro directly, 0 will be passed as 'mcr 15, 0, r4, cr8, cr3, {2}' argument in r4, unless ASID macro changed to produce 32 bit result. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Signed-off-by: Ben Dooks (cherry picked from commit a1af3474487cc3b8731b990dceac6b6aad7f3ed8) Signed-off-by: Victor Kamensky --- arch/arm/include/asm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h index 6f18da09668b..64fd15159b7d 100644 --- a/arch/arm/include/asm/mmu.h +++ b/arch/arm/include/asm/mmu.h @@ -16,7 +16,7 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID #define ASID_BITS 8 #define ASID_MASK ((~0ULL) << ASID_BITS) -#define ASID(mm) ((mm)->context.id.counter & ~ASID_MASK) +#define ASID(mm) ((unsigned int)((mm)->context.id.counter & ~ASID_MASK)) #else #define ASID(mm) (0) #endif From eb192460ccd50e73475b6092a8953ef7945921c8 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Tue, 15 Oct 2013 21:50:34 -0700 Subject: [PATCH 048/296] ARM: cci driver need big endian fixes in asm code cci_enable_port_for_self written in asm and it works with h/w registers that are in little endian format. When run in big endian mode it needs byteswaped constants before/after it writes/reads to/from such registers Signed-off-by: Victor Kamensky Acked-by: Nicolas Pitre Signed-off-by: Ben Dooks (cherry picked from commit fdb07aee0b2b9d7d1893c97f5ce79ec355caaf1f) Signed-off-by: Victor Kamensky --- drivers/bus/arm-cci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 2d1387bf1724..d9d954eb0fa0 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -705,7 +705,7 @@ asmlinkage void __naked cci_enable_port_for_self(void) /* Enable the CCI port */ " ldr r0, [r0, %[offsetof_port_phys]] \n" -" mov r3, #"__stringify(CCI_ENABLE_REQ)" \n" +" mov r3, %[cci_enable_req]\n" " str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n" /* poll the status reg for completion */ @@ -713,7 +713,7 @@ asmlinkage void __naked cci_enable_port_for_self(void) " ldr r0, [r1] \n" " ldr r0, [r0, r1] @ cci_ctrl_base \n" "4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n" -" tst r1, #1 \n" +" tst r1, %[cci_control_status_bits] \n" " bne 4b \n" " mov r0, #0 \n" @@ -726,6 +726,8 @@ asmlinkage void __naked cci_enable_port_for_self(void) "7: .word cci_ctrl_phys - . \n" : : [sizeof_cpu_port] "i" (sizeof(cpu_port)), + [cci_enable_req] "i" cpu_to_le32(CCI_ENABLE_REQ), + [cci_control_status_bits] "i" cpu_to_le32(1), #ifndef __ARMEB__ [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)), #else From 1c3fe1f0b34efee5687b55448e463f567e4b7061 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Wed, 25 Sep 2013 16:33:13 +0100 Subject: [PATCH 049/296] arm64: use correct register width when retrieving ASID The ASID is represented as an unsigned int in mm_context_t and we currently use the mmid assembler macro to access this element of the struct. This should be accessed with a register of 32-bit width. If the incorrect register width is used the ASID will be returned in bits[32:63] of the register when running under big-endian. Fix a use of the mmid macro in tlb.S to use a 32-bit access. Signed-off-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit fc18047c732f6becba92618a397555927687efd3) Signed-off-by: Victor Kamensky --- arch/arm64/mm/tlb.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/tlb.S b/arch/arm64/mm/tlb.S index 8ae80a18e8ec..19da91e0cd27 100644 --- a/arch/arm64/mm/tlb.S +++ b/arch/arm64/mm/tlb.S @@ -35,7 +35,7 @@ */ ENTRY(__cpu_flush_user_tlb_range) vma_vm_mm x3, x2 // get vma->vm_mm - mmid x3, x3 // get vm_mm->context.id + mmid w3, x3 // get vm_mm->context.id dsb sy lsr x0, x0, #12 // align address lsr x1, x1, #12 From 00767d21ba2a58cc3ca74c53ee1559d9aa581157 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:20 +0100 Subject: [PATCH 050/296] net: smc91x: dont't use SMC_outw for fixing up halfword-aligned data SMC_outw invokes an endian-aware I/O accessor, which may change the data endianness before writing to the device. This is not suitable for data transfers where the memory buffer is simply a string of bytes that does not require any byte-swapping. This patches fixes the smc91x SMC_PUSH_DATA macro so that it uses the string I/O accessor for outputting the leading or trailing halfwords on halfword-aligned buffers. Cc: Cc: Nicolas Pitre Cc: David S. Miller Signed-off-by: Will Deacon Acked-by: Nicolas Pitre Signed-off-by: David S. Miller (cherry picked from commit e9e4ea74f06635f2ffc1dffe5ef40c854faa0a90) Signed-off-by: Victor Kamensky --- drivers/net/ethernet/smsc/smc91x.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index fcb94806f4e9..51285872481c 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -1110,8 +1110,7 @@ static const char * chip_ids[ 16 ] = { void __iomem *__ioaddr = ioaddr; \ if (__len >= 2 && (unsigned long)__ptr & 2) { \ __len -= 2; \ - SMC_outw(*(u16 *)__ptr, ioaddr, \ - DATA_REG(lp)); \ + SMC_outsw(ioaddr, DATA_REG(lp), __ptr, 1); \ __ptr += 2; \ } \ if (SMC_CAN_USE_DATACS && lp->datacs) \ @@ -1119,8 +1118,7 @@ static const char * chip_ids[ 16 ] = { SMC_outsl(__ioaddr, DATA_REG(lp), __ptr, __len>>2); \ if (__len & 2) { \ __ptr += (__len & ~3); \ - SMC_outw(*((u16 *)__ptr), ioaddr, \ - DATA_REG(lp)); \ + SMC_outsw(ioaddr, DATA_REG(lp), __ptr, 1); \ } \ } else if (SMC_16BIT(lp)) \ SMC_outsw(ioaddr, DATA_REG(lp), p, (l) >> 1); \ From 4a6928be0d11377ff178fa7a355a98adfd8e4a29 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:08 +0100 Subject: [PATCH 051/296] arm64: big-endian: add big-endian support to top-level arch Makefile This patch adds big-endian support to the AArch64 top-level Makefile. This currently just passes the relevant flags to the toolchain and is predicated on a Kconfig option that will be introduced later on. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit a0974e6e217aead196033d72f898e2acb575304d) Signed-off-by: Victor Kamensky --- arch/arm64/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..3a926eb93c95 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -20,9 +20,15 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +KBUILD_CPPFLAGS += -mbig-endian +AS += -EB +LD += -EB +else KBUILD_CPPFLAGS += -mlittle-endian AS += -EL LD += -EL +endif comma = , From a07e8ed3b05f8424439c028853c568f79fbf1512 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:09 +0100 Subject: [PATCH 052/296] arm64: big-endian: fix byteorder include For big-endian processors, we must include linux/byteorder/big_endian.h to get the relevant definitions for swabbing between CPU order and a defined endianness. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit c194520ada7c8f2eddec5ebf24982483b49736a0) Signed-off-by: Victor Kamensky --- arch/arm64/include/uapi/asm/byteorder.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/uapi/asm/byteorder.h b/arch/arm64/include/uapi/asm/byteorder.h index 2b92046aafc5..dc19e9537f0d 100644 --- a/arch/arm64/include/uapi/asm/byteorder.h +++ b/arch/arm64/include/uapi/asm/byteorder.h @@ -16,6 +16,10 @@ #ifndef __ASM_BYTEORDER_H #define __ASM_BYTEORDER_H +#ifdef __AARCH64EB__ +#include +#else #include +#endif #endif /* __ASM_BYTEORDER_H */ From 041f3e992bdf0ec10e984ce6a60a44e67a9c428b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:10 +0100 Subject: [PATCH 053/296] arm64: ELF: add support for big-endian executables This patch adds support for the aarch64_be ELF format to the AArch64 ELF loader. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 5436b5c8305b4ed37c5d11f96c1aaccca63c9ab2) Signed-off-by: Victor Kamensky --- arch/arm64/include/asm/elf.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fe32c0e4ac01..aa57b15c9e86 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -92,11 +92,24 @@ typedef struct user_fpsimd_state elf_fpregset_t; * These are used to set parameters in the core dumps. */ #define ELF_CLASS ELFCLASS64 +#ifdef __AARCH64EB__ +#define ELF_DATA ELFDATA2MSB +#else #define ELF_DATA ELFDATA2LSB +#endif #define ELF_ARCH EM_AARCH64 +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ #define ELF_PLATFORM_SIZE 16 +#ifdef __AARCH64EB__ +#define ELF_PLATFORM ("aarch64_be") +#else #define ELF_PLATFORM ("aarch64") +#endif /* * This is used to ensure we don't load something for the wrong architecture. From 42004860c3b58dea05b24fd8089a2c4cd33fef6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:11 +0100 Subject: [PATCH 054/296] arm64: setup: report ELF_PLATFORM as the machine for utsname uname -m reports the machine field from the current utsname, which should reflect the endianness of the system. This patch reports ELF_PLATFORM for the field, so that everything appears consistent from userspace. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 94ed1f2cb5d46533f10262b1b760db7dbec9cf10) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 85afdae9cc05..31e1160bce30 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -124,7 +124,7 @@ static void __init setup_processor(void) printk("CPU: %s [%08x] revision %d\n", cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - sprintf(init_utsname()->machine, "aarch64"); + sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; } From 9daa58e68226eaec5d4d0b259e05394c71bb9b3f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:12 +0100 Subject: [PATCH 055/296] arm64: compat: add support for big-endian (BE8) AArch32 binaries This patch adds support for BE8 AArch32 tasks to the compat layer. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit a795a38eb91cf72c4a05e72a9c84e317ee179a48) Signed-off-by: Victor Kamensky Conflicts: arch/arm64/include/asm/elf.h --- arch/arm64/include/asm/compat.h | 14 ++++++++++++++ arch/arm64/include/asm/elf.h | 5 +++++ arch/arm64/include/asm/processor.h | 5 +++++ arch/arm64/include/asm/ptrace.h | 1 + 4 files changed, 25 insertions(+) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 899af807ef0f..fda2704b3f9f 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -26,7 +26,11 @@ #include #define COMPAT_USER_HZ 100 +#ifdef __AARCH64EB__ +#define COMPAT_UTS_MACHINE "armv8b\0\0" +#else #define COMPAT_UTS_MACHINE "armv8l\0\0" +#endif typedef u32 compat_size_t; typedef s32 compat_ssize_t; @@ -73,13 +77,23 @@ struct compat_timeval { }; struct compat_stat { +#ifdef __AARCH64EB__ + short st_dev; + short __pad1; +#else compat_dev_t st_dev; +#endif compat_ino_t st_ino; compat_mode_t st_mode; compat_ushort_t st_nlink; __compat_uid16_t st_uid; __compat_gid16_t st_gid; +#ifdef __AARCH64EB__ + short st_rdev; + short __pad2; +#else compat_dev_t st_rdev; +#endif compat_off_t st_size; compat_off_t st_blksize; compat_off_t st_blocks; diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index aa57b15c9e86..212b0b615b4a 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -165,7 +165,12 @@ extern unsigned long arch_randomize_brk(struct mm_struct *mm); #ifdef CONFIG_COMPAT #define EM_ARM 40 + +#ifdef __AARCH64EB__ +#define COMPAT_ELF_PLATFORM ("v8b") +#else #define COMPAT_ELF_PLATFORM ("v8l") +#endif #define COMPAT_ELF_ET_DYN_BASE (randomize_et_dyn(2 * TASK_SIZE_32 / 3)) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ab239b2c456f..45b20cd6cbca 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -107,6 +107,11 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate = COMPAT_PSR_MODE_USR; if (pc & 1) regs->pstate |= COMPAT_PSR_T_BIT; + +#ifdef __AARCH64EB__ + regs->pstate |= COMPAT_PSR_E_BIT; +#endif + regs->compat_sp = sp; } #endif diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41a71ee4c3df..491bea413fb1 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -42,6 +42,7 @@ #define COMPAT_PSR_MODE_UND 0x0000001b #define COMPAT_PSR_MODE_SYS 0x0000001f #define COMPAT_PSR_T_BIT 0x00000020 +#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_F_BIT 0x00000040 #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 From a443bff684c1abd8d95c7d3c634066db0ee29271 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:13 +0100 Subject: [PATCH 056/296] arm64: compat: correct register concatenation for syscall wrappers The arm64 port contains wrappers for arm32 syscalls that pass 64-bit values. These wrappers concatenate the two registers to hold a 64-bit value in a single X register. On BE, however, the lower and higher words are swapped. Create a new assembler macro, regs_to_64, that when on BE systems swaps the registers in the orr instruction. Acked-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit 55b89540b0d8d031f90e3d711ec0df3f797ecc61) Signed-off-by: Victor Kamensky --- arch/arm64/include/asm/assembler.h | 12 ++++++++++++ arch/arm64/kernel/sys32.S | 22 +++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 5aceb83b3f5c..381b935e74cd 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -115,3 +115,15 @@ lr .req x30 // link register .align 7 b \label .endm +/* + * Define a macro that constructs a 64-bit value by concatenating two + * 32-bit registers. Note that on big endian systems the order of the + * registers is swapped. + */ +#ifndef CONFIG_CPU_BIG_ENDIAN + .macro regs_to_64, rd, lbits, hbits +#else + .macro regs_to_64, rd, hbits, lbits +#endif + orr \rd, \lbits, \hbits, lsl #32 + .endm diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index a1b19ed7467c..423a5b3fc2be 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * extension. */ compat_sys_pread64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) compat_sys_pwrite64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) compat_sys_truncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) compat_sys_ftruncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) compat_sys_readahead_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) compat_sys_fadvise64_64_wrapper: mov w6, w1 - orr x1, x2, x3, lsl #32 - orr x2, x4, x5, lsl #32 + regs_to_64 x1, x2, x3 + regs_to_64 x2, x4, x5 mov w3, w6 b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) compat_sys_sync_file_range2_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) compat_sys_fallocate_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) From 5cf270ce3fbe6e8c493bce717418c62c00db13b3 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:14 +0100 Subject: [PATCH 057/296] arm64: big-endian: don't treat code as data when copying sigret code Currently the sigreturn compat code is copied to an offset in the vectors table. When using a BE kernel this data will be stored in the wrong endianess so when returning from a signal on a 32-bit BE system, arbitrary code will be executed. Instead of declaring the code inside a struct and copying that, use the assembler's .byte directives to store the code in the correct endianess regardless of platform endianess. Acked-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit a1d5ebaf8ccdd100f45042ce32c591867de04ac3) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/kuser32.S | 42 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/signal32.c | 28 ------------------------ arch/arm64/kernel/vdso.c | 5 ++++- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8bc..1e4905d52d30 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -75,3 +78,42 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe859..e8772c07cf5c 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -100,34 +100,6 @@ struct compat_rt_sigframe { #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ - 0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ - 0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { - /* - * AArch32 sigreturn code. - * We don't construct an OABI SWI - instead we just set the imm24 field - * to the EABI syscall number so that we create a sane disassembly. - */ - MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 0ea7a22bcdf2..a7149cae1615 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1]; static int alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long vpage; vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void) /* sigreturn code */ memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - aarch32_sigret_code, sizeof(aarch32_sigret_code)); + __aarch32_sigret_code_start, sigret_sz); flush_icache_range(vpage, vpage + PAGE_SIZE); vectors_page[0] = virt_to_page(vpage); From 68a8504b75607ca83ad5e665727d4bfd19c3dd42 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:15 +0100 Subject: [PATCH 058/296] arm64: asm: add CPU_LE & CPU_BE assembler helpers Add CPU_LE and CPU_BE to select assembler code in little and big endian configurations respectively. Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit e68bedaa03c950ae8045e7899e7a6b2a97d1bf41) Signed-off-by: Victor Kamensky --- arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 381b935e74cd..fd3e3924041b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -115,6 +115,25 @@ lr .req x30 // link register .align 7 b \label .endm + +/* + * Select code when configured for BE. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CPU_BE(code...) code +#else +#define CPU_BE(code...) +#endif + +/* + * Select code when configured for LE. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CPU_LE(code...) +#else +#define CPU_LE(code...) code +#endif + /* * Define a macro that constructs a 64-bit value by concatenating two * 32-bit registers. Note that on big endian systems the order of the From b0d60128f0c6d990f670ad01bdced70676c2137b Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:16 +0100 Subject: [PATCH 059/296] arm64: head: create a new function for setting the boot_cpu_mode flag Currently, the code for setting the __cpu_boot_mode flag is munged in with el2_setup. This makes things difficult on a BE bringup as a memory access has to have occurred before el2_setup which is the place that we'd like to set the endianess on the current EL. Create a new function for setting __cpu_boot_mode and have el2_setup return the mode the CPU. Also define a new constant in virt.h, BOOT_CPU_MODE_EL1, for readability. Acked-by: Marc Zyngier Acked-by: Will Deacon Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit 828e9834e9a5b7e61046aa3c5f603a4fecba2fb4) Signed-off-by: Victor Kamensky --- arch/arm64/include/asm/virt.h | 3 ++- arch/arm64/kernel/head.S | 34 +++++++++++++++++++++++++--------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 26e310c54344..130e2be952cf 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -18,7 +18,8 @@ #ifndef __ASM__VIRT_H #define __ASM__VIRT_H -#define BOOT_CPU_MODE_EL2 (0x0e12b007) +#define BOOT_CPU_MODE_EL1 (0xe11) +#define BOOT_CPU_MODE_EL2 (0xe12) #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3532ca613718..21caac9dfed8 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -115,8 +115,9 @@ ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -142,21 +143,20 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +1: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -196,9 +196,24 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) +/* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + ret +ENDPROC(set_cpu_boot_mode_flag) + /* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. @@ -226,8 +241,9 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 From bfe6e4a7171eb42cf20e0c0aca680ea2d42f9cd2 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:17 +0100 Subject: [PATCH 060/296] arm64: big-endian: set correct endianess on kernel entry The endianness of memory accesses at EL2 and EL1 are configured by SCTLR_EL2.EE and SCTLR_EL1.EE respectively. When the kernel is booted, the state of SCTLR_EL{2,1}.EE is unknown, and thus the kernel must ensure that they are set before performing any memory accesses. This patch ensures that SCTLR_EL{2,1} are configured appropriately at boot for kernels of either endianness. Acked-by: Will Deacon Signed-off-by: Matthew Leach [catalin.marinas@arm.com: fix SCTLR_EL1.E0E bit setting in head.S] Signed-off-by: Catalin Marinas (cherry picked from commit 9cf71728931a4073b9e3a4bcbf9dada86bc98370) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/head.S | 17 ++++++++++++++--- arch/arm64/mm/proc.S | 4 ++-- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 21caac9dfed8..999504b50c30 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -151,12 +151,22 @@ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #PSR_MODE_EL2t ccmp x0, #PSR_MODE_EL2h, #0x4, ne - b.eq 1f + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -173,7 +183,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f84fcf71f129..729a0020d4b6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -162,9 +162,9 @@ ENDPROC(__cpu_setup) * CE0 XWHW CZ ME TEEA S * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved - * .... .100 .... 01.1 11.1 ..01 0001 1101 < software settings + * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings */ .type crval, #object crval: - .word 0x030802e2 // clear + .word 0x000802e2 // clear .word 0x0405d11d // set From 67944dd8f2fcffe7084d077228287c6de70888b8 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Fri, 11 Oct 2013 14:52:18 +0100 Subject: [PATCH 061/296] arm64: big-endian: write CPU holding pen address as LE Currently when CPUs are brought online via a spin-table, the address they should jump to is written to the cpu-release-addr in the kernel's native endianness. As the kernel may switch endianness, secondaries might read the value byte-reversed from what was intended, and they would jump to the wrong address. As the only current arm64 spin-table implementations are little-endian, stricten up the arm64 spin-table definition such that the value written to cpu-release-addr is _always_ little-endian regardless of the endianness of any CPU. If a spinning CPU is operating big-endian, it must byte-reverse the value before jumping to handle this. Signed-off-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit 710be9ac4ea0d2e02a2c4aa625795e65bf3db5b1) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/smp_spin_table.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 27f08367a6e7..44c22805d2e2 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -72,7 +72,16 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) return -ENODEV; release_addr = __va(cpu_release_addr[cpu]); - release_addr[0] = (void *)__pa(secondary_holding_pen); + + /* + * We write the release address as LE regardless of the native + * endianess of the kernel. Therefore, any boot-loaders that + * read this address need to convert this address to the + * boot-loader's endianess before jumping. This is mandated by + * the boot protocol. + */ + release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + __flush_dcache_area(release_addr, sizeof(release_addr[0])); /* From a806cbf415fa56f0422c792bde0ba8baf2e19f45 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Oct 2013 14:52:19 +0100 Subject: [PATCH 062/296] arm64: kconfig: allow CPU_BIG_ENDIAN to be selected This patch wires up CONFIG_CPU_BIG_ENDIAN for the AArch64 kernel configuration. Selecting this option builds a big-endian kernel which can boot into a big-endian userspace. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit a872013d6d03ab63736a01dcd9747580be3a6b70) Signed-off-by: Victor Kamensky --- arch/arm64/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a95dc907a139..d22b183db226 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -130,6 +130,11 @@ config ARM64_64K_PAGES look-up. AArch32 emulation is not available when this feature is enabled. +config CPU_BIG_ENDIAN + bool "Build big-endian kernel" + help + Say Y if you plan on running a kernel in big-endian mode. + config SMP bool "Symmetric Multi-Processing" select USE_GENERIC_SMP_HELPERS From 36bada1e73f23f948283f97caf775f3428e56af8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Nov 2013 10:16:52 +0000 Subject: [PATCH 063/296] arm64: module: ensure instruction is little-endian before manipulation Relocations that require an instruction immediate to be re-encoded must ensure that the instruction pattern is represented in a little-endian format for the manipulation code to work correctly. This patch converts the loaded instruction into native-endianess prior to encoding and then converts back to little-endian byteorder before updating memory. Signed-off-by: Will Deacon Tested-by: Matthew Leach Signed-off-by: Catalin Marinas (cherry picked from commit 122e2fa0d310d262cb85cf0b003032e5d2bc2ae7) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da99..2c28a6cf93e6 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -111,6 +111,9 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) u32 immlo, immhi, lomask, himask, mask; int shift; + /* The instruction stream is always little endian. */ + insn = le32_to_cpu(insn); + switch (type) { case INSN_IMM_MOVNZ: /* @@ -179,7 +182,7 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) insn &= ~(mask << shift); insn |= (imm & mask) << shift; - return insn; + return cpu_to_le32(insn); } static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, From ba854bd8b9794ed883392dd443de6e7a7a70850a Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Thu, 28 Nov 2013 12:07:22 +0000 Subject: [PATCH 064/296] arm64: ptrace: fix compat registes get/set to be endian clean On a BE system the wrong half of the X registers is retrieved/written when attempting to get/set the value of aarch32 registers through ptrace. Ensure that types are the correct width so that the relevant casting occurs. Signed-off-by: Matthew Leach Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas (cherry picked from commit 6a2e5e521c333a0b56cb60dc5587e3f90859c5e7) Signed-off-by: Victor Kamensky --- arch/arm64/kernel/ptrace.c | 40 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 5341534b6d04..7041be26d4af 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -656,28 +656,27 @@ static int compat_gpr_get(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; switch (idx) { case 15: - reg = (void *)&task_pt_regs(target)->pc; + reg = task_pt_regs(target)->pc; break; case 16: - reg = (void *)&task_pt_regs(target)->pstate; + reg = task_pt_regs(target)->pstate; break; case 17: - reg = (void *)&task_pt_regs(target)->orig_x0; + reg = task_pt_regs(target)->orig_x0; break; default: - reg = (void *)&task_pt_regs(target)->regs[idx]; + reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); - + ret = copy_to_user(ubuf, ®, sizeof(reg)); if (ret) break; - else - ubuf += sizeof(compat_ulong_t); + + ubuf += sizeof(reg); } return ret; @@ -705,28 +704,28 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); switch (idx) { case 15: - reg = (void *)&newregs.pc; + newregs.pc = reg; break; case 16: - reg = (void *)&newregs.pstate; + newregs.pstate = reg; break; case 17: - reg = (void *)&newregs.orig_x0; + newregs.orig_x0 = reg; break; default: - reg = (void *)&newregs.regs[idx]; + newregs.regs[idx] = reg; } - ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - - if (ret) - goto out; - else - ubuf += sizeof(compat_ulong_t); } if (valid_user_regs(&newregs.user_regs)) @@ -734,7 +733,6 @@ static int compat_gpr_set(struct task_struct *target, else ret = -EINVAL; -out: return ret; } From d3222d77b27a07a4b200d1b0b792e8e3bdf30396 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 17 Dec 2013 23:37:01 +0000 Subject: [PATCH 065/296] video: vgacon: Don't build on arm64 arm64 is unlikely to have a VGA console and does not export screen_info causing build failures if the driver is build, for example in all*config. Add a dependency on !ARM64 to prevent this. This list is getting quite long, it may be easier to depend on a symbol which architectures that do support the driver can select. Signed-off-by: Mark Brown [tomi.valkeinen@ti.com: moved && to first modified line] Signed-off-by: Tomi Valkeinen (cherry picked from commit ee23794b86689e655cedd616e98c03bc3c74f5ec) Conflicts: drivers/video/console/Kconfig --- drivers/video/console/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index bc922c47d046..d5fa5f3fe6d1 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -6,7 +6,7 @@ menu "Console display driver support" config VGA_CONSOLE bool "VGA text console" if EXPERT || !X86 - depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) + depends on !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !SUPERH && !BLACKFIN && !AVR32 && !MN10300 && (!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && !ARM64 default y help Saying Y here will allow you to use Linux in text mode through a From cd1b5c286992a746f3d7b5817d38988383839ec8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 14 Mar 2014 20:20:35 +0000 Subject: [PATCH 066/296] configs: Add big endian config fragment Signed-off-by: Mark Brown --- linaro/configs/bigendian.conf | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 linaro/configs/bigendian.conf diff --git a/linaro/configs/bigendian.conf b/linaro/configs/bigendian.conf new file mode 100644 index 000000000000..6a1020299e85 --- /dev/null +++ b/linaro/configs/bigendian.conf @@ -0,0 +1,4 @@ +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_CPU_ENDIAN_BE8=y +# CONFIG_VIRTUALIZATION is not set +# CONFIG_MMC_DW_IDMAC is not set From e8cec2c1f77a8a90f701fd2cebdea4feba7640bf Mon Sep 17 00:00:00 2001 From: Alex Ray Date: Mon, 17 Mar 2014 13:44:01 -0700 Subject: [PATCH 067/296] ARM64: add option to build Image.gz/dtb combo Allows a defconfig to set a list of dtbs to concatenate with an Image.gz to create a Image.gz-dtb. Change-Id: I0dc3935e57f01b517aa64eda0c27b0101e9ea3b2 Signed-off-by: Alex Ray --- arch/arm64/Kconfig | 15 +++++++++++++++ arch/arm64/Makefile | 8 ++++++++ arch/arm64/boot/.gitignore | 1 + arch/arm64/boot/Makefile | 13 +++++++++++++ arch/arm64/boot/dts/Makefile | 11 +++++++++-- 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 56b3f6d447ae..b87502546318 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -202,6 +202,21 @@ config CMDLINE_FORCE This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +config BUILD_ARM64_APPENDED_DTB_IMAGE + bool "Build a concatenated Image.gz/dtb by default" + depends on OF + help + Enabling this option will cause a concatenated Image.gz and list of + DTBs to be built by default (instead of a standalone Image.gz.) + The image will built in arch/arm64/boot/Image.gz-dtb + +config BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES + string "Default dtb names" + depends on BUILD_ARM64_APPENDED_DTB_IMAGE + help + Space separated list of names of dtbs to append when + building a concatenated Image.gz-dtb. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index c95c5cb212fd..d69354dbd789 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,7 +41,12 @@ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) # Default target when executing plain make +ifeq ($(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE),y) +KBUILD_IMAGE := Image.gz-dtb +else KBUILD_IMAGE := Image.gz +endif + KBUILD_DTBS := dtbs all: $(KBUILD_IMAGE) $(KBUILD_DTBS) @@ -60,6 +65,9 @@ zinstall install: vmlinux dtbs: scripts $(Q)$(MAKE) $(build)=$(boot)/dts dtbs +Image.gz-dtb: vmlinux scripts dtbs + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/boot/.gitignore b/arch/arm64/boot/.gitignore index 8dab0bb6ae66..eb3551131b1e 100644 --- a/arch/arm64/boot/.gitignore +++ b/arch/arm64/boot/.gitignore @@ -1,2 +1,3 @@ Image Image.gz +Image.gz-dtb diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 5a0e3ab854a5..df519849fa00 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,14 +14,27 @@ # Based on the ia64 boot/Makefile. # +include $(srctree)/arch/arm64/boot/dts/Makefile + targets := Image Image.gz +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +DTB_OBJS := $(addprefix $(obj)/dts/,$(DTB_LIST)) + $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) +$(obj)/Image.gz-dtb: $(obj)/Image.gz $(DTB_OBJS) FORCE + $(call if_changed,cat) + install: $(obj)/Image $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile index 68457e9e0975..8dc5d8e28a01 100644 --- a/arch/arm64/boot/dts/Makefile +++ b/arch/arm64/boot/dts/Makefile @@ -1,8 +1,15 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb targets += dtbs -targets += $(dtb-y) -dtbs: $(addprefix $(obj)/, $(dtb-y)) +DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) +ifneq ($(DTB_NAMES),) +DTB_LIST := $(addsuffix .dtb,$(DTB_NAMES)) +else +DTB_LIST := $(dtb-y) +endif +targets += $(DTB_LIST) + +dtbs: $(addprefix $(obj)/, $(DTB_LIST)) clean-files := *.dtb From 3e8e77367e833de476abd739847ef47b53dced11 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Mar 2014 15:38:32 -0800 Subject: [PATCH 068/296] ocfs2: fix quota file corruption commit 15c34a760630ca2c803848fba90ca0646a9907dd upstream. Global quota files are accessed from different nodes. Thus we cannot cache offset of quota structure in the quota file after we drop our node reference count to it because after that moment quota structure may be freed and reallocated elsewhere by a different node resulting in corruption of quota file. Fix the problem by clearing dq_off when we are releasing dquot structure. We also remove the DB_READ_B handling because it is useless - DQ_ACTIVE_B is set iff DQ_READ_B is set. Signed-off-by: Jan Kara Cc: Goldwyn Rodrigues Cc: Joel Becker Reviewed-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/quota_global.c | 27 +++++++++++++++++---------- fs/ocfs2/quota_local.c | 4 ---- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281f217e..e49b4f1cb26b 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee4874c..d0f323da0b5c 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } From 52ed96fc71877b15d1090313cc7b63264bdb466b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Feb 2014 15:18:55 -0500 Subject: [PATCH 069/296] ocfs2 syncs the wrong range... commit 1b56e98990bcdbb20b9fab163654b9315bf158e8 upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ff54014a24ec..46387e49aa46 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2374,8 +2374,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2388,8 +2388,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* From 84bb5b645ec5a54744180a1edc5dc72adc862457 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Tue, 18 Feb 2014 17:56:51 -0600 Subject: [PATCH 070/296] sched: Fix double normalization of vruntime commit 791c9e0292671a3bfa95286bb5c08129d8605618 upstream. dequeue_entity() is called when p->on_rq and sets se->on_rq = 0 which appears to guarentee that the !se->on_rq condition is met. If the task has done set_current_state(TASK_INTERRUPTIBLE) without schedule() the second condition will be met and vruntime will be incorrectly adjusted twice. In certain cases this can result in the task's vruntime never increasing past the vruntime of other tasks on the CFS' run queue, starving them of CPU time. This patch changes switched_from_fair() to use !p->on_rq instead of !se->on_rq. I'm able to cause a task with a priority of 120 to starve all other tasks with the same priority on an ARM platform running 3.2.51-rt72 PREEMPT RT by writing one character at time to a serial tty (16550 UART) in a tight loop. I'm also able to verify making this change corrects the problem on that platform and kernel version. Signed-off-by: George McCollister Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392767811-28916-1-git-send-email-george.mccollister@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 305ef886219e..c7ab8eab5427 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5862,15 +5862,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) struct cfs_rq *cfs_rq = cfs_rq_of(se); /* - * Ensure the task's vruntime is normalized, so that when its + * Ensure the task's vruntime is normalized, so that when it's * switched back to the fair class the enqueue_entity(.flags=0) will * do the right thing. * - * If it was on_rq, then the dequeue_entity(.flags=0) will already - * have normalized the vruntime, if it was !on_rq, then only when + * If it's on_rq, then the dequeue_entity(.flags=0) will already + * have normalized the vruntime, if it's !on_rq, then only when * the task is sleeping will it still have non-normalized vruntime. */ - if (!se->on_rq && p->state != TASK_RUNNING) { + if (!p->on_rq && p->state != TASK_RUNNING) { /* * Fix up our vruntime so that the current sleep doesn't * cause 'unlimited' sleep bonus. From dabdb40f7f934e78b9c0ea45d114d1494e4cd44a Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 3 Mar 2014 15:38:36 -0800 Subject: [PATCH 071/296] rapidio/tsi721: fix tasklet termination in dma channel release commit 04379dffdd4da820d51a1566ad2e86f3b1ad97ed upstream. This patch is a modification of the patch originally proposed by Xiaotian Feng : https://lkml.org/lkml/2012/11/5/413 This new version disables DMA channel interrupts and ensures that the tasklet wil not be scheduled again before calling tasklet_kill(). Unfortunately the updated patch was not released at that time due to planned rework of Tsi721 mport driver to use threaded interrupts (which has yet to happen). Recently the issue was reported again: https://lkml.org/lkml/2014/2/19/762. Description from the original Xiaotian's patch: "Some drivers use tasklet_disable in device remove/release process, tasklet_disable will inc tasklet->count and return. If the tasklet is not handled yet under some softirq pressure, the tasklet will be placed on the tasklet_vec, never have a chance to be excuted. This might lead to a heavy loaded ksoftirqd, wakeup with pending_softirq, but tasklet is disabled. tasklet_kill should be used in this case." This patch is applicable to kernel versions starting from v3.5. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Xiaotian Feng Reviewed-by: Thomas Gleixner Cc: Mike Galbraith Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/devices/tsi721.h | 1 + drivers/rapidio/devices/tsi721_dma.c | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h index b4b0d83f9ef6..7061ac0ad428 100644 --- a/drivers/rapidio/devices/tsi721.h +++ b/drivers/rapidio/devices/tsi721.h @@ -678,6 +678,7 @@ struct tsi721_bdma_chan { struct list_head free_list; dma_cookie_t completed_cookie; struct tasklet_struct tasklet; + bool active; }; #endif /* CONFIG_RAPIDIO_DMA_ENGINE */ diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index 502663f5f7c6..91245f5dbe81 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -206,8 +206,8 @@ void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan) { /* Disable BDMA channel interrupts */ iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE); - - tasklet_schedule(&bdma_chan->tasklet); + if (bdma_chan->active) + tasklet_schedule(&bdma_chan->tasklet); } #ifdef CONFIG_PCI_MSI @@ -562,7 +562,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) } #endif /* CONFIG_PCI_MSI */ - tasklet_enable(&bdma_chan->tasklet); + bdma_chan->active = true; tsi721_bdma_interrupt_enable(bdma_chan, 1); return bdma_chan->bd_num - 1; @@ -576,9 +576,7 @@ err_out: static void tsi721_free_chan_resources(struct dma_chan *dchan) { struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); -#ifdef CONFIG_PCI_MSI struct tsi721_device *priv = to_tsi721(dchan->device); -#endif LIST_HEAD(list); dev_dbg(dchan->device->dev, "%s: Entry\n", __func__); @@ -589,14 +587,25 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan) BUG_ON(!list_empty(&bdma_chan->active_list)); BUG_ON(!list_empty(&bdma_chan->queue)); - tasklet_disable(&bdma_chan->tasklet); + tsi721_bdma_interrupt_enable(bdma_chan, 0); + bdma_chan->active = false; + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + synchronize_irq(priv->msix[TSI721_VECT_DMA0_DONE + + bdma_chan->id].vector); + synchronize_irq(priv->msix[TSI721_VECT_DMA0_INT + + bdma_chan->id].vector); + } else +#endif + synchronize_irq(priv->pdev->irq); + + tasklet_kill(&bdma_chan->tasklet); spin_lock_bh(&bdma_chan->lock); list_splice_init(&bdma_chan->free_list, &list); spin_unlock_bh(&bdma_chan->lock); - tsi721_bdma_interrupt_enable(bdma_chan, 0); - #ifdef CONFIG_PCI_MSI if (priv->flags & TSI721_USING_MSIX) { free_irq(priv->msix[TSI721_VECT_DMA0_DONE + @@ -790,6 +799,7 @@ int tsi721_register_dma(struct tsi721_device *priv) bdma_chan->dchan.cookie = 1; bdma_chan->dchan.chan_id = i; bdma_chan->id = i; + bdma_chan->active = false; spin_lock_init(&bdma_chan->lock); @@ -799,7 +809,6 @@ int tsi721_register_dma(struct tsi721_device *priv) tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet, (unsigned long)bdma_chan); - tasklet_disable(&bdma_chan->tasklet); list_add_tail(&bdma_chan->dchan.device_node, &mport->dma.channels); } From fe42b170afae5978dc90641f29f2b39aefaa47fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2014 10:09:18 -0800 Subject: [PATCH 072/296] net-tcp: fastopen: fix high order allocations [ Upstream commit f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac ] This patch fixes two bugs in fastopen : 1) The tcp_sendmsg(..., @size) argument was ignored. Code was relying on user not fooling the kernel with iovec mismatches 2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5 allocations, which are likely to fail when memory gets fragmented. Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Yuchung Cheng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 8 +++++--- net/ipv4/tcp_output.c | 7 ++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3fc77e90624a..6f87f0873843 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1308,7 +1308,8 @@ struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ - u16 copied; /* queued in tcp_connect() */ + size_t size; + int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a2e249cef49..39bdb14b3214 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1001,7 +1001,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); int err, flags; @@ -1016,11 +1017,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) if (unlikely(tp->fastopen_req == NULL)) return -ENOBUFS; tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); - *size = tp->fastopen_req->copied; + *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; } @@ -1040,7 +1042,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flags = msg->msg_flags; if (flags & MSG_FASTOPEN) { - err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d2df17940e07..6da3d94a114b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2892,7 +2892,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; - syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + space = min_t(size_t, space, fo->size); + + /* limit to order-0 allocations */ + space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + + syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, sk->sk_allocation); if (syn_data == NULL) goto fallback; From 913cfa947d296bd0a0671e87f102a49c24683fd8 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Thu, 27 Feb 2014 17:14:41 +0800 Subject: [PATCH 073/296] neigh: recompute reachabletime before returning from neigh_periodic_work() [ Upstream commit feff9ab2e7fa773b6a3965f77375fe89f7fd85cf ] If the neigh table's entries is less than gc_thresh1, the function will return directly, and the reachabletime will not be recompute, so the reachabletime can be guessed. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 49aeab86f317..b49e8bafab17 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -764,9 +764,6 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) - goto out; - /* * periodically recompute ReachableTime from random function */ @@ -779,6 +776,9 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; From 73c992581e00ea2177e8b885ed0ed314daac64d9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 21 Feb 2014 13:08:04 +0800 Subject: [PATCH 074/296] virtio-net: alloc big buffers also when guest can receive UFO [ Upstream commit 0e7ede80d929ff0f830c44a543daa1acd590c749 ] We should alloc big buffers also when guest can receive UFO packets to let the big packets fit into guest rx buffer. Fixes 5c5167515d80f78f6bb538492c423adcae31ad65 (virtio-net: Allow UFO feature to be set and advertised.) Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Sridhar Samudrala Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Rusty Russell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a0c05e07feeb..2835bfe151b1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1597,7 +1597,8 @@ static int virtnet_probe(struct virtio_device *vdev) /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) From 150d6bdb9d2c1bbdf95ba9502c35b5f4ddc4bd7e Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 21 Feb 2014 02:55:35 +0100 Subject: [PATCH 075/296] ipv6: reuse ip6_frag_id from ip6_ufo_append_data [ Upstream commit 916e4cf46d0204806c062c8c6c4d1f633852c5b6 ] Currently we generate a new fragmentation id on UFO segmentation. It is pretty hairy to identify the correct net namespace and dst there. Especially tunnels use IFF_XMIT_DST_RELEASE and thus have no skb_dst available at all. This causes unreliable or very predictable ipv6 fragmentation id generation while segmentation. Luckily we already have pregenerated the ip6_frag_id in ip6_ufo_append_data and can use it here. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/udp_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 3696aa28784a..2f65b022627b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -108,7 +108,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() From 43800d3eeded6b9016fd16b18cbc62154a3ec31e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 25 Feb 2014 13:17:59 +0000 Subject: [PATCH 076/296] sfc: check for NULL efx->ptp_data in efx_ptp_event [ Upstream commit 8f355e5cee63c2c0c145d8206c4245d0189f47ff ] If we receive a PTP event from the NIC when we haven't set up PTP state in the driver, we attempt to read through a NULL pointer efx->ptp_data, triggering a panic. Signed-off-by: Edward Cree Acked-by: Shradha Shah Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 9a95abf2dedf..540ad16d7807 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1319,6 +1319,13 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) struct efx_ptp_data *ptp = efx->ptp_data; int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE); + if (!ptp) { + if (net_ratelimit()) + netif_warn(efx, drv, efx->net_dev, + "Received PTP event but PTP not set up\n"); + return; + } + if (!ptp->enabled) return; From d1f7dd4e9b53146b6049de3a6b4c2b74472e26e5 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 27 Feb 2014 12:57:58 +0100 Subject: [PATCH 077/296] ipv6: ipv6_find_hdr restore prev functionality [ Upstream commit accfe0e356327da5bd53da8852b93fc22de9b5fc ] The commit 9195bb8e381d81d5a315f911904cdf0cfcc919b8 ("ipv6: improve ipv6_find_hdr() to skip empty routing headers") broke ipv6_find_hdr(). When a target is specified like IPPROTO_ICMPV6 ipv6_find_hdr() returns -ENOENT when it's found, not the header as expected. A part of IPVS is broken and possible also nft_exthdr_eval(). When target is -1 which it is most cases, it works. This patch exits the do while loop if the specific header is found so the nexthdr could be returned as expected. Reported-by: Art -kwaak- van Breemen Signed-off-by: Hans Schillstrom CC:Ansis Atteka Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/exthdrs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..51af9d0d019a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } From e16d498213afb9dd92d3c7b7ce02f6721d66ab7d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 28 Feb 2014 15:05:10 -0800 Subject: [PATCH 078/296] tg3: Don't check undefined error bits in RXBD [ Upstream commit d7b95315cc7f441418845a165ee56df723941487 ] Redefine the RXD_ERR_MASK to include only relevant error bits. This fixes a customer reported issue of randomly dropping packets on the 5719. Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 3 +-- drivers/net/ethernet/broadcom/tg3.h | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 68e9dc453e11..e27d5c839be5 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6687,8 +6687,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) work_mask |= opaque_key; - if ((desc->err_vlan & RXD_ERR_MASK) != 0 && - (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) { + if (desc->err_vlan & RXD_ERR_MASK) { drop_it: tg3_recycle_rx(tnapi, tpr, opaque_key, desc_idx, *post_ptr); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index ff6e30eeae35..046059c56713 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2587,7 +2587,11 @@ struct tg3_rx_buffer_desc { #define RXD_ERR_TOO_SMALL 0x00400000 #define RXD_ERR_NO_RESOURCES 0x00800000 #define RXD_ERR_HUGE_FRAME 0x01000000 -#define RXD_ERR_MASK 0xffff0000 + +#define RXD_ERR_MASK (RXD_ERR_BAD_CRC | RXD_ERR_COLLISION | \ + RXD_ERR_LINK_LOST | RXD_ERR_PHY_DECODE | \ + RXD_ERR_MAC_ABRT | RXD_ERR_TOO_SMALL | \ + RXD_ERR_NO_RESOURCES | RXD_ERR_HUGE_FRAME) u32 reserved; u32 opaque; From 892b46a6a68fa1ae1f85f3c2ba475382b7b6acc5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 3 Mar 2014 17:23:04 +0100 Subject: [PATCH 079/296] net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable [ Upstream commit ec0223ec48a90cb605244b45f7c62de856403729 ] RFC4895 introduced AUTH chunks for SCTP; during the SCTP handshake RANDOM; CHUNKS; HMAC-ALGO are negotiated (CHUNKS being optional though): ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- -------------------- COOKIE-ECHO --------------------> <-------------------- COOKIE-ACK --------------------- A special case is when an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- RFC4895, section 6.3. Receiving Authenticated Chunks says: The receiver MUST use the HMAC algorithm indicated in the HMAC Identifier field. If this algorithm was not specified by the receiver in the HMAC-ALGO parameter in the INIT or INIT-ACK chunk during association setup, the AUTH chunk and all the chunks after it MUST be discarded and an ERROR chunk SHOULD be sent with the error cause defined in Section 4.1. [...] If no endpoint pair shared key has been configured for that Shared Key Identifier, all authenticated chunks MUST be silently discarded. [...] When an endpoint requires COOKIE-ECHO chunks to be authenticated, some special procedures have to be followed because the reception of a COOKIE-ECHO chunk might result in the creation of an SCTP association. If a packet arrives containing an AUTH chunk as a first chunk, a COOKIE-ECHO chunk as the second chunk, and possibly more chunks after them, and the receiver does not have an STCB for that packet, then authentication is based on the contents of the COOKIE-ECHO chunk. In this situation, the receiver MUST authenticate the chunks in the packet by using the RANDOM parameters, CHUNKS parameters and HMAC_ALGO parameters obtained from the COOKIE-ECHO chunk, and possibly a local shared secret as inputs to the authentication procedure specified in Section 6.3. If authentication fails, then the packet is discarded. If the authentication is successful, the COOKIE-ECHO and all the chunks after the COOKIE-ECHO MUST be processed. If the receiver has an STCB, it MUST process the AUTH chunk as described above using the STCB from the existing association to authenticate the COOKIE-ECHO chunk and all the chunks after it. [...] Commit bbd0d59809f9 introduced the possibility to receive and verification of AUTH chunk, including the edge case for authenticated COOKIE-ECHO. On reception of COOKIE-ECHO, the function sctp_sf_do_5_1D_ce() handles processing, unpacks and creates a new association if it passed sanity checks and also tests for authentication chunks being present. After a new association has been processed, it invokes sctp_process_init() on the new association and walks through the parameter list it received from the INIT chunk. It checks SCTP_PARAM_RANDOM, SCTP_PARAM_HMAC_ALGO and SCTP_PARAM_CHUNKS, and copies them into asoc->peer meta data (peer_random, peer_hmacs, peer_chunks) in case sysctl -w net.sctp.auth_enable=1 is set. If in INIT's SCTP_PARAM_SUPPORTED_EXT parameter SCTP_CID_AUTH is set, peer_random != NULL and peer_hmacs != NULL the peer is to be assumed asoc->peer.auth_capable=1, in any other case asoc->peer.auth_capable=0. Now, if in sctp_sf_do_5_1D_ce() chunk->auth_chunk is available, we set up a fake auth chunk and pass that on to sctp_sf_authenticate(), which at latest in sctp_auth_calculate_hmac() reliably dereferences a NULL pointer at position 0..0008 when setting up the crypto key in crypto_hash_setkey() by using asoc->asoc_shared_key that is NULL as condition key_id == asoc->active_key_id is true if the AUTH chunk was injected correctly from remote. This happens no matter what net.sctp.auth_enable sysctl says. The fix is to check for net->sctp.auth_enable and for asoc->peer.auth_capable before doing any operations like sctp_sf_authenticate() as no key is activated in sctp_auth_asoc_init_active_key() for each case. Now as RFC4895 section 6.3 states that if the used HMAC-ALGO passed from the INIT chunk was not used in the AUTH chunk, we SHOULD send an error; however in this case it would be better to just silently discard such a maliciously prepared handshake as we didn't even receive a parameter at all. Also, as our endpoint has no shared key configured, section 6.3 says that MUST silently discard, which we are doing from now onwards. Before calling sctp_sf_pdiscard(), we need not only to free the association, but also the chunk->auth_chunk skb, as commit bbd0d59809f9 created a skb clone in that case. I have tested this locally by using netfilter's nfqueue and re-injecting packets into the local stack after maliciously modifying the INIT chunk (removing RANDOM; HMAC-ALGO param) and the SCTP packet containing the COOKIE_ECHO (injecting AUTH chunk before COOKIE_ECHO). Fixed with this patch applied. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_statefuns.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317f..7ceb25ba85b8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -765,6 +765,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, struct sctp_chunk auth; sctp_ierror_t ret; + /* Make sure that we and the peer are AUTH capable */ + if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { + kfree_skb(chunk->auth_chunk); + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; From b59ed9d77e700c22ca6ab042004a48f078de5989 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Feb 2014 16:02:47 +0100 Subject: [PATCH 080/296] mac80211: send control port protocol frames to the VO queue commit 1bf4bbb4024dcdab5e57634dd8ae1072d42a53ac upstream. Improves reliability of wifi connections with WPA, since authentication frames are prioritized over normal traffic and also typically exempt from aggregation. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/wme.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index afba19cb6f87..a282fddf8b00 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -153,6 +153,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, return IEEE80211_AC_BE; } + if (skb->protocol == sdata->control_port_protocol) { + skb->priority = 7; + return ieee80211_downgrade_queue(sdata, skb); + } + /* use the data classifier to determine what 802.1d tag the * data frame has */ skb->priority = cfg80211_classify8021d(skb); From a7ee1a84a81555b19ec3d02f104bfd70cf0b668a Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 20 Feb 2014 09:22:11 +0200 Subject: [PATCH 081/296] mac80211: fix AP powersave TX vs. wakeup race commit 1d147bfa64293b2723c4fec50922168658e613ba upstream. There is a race between the TX path and the STA wakeup: while a station is sleeping, mac80211 buffers frames until it wakes up, then the frames are transmitted. However, the RX and TX path are concurrent, so the packet indicating wakeup can be processed while a packet is being transmitted. This can lead to a situation where the buffered frames list is emptied on the one side, while a frame is being added on the other side, as the station is still seen as sleeping in the TX path. As a result, the newly added frame will not be send anytime soon. It might be sent much later (and out of order) when the station goes to sleep and wakes up the next time. Additionally, it can lead to the crash below. Fix all this by synchronising both paths with a new lock. Both path are not fastpath since they handle PS situations. In a later patch we'll remove the extra skb queue locks to reduce locking overhead. BUG: unable to handle kernel NULL pointer dereference at 000000b0 IP: [] ieee80211_report_used_skb+0x11/0x3e0 [mac80211] *pde = 00000000 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC EIP: 0060:[] EFLAGS: 00210282 CPU: 1 EIP is at ieee80211_report_used_skb+0x11/0x3e0 [mac80211] EAX: e5900da0 EBX: 00000000 ECX: 00000001 EDX: 00000000 ESI: e41d00c0 EDI: e5900da0 EBP: ebe458e4 ESP: ebe458b0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 000000b0 CR3: 25a78000 CR4: 000407d0 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 Process iperf (pid: 3934, ti=ebe44000 task=e757c0b0 task.ti=ebe44000) iwlwifi 0000:02:00.0: I iwl_pcie_enqueue_hcmd Sending command LQ_CMD (#4e), seq: 0x0903, 92 bytes at 3[3]:9 Stack: e403b32c ebe458c4 00200002 00200286 e403b338 ebe458cc c10960bb e5900da0 ff76a6ec ebe458d8 00000000 e41d00c0 e5900da0 ebe458f0 ff6f1b75 e403b210 ebe4598c ff723dc1 00000000 ff76a6ec e597c978 e403b758 00000002 00000002 Call Trace: [] ieee80211_free_txskb+0x15/0x20 [mac80211] [] invoke_tx_handlers+0x1661/0x1780 [mac80211] [] ieee80211_tx+0x75/0x100 [mac80211] [] ieee80211_xmit+0x8f/0xc0 [mac80211] [] ieee80211_subif_start_xmit+0x4fe/0xe20 [mac80211] [] dev_hard_start_xmit+0x450/0x950 [] sch_direct_xmit+0xa9/0x250 [] __qdisc_run+0x4b/0x150 [] dev_queue_xmit+0x2c2/0xca0 Reported-by: Yaara Rozenblum Signed-off-by: Emmanuel Grumbach Reviewed-by: Stanislaw Gruszka [reword commit log, use a separate lock] Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 4 ++++ net/mac80211/sta_info.h | 7 +++---- net/mac80211/tx.c | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b27..a66d0068a664 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -339,6 +339,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, return NULL; spin_lock_init(&sta->lock); + spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -1045,6 +1046,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); + /* sync with ieee80211_tx_h_unicast_ps_buf */ + spin_lock(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1064,6 +1067,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) } ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + spin_unlock(&sta->ps_lock); local->total_ps_buffered -= buffered; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99e..3184b2b2853c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -244,6 +244,7 @@ struct sta_ampdu_mlme { * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_lock: used for powersave (when mac80211 is the AP) related locking * @ps_tx_buf: buffers (per AC) of frames to transmit to this station * when it leaves power saving state or polls * @tx_filtered: buffers (per AC) of frames we already tried to @@ -324,10 +325,8 @@ struct sta_info { /* use the accessors defined below */ unsigned long _flags; - /* - * STA powersave frame queues, no more than the internal - * locking required. - */ + /* STA powersave lock and frame queues */ + spinlock_t ps_lock; struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fe9d6e7b904b..6d5791d735f3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -461,6 +461,20 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); + + /* sync with ieee80211_sta_ps_deliver_wakeup */ + spin_lock(&sta->ps_lock); + /* + * STA woke up the meantime and all the frames on ps_tx_buf have + * been queued to pending queue. No reordering can happen, go + * ahead and Tx the packet. + */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + spin_unlock(&sta->ps_lock); + return TX_CONTINUE; + } + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); ps_dbg(tx->sdata, @@ -474,6 +488,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); + spin_unlock(&sta->ps_lock); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, From e713fe3d4bde100807377a2699587d05721f909b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2014 20:34:34 +0100 Subject: [PATCH 082/296] mac80211: don't validate unchanged AP bandwidth while tracking commit 963a1852fbac4f75a2d938fa2e734ef1e6d4c044 upstream. The MLME code in mac80211 must track whether or not the AP changed bandwidth, but if there's no change while tracking it shouldn't do anything, otherwise regulatory updates can make it impossible to connect to certain APs if the regulatory database doesn't match the information from the AP. See the precise scenario described in the code. This still leaves some possible problems with CSA or if the AP actually changed bandwidth, but those cases are less common and won't completely prevent using it. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=70881 Reported-and-tested-by: Nate Carlson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5b4328dcbe4e..400b219ca090 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -359,6 +359,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; out: + /* + * When tracking the current AP, don't do any further checks if the + * new chandef is identical to the one we're currently using for the + * connection. This keeps us from playing ping-pong with regulatory, + * without it the following can happen (for example): + * - connect to an AP with 80 MHz, world regdom allows 80 MHz + * - AP advertises regdom US + * - CRDA loads regdom US with 80 MHz prohibited (old database) + * - the code below detects an unsupported channel, downgrades, and + * we disconnect from the AP in the caller + * - disconnect causes CRDA to reload world regdomain and the game + * starts anew. + * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) + * + * It seems possible that there are still scenarios with CSA or real + * bandwidth changes where a this could happen, but those cases are + * less common and wouldn't completely prevent using the AP. + */ + if (tracking && + cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) + return ret; + /* don't print the message below for VHT mismatch if VHT is disabled */ if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; From c02d9a9697134e6fd6527c33ae0d78a8faad6e41 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Feb 2014 20:47:53 +0100 Subject: [PATCH 083/296] mac80211: fix association to 20/40 MHz VHT networks commit cb664981607a6b5b3d670ad57bbda893b2528d96 upstream. When a VHT network uses 20 or 40 MHz as per the HT operation information, the channel center frequency segment 0 field in the VHT operation information is reserved, so ignore it. This fixes association with such networks when the AP puts 0 into the field, previously we'd disconnect due to an invalid channel with the message wlan0: AP VHT information is invalid, disable VHT Fixes: f2d9d270c15ae ("mac80211: support VHT association") Reported-by: Tim Nelson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 400b219ca090..49bc2246bd86 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -310,6 +310,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, switch (vht_oper->chan_width) { case IEEE80211_VHT_CHANWIDTH_USE_HT: vht_chandef.width = chandef->width; + vht_chandef.center_freq1 = chandef->center_freq1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: vht_chandef.width = NL80211_CHAN_WIDTH_80; From 30f754e9d9e0508024e6f872dfc888551102875f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Mar 2014 13:46:53 +0100 Subject: [PATCH 084/296] mac80211: clear sequence/fragment number in QoS-null frames commit 864a6040f395464003af8dd0d8ca86fed19866d4 upstream. Avoid leaking data by sending uninitialized memory and setting an invalid (non-zero) fragment number (the sequence number is ignored anyway) by setting the seq_ctrl field to zero. Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") Fixes: ce662b44ce22 ("mac80211: send (QoS) Null if no buffered frames") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mesh_ps.c | 1 + net/mac80211/sta_info.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee36..ddda201832b3 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a66d0068a664..0418777c361f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1114,6 +1114,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); From 078daa8b20b5ffe790f8b3cdf9a4df6ee965c30a Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Fri, 14 Feb 2014 08:15:20 +0530 Subject: [PATCH 085/296] ath9k: Fix ETSI compliance for AR9462 2.0 commit b3050248c167871ca52cfdb2ce78aa2460249346 upstream. The minimum CCA power threshold values have to be adjusted for existing cards to be in compliance with new regulations. Newer cards will make use of the values obtained from EEPROM, support for this was added earlier. To make sure that cards that are already in use and don't have proper values in EEPROM, do not violate regulations, use the initvals instead. Reported-by: Jeang Daniel Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h index 999ab08c34e6..4ae3cf7283ea 100644 --- a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h @@ -56,7 +56,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = { {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e}, {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c}, - {0x00009e20, 0x000003b5, 0x000003b5, 0x000003ce, 0x000003ce}, + {0x00009e20, 0x000003a5, 0x000003a5, 0x000003a5, 0x000003a5}, {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021}, {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282}, {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27}, @@ -95,7 +95,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = { {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000}, {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c}, - {0x0000ae20, 0x000001b5, 0x000001b5, 0x000001ce, 0x000001ce}, + {0x0000ae20, 0x000001a6, 0x000001a6, 0x000001aa, 0x000001aa}, {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550}, }; From 6856fbe8d4847fd6b5dd1fe86f8758b2fa912e31 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Feb 2014 10:30:18 +0200 Subject: [PATCH 086/296] iwlwifi: dvm: clear IWL_STA_UCODE_INPROGRESS when assoc fails commit ec6f678c74dbdb06a6a775bbb00f1d26c17c404b upstream. We set IWL_STA_UCODE_INPROGRESS flag when we add a station and clear it when we send the LQ command for it. But the LQ command is sent only when the association succeeds. If the association doesn't succeed, we would leave this flag set and that wouldn't indicate the station entry as vacant. This probably fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1065663 Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/sta.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c index c3c13ce96eb0..e800002d6158 100644 --- a/drivers/net/wireless/iwlwifi/dvm/sta.c +++ b/drivers/net/wireless/iwlwifi/dvm/sta.c @@ -590,6 +590,7 @@ void iwl_deactivate_station(struct iwl_priv *priv, const u8 sta_id, sizeof(priv->tid_data[sta_id][tid])); priv->stations[sta_id].used &= ~IWL_STA_DRIVER_ACTIVE; + priv->stations[sta_id].used &= ~IWL_STA_UCODE_INPROGRESS; priv->num_stations--; From 07af579b413d2c991d67a79813e8b5cc4055331c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 25 Feb 2014 10:37:15 +0100 Subject: [PATCH 087/296] iwlwifi: fix TX status for aggregated packets commit 143582c6847cb285b361804c613127c25de60ca4 upstream. Only the first packet is currently handled correctly, but then all others are assumed to have failed which is problematic. Fix this, marking them all successful instead (since if they're not then the firmware will have transmitted them as single frames.) This fixes the lost packet reporting. Also do a tiny variable scoping cleanup. Signed-off-by: Johannes Berg [Add the dvm part] Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/tx.c | 14 +++++++++----- drivers/net/wireless/iwlwifi/mvm/tx.c | 18 +++++++++--------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c index 20e65d3cc3bd..2b5dbff9eadb 100644 --- a/drivers/net/wireless/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/iwlwifi/dvm/tx.c @@ -1322,8 +1322,6 @@ int iwlagn_rx_reply_compressed_ba(struct iwl_priv *priv, struct iwl_compressed_ba_resp *ba_resp = (void *)pkt->data; struct iwl_ht_agg *agg; struct sk_buff_head reclaimed_skbs; - struct ieee80211_tx_info *info; - struct ieee80211_hdr *hdr; struct sk_buff *skb; int sta_id; int tid; @@ -1410,22 +1408,28 @@ int iwlagn_rx_reply_compressed_ba(struct iwl_priv *priv, freed = 0; skb_queue_walk(&reclaimed_skbs, skb) { - hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (ieee80211_is_data_qos(hdr->frame_control)) freed++; else WARN_ON_ONCE(1); - info = IEEE80211_SKB_CB(skb); iwl_trans_free_tx_cmd(priv->trans, info->driver_data[1]); + memset(&info->status, 0, sizeof(info->status)); + /* Packet was transmitted successfully, failures come as single + * frames because before failing a frame the firmware transmits + * it without aggregation at least once. + */ + info->flags |= IEEE80211_TX_STAT_ACK; + if (freed == 1) { /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ info = IEEE80211_SKB_CB(skb); memset(&info->status, 0, sizeof(info->status)); - info->flags |= IEEE80211_TX_STAT_ACK; info->flags |= IEEE80211_TX_STAT_AMPDU; info->status.ampdu_ack_len = ba_resp->txed_2_done; info->status.ampdu_len = ba_resp->txed; diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index a2e6112e91e9..4ec8385e4307 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -819,16 +819,12 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data; struct sk_buff_head reclaimed_skbs; struct iwl_mvm_tid_data *tid_data; - struct ieee80211_tx_info *info; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; - struct ieee80211_hdr *hdr; struct sk_buff *skb; int sta_id, tid, freed; - /* "flow" corresponds to Tx queue */ u16 scd_flow = le16_to_cpu(ba_notif->scd_flow); - /* "ssn" is start of block-ack Tx window, corresponds to index * (in Tx queue's circular buffer) of first TFD/frame in window */ u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn); @@ -885,22 +881,26 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, freed = 0; skb_queue_walk(&reclaimed_skbs, skb) { - hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (ieee80211_is_data_qos(hdr->frame_control)) freed++; else WARN_ON_ONCE(1); - info = IEEE80211_SKB_CB(skb); iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]); + memset(&info->status, 0, sizeof(info->status)); + /* Packet was transmitted successfully, failures come as single + * frames because before failing a frame the firmware transmits + * it without aggregation at least once. + */ + info->flags |= IEEE80211_TX_STAT_ACK; + if (freed == 1) { /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ - info = IEEE80211_SKB_CB(skb); - memset(&info->status, 0, sizeof(info->status)); - info->flags |= IEEE80211_TX_STAT_ACK; info->flags |= IEEE80211_TX_STAT_AMPDU; info->status.ampdu_ack_len = ba_notif->txed_2_done; info->status.ampdu_len = ba_notif->txed; From bace752109d73fd3f5dc4d592224c6f686b9a4eb Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 12 Feb 2014 15:15:05 +0200 Subject: [PATCH 088/296] iwlwifi: disable TX AMPDU by default for iwldvm commit 205e2210daa975d92ace485a65a31ccc4077fe1a upstream. NICs supported by iwldvm don't handle well TX AMPDU. Disable it by default, still leave the possibility to the user to force enable it with a debug parameter. NICs supported by iwlmvm don't suffer from the same issue, leave TX AMPDU enabled by default for these. Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/mac80211.c | 22 ++++++++++++++++++-- drivers/net/wireless/iwlwifi/iwl-drv.c | 2 +- drivers/net/wireless/iwlwifi/iwl-modparams.h | 11 ++++++---- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 22 ++++++++++++++++++-- 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index e04f3da1ccb3..e9d09f19f856 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -739,6 +739,24 @@ static int iwlagn_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, return ret; } +static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + return false; + return true; +} + +static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + return false; + if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) + return true; + + /* disabled by default */ + return false; +} + static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, @@ -760,7 +778,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + if (!iwl_enable_rx_ampdu(priv->cfg)) break; IWL_DEBUG_HT(priv, "start Rx\n"); ret = iwl_sta_rx_agg_start(priv, sta, tid, *ssn); @@ -772,7 +790,7 @@ static int iwlagn_mac_ampdu_action(struct ieee80211_hw *hw, case IEEE80211_AMPDU_TX_START: if (!priv->trans->ops->txq_enable) break; - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + if (!iwl_enable_tx_ampdu(priv->cfg)) break; IWL_DEBUG_HT(priv, "start Tx\n"); ret = iwlagn_tx_agg_start(priv, vif, sta, tid, ssn); diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c index 40fed1f511e2..96050e6c3d57 100644 --- a/drivers/net/wireless/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/iwlwifi/iwl-drv.c @@ -1211,7 +1211,7 @@ module_param_named(swcrypto, iwlwifi_mod_params.sw_crypto, int, S_IRUGO); MODULE_PARM_DESC(swcrypto, "using crypto in software (default 0 [hardware])"); module_param_named(11n_disable, iwlwifi_mod_params.disable_11n, uint, S_IRUGO); MODULE_PARM_DESC(11n_disable, - "disable 11n functionality, bitmap: 1: full, 2: agg TX, 4: agg RX"); + "disable 11n functionality, bitmap: 1: full, 2: disable agg TX, 4: disable agg RX, 8 enable agg TX"); module_param_named(amsdu_size_8K, iwlwifi_mod_params.amsdu_size_8K, int, S_IRUGO); MODULE_PARM_DESC(amsdu_size_8K, "enable 8K amsdu size (default 0)"); diff --git a/drivers/net/wireless/iwlwifi/iwl-modparams.h b/drivers/net/wireless/iwlwifi/iwl-modparams.h index d6f6c37c09fd..e99bc55046e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-modparams.h +++ b/drivers/net/wireless/iwlwifi/iwl-modparams.h @@ -79,9 +79,12 @@ enum iwl_power_level { IWL_POWER_NUM }; -#define IWL_DISABLE_HT_ALL BIT(0) -#define IWL_DISABLE_HT_TXAGG BIT(1) -#define IWL_DISABLE_HT_RXAGG BIT(2) +enum iwl_disable_11n { + IWL_DISABLE_HT_ALL = BIT(0), + IWL_DISABLE_HT_TXAGG = BIT(1), + IWL_DISABLE_HT_RXAGG = BIT(2), + IWL_ENABLE_HT_TXAGG = BIT(3), +}; /** * struct iwl_mod_params @@ -90,7 +93,7 @@ enum iwl_power_level { * * @sw_crypto: using hardware encryption, default = 0 * @disable_11n: disable 11n capabilities, default = 0, - * use IWL_DISABLE_HT_* constants + * use IWL_[DIS,EN]ABLE_HT_* constants * @amsdu_size_8K: enable 8K amsdu size, default = 0 * @restart_fw: restart firmware, default = 1 * @plcp_check: enable plcp health check, default = true diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index f7545e06ce2a..88b9c0964696 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -278,6 +278,24 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, ieee80211_free_txskb(hw, skb); } +static inline bool iwl_enable_rx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) + return false; + return true; +} + +static inline bool iwl_enable_tx_ampdu(const struct iwl_cfg *cfg) +{ + if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) + return false; + if (iwlwifi_mod_params.disable_11n & IWL_ENABLE_HT_TXAGG) + return true; + + /* enabled by default */ + return true; +} + static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum ieee80211_ampdu_mlme_action action, @@ -297,7 +315,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, switch (action) { case IEEE80211_AMPDU_RX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_RXAGG) { + if (!iwl_enable_rx_ampdu(mvm->cfg)) { ret = -EINVAL; break; } @@ -307,7 +325,7 @@ static int iwl_mvm_mac_ampdu_action(struct ieee80211_hw *hw, ret = iwl_mvm_sta_rx_agg(mvm, sta, tid, 0, false); break; case IEEE80211_AMPDU_TX_START: - if (iwlwifi_mod_params.disable_11n & IWL_DISABLE_HT_TXAGG) { + if (!iwl_enable_tx_ampdu(mvm->cfg)) { ret = -EINVAL; break; } From 036a9770c1ab0fe8fc6af256c002319935145f13 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Tue, 18 Feb 2014 15:41:54 -0800 Subject: [PATCH 089/296] mwifiex: clean pcie ring only when device is present commit 4f7ba432202c8330cc03ab959c6228d0de5dc4a3 upstream. Write io memory to clean PCIe buffer only when PCIe device is present else this results into crash because of invalid memory access. Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/wmm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c index ae31e8df44d7..80f72f6b6d56 100644 --- a/drivers/net/wireless/mwifiex/wmm.c +++ b/drivers/net/wireless/mwifiex/wmm.c @@ -556,7 +556,8 @@ mwifiex_clean_txrx(struct mwifiex_private *priv) mwifiex_wmm_delete_all_ralist(priv); memcpy(tos_to_tid, ac_to_tid, sizeof(tos_to_tid)); - if (priv->adapter->if_ops.clean_pcie_ring) + if (priv->adapter->if_ops.clean_pcie_ring && + !priv->adapter->surprise_removed) priv->adapter->if_ops.clean_pcie_ring(priv->adapter); spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock, flags); } From ef8f92b153230230d437e32573c0560bae9a8f13 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 18 Feb 2014 15:41:55 -0800 Subject: [PATCH 090/296] mwifiex: add NULL check for PCIe Rx skb commit bb8e6a1ee881d131e404f0f1f5e8dc9281002771 upstream. We may get a NULL pointer here if skb allocation for Rx packet was failed earlier. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/pcie.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 20c9c4c7b0b2..1662b99dd148 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -1195,6 +1195,12 @@ static int mwifiex_pcie_process_recv_data(struct mwifiex_adapter *adapter) rd_index = card->rxbd_rdptr & reg->rx_mask; skb_data = card->rx_buf_list[rd_index]; + /* If skb allocation was failed earlier for Rx packet, + * rx_buf_list[rd_index] would have been left with a NULL. + */ + if (!skb_data) + return -ENOMEM; + MWIFIEX_SKB_PACB(skb_data, &buf_pa); pci_unmap_single(card->dev, buf_pa, MWIFIEX_RX_DATA_BUF_SIZE, PCI_DMA_FROMDEVICE); From 02395fb0f850f04631988ddf532dc74f538d35de Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 18 Feb 2014 15:41:56 -0800 Subject: [PATCH 091/296] mwifiex: fix cmd and Tx data timeout issue for PCIe cards commit 1c97560f6d751a620978504a4a888c631192b71a upstream. We are sending sleep confirm done interrupt in the middle of sleep handshake. There is a corner case when Tx done interrupt is received from firmware during sleep handshake due to which host and firmware power states go out of sync causing cmd and Tx data timeout problem. Hence sleep confirm done interrupt is sent at the end of sleep handshake to fix the problem. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/pcie.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 1662b99dd148..801c709656f9 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -1515,6 +1515,14 @@ static int mwifiex_pcie_process_cmd_complete(struct mwifiex_adapter *adapter) if (adapter->ps_state == PS_STATE_SLEEP_CFM) { mwifiex_process_sleep_confirm_resp(adapter, skb->data, skb->len); + mwifiex_pcie_enable_host_int(adapter); + if (mwifiex_write_reg(adapter, + PCIE_CPU_INT_EVENT, + CPU_INTR_SLEEP_CFM_DONE)) { + dev_warn(adapter->dev, + "Write register failed\n"); + return -1; + } while (reg->sleep_cookie && (count++ < 10) && mwifiex_pcie_ok_to_access_hw(adapter)) usleep_range(50, 60); @@ -1985,23 +1993,9 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter) adapter->int_status |= pcie_ireg; spin_unlock_irqrestore(&adapter->int_lock, flags); - if (pcie_ireg & HOST_INTR_CMD_DONE) { - if ((adapter->ps_state == PS_STATE_SLEEP_CFM) || - (adapter->ps_state == PS_STATE_SLEEP)) { - mwifiex_pcie_enable_host_int(adapter); - if (mwifiex_write_reg(adapter, - PCIE_CPU_INT_EVENT, - CPU_INTR_SLEEP_CFM_DONE) - ) { - dev_warn(adapter->dev, - "Write register failed\n"); - return; - - } - } - } else if (!adapter->pps_uapsd_mode && - adapter->ps_state == PS_STATE_SLEEP && - mwifiex_pcie_ok_to_access_hw(adapter)) { + if (!adapter->pps_uapsd_mode && + adapter->ps_state == PS_STATE_SLEEP && + mwifiex_pcie_ok_to_access_hw(adapter)) { /* Potentially for PCIe we could get other * interrupts like shared. Don't change power * state until cookie is set */ From 23436be689aa00750bfa3e7b99dfac26f45c4f0f Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Wed, 26 Feb 2014 20:11:22 -0800 Subject: [PATCH 092/296] mwifiex: do not advertise usb autosuspend support commit adb07df1e039e9fe43e66aeea8b4771f83659dbb upstream. As many Surface Pro I & II users have found out, the mwifiex_usb doesn't support usb autosuspend, and it has caused some system stability issues. Bug 69661 - mwifiex_usb on MS Surface Pro 1 is unstable Bug 60815 - Interface hangs in mwifiex_usb Bug 64111 - mwifiex_usb USB8797 crash failed to get signal information USB autosuspend get triggered when Surface Pro's AC power is removed or powertop enables power saving on USB8797 device. Driver's suspend handler is called here, but resume handler won't be called until the AC power is put back on or powertop disables power saving for USB8797. We need to refactor the suspend/resume handlers to support usb autosuspend properly. For now let's just remove it. Signed-off-by: Bing Zhao Signed-off-by: Amitkumar Karwar Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/usb.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index b7adf3d46463..923e348dda70 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -511,13 +511,6 @@ static int mwifiex_usb_resume(struct usb_interface *intf) MWIFIEX_BSS_ROLE_ANY), MWIFIEX_ASYNC_CMD); -#ifdef CONFIG_PM - /* Resume handler may be called due to remote wakeup, - * force to exit suspend anyway - */ - usb_disable_autosuspend(card->udev); -#endif /* CONFIG_PM */ - return 0; } @@ -576,7 +569,6 @@ static struct usb_driver mwifiex_usb_driver = { .id_table = mwifiex_usb_table, .suspend = mwifiex_usb_suspend, .resume = mwifiex_usb_resume, - .supports_autosuspend = 1, }; static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) From eab0ff3d06510c626d121d51e599d4012847fd0f Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:13 -0800 Subject: [PATCH 093/296] mwifiex: copy AP's HT capability info correctly commit c99b1861c232e1f641f13b8645e0febb3712cc71 upstream. While preparing association request, intersection of device's HT capability information and corresponding fields advertised by AP is used. This patch fixes an error while copying this field from AP's beacon. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/11n.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mwifiex/11n.c b/drivers/net/wireless/mwifiex/11n.c index 41e9d25a2d8e..2658c8cda443 100644 --- a/drivers/net/wireless/mwifiex/11n.c +++ b/drivers/net/wireless/mwifiex/11n.c @@ -307,8 +307,7 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, ht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_ht_cap)); memcpy((u8 *) ht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *) bss_desc->bcn_ht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_ht_cap, le16_to_cpu(ht_cap->header.len)); mwifiex_fill_cap_info(priv, radio_type, ht_cap); From 5184c0b7c349f6783986d1848a5f1cdf333075b1 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:14 -0800 Subject: [PATCH 094/296] mwifiex: save and copy AP's VHT capability info correctly commit d51246481c7f28bbfa1f814ded2da65e531cd4b2 upstream. While preparing association request, intersection of device's VHT capability information and corresponding field advertised by AP is used. This patch fixes a couple errors while saving and copying vht_cap and vht_oper fields from AP's beacon. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/11ac.c | 3 +-- drivers/net/wireless/mwifiex/scan.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mwifiex/11ac.c b/drivers/net/wireless/mwifiex/11ac.c index 5e0eec4d71c7..5d9a8084665d 100644 --- a/drivers/net/wireless/mwifiex/11ac.c +++ b/drivers/net/wireless/mwifiex/11ac.c @@ -189,8 +189,7 @@ int mwifiex_cmd_append_11ac_tlv(struct mwifiex_private *priv, vht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_vht_cap)); memcpy((u8 *)vht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *)bss_desc->bcn_vht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_vht_cap, le16_to_cpu(vht_cap->header.len)); mwifiex_fill_vht_cap_tlv(priv, vht_cap, bss_desc->bss_band); diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c index 50b2fe53219d..470347a0a729 100644 --- a/drivers/net/wireless/mwifiex/scan.c +++ b/drivers/net/wireless/mwifiex/scan.c @@ -2040,12 +2040,12 @@ mwifiex_save_curr_bcn(struct mwifiex_private *priv) curr_bss->ht_info_offset); if (curr_bss->bcn_vht_cap) - curr_bss->bcn_ht_cap = (void *)(curr_bss->beacon_buf + - curr_bss->vht_cap_offset); + curr_bss->bcn_vht_cap = (void *)(curr_bss->beacon_buf + + curr_bss->vht_cap_offset); if (curr_bss->bcn_vht_oper) - curr_bss->bcn_ht_oper = (void *)(curr_bss->beacon_buf + - curr_bss->vht_info_offset); + curr_bss->bcn_vht_oper = (void *)(curr_bss->beacon_buf + + curr_bss->vht_info_offset); if (curr_bss->bcn_bss_co_2040) curr_bss->bcn_bss_co_2040 = From 9400319cc170bec3ff02f0adcb978552682725af Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2013 18:03:26 +0100 Subject: [PATCH 095/296] ARM: 7811/1: locks: use early clobber in arch_spin_trylock commit afa31d8eb86fc2f25083e675d57ac8173a98f999 upstream. The res variable is written before we've finished with the input operands (namely the lock address), so ensure that we mark it as `early clobber' to avoid unintended register sharing. Signed-off-by: Will Deacon Signed-off-by: Russell King Cc: Wang Weidong Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index dd64cc6f9cba..b07c09e5a0ac 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -107,7 +107,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) " subs %1, %0, %0, ror #16\n" " addeq %0, %0, %4\n" " strexeq %2, %0, [%3]" - : "=&r" (slock), "=&r" (contended), "=r" (res) + : "=&r" (slock), "=&r" (contended), "=&r" (res) : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) : "cc"); } while (res); From 9804b5d36e7635cb5a11a5209a1ae8b7cd655d74 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 12 Mar 2014 10:59:37 -0400 Subject: [PATCH 096/296] drm/ttm: don't oops if no invalidate_caches() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9ef7506f7eff3fc42724269f62e30164c141661f upstream. A few of the simpler TTM drivers (cirrus, ast, mgag200) do not implement this function. Yet can end up somehow with an evicted bo: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 16e761067 PUD 16e6cf067 PMD 0 Oops: 0010 [#1] SMP Modules linked in: bnep bluetooth rfkill fuse ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables sg btrfs zlib_deflate raid6_pq xor dm_queue_length iTCO_wdt iTCO_vendor_support coretemp kvm dcdbas dm_service_time microcode serio_raw pcspkr lpc_ich mfd_core i7core_edac edac_core ses enclosure ipmi_si ipmi_msghandler shpchp acpi_power_meter mperf nfsd auth_rpcgss nfs_acl lockd uinput sunrpc dm_multipath xfs libcrc32c ata_generic pata_acpi sr_mod cdrom sd_mod usb_storage mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit lpfc drm_kms_helper ttm crc32c_intel ata_piix bfa drm ixgbe libata i2c_core mdio crc_t10dif ptp crct10dif_common pps_core scsi_transport_fc dca scsi_tgt megaraid_sas bnx2 dm_mirror dm_region_hash dm_log dm_mod CPU: 16 PID: 2572 Comm: X Not tainted 3.10.0-86.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R810/0H235N, BIOS 0.3.0 11/14/2009 task: ffff8801799dabc0 ti: ffff88016c884000 task.ti: ffff88016c884000 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff88016c885ad8 EFLAGS: 00010202 RAX: ffffffffa04e94c0 RBX: ffff880178937a20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000240004 RDI: ffff880178937a00 RBP: ffff88016c885b60 R08: 00000000000171a0 R09: ffff88007cf171a0 R10: ffffea0005842540 R11: ffffffff810487b9 R12: ffff880178937b30 R13: ffff880178937a00 R14: ffff88016c885b78 R15: ffff880179929400 FS: 00007f81ba2ef980(0000) GS:ffff88007cf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000016e763000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0306fae ffff8801799295c0 0000000000260004 0000000000000001 ffff88016c885b60 ffffffffa0307669 00ff88007cf17738 ffff88017cf17700 ffff880178937a00 ffff880100000000 ffff880100000000 0000000079929400 Call Trace: [] ? ttm_bo_handle_move_mem+0x54e/0x5b0 [ttm] [] ? ttm_bo_mem_space+0x169/0x340 [ttm] [] ttm_bo_move_buffer+0x117/0x130 [ttm] [] ? perf_event_init_context+0x141/0x220 [] ttm_bo_validate+0xc1/0x130 [ttm] [] mgag200_bo_pin+0x87/0xc0 [mgag200] [] mga_crtc_cursor_set+0x474/0xbb0 [mgag200] [] ? __mem_cgroup_commit_charge+0x152/0x3b0 [] ? mutex_lock+0x12/0x2f [] drm_mode_cursor_common+0x123/0x170 [drm] [] drm_mode_cursor_ioctl+0x41/0x50 [drm] [] drm_ioctl+0x502/0x630 [drm] [] ? __do_page_fault+0x1f4/0x510 [] ? __restore_xstate_sig+0x218/0x4f0 [] do_vfs_ioctl+0x2e5/0x4d0 [] ? file_has_perm+0x8e/0xa0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: Rob Clark Reviewed-by: Jérôme Glisse Reviewed-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 8697abd7b173..0ac0a88860a4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -498,9 +498,11 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, moved: if (bo->evicted) { - ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); - if (ret) - pr_err("Can not flush read caches\n"); + if (bdev->driver->invalidate_caches) { + ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); + if (ret) + pr_err("Can not flush read caches\n"); + } bo->evicted = false; } From fd642c2b8f42e90563b6ef9f268aec6d5378befb Mon Sep 17 00:00:00 2001 From: Roman Volkov Date: Fri, 24 Jan 2014 16:18:14 +0400 Subject: [PATCH 097/296] ALSA: oxygen: Xonar DG(X): capture from I2S channel 1, not 2 commit 3dd77654fb1d7f68b9739f3039bad8dbbc0739f8 upstream. Actually CS4245 connected to the I2S channel 1 for capture, not channel 2. Otherwise capturing and playback does not work for CS4245. Signed-off-by: Roman Volkov Signed-off-by: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- sound/pci/oxygen/xonar_dg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c index 77acd790ea47..5519b8f0dad0 100644 --- a/sound/pci/oxygen/xonar_dg.c +++ b/sound/pci/oxygen/xonar_dg.c @@ -596,7 +596,7 @@ struct oxygen_model model_xonar_dg = { .model_data_size = sizeof(struct dg), .device_config = PLAYBACK_0_TO_I2S | PLAYBACK_1_TO_SPDIF | - CAPTURE_0_FROM_I2S_2 | + CAPTURE_0_FROM_I2S_1 | CAPTURE_1_FROM_SPDIF, .dac_channels_pcm = 6, .dac_channels_mixer = 0, From d4fa4f9c62ec91fd835717ae1579c8618d0fe039 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 12:34:39 +0100 Subject: [PATCH 098/296] ALSA: usb-audio: Add quirk for Logitech Webcam C500 commit e805ca8b0a9b6c91099c0eaa4b160a1196a4ae25 upstream. Logitech C500 (046d:0807) needs the same workaround like other Logitech Webcams. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 95558ef4a7a0..be4db47cb2d9 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -883,6 +883,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ From 9bfc5d3d4844bcb66405f2ffbbf172c50b90414e Mon Sep 17 00:00:00 2001 From: Marius Knaust Date: Mon, 3 Mar 2014 01:48:58 +0100 Subject: [PATCH 099/296] ALSA: hda - Added inverted digital-mic handling for Acer TravelMate 8371 commit a6b92b6650d010d58b6e6fe42c6271266e0b1134 upstream. Signed-off-by: Marius Knaust Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e0bdcb3ecf0e..34548589f419 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3648,6 +3648,7 @@ static const struct hda_fixup alc269_fixups[] = { }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1025, 0x0283, "Acer TravelMate 8371", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1028, 0x05bd, "Dell", ALC269_FIXUP_DELL2_MIC_NO_PRESENCE), From 6f06330c7a0464591a4d43532654dbaf5dd509d1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 11:52:24 +0100 Subject: [PATCH 100/296] ALSA: hda - Add missing loopback merge path for AD1884/1984 codecs commit c5eda4c1bf6214332c46fb2f4e7c42a85e5e5643 upstream. The mixer widget (NID 0x20) of AD1884 and AD1984 codecs isn't connected directly to the actual I/O paths but only via another mixer widget (NID 0x21). We need a similar fix as we did for AD1882. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_analog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 5a6527668c07..290e09825b82 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -3667,6 +3667,7 @@ static int ad1884_parse_auto_config(struct hda_codec *codec) spec = codec->spec; spec->gen.mixer_nid = 0x20; + spec->gen.mixer_merge_nid = 0x21; spec->gen.beep_nid = 0x10; set_beep_amp(spec, 0x10, 0, HDA_OUTPUT); From 27fdae48c06e3d82abe9bce30447512a57a533f8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Mar 2014 08:31:24 +1100 Subject: [PATCH 101/296] powerpc: Align p_dyn, p_rela and p_st symbols commit a5b2cf5b1af424ee3dd9e3ce6d5cea18cb927e67 upstream. The 64bit relocation code places a few symbols in the text segment. These symbols are only 4 byte aligned where they need to be 8 byte aligned. Add an explicit alignment. Signed-off-by: Anton Blanchard Tested-by: Laurent Dufour Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/reloc_64.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index b47a0e1ab001..c712ecec13ba 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -81,6 +81,7 @@ _GLOBAL(relocate) 6: blr +.balign 8 p_dyn: .llong __dynamic_start - 0b p_rela: .llong __rela_dyn_start - 0b p_st: .llong _stext - 0b From f5a82d2f4c8c46d753a739679d905191ca23c443 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Feb 2014 22:41:41 +0100 Subject: [PATCH 102/296] ARM: 7991/1: sa1100: fix compile problem on Collie commit 052450fdc55894a39fbae93d9bbe43947956f663 upstream. Due to a problem in the MFD Kconfig it was not possible to compile the UCB battery driver for the Collie SA1100 system, in turn making it impossible to compile in the battery driver. (See patch "mfd: include all drivers in subsystem menu".) After fixing the MFD Kconfig (separate patch) a compile error appears in the Collie battery driver due to the implicitly requiring through via prior to commit 40ca061b "ARM: 7841/1: sa1100: remove complex GPIO interface". Fix this up by including the required header into . Cc: Andrea Adami Cc: Dmitry Eremin-Solenikov Signed-off-by: Linus Walleij Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-sa1100/include/mach/collie.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index f33679d2d3ee..50e1d850ee2e 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARCH_COLLIE_H #define __ASM_ARCH_COLLIE_H +#include "hardware.h" /* Gives GPIO_MAX */ + extern void locomolcd_power(int on); #define COLLIE_SCOOP_GPIO_BASE (GPIO_MAX + 1) From 783d444c6dbca17d2b15918ef6b30add2ba586b8 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Thu, 20 Feb 2014 17:36:03 +0100 Subject: [PATCH 103/296] regulator: core: Replace direct ops->enable usage commit 30c219710358c5cca2f8bd2e9e547c6aadf7cf8b upstream. There are some direct ops->enable in the regulator core driver. This is a potential issue as the function _regulator_do_enable() handles gpio regulators and the normal ops->enable calls. These gpio regulators are simply ignored when ops->enable is called directly. One possible bug is that boot-on and always-on gpio regulators are not enabled on registration. This patch replaces all ops->enable calls by _regulator_do_enable. [Handle missing enable operations -- broonie] Signed-off-by: Markus Pargmann Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 815d6df8bd5f..c59cc6ed7adb 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -919,6 +919,8 @@ static int machine_constraints_voltage(struct regulator_dev *rdev, return 0; } +static int _regulator_do_enable(struct regulator_dev *rdev); + /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source @@ -975,10 +977,9 @@ static int set_machine_constraints(struct regulator_dev *rdev, /* If the constraints say the regulator should be on at this point * and we have control then make sure it is enabled. */ - if ((rdev->constraints->always_on || rdev->constraints->boot_on) && - ops->enable) { - ret = ops->enable(rdev); - if (ret < 0) { + if (rdev->constraints->always_on || rdev->constraints->boot_on) { + ret = _regulator_do_enable(rdev); + if (ret < 0 && ret != -EINVAL) { rdev_err(rdev, "failed to enable\n"); goto out; } @@ -3790,9 +3791,8 @@ int regulator_suspend_finish(void) struct regulator_ops *ops = rdev->desc->ops; mutex_lock(&rdev->mutex); - if ((rdev->use_count > 0 || rdev->constraints->always_on) && - ops->enable) { - error = ops->enable(rdev); + if (rdev->use_count > 0 || rdev->constraints->always_on) { + error = _regulator_do_enable(rdev); if (error) ret = error; } else { From a56c57c055ecc75f4375d60c783a0b578edd763c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 7 Mar 2014 15:05:20 -0800 Subject: [PATCH 104/296] x86: Ignore NMIs that come in during early boot commit 5fa10196bdb5f190f595ebd048490ee52dddea0f upstream. Don Zickus reports: A customer generated an external NMI using their iLO to test kdump worked. Unfortunately, the machine hung. Disabling the nmi_watchdog made things work. I speculated the external NMI fired, caused the machine to panic (as expected) and the perf NMI from the watchdog came in and was latched. My guess was this somehow caused the hang. ---- It appears that the latched NMI stays latched until the early page table generation on 64 bits, which causes exceptions to happen which end in IRET, which re-enable NMI. Therefore, ignore NMIs that come in during early execution, until we have proper exception handling. Reported-and-tested-by: Don Zickus Link: http://lkml.kernel.org/r/1394221143-29713-1-git-send-email-dzickus@redhat.com Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 7 ++++++- arch/x86/kernel/head_64.S | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 73afd11799ca..e194842b0937 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -566,6 +566,10 @@ ENDPROC(early_idt_handlers) /* This is global to keep gas from relaxing the jumps */ ENTRY(early_idt_handler) cld + + cmpl $X86_TRAP_NMI,(%esp) + je is_nmi # Ignore NMI + cmpl $2,%ss:early_recursion_flag je hlt_loop incl %ss:early_recursion_flag @@ -616,8 +620,9 @@ ex_entry: pop %edx pop %ecx pop %eax - addl $8,%esp /* drop vector number and error code */ decl %ss:early_recursion_flag +is_nmi: + addl $8,%esp /* drop vector number and error code */ iret ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a8368608ab41..9035aefdfc3a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,6 +343,9 @@ early_idt_handlers: ENTRY(early_idt_handler) cld + cmpl $X86_TRAP_NMI,(%rsp) + je is_nmi # Ignore NMI + cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -405,8 +408,9 @@ ENTRY(early_idt_handler) popq %rdx popq %rcx popq %rax - addq $16,%rsp # drop vector number and error code decl early_recursion_flag(%rip) +is_nmi: + addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN ENDPROC(early_idt_handler) From 4a24592bfc9b33c917cfbdba1f365216938fdbc9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 7 Mar 2014 18:58:40 -0800 Subject: [PATCH 105/296] x86: fix compile error due to X86_TRAP_NMI use in asm files commit b01d4e68933ec23e43b1046fa35d593cefcf37d1 upstream. It's an enum, not a #define, you can't use it in asm files. Introduced in commit 5fa10196bdb5 ("x86: Ignore NMIs that come in during early boot"), and sadly I didn't compile-test things like I should have before pushing out. My weak excuse is that the x86 tree generally doesn't introduce stupid things like this (and the ARM pull afterwards doesn't cause me to do a compile-test either, since I don't cross-compile). Cc: Don Zickus Cc: H. Peter Anvin Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/head_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e194842b0937..df63cae573e0 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -567,7 +567,7 @@ ENDPROC(early_idt_handlers) ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%esp) + cmpl $2,(%esp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,%ss:early_recursion_flag diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9035aefdfc3a..f2a9a2aa98f3 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,7 +343,7 @@ early_idt_handlers: ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%rsp) + cmpl $2,(%rsp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,early_recursion_flag(%rip) From b569c493e79db75017c8f505f7235459bd1a6f65 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Thu, 13 Mar 2014 19:43:01 +0800 Subject: [PATCH 106/296] x86/amd/numa: Fix northbridge quirk to assign correct NUMA node commit 847d7970defb45540735b3fb4e88471c27cacd85 upstream. For systems with multiple servers and routed fabric, all northbridges get assigned to the first server. Fix this by also using the node reported from the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0 by definition, which are on NUMA node 0 by definition, so this is invarient on most systems. Tested on fam10h and fam15h single and multi-fabric systems and candidate for stable. Signed-off-by: Daniel J Blueman Acked-by: Steffen Persvold Acked-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394710981-3596-1-git-send-email-daniel@numascale.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 04ee1e2e4c02..52dbf1e400dc 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev) return; pci_read_config_dword(nb_ht, 0x60, &val); - node = val & 7; + node = pcibus_to_node(dev->bus) | (val & 7); /* * Some hardware may return an invalid node ID, * so check it first: From d6f10324218d3edbb7f64c85ff5f36c2942b1163 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 10:52:39 -0800 Subject: [PATCH 107/296] usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e commit e0429362ab15c46ea4d64c3f8c9e0933e48a143a upstream. We've encountered a rare issue when enumerating two Logitech webcams after a reboot that doesn't power cycle the USB ports. They are spewing random data (possibly some leftover UVC buffers) on the second (full-sized) Get Configuration request of the enumeration phase. Since the data is random this can potentially cause all kinds of odd behavior, and since it occasionally happens multiple times (after the kernel issues another reset due to the garbled configuration descriptor), it is not always recoverable. Set the USB_DELAY_INIT quirk that seems to work around the issue. Signed-off-by: Julius Werner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 01fe36273f3b..1053eb651b2f 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -46,6 +46,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920 and C930e */ + { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, From 3ec34db27ae6cc05ebb61732cbdeecce38c486c1 Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 11:27:38 -0800 Subject: [PATCH 108/296] usb: Make DELAY_INIT quirk wait 100ms between Get Configuration requests commit d86db25e53fa69e3e97f3b55dd82a70689787c5d upstream. The DELAY_INIT quirk only reduces the frequency of enumeration failures with the Logitech HD Pro C920 and C930e webcams, but does not quite eliminate them. We have found that adding a delay of 100ms between the first and second Get Configuration request makes the device enumerate perfectly reliable even after several weeks of extensive testing. The reasons for that are anyone's guess, but since the DELAY_INIT quirk already delays enumeration by a whole second, wating for another 10th of that isn't really a big deal for the one other device that uses it, and it will resolve the problems with these webcams. Signed-off-by: Julius Werner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 548d1996590f..652438325197 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -718,6 +718,10 @@ int usb_get_configuration(struct usb_device *dev) result = -ENOMEM; goto err; } + + if (dev->quirks & USB_QUIRK_DELAY_INIT) + msleep(100); + result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno, bigbuffer, length); if (result < 0) { From 56f1c4124bd0c769591071916abc5358b8811c1a Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Mon, 24 Feb 2014 11:29:50 +0800 Subject: [PATCH 109/296] genirq: Remove racy waitqueue_active check commit c685689fd24d310343ac33942e9a54a974ae9c43 upstream. We hit one rare case below: T1 calling disable_irq(), but hanging at synchronize_irq() always; The corresponding irq thread is in sleeping state; And all CPUs are in idle state; After analysis, we found there is one possible scenerio which causes T1 is waiting there forever: CPU0 CPU1 synchronize_irq() wait_event() spin_lock() atomic_dec_and_test(&threads_active) insert the __wait into queue spin_unlock() if(waitqueue_active) atomic_read(&threads_active) wake_up() Here after inserted the __wait into queue on CPU0, and before test if queue is empty on CPU1, there is no barrier, it maybe cause it is not visible for CPU1 immediately, although CPU0 has updated the queue list. It is similar for CPU0 atomic_read() threads_active also. So we'd need one smp_mb() before waitqueue_active.that, but removing the waitqueue_active() check solves it as wel l and it makes things simple and clear. Signed-off-by: Chuansheng Liu Cc: Xiaoming Wang Link: http://lkml.kernel.org/r/1393212590-32543-1-git-send-email-chuansheng.liu@intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/irq/manage.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dc4db3228dcd..9bd5c8a6c8ee 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, static void wake_threads_waitq(struct irq_desc *desc) { - if (atomic_dec_and_test(&desc->threads_active) && - waitqueue_active(&desc->wait_for_threads)) + if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); } From 4bdd401e8b7384a685606f2254e634805580a2aa Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Feb 2014 18:19:36 +0800 Subject: [PATCH 110/296] cpuset: fix a race condition in __cpuset_node_allowed_softwall() commit 99afb0fd5f05aac467ffa85c36778fec4396209b upstream. It's not safe to access task's cpuset after releasing task_lock(). Holding callback_mutex won't help. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- kernel/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d313870dcd02..d9dd521ddd6b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2422,9 +2422,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) task_lock(current); cs = nearest_hardwall_ancestor(task_cs(current)); + allowed = node_isset(node, cs->mems_allowed); task_unlock(current); - allowed = node_isset(node, cs->mems_allowed); mutex_unlock(&callback_mutex); return allowed; } From 1f7dc3c002c02493219f7e6ee5930a30f7a57175 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 27 Feb 2014 11:37:15 +0800 Subject: [PATCH 111/296] ACPI / resources: ignore invalid ACPI device resources commit b355cee88e3b1a193f0e9a81db810f6f83ad728b upstream. ACPI table may export resource entry with 0 length. But the current code interprets this kind of resource in a wrong way. It will create a resource structure with res->end = acpi_resource->start + acpi_resource->len - 1; This patch fixes a problem on my machine that a platform device fails to be created because one of its ACPI IO resource entry (start = 0, end = 0, length = 0) is translated into a generic resource with start = 0, end = 0xffffffff. Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 3322b47ab7ca..c2dd598e25a2 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -77,18 +77,24 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_MEMORY24: memory24 = &ares->data.memory24; + if (!memory24->address_length) + return false; acpi_dev_get_memresource(res, memory24->minimum, memory24->address_length, memory24->write_protect); break; case ACPI_RESOURCE_TYPE_MEMORY32: memory32 = &ares->data.memory32; + if (!memory32->address_length) + return false; acpi_dev_get_memresource(res, memory32->minimum, memory32->address_length, memory32->write_protect); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: fixed_memory32 = &ares->data.fixed_memory32; + if (!fixed_memory32->address_length) + return false; acpi_dev_get_memresource(res, fixed_memory32->address, fixed_memory32->address_length, fixed_memory32->write_protect); @@ -144,12 +150,16 @@ bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_IO: io = &ares->data.io; + if (!io->address_length) + return false; acpi_dev_get_ioresource(res, io->minimum, io->address_length, io->io_decode); break; case ACPI_RESOURCE_TYPE_FIXED_IO: fixed_io = &ares->data.fixed_io; + if (!fixed_io->address_length) + return false; acpi_dev_get_ioresource(res, fixed_io->address, fixed_io->address_length, ACPI_DECODE_10); From d6a6d1f38ce55aa5a7d8aab972176660b19fd7ab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:37:38 -0500 Subject: [PATCH 112/296] tracing: Do not add event files for modules that fail tracepoints commit 45ab2813d40d88fc575e753c38478de242d03f88 upstream. If a module fails to add its tracepoints due to module tainting, do not create the module event infrastructure in the debugfs directory. As the events will not work and worse yet, they will silently fail, making the user wonder why the events they enable do not display anything. Having a warning on module load and the events not visible to the users will make the cause of the problem much clearer. Link: http://lkml.kernel.org/r/20140227154923.265882695@goodmis.org Fixes: 6d723736e472 "tracing/events: add support for modules to TRACE_EVENT" Acked-by: Mathieu Desnoyers Cc: Rusty Russell Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/tracepoint.h | 6 ++++++ kernel/trace/trace_events.c | 10 ++++++++++ kernel/tracepoint.c | 7 ++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index f8e084d0fc77..ba605015c4d8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,6 +60,12 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; +bool trace_module_has_bad_taint(struct module *mod); +#else +static inline bool trace_module_has_bad_taint(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ struct tracepoint_iter { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 3d18aadef493..2f4b185bfc23 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1860,6 +1860,16 @@ static void trace_module_add_events(struct module *mod) struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call **call, **start, **end; + if (!mod->num_trace_events) + return; + + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); + return; + } + start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f26540e9c9..031cc5655a51 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) EXPORT_SYMBOL_GPL(tracepoint_iter_reset); #ifdef CONFIG_MODULES +bool trace_module_has_bad_taint(struct module *mod) +{ + return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); +} + static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod, *iter; @@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod) * module headers (for forced load), to make sure we don't cause a crash. * Staging and out-of-tree GPL modules are fine. */ - if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) + if (trace_module_has_bad_taint(mod)) return 0; mutex_lock(&tracepoints_mutex); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); From f5befc2c193dbe92826a382e16cb50e8640a4f62 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 18 Feb 2014 22:25:15 +0100 Subject: [PATCH 113/296] firewire: net: fix use after free commit 8987583366ae9e03c306c2b7d73bdb952df1d08d upstream. Commit 8408dc1c14c1 "firewire: net: use dev_printk API" introduced a use-after-free in a failure path. fwnet_transmit_packet_failed(ptask) may free ptask, then the dev_err() call dereferenced it. The fix is straightforward; simply reorder the two calls. Reported-by: Dan Carpenter Signed-off-by: Stefan Richter Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 815b0fcbe918..7bdb6fe63236 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -929,8 +929,6 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, if (rcode == RCODE_COMPLETE) { fwnet_transmit_packet_done(ptask); } else { - fwnet_transmit_packet_failed(ptask); - if (printk_timed_ratelimit(&j, 1000) || rcode != last_rcode) { dev_err(&ptask->dev->netdev->dev, "fwnet_write_complete failed: %x (skipped %d)\n", @@ -938,8 +936,10 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, errors_skipped = 0; last_rcode = rcode; - } else + } else { errors_skipped++; + } + fwnet_transmit_packet_failed(ptask); } } From 9dfce5a3e2f985cca75c05dd714958b9d0ad8ab1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:19:57 -0500 Subject: [PATCH 114/296] firewire: don't use PREPARE_DELAYED_WORK commit 70044d71d31d6973665ced5be04ef39ac1c09a48 upstream. PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. firewire core-device and sbp2 have been been multiplexing work items with multiple work functions. Introduce fw_device_workfn() and sbp2_lu_workfn() which invoke fw_device->workfn and sbp2_logical_unit->workfn respectively and always use the two functions as the work functions and update the users to set the ->workfn fields instead of overriding work functions using PREPARE_DELAYED_WORK(). This fixes a variety of possible regressions since a2c1c57be8d9 "workqueue: consider work function when searching for busy work items" due to which fw_workqueue lost its required non-reentrancy property. Signed-off-by: Tejun Heo Acked-by: Stefan Richter Cc: linux1394-devel@lists.sourceforge.net Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/core-device.c | 22 +++++++++++++++------- drivers/firewire/sbp2.c | 17 +++++++++++++---- include/linux/firewire.h | 1 + 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 664a6ff0a823..392ad513dc04 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -895,7 +895,7 @@ static int lookup_existing_device(struct device *dev, void *data) old->config_rom_retries = 0; fw_notice(card, "rediscovered device %s\n", dev_name(dev)); - PREPARE_DELAYED_WORK(&old->work, fw_device_update); + old->workfn = fw_device_update; fw_schedule_device_work(old, 0); if (current_node == card->root_node) @@ -1054,7 +1054,7 @@ static void fw_device_init(struct work_struct *work) if (atomic_cmpxchg(&device->state, FW_DEVICE_INITIALIZING, FW_DEVICE_RUNNING) == FW_DEVICE_GONE) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); } else { fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n", @@ -1175,13 +1175,20 @@ static void fw_device_refresh(struct work_struct *work) dev_name(&device->device), fw_rcode_string(ret)); gone: atomic_set(&device->state, FW_DEVICE_GONE); - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); out: if (node_id == card->root_node->node_id) fw_schedule_bm_work(card, 0); } +static void fw_device_workfn(struct work_struct *work) +{ + struct fw_device *device = container_of(to_delayed_work(work), + struct fw_device, work); + device->workfn(work); +} + void fw_node_event(struct fw_card *card, struct fw_node *node, int event) { struct fw_device *device; @@ -1231,7 +1238,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) * power-up after getting plugged in. We schedule the * first config rom scan half a second after bus reset. */ - INIT_DELAYED_WORK(&device->work, fw_device_init); + device->workfn = fw_device_init; + INIT_DELAYED_WORK(&device->work, fw_device_workfn); fw_schedule_device_work(device, INITIAL_DELAY); break; @@ -1247,7 +1255,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) if (atomic_cmpxchg(&device->state, FW_DEVICE_RUNNING, FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_refresh); + device->workfn = fw_device_refresh; fw_schedule_device_work(device, device->is_local ? 0 : INITIAL_DELAY); } @@ -1262,7 +1270,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) smp_wmb(); /* update node_id before generation */ device->generation = card->generation; if (atomic_read(&device->state) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_update); + device->workfn = fw_device_update; fw_schedule_device_work(device, 0); } break; @@ -1287,7 +1295,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) device = node->data; if (atomic_xchg(&device->state, FW_DEVICE_GONE) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, list_empty(&card->link) ? 0 : SHUTDOWN_DELAY); } diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 47674b913843..1b1c37dd830b 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -146,6 +146,7 @@ struct sbp2_logical_unit { */ int generation; int retries; + work_func_t workfn; struct delayed_work work; bool has_sdev; bool blocked; @@ -864,7 +865,7 @@ static void sbp2_login(struct work_struct *work) /* set appropriate retry limit(s) in BUSY_TIMEOUT register */ sbp2_set_busy_timeout(lu); - PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); + lu->workfn = sbp2_reconnect; sbp2_agent_reset(lu); /* This was a re-login. */ @@ -918,7 +919,7 @@ static void sbp2_login(struct work_struct *work) * If a bus reset happened, sbp2_update will have requeued * lu->work already. Reset the work from reconnect to login. */ - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } static void sbp2_reconnect(struct work_struct *work) @@ -952,7 +953,7 @@ static void sbp2_reconnect(struct work_struct *work) lu->retries++ >= 5) { dev_err(tgt_dev(tgt), "failed to reconnect\n"); lu->retries = 0; - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); @@ -972,6 +973,13 @@ static void sbp2_reconnect(struct work_struct *work) sbp2_conditionally_unblock(lu); } +static void sbp2_lu_workfn(struct work_struct *work) +{ + struct sbp2_logical_unit *lu = container_of(to_delayed_work(work), + struct sbp2_logical_unit, work); + lu->workfn(work); +} + static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) { struct sbp2_logical_unit *lu; @@ -998,7 +1006,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) lu->blocked = false; ++tgt->dont_block; INIT_LIST_HEAD(&lu->orb_list); - INIT_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; + INIT_DELAYED_WORK(&lu->work, sbp2_lu_workfn); list_add_tail(&lu->link, &tgt->lu_list); return 0; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 217e4b42b7c8..5d838bf10cbd 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -200,6 +200,7 @@ struct fw_device { unsigned irmc:1; unsigned bc_implemented:2; + work_func_t workfn; struct delayed_work work; struct fw_attribute_group attribute_group; }; From 2e44c866bfcea9dcd77dda2c77f03f6d042aade7 Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Fri, 7 Mar 2014 16:34:29 +0000 Subject: [PATCH 115/296] libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8 (2BA30001) commit b28a613e9138e4b3a64649bd60b13436f4b4b49b upstream. Via commit 87809942d3fa "libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8" we added a quirk for disks named "ST1000LM024 HN-M101MBB" with firmware revision "2AR10001". As reported on https://bugzilla.redhat.com/show_bug.cgi?id=1073901, we need to also add firmware revision 2BA30001 as it is broken as well. Reported-by: Nicholas Signed-off-by: Michele Baldessari Tested-by: Guilherme Amadio Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 15518fda2d2a..8b8cbe9bcb92 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4152,6 +4152,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ From cb67b44a0ddb8c1387ad2df82508b17b9b3f546d Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sun, 2 Mar 2014 20:54:42 +0100 Subject: [PATCH 116/296] spi: spi-ath79: fix initial GPIO CS line setup commit 61d1cf163c8653934cc8cd5d0b2a562d0990c265 upstream. The 'ath79_spi_setup_cs' function initializes the chip select line of a given SPI device in order to make sure that the device is inactive. If the SPI_CS_HIGH bit is set for a given device, it means that the CS line of that device is active HIGH so it must be set to LOW initially. In case of GPIO CS lines, the 'ath79_spi_setup_cs' function does the opposite of that due to the wrong GPIO flags. Fix the code to use the correct GPIO flags. Reported-by: Ronald Wahl Signed-off-by: Gabor Juhos Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-ath79.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c index e504b7636058..23f1ba6e9ccf 100644 --- a/drivers/spi/spi-ath79.c +++ b/drivers/spi/spi-ath79.c @@ -132,9 +132,9 @@ static int ath79_spi_setup_cs(struct spi_device *spi) flags = GPIOF_DIR_OUT; if (spi->mode & SPI_CS_HIGH) - flags |= GPIOF_INIT_HIGH; - else flags |= GPIOF_INIT_LOW; + else + flags |= GPIOF_INIT_HIGH; status = gpio_request_one(cdata->gpio, flags, dev_name(&spi->dev)); From 40cb674a5a0cff49a46af150fb862229e545e908 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Mar 2014 22:03:12 -0500 Subject: [PATCH 117/296] NFS: Fix a delegation callback race commit 755a48a7a4eb05b9c8424e3017d947b2961a60e0 upstream. The clean-up in commit 36281caa839f ended up removing a NULL pointer check that is needed in order to prevent an Oops in nfs_async_inode_return_delegation(). Reported-by: "Yan, Zheng" Link: http://lkml.kernel.org/r/5313E9F6.2020405@intel.com Fixes: 36281caa839f (NFSv4: Further clean-ups of delegation stateid validation) Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/delegation.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 57db3244f4d9..4b49a8c6ccad 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -656,16 +656,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * From c8dd8fdf0bd8c858d30ba3889104e226e865cade Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Mar 2014 08:44:23 -0500 Subject: [PATCH 118/296] NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid commit e1253be0ece1a95a02c7f5843194877471af8179 upstream. When nfs4_set_rw_stateid() can fails by returning EIO to indicate that the stateid is completely invalid, then it makes no sense to have it trigger a retry of the READ or WRITE operation. Instead, we should just have it fall through and attempt a recovery. This fixes an infinite loop in which the client keeps replaying the same bad stateid back to the server. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26e71bdb5b33..1ae7dd5956c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3607,8 +3607,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } From d4be842bb4b516bcc3a8e84dab18121b376a6eb7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2014 22:11:39 +0100 Subject: [PATCH 119/296] ACPI / sleep: Add extra checks for HW Reduced ACPI mode sleep states commit a4e90bed511220ff601d064c9e5d583e91308f65 upstream. If the HW Reduced ACPI mode bit is set in the FADT, ACPICA uses the optional sleep control and sleep status registers for making the system enter sleep states (including S5), so it is not possible to use system sleep states or power it off using ACPI if the HW Reduced ACPI mode bit is set and those registers are not available. For this reason, add a new function, acpi_sleep_state_supported(), checking if the HW Reduced ACPI mode bit is set and whether or not system sleep states are usable in that case in addition to checking the return value of acpi_get_sleep_type_data() and make the ACPI sleep setup routines use that function to check the availability of system sleep states. Among other things, this prevents the kernel from attempting to use ACPI for powering off HW Reduced ACPI systems without the sleep control and sleep status registers, because ACPI power off doesn't have a chance to work on them. That allows alternative power off mechanisms that may actually work to be used on those systems. The affected machines include Dell Venue 8 Pro, Asus T100TA, Haswell Desktop SDP and Ivy Bridge EP Demo depot. References: https://bugzilla.kernel.org/show_bug.cgi?id=70931 Reported-by: Adam Williamson Tested-by: Aubrey Li Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 9c1a435d10e6..035920f2ab4d 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -78,6 +78,17 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } +static bool acpi_sleep_state_supported(u8 sleep_state) +{ + acpi_status status; + u8 type_a, type_b; + + status = acpi_get_sleep_type_data(sleep_state, &type_a, &type_b); + return ACPI_SUCCESS(status) && (!acpi_gbl_reduced_hardware + || (acpi_gbl_FADT.sleep_control.address + && acpi_gbl_FADT.sleep_status.address)); +} + #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; @@ -600,15 +611,9 @@ static void acpi_sleep_suspend_setup(void) { int i; - for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(i, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) + if (acpi_sleep_state_supported(i)) sleep_states[i] = 1; - } - } suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); @@ -739,11 +744,7 @@ static const struct platform_hibernation_ops acpi_hibernation_ops_old = { static void acpi_sleep_hibernate_setup(void) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); - if (ACPI_FAILURE(status)) + if (!acpi_sleep_state_supported(ACPI_STATE_S4)) return; hibernation_set_ops(old_suspend_ordering ? @@ -792,8 +793,6 @@ static void acpi_power_off(void) int __init acpi_sleep_init(void) { - acpi_status status; - u8 type_a, type_b; char supported[ACPI_S_STATE_COUNT * 3 + 1]; char *pos = supported; int i; @@ -808,8 +807,7 @@ int __init acpi_sleep_init(void) acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); - status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_sleep_state_supported(ACPI_STATE_S5)) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; From 42890f74fac83d16035cdd201187a4d3113a9996 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 26 Feb 2014 03:09:41 -0800 Subject: [PATCH 120/296] iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug commit a2a99cea5ec7c1e47825559f0e75a4efbcf8aee3 upstream. This patch fixes a bug in iscsit_get_tpg_from_np() where the tpg->tpg_state sanity check was looking for TPG_STATE_FREE, instead of != TPG_STATE_ACTIVE. The latter is expected during a normal TPG shutdown once the tpg_state goes into TPG_STATE_INACTIVE in order to reject any new incoming login attempts. Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_tpg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 439260b7d87f..f31b4c5cdf3f 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -138,7 +138,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { spin_lock(&tpg->tpg_state_lock); - if (tpg->tpg_state == TPG_STATE_FREE) { + if (tpg->tpg_state != TPG_STATE_ACTIVE) { spin_unlock(&tpg->tpg_state_lock); continue; } From 63490f40e071a1b03b6ceca4b654a4d5aeff851d Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Mon, 10 Mar 2014 15:49:45 -0700 Subject: [PATCH 121/296] fs/proc/base.c: fix GPF in /proc/$PID/map_files commit 70335abb2689c8cd5df91bf2d95a65649addf50b upstream. The expected logic of proc_map_files_get_link() is either to return 0 and initialize 'path' or return an error and leave 'path' uninitialized. By the time dname_to_vma_addr() returns 0 the corresponding vma may have already be gone. In this case the path is not initialized but the return value is still 0. This results in 'general protection fault' inside d_path(). Steps to reproduce: CONFIG_CHECKPOINT_RESTORE=y fd = open(...); while (1) { mmap(fd, ...); munmap(fd, ...); } ls -la /proc/$PID/map_files Addresses https://bugzilla.kernel.org/show_bug.cgi?id=68991 Signed-off-by: Artem Fetishev Signed-off-by: Aleksandr Terekhov Reported-by: Acked-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Reviewed-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/proc/base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..de12b8128b95 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1825,6 +1825,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { From 680f38a3d12c7f1130635cd59180481072fc9518 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Mar 2014 18:09:52 -0500 Subject: [PATCH 122/296] drm/radeon/atom: select the proper number of lanes in transmitter setup commit d03874c881a049a50e12f285077ab1f9fc2686e1 upstream. We need to check for DVI vs. HDMI when setting up duallink since HDMI is single link only. Fixes 4k modes on newer asics. bug: https://bugs.freedesktop.org/show_bug.cgi?id=75223 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_encoders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 4c81e9faa635..1f7f3ce875c8 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1281,7 +1281,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t } if (is_dp) args.v5.ucLaneNum = dp_lane_count; - else if (radeon_encoder->pixel_clock > 165000) + else if (radeon_dig_monitor_is_duallink(encoder, radeon_encoder->pixel_clock)) args.v5.ucLaneNum = 8; else args.v5.ucLaneNum = 4; From 0f8141267dce8a719b294b6fcade653eaae7aa54 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 10 Mar 2014 14:46:07 +0100 Subject: [PATCH 123/296] ipc: Fix 2 bugs in msgrcv() MSG_COPY implementation commit 4f87dac386cc43d5525da7a939d4b4e7edbea22c upstream. While testing and documenting the msgrcv() MSG_COPY flag that Stanislav Kinsbursky added in commit 4a674f34ba04 ("ipc: introduce message queue copy feature" => kernel 3.8), I discovered a couple of bugs in the implementation. The two bugs concern MSG_COPY interactions with other msgrcv() flags, namely: (A) MSG_COPY + MSG_EXCEPT (B) MSG_COPY + !IPC_NOWAIT The bugs are distinct (and the fix for the first one is obvious), however my fix for both is a single-line patch, which is why I'm combining them in a single mail, rather than writing two mails+patches. ===== (A) MSG_COPY + MSG_EXCEPT ===== With the addition of the MSG_COPY flag, there are now two msgrcv() flags--MSG_COPY and MSG_EXCEPT--that modify the meaning of the 'msgtyp' argument in unrelated ways. Specifying both in the same call is a logical error that is currently permitted, with the effect that MSG_COPY has priority and MSG_EXCEPT is ignored. The call should give an error if both flags are specified. The patch below implements that behavior. ===== (B) (B) MSG_COPY + !IPC_NOWAIT ===== The test code that was submitted in commit 3a665531a3b7 ("selftests: IPC message queue copy feature test") shows MSG_COPY being used in conjunction with IPC_NOWAIT. In other words, if there is no message at the position 'msgtyp'. return immediately with the error in ENOMSG. What was not (fully) tested is the behavior if MSG_COPY is specified *without* IPC_NOWAIT, and there is an odd behavior. If the queue contains less than 'msgtyp' messages, then the call blocks until the next message is written to the queue. At that point, the msgrcv() call returns a copy of the newly added message, regardless of whether that message is at the ordinal position 'msgtyp'. This is clearly bogus, and problematic for applications that might want to make use of the MSG_COPY flag. I considered the following possible solutions to this problem: (1) Force the call to block until a message *does* appear at the position 'msgtyp'. (2) If the MSG_COPY flag is specified, the kernel should implicitly add IPC_NOWAIT, so that the call fails with ENOMSG for this case. (3) If the MSG_COPY flag is specified, but IPC_NOWAIT is not, generate an error (probably, EINVAL is the right one). I do not know if any application would really want to have the functionality of solution (1), especially since an application can determine in advance the number of messages in the queue using msgctl() IPC_STAT. Obviously, this solution would be the most work to implement. Solution (2) would have the effect of silently fixing any applications that tried to employ broken behavior. However, it would mean that if we later decided to implement solution (1), then user-space could not easily detect what the kernel supports (but, since I'm somewhat doubtful that solution (1) is needed, I'm not sure that this is much of a problem). Solution (3) would have the effect of informing broken applications that they are doing something broken. The downside is that this would cause a ABI breakage for any applications that are currently employing the broken behavior. However: a) Those applications are almost certainly not getting the results they expect. b) Possibly, those applications don't even exist, because MSG_COPY is currently hidden behind CONFIG_CHECKPOINT_RESTORE. The upside of solution (3) is that if we later decided to implement solution (1), user-space could determine what the kernel supports, via the error return. In my view, solution (3) is mildly preferable to solution (2), and solution (1) could still be done later if anyone really cares. The patch below implements solution (3). PS. For anyone out there still listening, it's the usual story: documenting an API (and the thinking about, and the testing of the API, that documentation entails) is the one of the single best ways of finding bugs in the API, as I've learned from a lot of experience. Best to do that documentation before releasing the API. Signed-off-by: Michael Kerrisk Acked-by: Stanislav Kinsbursky Cc: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- ipc/msg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ipc/msg.c b/ipc/msg.c index 558aa91186b6..52770bfde2a5 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -885,6 +885,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl return -EINVAL; if (msgflg & MSG_COPY) { + if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) + return -EINVAL; copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) return PTR_ERR(copy); From c516a4c127a90e5889b72f1ad1b9da4005bba65b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 11 Mar 2014 19:11:18 +0100 Subject: [PATCH 124/296] KVM: SVM: fix cr8 intercept window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 596f3142d2b7be307a1652d59e7b93adab918437 upstream. We always disable cr8 intercept in its handler, but only re-enable it if handling KVM_REQ_EVENT, so there can be a window where we do not intercept cr8 writes, which allows an interrupt to disrupt a higher priority task. Fix this by disabling intercepts in the same function that re-enables them when needed. This fixes BSOD in Windows 2008. Signed-off-by: Radim Krčmář Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a14a6eaf871d..765210d4d925 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2985,10 +2985,8 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; @@ -3550,6 +3548,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; From 10ff94faa6ec1a76d86302a358cc8a8eeb36e74d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 11 Mar 2014 14:22:19 -0600 Subject: [PATCH 125/296] PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled commit 3cdeb713dc66057b50682048c151eae07b186c42 upstream. Andreas reported that after 1f42db786b14 ("PCI: Enable INTx if BIOS left them disabled"), pciehp surprise removal stopped working. This happens because pci_reenable_device() on the hotplug bridge (used in the pciehp_configure_device() path) clears the Interrupt Disable bit, which apparently breaks the bridge's MSI hotplug event reporting. Previously we cleared the Interrupt Disable bit in do_pci_enable_device(), which is used by both pci_enable_device() and pci_reenable_device(). But we use pci_reenable_device() after the driver may have enabled MSI or MSI-X, and we *set* Interrupt Disable as part of enabling MSI/MSI-X. This patch clears Interrupt Disable only when MSI/MSI-X has not been enabled. Fixes: 1f42db786b14 PCI: Enable INTx if BIOS left them disabled Link: https://bugzilla.kernel.org/show_bug.cgi?id=71691 Reported-and-tested-by: Andreas Noever Signed-off-by: Bjorn Helgaas CC: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0bb7bfd49bf6..f30acaa84037 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1130,6 +1130,9 @@ static int do_pci_enable_device(struct pci_dev *dev, int bars) return err; pci_fixup_device(pci_fixup_enable, dev); + if (dev->msi_enabled || dev->msix_enabled) + return 0; + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin) { pci_read_config_word(dev, PCI_COMMAND, &cmd); From 757319d8bb00395874ac77cc81f474581a4644b1 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 10 Mar 2014 06:55:55 -0400 Subject: [PATCH 126/296] vmxnet3: fix netpoll race condition commit d25f06ea466ea521b563b76661180b4e44714ae6 upstream. vmxnet3's netpoll driver is incorrectly coded. It directly calls vmxnet3_do_poll, which is the driver internal napi poll routine. As the netpoll controller method doesn't block real napi polls in any way, there is a potential for race conditions in which the netpoll controller method and the napi poll method run concurrently. The result is data corruption causing panics such as this one recently observed: PID: 1371 TASK: ffff88023762caa0 CPU: 1 COMMAND: "rs:main Q:Reg" #0 [ffff88023abd5780] machine_kexec at ffffffff81038f3b #1 [ffff88023abd57e0] crash_kexec at ffffffff810c5d92 #2 [ffff88023abd58b0] oops_end at ffffffff8152b570 #3 [ffff88023abd58e0] die at ffffffff81010e0b #4 [ffff88023abd5910] do_trap at ffffffff8152add4 #5 [ffff88023abd5970] do_invalid_op at ffffffff8100cf95 #6 [ffff88023abd5a10] invalid_op at ffffffff8100bf9b [exception RIP: vmxnet3_rq_rx_complete+1968] RIP: ffffffffa00f1e80 RSP: ffff88023abd5ac8 RFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff88023b5dcee0 RCX: 00000000000000c0 RDX: 0000000000000000 RSI: 00000000000005f2 RDI: ffff88023b5dcee0 RBP: ffff88023abd5b48 R8: 0000000000000000 R9: ffff88023a3b6048 R10: 0000000000000000 R11: 0000000000000002 R12: ffff8802398d4cd8 R13: ffff88023af35140 R14: ffff88023b60c890 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff88023abd5b50] vmxnet3_do_poll at ffffffffa00f204a [vmxnet3] #8 [ffff88023abd5b80] vmxnet3_netpoll at ffffffffa00f209c [vmxnet3] #9 [ffff88023abd5ba0] netpoll_poll_dev at ffffffff81472bb7 The fix is to do as other drivers do, and have the poll controller call the top half interrupt handler, which schedules a napi poll properly to recieve frames Tested by myself, successfully. Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "VMware, Inc." CC: "David S. Miller" Reviewed-by: Shreyas N Bhatewara Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vmxnet3/vmxnet3_drv.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 55a62cae2cb4..434837f82a3e 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1740,12 +1740,18 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int i; - if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) - vmxnet3_disable_all_intrs(adapter); - - vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size); - vmxnet3_enable_all_intrs(adapter); + switch (adapter->intr.type) { + case VMXNET3_IT_MSIX: + for (i = 0; i < adapter->num_rx_queues; i++) + vmxnet3_msix_rx(0, &adapter->rx_queue[i]); + break; + case VMXNET3_IT_MSI: + default: + vmxnet3_intr(0, adapter->netdev); + break; + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ From 40d2b82b4cb5b65473a7f3a6a93d7b0f8ad439ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 13 Mar 2014 10:44:34 +0100 Subject: [PATCH 127/296] vmxnet3: fix building without CONFIG_PCI_MSI commit 0a8d8c446b5429d15ff2d48f46e00d8a08552303 upstream. Since commit d25f06ea466e "vmxnet3: fix netpoll race condition", the vmxnet3 driver fails to build when CONFIG_PCI_MSI is disabled, because it unconditionally references the vmxnet3_msix_rx() function. To fix this, use the same #ifdef in the caller that exists around the function definition. Signed-off-by: Arnd Bergmann Cc: Neil Horman Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: "David S. Miller" Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vmxnet3/vmxnet3_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 434837f82a3e..d0815855d877 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1740,13 +1740,16 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - int i; switch (adapter->intr.type) { - case VMXNET3_IT_MSIX: +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: { + int i; for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_msix_rx(0, &adapter->rx_queue[i]); break; + } +#endif case VMXNET3_IT_MSI: default: vmxnet3_intr(0, adapter->netdev); From 8c54de8fefe718cba1f1e8ba60beb1d28f3acf46 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 10 Mar 2014 15:49:44 -0700 Subject: [PATCH 128/296] mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block commit 2af120bc040c5ebcda156df6be6a66610ab6957f upstream. We received several reports of bad page state when freeing CMA pages previously allocated with alloc_contig_range: BUG: Bad page state in process Binder_A pfn:63202 page:d21130b0 count:0 mapcount:1 mapping: (null) index:0x7dfbf page flags: 0x40080068(uptodate|lru|active|swapbacked) Based on the page state, it looks like the page was still in use. The page flags do not make sense for the use case though. Further debugging showed that despite alloc_contig_range returning success, at least one page in the range still remained in the buddy allocator. There is an issue with isolate_freepages_block. In strict mode (which CMA uses), if any pages in the range cannot be isolated, isolate_freepages_block should return failure 0. The current check keeps track of the total number of isolated pages and compares against the size of the range: if (strict && nr_strict_required > total_isolated) total_isolated = 0; After taking the zone lock, if one of the pages in the range is not in the buddy allocator, we continue through the loop and do not increment total_isolated. If in the last iteration of the loop we isolate more than one page (e.g. last page needed is a higher order page), the check for total_isolated may pass and we fail to detect that a page was skipped. The fix is to bail out if the loop immediately if we are in strict mode. There's no benfit to continuing anyway since we need all pages to be isolated. Additionally, drop the error checking based on nr_strict_required and just check the pfn ranges. This matches with what isolate_freepages_range does. Signed-off-by: Laura Abbott Acked-by: Minchan Kim Cc: Mel Gorman Acked-by: Vlastimil Babka Cc: Joonsoo Kim Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Michal Nazarewicz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/compaction.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index 9a3e351da29b..18a90b4d0bfc 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -252,7 +252,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, { int nr_scanned = 0, total_isolated = 0; struct page *cursor, *valid_page = NULL; - unsigned long nr_strict_required = end_pfn - blockpfn; unsigned long flags; bool locked = false; @@ -265,11 +264,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, nr_scanned++; if (!pfn_valid_within(blockpfn)) - continue; + goto isolate_fail; + if (!valid_page) valid_page = page; if (!PageBuddy(page)) - continue; + goto isolate_fail; /* * The zone lock must be held to isolate freepages. @@ -290,12 +290,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) - continue; + goto isolate_fail; /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); - if (!isolated && strict) - break; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -306,7 +304,15 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (isolated) { blockpfn += isolated - 1; cursor += isolated - 1; + continue; } + +isolate_fail: + if (strict) + break; + else + continue; + } trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); @@ -316,7 +322,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, * pages requested were isolated. If there were any failures, 0 is * returned and CMA will fail. */ - if (strict && nr_strict_required > total_isolated) + if (strict && blockpfn < end_pfn) total_isolated = 0; if (locked) From e88217a8ee592adc45e14558091f254a2ce1868d Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 00:40:05 +0100 Subject: [PATCH 129/296] dm cache: fix truncation bug when copying a block to/from >2TB fast device commit 8b9d96666529a979acf4825391efcc7c8a3e9f12 upstream. During demotion or promotion to a cache's >2TB fast device we must not truncate the cache block's associated sector to 32bits. The 32bit temporary result of from_cblock() caused a 32bit multiplication when calculating the sector of the fast device in issue_copy_real(). Use an intermediate 64bit type to store the 32bit from_cblock() to allow for proper 64bit multiplication. Here is an example of how this bug manifests on an ext4 filesystem: EXT4-fs error (device dm-0): ext4_mb_generate_buddy:756: group 17136, 32768 clusters in bitmap, 30688 in gd; block bitmap corrupt. JBD2: Spotted dirty metadata buffer (dev = dm-0, blocknr = 0). There's a risk of filesystem corruption in case of system crash. Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 516f9c922bb2..265f83419de2 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -861,12 +861,13 @@ static void issue_copy_real(struct dm_cache_migration *mg) int r; struct dm_io_region o_region, c_region; struct cache *cache = mg->cache; + sector_t cblock = from_cblock(mg->cblock); o_region.bdev = cache->origin_dev->bdev; o_region.count = cache->sectors_per_block; c_region.bdev = cache->cache_dev->bdev; - c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; + c_region.sector = cblock * cache->sectors_per_block; c_region.count = cache->sectors_per_block; if (mg->writeback || mg->demote) { From bd86e7cb779a11a18ce2f33ccdde50c9568c3f6d Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 16:13:39 +0100 Subject: [PATCH 130/296] dm cache: fix access beyond end of origin device commit e893fba90c09f9b57fb97daae204ea9cc2c52fa5 upstream. In order to avoid wasting cache space a partial block at the end of the origin device is not cached. Unfortunately, the check for such a partial block at the end of the origin device was flawed. Fix accesses beyond the end of the origin device that occured due to attempted promotion of an undetected partial block by: - initializing the per bio data struct to allow cache_end_io to work properly - recognizing access to the partial block at the end of the origin device - avoiding out of bounds access to the discard bitset Otherwise, users can experience errors like the following: attempt to access beyond end of device dm-5: rw=0, want=20971520, limit=20971456 ... device-mapper: cache: promotion failed; couldn't copy block Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 265f83419de2..1a75869d3a82 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2175,20 +2175,18 @@ static int cache_map(struct dm_target *ti, struct bio *bio) bool discarded_block; struct dm_bio_prison_cell *cell; struct policy_result lookup_result; - struct per_bio_data *pb; + struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); - if (from_oblock(block) > from_oblock(cache->origin_blocks)) { + if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* * This can only occur if the io goes to a partial block at * the end of the origin device. We don't cache these. * Just remap to the origin and carry on. */ - remap_to_origin_clear_discard(cache, bio, block); + remap_to_origin(cache, bio); return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio, pb_data_size); - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; From 83bb80f4ade8b6fa1e62a46aa67d52cc9fa12f0b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:29:58 +1100 Subject: [PATCH 131/296] net: unix socket code abuses csum_partial commit 0a13404dd3bf4ea870e3d96270b5a382edca85c0 upstream. The unix socket code is using the result of csum_partial to hash into a lookup table: unix_hash_fold(csum_partial(sunaddr, len, 0)); csum_partial is only guaranteed to produce something that can be folded into a checksum, as its prototype explains: * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic The 32bit value should not be used directly. Depending on the alignment, the ppc64 csum_partial will return different 32bit partial checksums that will fold into the same 16bit checksum. This difference causes the following testcase (courtesy of Gustavo) to sometimes fail: #include #include int main() { int fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); int i = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, 4); struct sockaddr addr; addr.sa_family = AF_LOCAL; bind(fd, &addr, 2); listen(fd, 128); struct sockaddr_storage ss; socklen_t sslen = (socklen_t)sizeof(ss); getsockname(fd, (struct sockaddr*)&ss, &sslen); fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); if (connect(fd, (struct sockaddr*)&ss, sslen) == -1){ perror(NULL); return 1; } printf("OK\n"); return 0; } As suggested by davem, fix this by using csum_fold to fold the partial 32bit checksum into a 16bit checksum before using it. Signed-off-by: Anton Blanchard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3ca7927520b0..94d334781554 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -160,9 +160,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } From 7184df3a509dcf56c01d19b21709a7860d918600 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 14:52:01 +0100 Subject: [PATCH 132/296] can: flexcan: flexcan_open(): fix error path if flexcan_chip_start() fails commit 7e9e148af01ef388efb6e2490805970be4622792 upstream. If flexcan_chip_start() in flexcan_open() fails, the interrupt is not freed, this patch adds the missing cleanup. Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/flexcan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index f63169d6af26..6d388cff8455 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -862,7 +862,7 @@ static int flexcan_open(struct net_device *dev) /* start chip and queuing */ err = flexcan_chip_start(dev); if (err) - goto out_close; + goto out_free_irq; can_led_event(dev, CAN_LED_EVENT_OPEN); @@ -871,6 +871,8 @@ static int flexcan_open(struct net_device *dev) return 0; + out_free_irq: + free_irq(dev->irq, dev); out_close: close_candev(dev); out: From a5748c58fc4200a5ec7a8ceadaa845ffb15cf56b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 6 Feb 2014 12:23:01 -0800 Subject: [PATCH 133/296] SCSI: isci: fix reset timeout handling commit ddfadd7736b677de2d4ca2cd5b4b655368c85a7a upstream. Remove an erroneous BUG_ON() in the case of a hard reset timeout. The reset timeout handler puts the port into the "awaiting link-up" state. The timeout causes the device to be disconnected and we need to be in the awaiting link-up state to re-connect the port. The BUG_ON() made the incorrect assumption that resets never timeout and we always complete the reset in the "resetting" state. Testing this patch also uncovered that libata continues to attempt to reset the port long after the driver has torn down the context. Once the driver has committed to abandoning the link it must indicate to libata that recovery ends by returning -ENODEV from ->lldd_I_T_nexus_reset(). Acked-by: Lukasz Dorau Reported-by: David Milburn Reported-by: Xun Ni Tested-by: Xun Ni Signed-off-by: Dan Williams Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/isci/port_config.c | 7 ------- drivers/scsi/isci/task.c | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c index cd962da4a57a..5017bde3b366 100644 --- a/drivers/scsi/isci/port_config.c +++ b/drivers/scsi/isci/port_config.c @@ -615,13 +615,6 @@ static void sci_apc_agent_link_up(struct isci_host *ihost, SCIC_SDS_APC_WAIT_LINK_UP_NOTIFICATION); } else { /* the phy is already the part of the port */ - u32 port_state = iport->sm.current_state_id; - - /* if the PORT'S state is resetting then the link up is from - * port hard reset in this case, we need to tell the port - * that link up is recieved - */ - BUG_ON(port_state != SCI_PORT_RESETTING); port_agent->phy_ready_mask |= 1 << phy_index; sci_port_link_up(iport, iphy); } diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 0d30ca849e8f..5d6fda72d659 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -801,7 +801,7 @@ int isci_task_I_T_nexus_reset(struct domain_device *dev) /* XXX: need to cleanup any ireqs targeting this * domain_device */ - ret = TMF_RESP_FUNC_COMPLETE; + ret = -ENODEV; goto out; } From be92db5e00d862acda06ff116d8f4728d24e3b49 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 6 Feb 2014 12:23:20 -0800 Subject: [PATCH 134/296] SCSI: isci: correct erroneous for_each_isci_host macro commit c59053a23d586675c25d789a7494adfdc02fba57 upstream. In the first place, the loop 'for' in the macro 'for_each_isci_host' (drivers/scsi/isci/host.h:314) is incorrect, because it accesses the 3rd element of 2 element array. After the 2nd iteration it executes the instruction: ihost = to_pci_info(pdev)->hosts[2] (while the size of the 'hosts' array equals 2) and reads an out of range element. In the second place, this loop is incorrectly optimized by GCC v4.8 (see http://marc.info/?l=linux-kernel&m=138998871911336&w=2). As a result, on platforms with two SCU controllers, the loop is executed more times than it can be (for i=0,1 and 2). It causes kernel panic during entering the S3 state and the following oops after 'rmmod isci': BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __list_add+0x1b/0xc0 Oops: 0000 [#1] SMP RIP: 0010:[] [] __list_add+0x1b/0xc0 Call Trace: [] __mutex_lock_slowpath+0x114/0x1b0 [] mutex_lock+0x1f/0x30 [] sas_disable_events+0x1b/0x50 [libsas] [] sas_unregister_ha+0x18/0x60 [libsas] [] isci_unregister+0x1e/0x40 [isci] [] isci_pci_remove+0x5d/0x100 [isci] [] pci_device_remove+0x3b/0xb0 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xa8/0xb0 [] bus_remove_driver+0x9b/0x120 [] driver_unregister+0x2c/0x50 [] pci_unregister_driver+0x23/0x80 [] isci_exit+0x10/0x1e [isci] [] SyS_delete_module+0x16b/0x2d0 [] ? do_notify_resume+0x61/0xa0 [] system_call_fastpath+0x16/0x1b The loop has been corrected. This patch fixes kernel panic during entering the S3 state and the above oops. Signed-off-by: Lukasz Dorau Reviewed-by: Maciej Patelczyk Tested-by: Lukasz Dorau Signed-off-by: Dan Williams Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/isci/host.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 4911310a38f5..22a9bb1abae1 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -311,9 +311,8 @@ static inline struct Scsi_Host *to_shost(struct isci_host *ihost) } #define for_each_isci_host(id, ihost, pdev) \ - for (id = 0, ihost = to_pci_info(pdev)->hosts[id]; \ - id < ARRAY_SIZE(to_pci_info(pdev)->hosts) && ihost; \ - ihost = to_pci_info(pdev)->hosts[++id]) + for (id = 0; id < SCI_MAX_CONTROLLERS && \ + (ihost = to_pci_info(pdev)->hosts[id]); id++) static inline void wait_for_start(struct isci_host *ihost) { From 2848c03a56d600f6d010f5472d8223fb126fff73 Mon Sep 17 00:00:00 2001 From: Giridhar Malavali Date: Wed, 26 Feb 2014 04:15:12 -0500 Subject: [PATCH 135/296] SCSI: qla2xxx: Poll during initialization for ISP25xx and ISP83xx commit b77ed25c9f8402e8b3e49e220edb4ef09ecfbb53 upstream. Signed-off-by: Giridhar Malavali Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index c32efc753229..799c266b0bb5 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2980,8 +2980,7 @@ struct qla_hw_data { IS_QLA25XX(ha) || IS_QLA81XX(ha) || \ IS_QLA82XX(ha) || IS_QLA83XX(ha)) #define IS_MSIX_NACK_CAPABLE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) -#define IS_NOPOLLING_TYPE(ha) ((IS_QLA25XX(ha) || IS_QLA81XX(ha) || \ - IS_QLA83XX(ha)) && (ha)->flags.msix_enabled) +#define IS_NOPOLLING_TYPE(ha) (IS_QLA81XX(ha) && (ha)->flags.msix_enabled) #define IS_FAC_REQUIRED(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) #define IS_NOCACHE_VPD_TYPE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) #define IS_ALOGIO_CAPABLE(ha) (IS_QLA23XX(ha) || IS_FWI2_CAPABLE(ha)) From 535dba0ec5df095ce7aca036f78110356187c419 Mon Sep 17 00:00:00 2001 From: Ales Novak Date: Thu, 27 Feb 2014 11:03:30 +0100 Subject: [PATCH 136/296] SCSI: storvsc: NULL pointer dereference fix commit b12bb60d6c350b348a4e1460cd68f97ccae9822e upstream. If the initialization of storvsc fails, the storvsc_device_destroy() causes NULL pointer dereference. storvsc_bus_scan() scsi_scan_target() __scsi_scan_target() scsi_probe_and_add_lun(hostdata=NULL) scsi_alloc_sdev(hostdata=NULL) sdev->hostdata = hostdata now the host allocation fails __scsi_remove_device(sdev) calls sdev->host->hostt->slave_destroy() == storvsc_device_destroy(sdev) access of sdev->hostdata->request_mempool Signed-off-by: Ales Novak Signed-off-by: Thomas Abraham Reviewed-by: Jiri Kosina Acked-by: K. Y. Srinivasan Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index fb7437dd5b7a..91b76cea3e3c 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1189,6 +1189,9 @@ static void storvsc_device_destroy(struct scsi_device *sdevice) { struct stor_mem_pools *memp = sdevice->hostdata; + if (!memp) + return; + mempool_destroy(memp->request_mempool); kmem_cache_destroy(memp->request_pool); kfree(memp); From d2c6966a096ad50c33825a52e6521102ffeda1ff Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sun, 2 Feb 2014 22:56:23 -0800 Subject: [PATCH 137/296] x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU commit 731bd6a93a6e9172094a2322bd0ee964bb1f4d63 upstream. For non-eager fpu mode, thread's fpu state is allocated during the first fpu usage (in the context of device not available exception). This (math_state_restore()) can be a blocking call and hence we enable interrupts (which were originally disabled when the exception happened), allocate memory and disable interrupts etc. But the eager-fpu mode, call's the same math_state_restore() from kernel_fpu_end(). The assumption being that tsk_used_math() is always set for the eager-fpu mode and thus avoid the code path of enabling interrupts, allocating fpu state using blocking call and disable interrupts etc. But the below issue was noticed by Maarten Baert, Nate Eldredge and few others: If a user process dumps core on an ecrypt fs while aesni-intel is loaded, we get a BUG() in __find_get_block() complaining that it was called with interrupts disabled; then all further accesses to our ecrypt fs hang and we have to reboot. The aesni-intel code (encrypting the core file that we are writing) needs the FPU and quite properly wraps its code in kernel_fpu_{begin,end}(), the latter of which calls math_state_restore(). So after kernel_fpu_end(), interrupts may be disabled, which nobody seems to expect, and they stay that way until we eventually get to __find_get_block() which barfs. For eager fpu, most the time, tsk_used_math() is true. At few instances during thread exit, signal return handling etc, tsk_used_math() might be false. In kernel_fpu_end(), for eager-fpu, call math_state_restore() only if tsk_used_math() is set. Otherwise, don't bother. Kernel code path which cleared tsk_used_math() knows what needs to be done with the fpu state. Reported-by: Maarten Baert Reported-by: Nate Eldredge Suggested-by: Linus Torvalds Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1391410583.3801.6.camel@europa Cc: George Spelvin Signed-off-by: H. Peter Anvin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/i387.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f7ea30dce238..b03ff1842547 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - if (use_eager_fpu()) - math_state_restore(); - else + if (use_eager_fpu()) { + /* + * For eager fpu, most the time, tsk_used_math() is true. + * Restore the user math as we are done with the kernel usage. + * At few instances during thread exit, signal handling etc, + * tsk_used_math() is false. Those few places will take proper + * actions, so we don't need to restore the math here. + */ + if (likely(tsk_used_math(current))) + math_state_restore(); + } else { stts(); + } } EXPORT_SYMBOL(__kernel_fpu_end); From 0e48f06cf0a1b55cd2dcc432ccfb13174705fcc2 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 8 Feb 2014 15:47:46 +0000 Subject: [PATCH 138/296] Btrfs: fix data corruption when reading/updating compressed extents commit a2aa75e18a21b21952dc6daa9bac7c9f4426f81f upstream. When using a mix of compressed file extents and prealloc extents, it is possible to fill a page of a file with random, garbage data from some unrelated previous use of the page, instead of a sequence of zeroes. A simple sequence of steps to get into such case, taken from the test case I made for xfstests, is: _scratch_mkfs _scratch_mount "-o compress-force=lzo" $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar This results in the following file items in the fs tree: item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160 inode generation 6 transid 6 size 542872 block group 0 mode 100600 item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16 inode ref index 2 namelen 6 name: foobar item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53 extent data disk byte 0 nr 0 gen 6 extent data offset 0 nr 24576 ram 266240 extent compression 0 item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53 prealloc data disk byte 12849152 nr 241664 gen 6 prealloc data offset 0 nr 241664 item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53 extent data disk byte 12845056 nr 4096 gen 6 extent data offset 0 nr 20480 ram 20480 extent compression 2 item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53 prealloc data disk byte 13090816 nr 405504 gen 6 prealloc data offset 0 nr 258048 The on disk extent at offset 266240 (which corresponds to 1 single disk block), contains 5 compressed chunks of file data. Each of the first 4 compress 4096 bytes of file data, while the last one only compresses 3024 bytes of file data. Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 = 1072 bytes) should always return zeroes (our next extent is a prealloc one). The solution here is the compression code path to zero the remaining (untouched) bytes of the last page it uncompressed data into, as the information about how much space the file data consumes in the last page is not known in the upper layer fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing the remainder of the page but only if it corresponds to the last page of the inode and if the inode's size is not a multiple of the page size. This would cause not only returning random data on reads, but also permanently storing random data when updating parts of the region that should be zeroed. For the example above, it means updating a single byte in the region [285648 ; 286720[ would store that byte correctly but also store random data on disk. A test case for xfstests follows soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/compression.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b189bd1e7a3e..ce7067881d36 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1009,6 +1009,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + if (*pg_index == (vcnt - 1) && *pg_offset == 0) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); From 4010fe8e338981f0f6192cb80758d7f4abdbe712 Mon Sep 17 00:00:00 2001 From: Roman Volkov Date: Fri, 24 Jan 2014 16:18:11 +0400 Subject: [PATCH 139/296] ALSA: oxygen: modify adjust_dg_dac_routing function commit 1f91ecc14deea9461aca93273d78871ec4d98fcd upstream. When selecting the audio output destinations (headphones, FP headphones, multichannel output), the channel routing should be changed depending on what destination selected. Also unnecessary I2S channels are digitally muted. This function called when the user selects the destination in the ALSA mixer. Signed-off-by: Roman Volkov Signed-off-by: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- sound/pci/oxygen/xonar_dg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c index 5519b8f0dad0..eb7ad7706205 100644 --- a/sound/pci/oxygen/xonar_dg.c +++ b/sound/pci/oxygen/xonar_dg.c @@ -294,6 +294,16 @@ static int output_switch_put(struct snd_kcontrol *ctl, oxygen_write16_masked(chip, OXYGEN_GPIO_DATA, data->output_sel == 1 ? GPIO_HP_REAR : 0, GPIO_HP_REAR); + oxygen_write8_masked(chip, OXYGEN_PLAY_ROUTING, + data->output_sel == 0 ? + OXYGEN_PLAY_MUTE01 : + OXYGEN_PLAY_MUTE23 | + OXYGEN_PLAY_MUTE45 | + OXYGEN_PLAY_MUTE67, + OXYGEN_PLAY_MUTE01 | + OXYGEN_PLAY_MUTE23 | + OXYGEN_PLAY_MUTE45 | + OXYGEN_PLAY_MUTE67); } mutex_unlock(&chip->mutex); return changed; From 5a0b9c33b0a361a7b82fc4ae509bfc1df004f2c0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 27 Jul 2013 03:53:54 -0700 Subject: [PATCH 140/296] jiffies: Avoid undefined behavior from signed overflow commit 5a581b367b5df0531265311fc681c2abd377e5e6 upstream. According to the C standard 3.4.3p3, overflow of a signed integer results in undefined behavior. This commit therefore changes the definitions of time_after(), time_after_eq(), time_after64(), and time_after_eq64() to avoid this undefined behavior. The trick is that the subtraction is done using unsigned arithmetic, which according to 6.2.5p9 cannot overflow because it is defined as modulo arithmetic. This has the added (though admittedly quite small) benefit of shortening four lines of code by four characters each. Note that the C standard considers the cast from unsigned to signed to be implementation-defined, see 6.3.1.3p3. However, on a two's-complement system, an implementation that defines anything other than a reinterpretation of the bits is free to come to me, and I will be happy to act as a witness for its being committed to an insane asylum. (Although I have nothing against saturating arithmetic or signals in some cases, these things really should not be the default when compiling an operating-system kernel.) Signed-off-by: Paul E. McKenney Cc: John Stultz Cc: "David S. Miller" Cc: Arnd Bergmann Cc: Ingo Molnar Cc: Linus Torvalds Cc: Eric Dumazet Cc: Kevin Easton [ paulmck: Included time_after64() and time_after_eq64(), as suggested by Eric Dumazet, also fixed commit message.] Reviewed-by: Josh Triplett Ruchi Kandoi Signed-off-by: Greg Kroah-Hartman --- include/linux/jiffies.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 8fb8edf12417..7b5d4a8ab199 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -101,13 +101,13 @@ static inline u64 get_jiffies_64(void) #define time_after(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ - ((long)(b) - (long)(a) < 0)) + ((long)((b) - (a)) < 0)) #define time_before(a,b) time_after(b,a) #define time_after_eq(a,b) \ (typecheck(unsigned long, a) && \ typecheck(unsigned long, b) && \ - ((long)(a) - (long)(b) >= 0)) + ((long)((a) - (b)) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) /* @@ -130,13 +130,13 @@ static inline u64 get_jiffies_64(void) #define time_after64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ - ((__s64)(b) - (__s64)(a) < 0)) + ((__s64)((b) - (a)) < 0)) #define time_before64(a,b) time_after64(b,a) #define time_after_eq64(a,b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ - ((__s64)(a) - (__s64)(b) >= 0)) + ((__s64)((a) - (b)) >= 0)) #define time_before_eq64(a,b) time_after_eq64(b,a) /* From ed93fb01a3991ced713a37bc844ed2f2abef8ea8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 31 Oct 2013 13:24:28 +0100 Subject: [PATCH 141/296] s390/dasd: hold request queue sysfs lock when calling elevator_init() commit ef0899410ff630b2e75306da49996dbbfa318165 upstream. "elevator: Fix a race in elevator switching and md device initialization" changed the semantics of elevator_init() in a way that now enforces to hold the corresponding request queue's sysfs_lock when calling elevator_init() to fix a race. The patch did not convert the s390 dasd device driver which is the only device driver which also calls elevator_init(). So add the missing locking. Cc: Tomoki Sekiyama Cc: Jens Axboe Signed-off-by: Heiko Carstens Signed-off-by: Jens Axboe Cc: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d72a9216ee2e..e91ec8cd9b09 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2879,12 +2879,12 @@ static int dasd_alloc_queue(struct dasd_block *block) elevator_exit(block->request_queue->elevator); block->request_queue->elevator = NULL; + mutex_lock(&block->request_queue->sysfs_lock); rc = elevator_init(block->request_queue, "deadline"); - if (rc) { + if (rc) blk_cleanup_queue(block->request_queue); - return rc; - } - return 0; + mutex_unlock(&block->request_queue->sysfs_lock); + return rc; } /* From f1352fb2a38e6d57c9ec7f96c5449db616e04ef2 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 31 Jan 2014 15:41:58 -0500 Subject: [PATCH 142/296] Fix mountpoint reference leakage in linkat commit d22e6338db7f613dd4f6095c190682fcc519e4b7 upstream. Recent changes to retry on ESTALE in linkat (commit 442e31ca5a49e398351b2954b51f578353fdf210) introduced a mountpoint reference leak and a small memory leak in case a filesystem link operation returns ESTALE which is pretty normal for distributed filesystems like lustre, nfs and so on. Free old_path in such a case. [AV: there was another missing path_put() nearby - on the previous goto retry] Signed-off-by: Oleg Drokin: Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/namei.c b/fs/namei.c index cccaf77e76c5..1211ee5a1cb3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3655,6 +3655,7 @@ retry: out_dput: done_path_create(&new_path, new_dentry); if (retry_estale(error, how)) { + path_put(&old_path); how |= LOOKUP_REVAL; goto retry; } From ba9a72974fd85701aea843b276d9773809286e74 Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Mon, 3 Mar 2014 15:38:25 -0800 Subject: [PATCH 143/296] memcg: reparent charges of children before processing parent commit 4fb1a86fb5e4209a7d4426d4e586c58e9edc74ac upstream. Sometimes the cleanup after memcg hierarchy testing gets stuck in mem_cgroup_reparent_charges(), unable to bring non-kmem usage down to 0. There may turn out to be several causes, but a major cause is this: the workitem to offline parent can get run before workitem to offline child; parent's mem_cgroup_reparent_charges() circles around waiting for the child's pages to be reparented to its lrus, but it's holding cgroup_mutex which prevents the child from reaching its mem_cgroup_reparent_charges(). Further testing showed that an ordered workqueue for cgroup_destroy_wq is not always good enough: percpu_ref_kill_and_confirm's call_rcu_sched stage on the way can mess up the order before reaching the workqueue. Instead, when offlining a memcg, call mem_cgroup_reparent_charges() on all its children (and grandchildren, in the correct order) to have their charges reparented first. [The version for 3.10.34 (or perhaps now 3.10.35) is this below. Yes, more differences, and the old mem_cgroup_reparent_charges line is intentionally left in for 3.10 whereas it was removed for 3.12+: that's because the css/cgroup iterator changed in between, it used not to supply the root of the subtree, but nowadays it does - Hugh] Fixes: e5fca243abae ("cgroup: use a dedicated workqueue for cgroup destruction") Signed-off-by: Filipe Brandenburger Signed-off-by: Hugh Dickins Reviewed-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6115b2bbd6ea..f45e21ab9cea 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6326,9 +6326,23 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) static void mem_cgroup_css_offline(struct cgroup *cont) { struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + struct cgroup *iter; mem_cgroup_invalidate_reclaim_iterators(memcg); + + /* + * This requires that offlining is serialized. Right now that is + * guaranteed because css_killed_work_fn() holds the cgroup_mutex. + */ + rcu_read_lock(); + cgroup_for_each_descendant_post(iter, cont) { + rcu_read_unlock(); + mem_cgroup_reparent_charges(mem_cgroup_from_cont(iter)); + rcu_read_lock(); + } + rcu_read_unlock(); mem_cgroup_reparent_charges(memcg); + mem_cgroup_destroy_all_caches(memcg); } From 99a65d40dfa32a1f6ae205d3d72b64b9c863f608 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Mon, 9 Dec 2013 09:49:45 +0000 Subject: [PATCH 144/296] MIPS: include linux/types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 87c99203fea897fbdd84b681ad9fced2517dcf98 upstream. The file uses u16 type but doesn't include its definition explicitly I was getting this error when including this header in my driver: arch/mips/include/asm/mipsregs.h:644:33: error: unknown type name ‘u16’ Signed-off-by: Qais Yousef Reviewed-by: Steven J. Hill Acked-by: David Daney Signed-off-by: John Crispin Patchwork: http://patchwork.linux-mips.org/patch/6212/ Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/mipsregs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 87e6207b05e4..3d0074e10595 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -14,6 +14,7 @@ #define _ASM_MIPSREGS_H #include +#include #include #include From 68c52c3ef819b289473200e232bb940dbe87fc48 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 21 Jan 2014 20:32:05 -0800 Subject: [PATCH 145/296] bio-integrity: Fix bio_integrity_verify segment start bug commit 5837c80e870bc3b12ac6a98cdc9ce7a9522a8fb6 upstream. This patch addresses a bug in bio_integrity_verify() code that has been causing DIF READ verify operations to be silently skipped. The issue is that bio->bi_idx will have been incremented within bio_advance() code in the normal blk_update_request() -> req_bio_endio() completion path, and bio_integrity_verify() is using bio_for_each_segment() which starts the bio segment walk at the current bio->bi_idx. So instead use bio_for_each_segment_all() to always start the bio segment walk from zero, regardless of the current bio->bi_idx value after bio_advance() has been called. (Context change for v3.10.y -> v3.13.y code - nab) Cc: Martin K. Petersen Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8dccf73025b3..433c3b828e1d 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,7 +458,7 @@ static int bio_integrity_verify(struct bio *bio) bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; - bio_for_each_segment(bv, bio, i) { + bio_for_each_segment_all(bv, bio, i) { void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; From a191212af8f4895d6a40c9d53fa84e9ae575ecd0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Jun 2013 12:17:32 +0200 Subject: [PATCH 146/296] tick: Make oneshot broadcast robust vs. CPU offlining commit c9b5a266b103af873abb9ac03bc3d067702c8f4b upstream. In periodic mode we remove offline cpus from the broadcast propagation mask. In oneshot mode we fail to do so. This was not a problem so far, but the recent changes to the broadcast propagation introduced a constellation which can result in a NULL pointer dereference. What happens is: CPU0 CPU1 idle() arch_idle() tick_broadcast_oneshot_control(OFF); set cpu1 in tick_broadcast_force_mask if (cpu_offline()) arch_cpu_dead() cpu_dead_cleanup(cpu1) cpu1 tickdevice pointer = NULL broadcast interrupt dereference cpu1 tickdevice pointer -> OOPS We dereference the pointer because cpu1 is still set in tick_broadcast_force_mask and tick_do_broadcast() expects a valid cpumask and therefor lacks any further checks. Remove the cpu from the tick_broadcast_force_mask before we set the tick device pointer to NULL. Also add a sanity check to the oneshot broadcast function, so we can detect such issues w/o crashing the machine. Reported-by: Prarit Bhargava Cc: athorlton@sgi.com Cc: CAI Qian Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1306261303260.4013@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Preeti U Murthy Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-broadcast.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f681da32a2ff..19ee339a1d0d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -593,6 +593,13 @@ again: cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); cpumask_clear(tick_broadcast_force_mask); + /* + * Sanity check. Catch the case where we try to broadcast to + * offline cpus. + */ + if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) + cpumask_and(tmpmask, tmpmask, cpu_online_mask); + /* * Wakeup the cpus which have an expired event. */ @@ -834,10 +841,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) raw_spin_lock_irqsave(&tick_broadcast_lock, flags); /* - * Clear the broadcast mask flag for the dead cpu, but do not - * stop the broadcast device! + * Clear the broadcast masks for the dead cpu, but do not stop + * the broadcast device! */ cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); + cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); + cpumask_clear_cpu(cpu, tick_broadcast_force_mask); raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } From 798610b539f631feef5af5bd713d197712e08ff1 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Mar 2014 10:28:23 +0200 Subject: [PATCH 147/296] iwlwifi: mvm: don't WARN when statistics are handled late commit 1e9291996c4eedf79883f47ec635235e39d3d6cd upstream. Since the statistics handler is asynchrous, it can very well be that we will handle the statistics (hence the RSSI fluctuation) when we already disassociated. Don't WARN on this case. This solves: https://bugzilla.redhat.com/show_bug.cgi?id=1071998 Fixes: 2b76ef13086f ("iwlwifi: mvm: implement reduced Tx power") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/bt-coex.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c index 810bfa5f6de0..9649f511bd5b 100644 --- a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c @@ -523,8 +523,11 @@ void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mutex_lock(&mvm->mutex); - /* Rssi update while not associated ?! */ - if (WARN_ON_ONCE(mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT)) + /* + * Rssi update while not associated - can happen since the statistics + * are handled asynchronously + */ + if (mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT) goto out_unlock; /* No open connection - reports should be disabled */ From 307af156795a3009398e609719cd557f58a20907 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 22 Oct 2013 17:59:54 +0100 Subject: [PATCH 148/296] ARM: 7864/1: Handle 64-bit memory in case of 32-bit phys_addr_t commit 6d7d5da7d75c6df676c8b72d32b02ff024438f0c upstream. Use CONFIG_ARCH_PHYS_ADDR_T_64BIT to determine if ignoring or truncating of memory banks is neccessary. This may be needed in the case of 64-bit memory bank addresses but when phys_addr_t is kept 32-bit. Signed-off-by: Magnus Damm Signed-off-by: Russell King Cc: Wang Nan Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/setup.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index b4b1d397592b..3ba573cb7167 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -530,6 +530,7 @@ void __init dump_machine_table(void) int __init arm_add_memory(phys_addr_t start, phys_addr_t size) { struct membank *bank = &meminfo.bank[meminfo.nr_banks]; + u64 aligned_start; if (meminfo.nr_banks >= NR_BANKS) { printk(KERN_CRIT "NR_BANKS too low, " @@ -542,10 +543,16 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) * Size is appropriately rounded down, start is rounded up. */ size -= start & ~PAGE_MASK; - bank->start = PAGE_ALIGN(start); + aligned_start = PAGE_ALIGN(start); -#ifndef CONFIG_ARM_LPAE - if (bank->start + size < bank->start) { +#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT + if (aligned_start > ULONG_MAX) { + printk(KERN_CRIT "Ignoring memory at 0x%08llx outside " + "32-bit physical address space\n", (long long)start); + return -EINVAL; + } + + if (aligned_start + size > ULONG_MAX) { printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " "32-bit physical address space\n", (long long)start); /* @@ -553,10 +560,11 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB. * This means we lose a page after masking. */ - size = ULONG_MAX - bank->start; + size = ULONG_MAX - aligned_start; } #endif + bank->start = aligned_start; bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); /* From 22fc72288f35219585f11fd40e663c0d3a30a28a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 11 Jan 2014 11:22:18 +0000 Subject: [PATCH 149/296] ARM: ignore memory below PHYS_OFFSET commit 571b14375019c3a66ef70d4d4a7083f4238aca30 upstream. If the kernel is loaded higher in physical memory than normal, and we calculate PHYS_OFFSET higher than the start of RAM, this leads to boot problems as we attempt to map part of this RAM into userspace. Rather than struggle with this, just truncate the mapping. Signed-off-by: Russell King Cc: Wang Nan Signed-off-by: Greg Kroah-Hartman --- arch/arm/kernel/setup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3ba573cb7167..eb83bcc70ec8 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -564,6 +564,20 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) } #endif + if (aligned_start < PHYS_OFFSET) { + if (aligned_start + size <= PHYS_OFFSET) { + pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n", + aligned_start, aligned_start + size); + return -EINVAL; + } + + pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n", + aligned_start, (u64)PHYS_OFFSET); + + size -= PHYS_OFFSET - aligned_start; + aligned_start = PHYS_OFFSET; + } + bank->start = aligned_start; bank->size = size & ~(phys_addr_t)(PAGE_SIZE - 1); From af737f6739b80e4a9fc471fa0595740caf720714 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:53:51 -0800 Subject: [PATCH 150/296] iscsi/iser-target: Use list_del_init for ->i_conn_node commit 5159d763f60af693a3fcec45dce2021f66e528a4 upstream. There are a handful of uses of list_empty() for cmd->i_conn_node within iser-target code that expect to return false once a cmd has been removed from the per connect list. This patch changes all uses of list_del -> list_del_init in order to ensure that list_empty() returns false as expected. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 6 +++--- drivers/target/iscsi/iscsi_target.c | 4 ++-- drivers/target/iscsi/iscsi_target_erl2.c | 16 ++++++++-------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6fc283a041d6..45ac70bfd7e3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1213,7 +1213,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_CMD: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); if (cmd->data_direction == DMA_TO_DEVICE) @@ -1225,7 +1225,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_TMFUNC: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1234,7 +1234,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_NOOP_OUT: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); /* diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 5b07fd156bd7..59a5319ee50c 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3653,7 +3653,7 @@ iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state break; case ISTATE_REMOVE: spin_lock_bh(&conn->cmd_lock); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, false); @@ -4099,7 +4099,7 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 45a5afd5ea13..0d2d013076c4 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -140,7 +140,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -162,7 +162,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -218,7 +218,7 @@ int iscsit_remove_cmd_from_connection_recovery( } cr = cmd->cr; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); return --cr->cmd_count; } @@ -299,7 +299,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) if (!(cmd->cmd_flags & ICF_OOO_CMDSN)) continue; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); @@ -337,7 +337,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) /* * Only perform connection recovery on ISCSI_OP_SCSI_CMD or * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call - * list_del(&cmd->i_conn_node); to release the command to the + * list_del_init(&cmd->i_conn_node); to release the command to the * session pool and remove it from the connection's list. * * Also stop the DataOUT timer, which will be restarted after @@ -353,7 +353,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) " CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, cmd->cmd_sn, conn->cid); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -373,7 +373,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) */ if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd && iscsi_sna_gte(cmd->cmd_sn, conn->sess->exp_cmd_sn)) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -395,7 +395,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) cmd->sess = conn->sess; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_all_datain_reqs(cmd); From d8bd97a03ce979b216df151e3b991023e6b72917 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:54:39 -0800 Subject: [PATCH 151/296] iscsi/iser-target: Fix isert_conn->state hung shutdown issues commit defd884845297fd5690594bfe89656b01f16d87e upstream. This patch addresses a couple of different hug shutdown issues related to wait_event() + isert_conn->state. First, it changes isert_conn->conn_wait + isert_conn->conn_wait_comp_err from waitqueues to completions, and sets ISER_CONN_TERMINATING from within isert_disconnect_work(). Second, it splits isert_free_conn() into isert_wait_conn() that is called earlier in iscsit_close_connection() to ensure that all outstanding commands have completed before continuing. Finally, it breaks isert_cq_comp_err() into seperate TX / RX related code, and adds logic in isert_cq_rx_comp_err() to wait for outstanding commands to complete before setting ISER_CONN_DOWN and calling complete(&isert_conn->conn_wait_comp_err). Acked-by: Sagi Grimberg Cc: Or Gerlitz Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 104 +++++++++++------------- drivers/infiniband/ulp/isert/ib_isert.h | 4 +- drivers/target/iscsi/iscsi_target.c | 4 + include/target/iscsi/iscsi_transport.h | 1 + 4 files changed, 53 insertions(+), 60 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 45ac70bfd7e3..fe23f372c765 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -392,8 +392,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); - init_waitqueue_head(&isert_conn->conn_wait); - init_waitqueue_head(&isert_conn->conn_wait_comp_err); + init_completion(&isert_conn->conn_wait); + init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); @@ -550,11 +550,11 @@ isert_disconnect_work(struct work_struct *work) pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); mutex_unlock(&isert_conn->conn_mutex); goto wake_up; } @@ -574,7 +574,7 @@ isert_disconnect_work(struct work_struct *work) mutex_unlock(&isert_conn->conn_mutex); wake_up: - wake_up(&isert_conn->conn_wait); + complete(&isert_conn->conn_wait); isert_put_conn(isert_conn); } @@ -1348,7 +1348,7 @@ isert_do_control_comp(struct work_struct *work) pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); /* * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_free_conn() + * from isert_wait_conn() */ isert_conn->logout_posted = true; iscsit_logout_post_handler(cmd, cmd->conn); @@ -1426,31 +1426,38 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void -isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_cmd *isert_cmd = tx_desc->isert_cmd; - if (tx_desc) { - struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + if (!isert_cmd) + isert_unmap_tx_desc(tx_desc, ib_dev); + else + isert_completion_put(tx_desc, isert_cmd, ib_dev); +} - if (!isert_cmd) - isert_unmap_tx_desc(tx_desc, ib_dev); - else - isert_completion_put(tx_desc, isert_cmd, ib_dev); +static void +isert_cq_rx_comp_err(struct isert_conn *isert_conn) +{ + struct iscsi_conn *conn = isert_conn->conn; + + if (isert_conn->post_recv_buf_count) + return; + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); } - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - pr_debug("Calling wake_up from isert_cq_comp_err\n"); + while (atomic_read(&isert_conn->post_send_buf_count)) + msleep(3000); - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state != ISER_CONN_DOWN) - isert_conn->state = ISER_CONN_TERMINATING; - mutex_unlock(&isert_conn->conn_mutex); + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); - wake_up(&isert_conn->conn_wait_comp_err); - } + complete(&isert_conn->conn_wait_comp_err); } static void @@ -1475,7 +1482,7 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); pr_debug("TX wc.status: 0x%08x\n", wc.status); atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_comp_err(tx_desc, isert_conn); + isert_cq_tx_comp_err(tx_desc, isert_conn); } } @@ -1517,7 +1524,7 @@ isert_cq_rx_work(struct work_struct *work) pr_debug("RX wc.status: 0x%08x\n", wc.status); isert_conn->post_recv_buf_count--; - isert_cq_comp_err(NULL, isert_conn); + isert_cq_rx_comp_err(isert_conn); } } @@ -2218,22 +2225,11 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } -static int isert_check_state(struct isert_conn *isert_conn, int state) -{ - int ret; - - mutex_lock(&isert_conn->conn_mutex); - ret = (isert_conn->state == state); - mutex_unlock(&isert_conn->conn_mutex); - - return ret; -} - -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; - pr_debug("isert_free_conn: Starting \n"); + pr_debug("isert_wait_conn: Starting \n"); /* * Decrement post_send_buf_count for special case when called * from isert_do_control_comp() -> iscsit_logout_post_handler() @@ -2243,38 +2239,29 @@ static void isert_free_conn(struct iscsi_conn *conn) atomic_dec(&isert_conn->post_send_buf_count); if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { - pr_debug("Calling rdma_disconnect from isert_free_conn\n"); + pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state == ISER_CONN_UP) { - pr_debug("isert_free_conn: Before wait_event comp_err %d\n", - isert_conn->state); - mutex_unlock(&isert_conn->conn_mutex); - - wait_event(isert_conn->conn_wait_comp_err, - (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); - - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); - - isert_put_conn(isert_conn); - return; - } if (isert_conn->state == ISER_CONN_INIT) { mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); return; } - pr_debug("isert_free_conn: wait_event conn_wait %d\n", - isert_conn->state); + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; mutex_unlock(&isert_conn->conn_mutex); - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); + wait_for_completion(&isert_conn->conn_wait_comp_err); + + wait_for_completion(&isert_conn->conn_wait); +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ + struct isert_conn *isert_conn = conn->context; isert_put_conn(isert_conn); } @@ -2286,6 +2273,7 @@ static struct iscsit_transport iser_target_transport = { .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, + .iscsit_wait_conn = isert_wait_conn, .iscsit_free_conn = isert_free_conn, .iscsit_alloc_cmd = isert_alloc_cmd, .iscsit_get_login_rx = isert_get_login_rx, diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 5795c82a2306..b9d6cc6917cf 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -103,8 +103,8 @@ struct isert_conn { struct isert_device *conn_device; struct work_struct conn_logout_work; struct mutex conn_mutex; - wait_queue_head_t conn_wait; - wait_queue_head_t conn_wait_comp_err; + struct completion conn_wait; + struct completion conn_wait_comp_err; struct kref conn_kref; }; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 59a5319ee50c..5232ac7b0745 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4144,6 +4144,10 @@ int iscsit_close_connection( iscsit_stop_timers_for_cmds(conn); iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + iscsit_free_queue_reqs_for_conn(conn); /* diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index c5aade523863..4a5f00e2e6cd 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -11,6 +11,7 @@ struct iscsit_transport { int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); + void (*iscsit_wait_conn)(struct iscsi_conn *); void (*iscsit_free_conn)(struct iscsi_conn *); struct iscsi_cmd *(*iscsit_alloc_cmd)(struct iscsi_conn *, gfp_t); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); From 0bf95498441319bcdba6fc4d8307091cd66bfc5e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 27 Feb 2014 09:05:03 -0800 Subject: [PATCH 152/296] iser-target: Fix post_send_buf_count for RDMA READ/WRITE commit b6b87a1df604678ed1be40158080db012a99ccca upstream. This patch fixes the incorrect setting of ->post_send_buf_count related to RDMA WRITEs + READs where isert_rdma_rw->send_wr_num was not being taken into account. This includes incrementing ->post_send_buf_count within isert_put_datain() + isert_get_dataout(), decrementing within __isert_send_completion() + isert_response_completion(), and clearing wr->send_wr_num within isert_completion_rdma_read() This is necessary because even though IB_SEND_SIGNALED is not set for RDMA WRITEs + READs, during a QP failure event the work requests will be returned with exception status from the TX completion queue. Acked-by: Sagi Grimberg Cc: Or Gerlitz Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index fe23f372c765..588a5eca63d8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1308,6 +1308,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, } cmd->write_data_done = se_cmd->data_length; + wr->send_wr_num = 0; pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n"); spin_lock_bh(&cmd->istate_lock); @@ -1367,6 +1368,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd; + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1377,7 +1379,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, queue_work(isert_comp_wq, &isert_cmd->comp_work); return; } - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(tx_desc, isert_cmd, ib_dev); @@ -1415,7 +1417,7 @@ isert_send_completion(struct iser_tx_desc *tx_desc, case ISER_IB_RDMA_READ: pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_read(tx_desc, isert_cmd); break; default: @@ -1834,12 +1836,12 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); } pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n"); return 1; @@ -1942,12 +1944,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) data_left -= data_len; } - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); } pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n"); return 0; From ae59ae911d2c14cf23843b9f12a935610c9c3db1 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Mar 2014 22:40:27 +0800 Subject: [PATCH 153/296] PNP / ACPI: proper handling of ACPI IO/Memory resource parsing failures commit 89935315f192abf7068d0044cefc84f162c3c81f upstream. Before commit b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources), if acpi_dev_resource_memory()/acpi_dev_resource_io() returns false, it means the the resource is not a memeory/IO resource. But after commit b355cee88e3b, those functions return false if the given memory/IO resource entry is invalid (the length of the resource is zero). This breaks pnpacpi_allocated_resource(), because it now recognizes the invalid memory/io resources as resources of unknown type. Thus users see confusing warning messages on machines with zero length ACPI memory/IO resources. Fix the problem by rearranging pnpacpi_allocated_resource() so that it calls acpi_dev_resource_memory() for memory type and IO type resources only, respectively. Fixes: b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources) Signed-off-by: Zhang Rui Reported-and-tested-by: Markus Trippelsdorf Reported-and-tested-by: Julian Wollrath Reported-and-tested-by: Paul Bolle [rjw: Changelog] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pnp/pnpacpi/rsparser.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 9847ab163829..a8b7466196ee 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -183,9 +183,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, struct resource r; int i, flags; - if (acpi_dev_resource_memory(res, &r) - || acpi_dev_resource_io(res, &r) - || acpi_dev_resource_address_space(res, &r) + if (acpi_dev_resource_address_space(res, &r) || acpi_dev_resource_ext_address_space(res, &r)) { pnp_add_resource(dev, &r); return AE_OK; @@ -217,6 +215,17 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, } switch (res->type) { + case ACPI_RESOURCE_TYPE_MEMORY24: + case ACPI_RESOURCE_TYPE_MEMORY32: + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + if (acpi_dev_resource_memory(res, &r)) + pnp_add_resource(dev, &r); + break; + case ACPI_RESOURCE_TYPE_IO: + case ACPI_RESOURCE_TYPE_FIXED_IO: + if (acpi_dev_resource_io(res, &r)) + pnp_add_resource(dev, &r); + break; case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) From 10f8245e0d3650144b034142c8f91e5d15c392ab Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 23 Mar 2014 21:42:03 -0700 Subject: [PATCH 154/296] Linux 3.10.34 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1e602eb906fa..571a1bf14868 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 33 +SUBLEVEL = 34 EXTRAVERSION = NAME = TOSSUG Baby Fish From 765aae26e6e296333c3a5f7a02360f5389dc439a Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 24 Mar 2014 13:47:27 +0000 Subject: [PATCH 155/296] hmp: sched: Clean up hmp_up_threshold checks into a utility fn In anticipation of modifying the up_threshold handling, make all instances use the same utility fn to check if a task is eligible for up-migration. This also removes the previous difference in threshold comparison where up-migration used '!threshold' to decide up-migration eligibility. Make them both use '! Signed-off-by: Jon Medhurst --- kernel/sched/fair.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 43857fec77be..febf67ef8e42 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6707,6 +6707,14 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { } #endif #ifdef CONFIG_SCHED_HMP +static unsigned int hmp_task_eligible_for_up_migration(struct sched_entity *se) +{ + /* below hmp_up_threshold, never eligible */ + if (se->avg.load_avg_ratio < hmp_up_threshold) + return 0; + return 1; +} + /* Check if task should migrate to a faster cpu */ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_entity *se) { @@ -6722,7 +6730,7 @@ static unsigned int hmp_up_migration(int cpu, int *target_cpu, struct sched_enti if (p->prio >= hmp_up_prio) return 0; #endif - if (se->avg.load_avg_ratio < hmp_up_threshold) + if (!hmp_task_eligible_for_up_migration(se)) return 0; /* Let the task load settle before doing another up migration */ @@ -7210,7 +7218,10 @@ static unsigned int hmp_idle_pull(int this_cpu) } orig = curr; curr = hmp_get_heaviest_task(curr, 1); - if (curr->avg.load_avg_ratio > hmp_up_threshold && + /* check if heaviest eligible task on this + * CPU is heavier than previous task + */ + if (hmp_task_eligible_for_up_migration(curr) && curr->avg.load_avg_ratio > ratio) { p = task_of(curr); target = rq; From 0baa5811bacf15b0e76ee85ce29fedffb5136313 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 24 Mar 2014 13:47:28 +0000 Subject: [PATCH 156/296] sched: hmp: unify active migration code The HMP active migration code is functionally identical to the CFS active migration code apart from one flag check. Share the code and make the flag check optional. Two wrapper functions allow the flag check to be present or not. Thanks to tixy@linaro.org for pointing out the build break and a good solution in an earlier version. Signed-off-by: Chris Redpath Signed-off-by: Jon Medhurst --- kernel/sched/fair.c | 198 +++++++++++--------------------------------- 1 file changed, 49 insertions(+), 149 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index febf67ef8e42..1c003e9e1ef2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6184,9 +6184,17 @@ out_one_pinned: out: return ld_moved; } + #ifdef CONFIG_SCHED_HMP static unsigned int hmp_idle_pull(int this_cpu); +static int move_specific_task(struct lb_env *env, struct task_struct *pm); +#else +static int move_specific_task(struct lb_env *env, struct task_struct *pm) +{ + return 0; +} #endif + /* * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. @@ -6246,22 +6254,19 @@ void idle_balance(int this_cpu, struct rq *this_rq) } } -/* - * active_load_balance_cpu_stop is run by cpu stopper. It pushes - * running tasks off the busiest CPU onto idle CPUs. It requires at - * least 1 task to be running on each physical CPU where possible, and - * avoids physical / logical imbalances. - */ -static int active_load_balance_cpu_stop(void *data) +static int __do_active_load_balance_cpu_stop(void *data, bool check_sd_lb_flag) { struct rq *busiest_rq = data; int busiest_cpu = cpu_of(busiest_rq); int target_cpu = busiest_rq->push_cpu; struct rq *target_rq = cpu_rq(target_cpu); struct sched_domain *sd; + struct task_struct *p = NULL; raw_spin_lock_irq(&busiest_rq->lock); - +#ifdef CONFIG_SCHED_HMP + p = busiest_rq->migrate_task; +#endif /* make sure the requested cpu hasn't gone down in the meantime */ if (unlikely(busiest_cpu != smp_processor_id() || !busiest_rq->active_balance)) @@ -6271,6 +6276,11 @@ static int active_load_balance_cpu_stop(void *data) if (busiest_rq->nr_running <= 1) goto out_unlock; + if (!check_sd_lb_flag) { + /* Task has migrated meanwhile, abort forced migration */ + if (task_rq(p) != busiest_rq) + goto out_unlock; + } /* * This condition is "impossible", if it occurs * we need to fix it. Originally reported by @@ -6284,12 +6294,14 @@ static int active_load_balance_cpu_stop(void *data) /* Search for an sd spanning us and the target CPU. */ rcu_read_lock(); for_each_domain(target_cpu, sd) { - if ((sd->flags & SD_LOAD_BALANCE) && - cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) + if (((check_sd_lb_flag && sd->flags & SD_LOAD_BALANCE) || + !check_sd_lb_flag) && + cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) break; } if (likely(sd)) { + bool success = false; struct lb_env env = { .sd = sd, .dst_cpu = target_cpu, @@ -6301,7 +6313,14 @@ static int active_load_balance_cpu_stop(void *data) schedstat_inc(sd, alb_count); - if (move_one_task(&env)) + if (check_sd_lb_flag) { + if (move_one_task(&env)) + success = true; + } else { + if (move_specific_task(&env, p)) + success = true; + } + if (success) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -6309,11 +6328,24 @@ static int active_load_balance_cpu_stop(void *data) rcu_read_unlock(); double_unlock_balance(busiest_rq, target_rq); out_unlock: + if (!check_sd_lb_flag) + put_task_struct(p); busiest_rq->active_balance = 0; raw_spin_unlock_irq(&busiest_rq->lock); return 0; } +/* + * active_load_balance_cpu_stop is run by cpu stopper. It pushes + * running tasks off the busiest CPU onto idle CPUs. It requires at + * least 1 task to be running on each physical CPU where possible, and + * avoids physical / logical imbalances. + */ +static int active_load_balance_cpu_stop(void *data) +{ + return __do_active_load_balance_cpu_stop(data, true); +} + #ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details @@ -6874,151 +6906,19 @@ static int move_specific_task(struct lb_env *env, struct task_struct *pm) * hmp_active_task_migration_cpu_stop is run by cpu stopper and used to * migrate a specific task from one runqueue to another. * hmp_force_up_migration uses this to push a currently running task - * off a runqueue. - * Based on active_load_balance_stop_cpu and can potentially be merged. + * off a runqueue. hmp_idle_pull uses this to pull a currently + * running task to an idle runqueue. + * Reuses __do_active_load_balance_cpu_stop to actually do the work. */ static int hmp_active_task_migration_cpu_stop(void *data) { - struct rq *busiest_rq = data; - struct task_struct *p = busiest_rq->migrate_task; - int busiest_cpu = cpu_of(busiest_rq); - int target_cpu = busiest_rq->push_cpu; - struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; - - raw_spin_lock_irq(&busiest_rq->lock); - /* make sure the requested cpu hasn't gone down in the meantime */ - if (unlikely(busiest_cpu != smp_processor_id() || - !busiest_rq->active_balance)) { - goto out_unlock; - } - /* Is there any task to move? */ - if (busiest_rq->nr_running <= 1) - goto out_unlock; - /* Task has migrated meanwhile, abort forced migration */ - if (task_rq(p) != busiest_rq) - goto out_unlock; - /* - * This condition is "impossible", if it occurs - * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. - */ - BUG_ON(busiest_rq == target_rq); - - /* move a task from busiest_rq to target_rq */ - double_lock_balance(busiest_rq, target_rq); - - /* Search for an sd spanning us and the target CPU. */ - rcu_read_lock(); - for_each_domain(target_cpu, sd) { - if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) - break; - } - - if (likely(sd)) { - struct lb_env env = { - .sd = sd, - .dst_cpu = target_cpu, - .dst_rq = target_rq, - .src_cpu = busiest_rq->cpu, - .src_rq = busiest_rq, - .idle = CPU_IDLE, - }; - - schedstat_inc(sd, alb_count); - - if (move_specific_task(&env, p)) - schedstat_inc(sd, alb_pushed); - else - schedstat_inc(sd, alb_failed); - } - rcu_read_unlock(); - double_unlock_balance(busiest_rq, target_rq); -out_unlock: - put_task_struct(p); - busiest_rq->active_balance = 0; - raw_spin_unlock_irq(&busiest_rq->lock); - return 0; -} - -/* - * hmp_idle_pull_cpu_stop is run by cpu stopper and used to - * migrate a specific task from one runqueue to another. - * hmp_idle_pull uses this to push a currently running task - * off a runqueue to a faster CPU. - * Locking is slightly different than usual. - * Based on active_load_balance_stop_cpu and can potentially be merged. - */ -static int hmp_idle_pull_cpu_stop(void *data) -{ - struct rq *busiest_rq = data; - struct task_struct *p = busiest_rq->migrate_task; - int busiest_cpu = cpu_of(busiest_rq); - int target_cpu = busiest_rq->push_cpu; - struct rq *target_rq = cpu_rq(target_cpu); - struct sched_domain *sd; - - raw_spin_lock_irq(&busiest_rq->lock); - - /* make sure the requested cpu hasn't gone down in the meantime */ - if (unlikely(busiest_cpu != smp_processor_id() || - !busiest_rq->active_balance)) - goto out_unlock; - - /* Is there any task to move? */ - if (busiest_rq->nr_running <= 1) - goto out_unlock; - - /* Task has migrated meanwhile, abort forced migration */ - if (task_rq(p) != busiest_rq) - goto out_unlock; - - /* - * This condition is "impossible", if it occurs - * we need to fix it. Originally reported by - * Bjorn Helgaas on a 128-cpu setup. - */ - BUG_ON(busiest_rq == target_rq); - - /* move a task from busiest_rq to target_rq */ - double_lock_balance(busiest_rq, target_rq); - - /* Search for an sd spanning us and the target CPU. */ - rcu_read_lock(); - for_each_domain(target_cpu, sd) { - if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) - break; - } - if (likely(sd)) { - struct lb_env env = { - .sd = sd, - .dst_cpu = target_cpu, - .dst_rq = target_rq, - .src_cpu = busiest_rq->cpu, - .src_rq = busiest_rq, - .idle = CPU_IDLE, - }; - - schedstat_inc(sd, alb_count); - - if (move_specific_task(&env, p)) - schedstat_inc(sd, alb_pushed); - else - schedstat_inc(sd, alb_failed); - } - rcu_read_unlock(); - double_unlock_balance(busiest_rq, target_rq); -out_unlock: - put_task_struct(p); - busiest_rq->active_balance = 0; - raw_spin_unlock_irq(&busiest_rq->lock); - return 0; + return __do_active_load_balance_cpu_stop(data, false); } /* * Move task in a runnable state to another CPU. * - * Tailored on 'active_load_balance_stop_cpu' with slight + * Tailored on 'active_load_balance_cpu_stop' with slight * modification to locking and pre-transfer checks. Note * rq->lock must be held before calling. */ @@ -7258,7 +7158,7 @@ static unsigned int hmp_idle_pull(int this_cpu) if (force) { stop_one_cpu_nowait(cpu_of(target), - hmp_idle_pull_cpu_stop, + hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); } done: From aae7721f20f2520d24a149408a74f18e58f56472 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 24 Mar 2014 13:47:29 +0000 Subject: [PATCH 157/296] hmp: Use idle pull to perform forced up-migrations When a normal forced up-migration takes place we stop the task to be migrated while the target CPU becomes available. This delay can range from 80us to 1500us on TC2 if the target CPU is in a deep idle state. Instead, interrupt the target CPU and ask it to pull a task. This lets the current eligible task continue executing on the original CPU while the target CPU wakes. Use a pinned timer to prevent the pulling CPU going back into power-down with pending up-migrations. If we trigger for a nohz kick, it doesn't matter about triggering for an idle pull since the idle_pull flag will be set when we execute the softirq and we'll still do the idle pull. If the target CPU is busy, we will not pull any tasks. Signed-off-by: Chris Redpath Signed-off-by: Jon Medhurst --- kernel/sched/core.c | 11 +++- kernel/sched/fair.c | 142 +++++++++++++++++++++++++++++++++++++++---- kernel/sched/sched.h | 1 + 3 files changed, 142 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3e326f9208fe..5f242330ef85 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1407,7 +1407,11 @@ void scheduler_ipi(void) { if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) - && !got_nohz_idle_kick()) + && !got_nohz_idle_kick() +#ifdef CONFIG_SCHED_HMP + && !this_rq()->wake_for_idle_pull +#endif + ) return; /* @@ -1434,6 +1438,11 @@ void scheduler_ipi(void) this_rq()->idle_balance = 1; raise_softirq_irqoff(SCHED_SOFTIRQ); } +#ifdef CONFIG_SCHED_HMP + else if (unlikely(this_rq()->wake_for_idle_pull)) + raise_softirq_irqoff(SCHED_SOFTIRQ); +#endif + irq_exit(); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c003e9e1ef2..128d5723ae4d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -39,6 +39,9 @@ */ #include #endif /* CONFIG_HMP_FREQUENCY_INVARIANT_SCALE */ +#ifdef CONFIG_SCHED_HMP +#include +#endif #include "sched.h" @@ -3514,6 +3517,110 @@ static const int hmp_max_tasks = 5; extern void __init arch_get_hmp_domains(struct list_head *hmp_domains_list); +#ifdef CONFIG_CPU_IDLE +/* + * hmp_idle_pull: + * + * In this version we have stopped using forced up migrations when we + * detect that a task running on a little CPU should be moved to a bigger + * CPU. In most cases, the bigger CPU is in a deep sleep state and a forced + * migration means we stop the task immediately but need to wait for the + * target CPU to wake up before we can restart the task which is being + * moved. Instead, we now wake a big CPU with an IPI and ask it to pull + * a task when ready. This allows the task to continue executing on its + * current CPU, reducing the amount of time that the task is stalled for. + * + * keepalive timers: + * + * The keepalive timer is used as a way to keep a CPU engaged in an + * idle pull operation out of idle while waiting for the source + * CPU to stop and move the task. Ideally this would not be necessary + * and we could impose a temporary zero-latency requirement on the + * current CPU, but in the current QoS framework this will result in + * all CPUs in the system being unable to enter idle states which is + * not desirable. The timer does not perform any work when it expires. + */ +struct hmp_keepalive { + bool init; + ktime_t delay; /* if zero, no need for timer */ + struct hrtimer timer; +}; +DEFINE_PER_CPU(struct hmp_keepalive, hmp_cpu_keepalive); + +/* setup per-cpu keepalive timers */ +static enum hrtimer_restart hmp_cpu_keepalive_notify(struct hrtimer *hrtimer) +{ + return HRTIMER_NORESTART; +} + +/* + * Work out if any of the idle states have an exit latency too high for us. + * ns_delay is passed in containing the max we are willing to tolerate. + * If there are none, set ns_delay to zero. + * If there are any, set ns_delay to + * ('target_residency of state with shortest too-big latency' - 1) * 1000. + */ +static void hmp_keepalive_delay(unsigned int *ns_delay) +{ + struct cpuidle_driver *drv; + drv = cpuidle_driver_ref(); + if (drv) { + unsigned int us_delay = UINT_MAX; + unsigned int us_max_delay = *ns_delay / 1000; + int idx; + /* if cpuidle states are guaranteed to be sorted we + * could stop at the first match. + */ + for (idx = 0; idx < drv->state_count; idx++) { + if (drv->states[idx].exit_latency > us_max_delay && + drv->states[idx].target_residency < us_delay) { + us_delay = drv->states[idx].target_residency; + } + } + if (us_delay == UINT_MAX) + *ns_delay = 0; /* no timer required */ + else + *ns_delay = 1000 * (us_delay - 1); + } + cpuidle_driver_unref(); +} + +static void hmp_cpu_keepalive_trigger(void) +{ + int cpu = smp_processor_id(); + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (!keepalive->init) { + unsigned int ns_delay = 100000; /* tolerate 100usec delay */ + + hrtimer_init(&keepalive->timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + keepalive->timer.function = hmp_cpu_keepalive_notify; + + hmp_keepalive_delay(&ns_delay); + keepalive->delay = ns_to_ktime(ns_delay); + keepalive->init = true; + } + if (ktime_to_ns(keepalive->delay)) + hrtimer_start(&keepalive->timer, + keepalive->delay, HRTIMER_MODE_REL_PINNED); +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ + struct hmp_keepalive *keepalive = &per_cpu(hmp_cpu_keepalive, cpu); + if (keepalive->init) + hrtimer_cancel(&keepalive->timer); +} +#else /* !CONFIG_CPU_IDLE */ +static void hmp_cpu_keepalive_trigger(void) +{ +} + +static void hmp_cpu_keepalive_cancel(int cpu) +{ +} +#endif + /* Setup hmp_domains */ static int __init hmp_cpu_mask_setup(void) { @@ -3574,6 +3681,8 @@ static void hmp_offline_cpu(int cpu) if(domain) cpumask_clear_cpu(cpu, &domain->cpus); + + hmp_cpu_keepalive_cancel(cpu); } /* * Needed to determine heaviest tasks etc. @@ -7003,7 +7112,7 @@ static void hmp_force_up_migration(int this_cpu) target = cpu_rq(cpu); raw_spin_lock_irqsave(&target->lock, flags); curr = target->cfs.curr; - if (!curr) { + if (!curr || target->active_balance) { raw_spin_unlock_irqrestore(&target->lock, flags); continue; } @@ -7020,16 +7129,13 @@ static void hmp_force_up_migration(int this_cpu) curr = hmp_get_heaviest_task(curr, 1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { - if (!target->active_balance) { - get_task_struct(p); - target->push_cpu = target_cpu; - target->migrate_task = p; - got_target = 1; - trace_sched_hmp_migrate(p, target->push_cpu, HMP_MIGRATE_FORCE); - hmp_next_up_delay(&p->se, target->push_cpu); - } + cpu_rq(target_cpu)->wake_for_idle_pull = 1; + raw_spin_unlock_irqrestore(&target->lock, flags); + spin_unlock(&hmp_force_migration); + smp_send_reschedule(target_cpu); + return; } - if (!got_target && !target->active_balance) { + if (!got_target) { /* * For now we just check the currently running task. * Selecting the lightest task for offloading will @@ -7051,7 +7157,7 @@ static void hmp_force_up_migration(int this_cpu) * is not currently running move it, otherwise let the * CPU stopper take care of it. */ - if (got_target && !target->active_balance) { + if (got_target) { if (!task_running(target, p)) { trace_sched_hmp_migrate_force_running(p, 0); hmp_migrate_runnable_task(target); @@ -7157,6 +7263,8 @@ static unsigned int hmp_idle_pull(int this_cpu) raw_spin_unlock_irqrestore(&target->lock, flags); if (force) { + /* start timer to keep us awake */ + hmp_cpu_keepalive_trigger(); stop_one_cpu_nowait(cpu_of(target), hmp_active_task_migration_cpu_stop, target, &target->active_balance_work); @@ -7180,6 +7288,18 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; +#ifdef CONFIG_SCHED_HMP + /* shortcut for hmp idle pull wakeups */ + if (unlikely(this_rq->wake_for_idle_pull)) { + this_rq->wake_for_idle_pull = 0; + if (hmp_idle_pull(this_cpu)) { + /* break out unless running nohz idle as well */ + if (idle != CPU_IDLE) + return; + } + } +#endif + hmp_force_up_migration(this_cpu); rebalance_domains(this_cpu, idle); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27f51ac86700..fbcda9339fc7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -466,6 +466,7 @@ struct rq { struct cpu_stop_work active_balance_work; #ifdef CONFIG_SCHED_HMP struct task_struct *migrate_task; + int wake_for_idle_pull; #endif /* cpu of this runqueue: */ int cpu; From 5a570cfc01b06906faa8ac67ad7c0c6f278761c4 Mon Sep 17 00:00:00 2001 From: Chris Redpath Date: Mon, 24 Mar 2014 13:47:30 +0000 Subject: [PATCH 158/296] hmp: dont attempt to pull tasks if affinity doesn't allow it When looking for a task to be idle-pulled, don't consider tasks where the affinity does not allow that task to be placed on the target CPU. Also ensure that tasks with restricted affinity do not block selecting other unrestricted busy tasks. Use the knowledge of target CPU more effectively in idle pull by passing to hmp_get_heaviest_task when we know it, otherwise only checking for general affinity matches with any of the CPUs in the bigger HMP domain. We still need to explicitly check affinity is allowed in idle pull since if we find no match in hmp_get_heaviest_task we will return the current one, which may not be affine to the new CPU despite having high enough load. In this case, there is nothing to move. Signed-off-by: Chris Redpath Signed-off-by: Jon Medhurst --- kernel/sched/fair.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 128d5723ae4d..1957f2589d9a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3694,30 +3694,31 @@ static inline struct hmp_domain *hmp_faster_domain(int cpu); /* must hold runqueue lock for queue se is currently on */ static struct sched_entity *hmp_get_heaviest_task( - struct sched_entity *se, int migrate_up) + struct sched_entity *se, int target_cpu) { int num_tasks = hmp_max_tasks; struct sched_entity *max_se = se; unsigned long int max_ratio = se->avg.load_avg_ratio; const struct cpumask *hmp_target_mask = NULL; + struct hmp_domain *hmp; - if (migrate_up) { - struct hmp_domain *hmp; - if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq))) - return max_se; + if (hmp_cpu_is_fastest(cpu_of(se->cfs_rq->rq))) + return max_se; - hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); - hmp_target_mask = &hmp->cpus; + hmp = hmp_faster_domain(cpu_of(se->cfs_rq->rq)); + hmp_target_mask = &hmp->cpus; + if (target_cpu >= 0) { + BUG_ON(!cpumask_test_cpu(target_cpu, hmp_target_mask)); + hmp_target_mask = cpumask_of(target_cpu); } /* The currently running task is not on the runqueue */ se = __pick_first_entity(cfs_rq_of(se)); while (num_tasks && se) { if (entity_is_task(se) && - (se->avg.load_avg_ratio > max_ratio && - hmp_target_mask && - cpumask_intersects(hmp_target_mask, - tsk_cpus_allowed(task_of(se))))) { + se->avg.load_avg_ratio > max_ratio && + cpumask_intersects(hmp_target_mask, + tsk_cpus_allowed(task_of(se)))) { max_se = se; max_ratio = se->avg.load_avg_ratio; } @@ -7126,7 +7127,7 @@ static void hmp_force_up_migration(int this_cpu) } } orig = curr; - curr = hmp_get_heaviest_task(curr, 1); + curr = hmp_get_heaviest_task(curr, -1); p = task_of(curr); if (hmp_up_migration(cpu, &target_cpu, curr)) { cpu_rq(target_cpu)->wake_for_idle_pull = 1; @@ -7223,12 +7224,14 @@ static unsigned int hmp_idle_pull(int this_cpu) } } orig = curr; - curr = hmp_get_heaviest_task(curr, 1); + curr = hmp_get_heaviest_task(curr, this_cpu); /* check if heaviest eligible task on this * CPU is heavier than previous task */ if (hmp_task_eligible_for_up_migration(curr) && - curr->avg.load_avg_ratio > ratio) { + curr->avg.load_avg_ratio > ratio && + cpumask_test_cpu(this_cpu, + tsk_cpus_allowed(task_of(curr)))) { p = task_of(curr); target = rq; ratio = curr->avg.load_avg_ratio; From e42126fcf1525129f25c895d26ddd868e5d4dff9 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Mon, 24 Mar 2014 16:45:43 -0700 Subject: [PATCH 159/296] video: adf: replace fbdev helper's open flag with refcount A device's fb_info is shared between clients. fb_release() is called when each client is released, not just the last one. Since the fbdev helper needs to release its dma-buf when the last client goes away, it must keep its own reference count. fbmem and fbcon hold different locks while calling fb_release(), so explicit locking is needed. Change-Id: I42cd659f7633adba7c11f407d4b594bd43305d6a Signed-off-by: Greg Hackmann --- drivers/video/adf/adf_fbdev.c | 36 ++++++++++++++++++++++++----------- include/video/adf_fbdev.h | 4 +++- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/video/adf/adf_fbdev.c b/drivers/video/adf/adf_fbdev.c index cac34d14cbc2..9d3c245850af 100644 --- a/drivers/video/adf/adf_fbdev.c +++ b/drivers/video/adf/adf_fbdev.c @@ -356,18 +356,25 @@ int adf_fbdev_open(struct fb_info *info, int user) struct adf_fbdev *fbdev = info->par; int ret; - if (!fbdev->open) { + mutex_lock(&fbdev->refcount_lock); + + if (unlikely(fbdev->refcount == UINT_MAX)) { + ret = -EMFILE; + goto done; + } + + if (!fbdev->refcount) { struct drm_mode_modeinfo mode; struct fb_videomode fbmode; struct adf_device *dev = adf_interface_parent(fbdev->intf); ret = adf_device_attach(dev, fbdev->eng, fbdev->intf); if (ret < 0 && ret != -EALREADY) - return ret; + goto done; ret = adf_fb_alloc(fbdev); if (ret < 0) - return ret; + goto done; adf_interface_current_mode(fbdev->intf, &mode); adf_modeinfo_to_fb_videomode(&mode, &fbmode); @@ -379,13 +386,15 @@ int adf_fbdev_open(struct fb_info *info, int user) ret = adf_fbdev_post(fbdev); if (ret < 0) { - if (!fbdev->open) + if (!fbdev->refcount) adf_fb_destroy(fbdev); - return ret; + goto done; } - fbdev->open = true; - return 0; + fbdev->refcount++; +done: + mutex_unlock(&fbdev->refcount_lock); + return ret; } EXPORT_SYMBOL(adf_fbdev_open); @@ -395,8 +404,12 @@ EXPORT_SYMBOL(adf_fbdev_open); int adf_fbdev_release(struct fb_info *info, int user) { struct adf_fbdev *fbdev = info->par; - adf_fb_destroy(fbdev); - fbdev->open = false; + mutex_lock(&fbdev->refcount_lock); + BUG_ON(!fbdev->refcount); + fbdev->refcount--; + if (!fbdev->refcount) + adf_fb_destroy(fbdev); + mutex_unlock(&fbdev->refcount_lock); return 0; } EXPORT_SYMBOL(adf_fbdev_release); @@ -601,6 +614,7 @@ int adf_fbdev_init(struct adf_fbdev *fbdev, struct adf_interface *interface, dev_err(dev, "allocating framebuffer device failed\n"); return -ENOMEM; } + mutex_init(&fbdev->refcount_lock); fbdev->default_xres_virtual = xres_virtual; fbdev->default_yres_virtual = yres_virtual; fbdev->default_format = format; @@ -644,8 +658,8 @@ EXPORT_SYMBOL(adf_fbdev_init); void adf_fbdev_destroy(struct adf_fbdev *fbdev) { unregister_framebuffer(fbdev->info); - if (WARN_ON(fbdev->open)) - adf_fb_destroy(fbdev); + BUG_ON(fbdev->refcount); + mutex_destroy(&fbdev->refcount_lock); framebuffer_release(fbdev->info); } EXPORT_SYMBOL(adf_fbdev_destroy); diff --git a/include/video/adf_fbdev.h b/include/video/adf_fbdev.h index 9c349144b5cd..e4d6fb7b25df 100644 --- a/include/video/adf_fbdev.h +++ b/include/video/adf_fbdev.h @@ -16,6 +16,7 @@ #define _VIDEO_ADF_FBDEV_H_ #include +#include #include