From 4496aa0262a9cc19aa4f248059e38f07cb511381 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 16 Jun 2014 21:21:11 +0100 Subject: [PATCH 001/298] efi: Move common EFI stub code from x86 arch code to common location No code changes made, just moving functions and #define from x86 arch directory to common location. Code is shared using #include, similar to how decompression code is shared among architectures. Signed-off-by: Roy Franz Acked-by: Mark Salter Reviewed-by: Grant Likely Signed-off-by: Matt Fleming (cherry picked from commit 7721da4c1ebf96ff815987011c1a0edef596b50a) Signed-off-by: Mark Brown Conflicts: arch/x86/boot/compressed/eboot.c arch/x86/boot/compressed/eboot.h --- drivers/firmware/efi/efi-stub-helper.c | 148 +++++++------------------ 1 file changed, 38 insertions(+), 110 deletions(-) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index b6bffbfd3be7..a0282872d97d 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -16,18 +16,6 @@ struct file_info { u64 size; }; - - - -static void efi_char16_printk(efi_system_table_t *sys_table_arg, - efi_char16_t *str) -{ - struct efi_simple_text_output_protocol *out; - - out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out; - efi_call_phys2(out->output_string, out, str); -} - static void efi_printk(efi_system_table_t *sys_table_arg, char *str) { char *s8; @@ -65,20 +53,23 @@ again: * allocation which may be in a new descriptor region. */ *map_size += sizeof(*m); - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, *map_size, (void **)&m); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + *map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; - status = efi_call_phys5(sys_table_arg->boottime->get_memory_map, - map_size, m, &key, desc_size, &desc_version); + *desc_size = 0; + key = 0; + status = efi_early->call(efi_early->get_memory_map, map_size, m, + &key, desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_phys1(sys_table_arg->boottime->free_pool, m); + efi_early->call(efi_early->free_pool, m); goto again; } if (status != EFI_SUCCESS) - efi_call_phys1(sys_table_arg->boottime->free_pool, m); + efi_early->call(efi_early->free_pool, m); + if (key_ptr && status == EFI_SUCCESS) *key_ptr = key; if (desc_ver && status == EFI_SUCCESS) @@ -158,9 +149,9 @@ again: if (!max_addr) status = EFI_NOT_FOUND; else { - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &max_addr); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); if (status != EFI_SUCCESS) { max = max_addr; max_addr = 0; @@ -170,8 +161,7 @@ again: *addr = max_addr; } - efi_call_phys1(sys_table_arg->boottime->free_pool, map); - + efi_early->call(efi_early->free_pool, map); fail: return status; } @@ -231,9 +221,9 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if ((start + size) > end) continue; - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &start); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); if (status == EFI_SUCCESS) { *addr = start; break; @@ -243,7 +233,7 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if (i == map_size / desc_size) status = EFI_NOT_FOUND; - efi_call_phys1(sys_table_arg->boottime->free_pool, map); + efi_early->call(efi_early->free_pool, map); fail: return status; } @@ -257,7 +247,7 @@ static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, return; nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - efi_call_phys2(sys_table_arg->boottime->free_pages, addr, nr_pages); + efi_early->call(efi_early->free_pages, addr, nr_pages); } @@ -276,9 +266,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, { struct file_info *files; unsigned long file_addr; - efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; u64 file_size_total; - efi_file_io_interface_t *io; efi_file_handle_t *fh; efi_status_t status; int nr_files; @@ -319,10 +307,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, if (!nr_files) return EFI_SUCCESS; - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, - nr_files * sizeof(*files), - (void **)&files); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to alloc mem for file handle list\n"); goto fail; @@ -331,13 +317,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, str = cmd_line; for (i = 0; i < nr_files; i++) { struct file_info *file; - efi_file_handle_t *h; - efi_file_info_t *info; efi_char16_t filename_16[256]; - unsigned long info_sz; - efi_guid_t info_guid = EFI_FILE_INFO_ID; efi_char16_t *p; - u64 file_sz; str = strstr(str, option_string); if (!str) @@ -368,71 +349,18 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, /* Only open the volume once. */ if (!i) { - efi_boot_services_t *boottime; - - boottime = sys_table_arg->boottime; - - status = efi_call_phys3(boottime->handle_protocol, - image->device_handle, &fs_proto, - (void **)&io); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to handle fs_proto\n"); + status = efi_open_volume(sys_table_arg, image, + (void **)&fh); + if (status != EFI_SUCCESS) goto free_files; - } - - status = efi_call_phys2(io->open_volume, io, &fh); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open volume\n"); - goto free_files; - } } - status = efi_call_phys5(fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open file: "); - efi_char16_printk(sys_table_arg, filename_16); - efi_printk(sys_table_arg, "\n"); + status = efi_file_size(sys_table_arg, fh, filename_16, + (void **)&file->handle, &file->size); + if (status != EFI_SUCCESS) goto close_handles; - } - file->handle = h; - - info_sz = 0; - status = efi_call_phys4(h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table_arg, "Failed to get file info size\n"); - goto close_handles; - } - -grow: - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, info_sz, - (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); - goto close_handles; - } - - status = efi_call_phys4(h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_phys1(sys_table_arg->boottime->free_pool, - info); - goto grow; - } - - file_sz = info->file_size; - efi_call_phys1(sys_table_arg->boottime->free_pool, info); - - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to get file info\n"); - goto close_handles; - } - - file->size = file_sz; - file_size_total += file_sz; + file_size_total += file->size; } if (file_size_total) { @@ -468,10 +396,10 @@ grow: chunksize = EFI_READ_CHUNK_SIZE; else chunksize = size; - status = efi_call_phys3(fh->read, - files[j].handle, - &chunksize, - (void *)addr); + + status = efi_file_read(fh, files[j].handle, + &chunksize, + (void *)addr); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to read file\n"); goto free_file_total; @@ -480,12 +408,12 @@ grow: size -= chunksize; } - efi_call_phys1(fh->close, files[j].handle); + efi_file_close(fh, files[j].handle); } } - efi_call_phys1(sys_table_arg->boottime->free_pool, files); + efi_early->call(efi_early->free_pool, files); *load_addr = file_addr; *load_size = file_size_total; @@ -497,9 +425,9 @@ free_file_total: close_handles: for (k = j; k < i; k++) - efi_call_phys1(fh->close, files[k].handle); + efi_file_close(fh, files[k].handle); free_files: - efi_call_phys1(sys_table_arg->boottime->free_pool, files); + efi_early->call(efi_early->free_pool, files); fail: *load_addr = 0; *load_size = 0; @@ -545,9 +473,9 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * as possible while respecting the required alignment. */ nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &efi_addr); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &efi_addr); new_addr = efi_addr; /* * If preferred address allocation failed allocate as low as From 7f6dadd94adc9dd9dde0c520c3e7a06c52884d35 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 22 Mar 2014 10:09:01 +0000 Subject: [PATCH 002/298] x86, efi: Abstract x86 efi_early calls The ARM EFI boot stub doesn't need to care about the efi_early infrastructure that x86 requires in order to do mixed mode thunking. So wrap everything up in an efi_call_early() macro. This allows x86 to do the necessary indirection jumps to call whatever firmware interface is necessary (native or mixed mode), but also allows the ARM folks to mask the fact that they don't support relocation in the boot stub and need to pass 'sys_table_arg' to every function. [ hpa: there are no object code changes from this patch ] Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/20140326091011.GB2958@console-pimps.org Cc: Roy Franz Signed-off-by: H. Peter Anvin (cherry picked from commit 204b0a1a4b92612c957a042df1a3be0e9cc79391) Signed-off-by: Mark Brown Conflicts: arch/x86/boot/compressed/eboot.c --- drivers/firmware/efi/efi-stub-helper.c | 44 +++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index a0282872d97d..ff50aeebf0d9 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -53,22 +53,22 @@ again: * allocation which may be in a new descriptor region. */ *map_size += sizeof(*m); - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - *map_size, (void **)&m); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + *map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; *desc_size = 0; key = 0; - status = efi_early->call(efi_early->get_memory_map, map_size, m, - &key, desc_size, &desc_version); + status = efi_call_early(get_memory_map, map_size, m, + &key, desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { - efi_early->call(efi_early->free_pool, m); + efi_call_early(free_pool, m); goto again; } if (status != EFI_SUCCESS) - efi_early->call(efi_early->free_pool, m); + efi_call_early(free_pool, m); if (key_ptr && status == EFI_SUCCESS) *key_ptr = key; @@ -149,9 +149,9 @@ again: if (!max_addr) status = EFI_NOT_FOUND; else { - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &max_addr); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); if (status != EFI_SUCCESS) { max = max_addr; max_addr = 0; @@ -161,7 +161,7 @@ again: *addr = max_addr; } - efi_early->call(efi_early->free_pool, map); + efi_call_early(free_pool, map); fail: return status; } @@ -221,9 +221,9 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if ((start + size) > end) continue; - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &start); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); if (status == EFI_SUCCESS) { *addr = start; break; @@ -233,7 +233,7 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if (i == map_size / desc_size) status = EFI_NOT_FOUND; - efi_early->call(efi_early->free_pool, map); + efi_call_early(free_pool, map); fail: return status; } @@ -247,7 +247,7 @@ static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, return; nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - efi_early->call(efi_early->free_pages, addr, nr_pages); + efi_call_early(free_pages, addr, nr_pages); } @@ -307,8 +307,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, if (!nr_files) return EFI_SUCCESS; - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - nr_files * sizeof(*files), (void **)&files); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to alloc mem for file handle list\n"); goto fail; @@ -413,7 +413,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, } - efi_early->call(efi_early->free_pool, files); + efi_call_early(free_pool, files); *load_addr = file_addr; *load_size = file_size_total; @@ -427,7 +427,7 @@ close_handles: for (k = j; k < i; k++) efi_file_close(fh, files[k].handle); free_files: - efi_early->call(efi_early->free_pool, files); + efi_call_early(free_pool, files); fail: *load_addr = 0; *load_size = 0; @@ -473,9 +473,9 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * as possible while respecting the required alignment. */ nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &efi_addr); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &efi_addr); new_addr = efi_addr; /* * If preferred address allocation failed allocate as low as From 8bf79f50f3bdb6f1dc41a0cae347c0f920bb0acb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 10 Apr 2014 14:11:45 +0100 Subject: [PATCH 003/298] efi: Pass correct file handle to efi_file_{read,close} We're currently passing the file handle for the root file system to efi_file_read() and efi_file_close(), instead of the file handle for the file we wish to read/close. While this has worked up until now, it seems that it has only been by pure luck. Olivier explains, "The issue is the UEFI Fat driver might return the same function for 'fh->read()' and 'h->read()'. While in our case it does not work with a different implementation of EFI_SIMPLE_FILE_SYSTEM_PROTOCOL. In our case, we return a different pointer when reading a directory and reading a file." Fixing this actually clears up the two functions because we can drop one of the arguments, and instead only pass a file 'handle' argument. Reported-by: Olivier Martin Reviewed-by: Olivier Martin Reviewed-by: Mark Rutland Cc: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit 47514c996fac5e6f13ef3a4c5e23f1c5cffabb7b) Signed-off-by: Mark Brown Conflicts: arch/x86/boot/compressed/eboot.c --- drivers/firmware/efi/efi-stub-helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index ff50aeebf0d9..2c41eaece2c1 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -397,7 +397,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, else chunksize = size; - status = efi_file_read(fh, files[j].handle, + status = efi_file_read(files[j].handle, &chunksize, (void *)addr); if (status != EFI_SUCCESS) { @@ -408,7 +408,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, size -= chunksize; } - efi_file_close(fh, files[j].handle); + efi_file_close(files[j].handle); } } @@ -425,7 +425,7 @@ free_file_total: close_handles: for (k = j; k < i; k++) - efi_file_close(fh, files[k].handle); + efi_file_close(files[k].handle); free_files: efi_call_early(free_pool, files); fail: From 0a5c5fa3babb06834e59d3fc32c1c62ff8b7962c Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Fri, 4 Apr 2014 13:25:46 +0100 Subject: [PATCH 004/298] efi: efi-stub-helper cleanup An #ifdef CONFIG_ARM clause in efi-stub-helper.c got included with some of the generic stub rework by Roy Franz. Drop it here to make subsequent patches less confusing. Also, In handle_cmdline_files(), fh is not initialized, and while the overall logic around this handling appears safe, gcc does not always pick this up. Initialize to NULL to remove the resulting warning. Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit 9403e462fb5ffa9eeaa9663cb23ded02b7e603a3) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi-stub-helper.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index 2c41eaece2c1..47722003b8fc 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -267,7 +267,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, struct file_info *files; unsigned long file_addr; u64 file_size_total; - efi_file_handle_t *fh; + efi_file_handle_t *fh = NULL; efi_status_t status; int nr_files; char *str; @@ -536,18 +536,8 @@ static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg, } options_size++; /* NUL termination */ -#ifdef CONFIG_ARM - /* - * For ARM, allocate at a high address to avoid reserved - * regions at low addresses that we don't know the specfics of - * at the time we are processing the command line. - */ - status = efi_high_alloc(sys_table_arg, options_size, 0, - &cmdline_addr, 0xfffff000); -#else - status = efi_low_alloc(sys_table_arg, options_size, 0, - &cmdline_addr); -#endif + + status = efi_low_alloc(sys_table_arg, options_size, 0, &cmdline_addr); if (status != EFI_SUCCESS) return NULL; From eff2c56aaf751ac4f73d77b20334f4f6422a9da3 Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Fri, 13 Dec 2013 11:04:49 -0800 Subject: [PATCH 005/298] efi: Add shared printk wrapper for consistent prefixing Add a wrapper for printk to standardize the prefix for informational and error messages from the EFI stub. Signed-off-by: Roy Franz Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit f966ea021f947b20c22b31194d7e3042375c7f24) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi-stub-helper.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index 47722003b8fc..1bf439be9138 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -33,6 +33,9 @@ static void efi_printk(efi_system_table_t *sys_table_arg, char *str) } } +#define pr_efi(sys_table, msg) efi_printk(sys_table, "EFI stub: "msg) +#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg) + static efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, efi_memory_desc_t **map, @@ -310,7 +313,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, status = efi_call_early(allocate_pool, EFI_LOADER_DATA, nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc mem for file handle list\n"); + pr_efi_err(sys_table_arg, "Failed to alloc mem for file handle list\n"); goto fail; } @@ -374,13 +377,13 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, status = efi_high_alloc(sys_table_arg, file_size_total, 0x1000, &file_addr, max_addr); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc highmem for files\n"); + pr_efi_err(sys_table_arg, "Failed to alloc highmem for files\n"); goto close_handles; } /* We've run out of free low memory. */ if (file_addr > max_addr) { - efi_printk(sys_table_arg, "We've run out of free low memory\n"); + pr_efi_err(sys_table_arg, "We've run out of free low memory\n"); status = EFI_INVALID_PARAMETER; goto free_file_total; } @@ -401,7 +404,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, &chunksize, (void *)addr); if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to read file\n"); + pr_efi_err(sys_table_arg, "Failed to read file\n"); goto free_file_total; } addr += chunksize; @@ -486,7 +489,7 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, &new_addr); } if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "ERROR: Failed to allocate usable memory for kernel.\n"); + pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n"); return status; } From 8ff22e6d038d5edabd1aea7bc8e84571b8438b72 Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Tue, 28 Jan 2014 10:41:28 -0800 Subject: [PATCH 006/298] efi: Add get_dram_base() helper function Add the get_dram_base() function, shared by arm/arm64. Signed-off-by: Roy Franz Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit 9bb40191e88d23563fd0467ac195debf5f6daaf9) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi-stub-helper.c | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index 1bf439be9138..a168dd20511f 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -11,6 +11,10 @@ */ #define EFI_READ_CHUNK_SIZE (1024 * 1024) +/* error code which can't be mistaken for valid address */ +#define EFI_ERROR (~0UL) + + struct file_info { efi_file_handle_t *handle; u64 size; @@ -83,6 +87,32 @@ fail: return status; } + +static unsigned long __init get_dram_base(efi_system_table_t *sys_table_arg) +{ + efi_status_t status; + unsigned long map_size; + unsigned long membase = EFI_ERROR; + struct efi_memory_map map; + efi_memory_desc_t *md; + + status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map, + &map_size, &map.desc_size, NULL, NULL); + if (status != EFI_SUCCESS) + return membase; + + map.map_end = map.map + map_size; + + for_each_efi_memory_desc(&map, md) + if (md->attribute & EFI_MEMORY_WB) + if (membase > md->phys_addr) + membase = md->phys_addr; + + efi_call_early(free_pool, map.map); + + return membase; +} + /* * Allocate at the highest possible address that is not above 'max'. */ From ffb248aa56558bcb1492cb2504e721e9a26a9144 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 20 Sep 2013 09:55:39 -0500 Subject: [PATCH 007/298] efi: x86: Handle arbitrary Unicode characters Instead of truncating UTF-16 assuming all characters is ASCII, properly convert it to UTF-8. Signed-off-by: H. Peter Anvin [ Bug and style fixes. ] Signed-off-by: Roy Franz Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit c625d1c203941fad755eb4eb729db1f65d6e9836) Signed-off-by: Mark Brown --- arch/x86/boot/compressed/eboot.c | 3 +- drivers/firmware/efi/efi-stub-helper.c | 87 ++++++++++++++++++++------ 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index a7677babf946..feca05fa4536 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -488,8 +488,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ - cmdline_ptr = efi_convert_cmdline_to_ascii(sys_table, image, - &options_size); + cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); if (!cmdline_ptr) goto fail; hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index a168dd20511f..eb6d4be9e722 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -535,53 +535,100 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, return status; } +/* + * Get the number of UTF-8 bytes corresponding to an UTF-16 character. + * This overestimates for surrogates, but that is okay. + */ +static int efi_utf8_bytes(u16 c) +{ + return 1 + (c >= 0x80) + (c >= 0x800); +} + +/* + * Convert an UTF-16 string, not necessarily null terminated, to UTF-8. + */ +static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) +{ + unsigned int c; + + while (n--) { + c = *src++; + if (n && c >= 0xd800 && c <= 0xdbff && + *src >= 0xdc00 && *src <= 0xdfff) { + c = 0x10000 + ((c & 0x3ff) << 10) + (*src & 0x3ff); + src++; + n--; + } + if (c >= 0xd800 && c <= 0xdfff) + c = 0xfffd; /* Unmatched surrogate */ + if (c < 0x80) { + *dst++ = c; + continue; + } + if (c < 0x800) { + *dst++ = 0xc0 + (c >> 6); + goto t1; + } + if (c < 0x10000) { + *dst++ = 0xe0 + (c >> 12); + goto t2; + } + *dst++ = 0xf0 + (c >> 18); + *dst++ = 0x80 + ((c >> 12) & 0x3f); + t2: + *dst++ = 0x80 + ((c >> 6) & 0x3f); + t1: + *dst++ = 0x80 + (c & 0x3f); + } + + return dst; +} + /* * Convert the unicode UEFI command line to ASCII to pass to kernel. * Size of memory allocated return in *cmd_line_len. * Returns NULL on error. */ -static char *efi_convert_cmdline_to_ascii(efi_system_table_t *sys_table_arg, - efi_loaded_image_t *image, - int *cmd_line_len) +static char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, + efi_loaded_image_t *image, + int *cmd_line_len) { - u16 *s2; + const u16 *s2; u8 *s1 = NULL; unsigned long cmdline_addr = 0; - int load_options_size = image->load_options_size / 2; /* ASCII */ - void *options = image->load_options; - int options_size = 0; + int load_options_chars = image->load_options_size / 2; /* UTF-16 */ + const u16 *options = image->load_options; + int options_bytes = 0; /* UTF-8 bytes */ + int options_chars = 0; /* UTF-16 chars */ efi_status_t status; - int i; u16 zero = 0; if (options) { s2 = options; - while (*s2 && *s2 != '\n' && options_size < load_options_size) { - s2++; - options_size++; + while (*s2 && *s2 != '\n' + && options_chars < load_options_chars) { + options_bytes += efi_utf8_bytes(*s2++); + options_chars++; } } - if (options_size == 0) { + if (!options_chars) { /* No command line options, so return empty string*/ - options_size = 1; options = &zero; } - options_size++; /* NUL termination */ + options_bytes++; /* NUL termination */ - status = efi_low_alloc(sys_table_arg, options_size, 0, &cmdline_addr); + status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr); if (status != EFI_SUCCESS) return NULL; s1 = (u8 *)cmdline_addr; - s2 = (u16 *)options; - - for (i = 0; i < options_size - 1; i++) - *s1++ = *s2++; + s2 = (const u16 *)options; + s1 = efi_utf16_to_utf8(s1, s2, options_chars); *s1 = '\0'; - *cmd_line_len = options_size; + *cmd_line_len = options_bytes; return (char *)cmdline_addr; } From 5d29ff629d76df88827f7bc3caa82510c2422fa1 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 30 Dec 2013 12:12:12 -0500 Subject: [PATCH 008/298] efi: add helper function to get UEFI params from FDT ARM and ARM64 architectures use the device tree to pass UEFI parameters from stub to kernel. These parameters are things known to the stub but not discoverable by the kernel after the stub calls ExitBootSerives(). There is a helper function in: drivers/firmware/efi/fdt.c which the stub uses to add the UEFI parameters to the device tree. This patch adds a complimentary helper function which UEFI runtime support may use to retrieve the parameters from the device tree. If an architecture wants to use this helper, it should select CONFIG_EFI_PARAMS_FROM_FDT. Signed-off-by: Mark Salter Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 0302f71c0aa59571ac306f93068fbbfe65ea349b) Signed-off-by: Mark Brown Conflicts: drivers/firmware/efi/Kconfig Conflicts: drivers/firmware/efi/Kconfig --- drivers/firmware/efi/Kconfig | 7 ++++ drivers/firmware/efi/efi.c | 79 ++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 9 ++++ 3 files changed, 95 insertions(+) diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 1e75f48b61f8..d420ae2d3413 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -47,6 +47,13 @@ config EFI_RUNTIME_MAP See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map. +config EFI_PARAMS_FROM_FDT + bool + help + Select this config option from the architecture Kconfig if + the EFI runtime support gets system table address, memory + map address, and other parameters from the device tree. + endmenu config UEFI_CPER diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4753bac65279..83c2edf5a640 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include struct efi __read_mostly efi = { @@ -315,3 +317,80 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) early_iounmap(config_tables, efi.systab->nr_tables * sz); return 0; } + +#ifdef CONFIG_EFI_PARAMS_FROM_FDT + +#define UEFI_PARAM(name, prop, field) \ + { \ + { name }, \ + { prop }, \ + offsetof(struct efi_fdt_params, field), \ + FIELD_SIZEOF(struct efi_fdt_params, field) \ + } + +static __initdata struct { + const char name[32]; + const char propname[32]; + int offset; + int size; +} dt_params[] = { + UEFI_PARAM("System Table", "linux,uefi-system-table", system_table), + UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap), + UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size), + UEFI_PARAM("MemMap Desc. Size", "linux,uefi-mmap-desc-size", desc_size), + UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver) +}; + +struct param_info { + int verbose; + void *params; +}; + +static int __init fdt_find_uefi_params(unsigned long node, const char *uname, + int depth, void *data) +{ + struct param_info *info = data; + void *prop, *dest; + unsigned long len; + u64 val; + int i; + + if (depth != 1 || + (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) + return 0; + + pr_info("Getting parameters from FDT:\n"); + + for (i = 0; i < ARRAY_SIZE(dt_params); i++) { + prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len); + if (!prop) { + pr_err("Can't find %s in device tree!\n", + dt_params[i].name); + return 0; + } + dest = info->params + dt_params[i].offset; + + val = of_read_number(prop, len / sizeof(u32)); + + if (dt_params[i].size == sizeof(u32)) + *(u32 *)dest = val; + else + *(u64 *)dest = val; + + if (info->verbose) + pr_info(" %s: 0x%0*llx\n", dt_params[i].name, + dt_params[i].size * 2, val); + } + return 1; +} + +int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose) +{ + struct param_info info; + + info.verbose = verbose; + info.params = params; + + return of_scan_flat_dt(fdt_find_uefi_params, &info); +} +#endif /* CONFIG_EFI_PARAMS_FROM_FDT */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 0a819e7a60c9..11c1ad2c1982 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -483,6 +483,14 @@ struct efi_memory_map { unsigned long desc_size; }; +struct efi_fdt_params { + u64 system_table; + u64 mmap; + u32 mmap_size; + u32 desc_size; + u32 desc_ver; +}; + typedef struct { u32 revision; void *parent_handle; @@ -619,6 +627,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, extern void efi_get_time(struct timespec *now); extern int efi_set_rtc_mmss(const struct timespec *now); extern void efi_reserve_boot_services(void); +extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose); extern struct efi_memory_map memmap; /** From 3dce72e58d545c0e4716089ae34e0dde327aa75a Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Wed, 8 Jan 2014 17:54:19 -0800 Subject: [PATCH 009/298] efi: Add shared FDT related functions for ARM/ARM64 Both ARM and ARM64 stubs will update the device tree that they pass to the kernel. In both cases they primarily need to add the same UEFI related information, so the function can be shared. Create a new FDT related file for this to avoid use of architecture #ifdefs in efi-stub-helper.c. Signed-off-by: Roy Franz [ Fixed memory node deletion code. ] Signed-off-by: Mark Rutland Signed-off-by: Leif Lindholm Acked-by: Grant Likely Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 263b4a30bfdb0d756ae9c70c6ff2eef1eb951770) Signed-off-by: Mark Brown --- drivers/firmware/efi/fdt.c | 285 +++++++++++++++++++++++++++++++++++++ include/linux/efi.h | 3 + 2 files changed, 288 insertions(+) create mode 100644 drivers/firmware/efi/fdt.c diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c new file mode 100644 index 000000000000..5c6a8e8a9580 --- /dev/null +++ b/drivers/firmware/efi/fdt.c @@ -0,0 +1,285 @@ +/* + * FDT related Helper functions used by the EFI stub on multiple + * architectures. This should be #included by the EFI stub + * implementation files. + * + * Copyright 2013 Linaro Limited; author Roy Franz + * + * This file is part of the Linux kernel, and is made available + * under the terms of the GNU General Public License version 2. + * + */ + +static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, + unsigned long orig_fdt_size, + void *fdt, int new_fdt_size, char *cmdline_ptr, + u64 initrd_addr, u64 initrd_size, + efi_memory_desc_t *memory_map, + unsigned long map_size, unsigned long desc_size, + u32 desc_ver) +{ + int node, prev; + int status; + u32 fdt_val32; + u64 fdt_val64; + + /* + * Copy definition of linux_banner here. Since this code is + * built as part of the decompressor for ARM v7, pulling + * in version.c where linux_banner is defined for the + * kernel brings other kernel dependencies with it. + */ + const char linux_banner[] = + "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; + + /* Do some checks on provided FDT, if it exists*/ + if (orig_fdt) { + if (fdt_check_header(orig_fdt)) { + pr_efi_err(sys_table, "Device Tree header not valid!\n"); + return EFI_LOAD_ERROR; + } + /* + * We don't get the size of the FDT if we get if from a + * configuration table. + */ + if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) { + pr_efi_err(sys_table, "Truncated device tree! foo!\n"); + return EFI_LOAD_ERROR; + } + } + + if (orig_fdt) + status = fdt_open_into(orig_fdt, fdt, new_fdt_size); + else + status = fdt_create_empty_tree(fdt, new_fdt_size); + + if (status != 0) + goto fdt_set_fail; + + /* + * Delete any memory nodes present. We must delete nodes which + * early_init_dt_scan_memory may try to use. + */ + prev = 0; + for (;;) { + const char *type, *name; + int len; + + node = fdt_next_node(fdt, prev, NULL); + if (node < 0) + break; + + type = fdt_getprop(fdt, node, "device_type", &len); + if (type && strncmp(type, "memory", len) == 0) { + fdt_del_node(fdt, node); + continue; + } + + prev = node; + } + + node = fdt_subnode_offset(fdt, 0, "chosen"); + if (node < 0) { + node = fdt_add_subnode(fdt, 0, "chosen"); + if (node < 0) { + status = node; /* node is error code when negative */ + goto fdt_set_fail; + } + } + + if ((cmdline_ptr != NULL) && (strlen(cmdline_ptr) > 0)) { + status = fdt_setprop(fdt, node, "bootargs", cmdline_ptr, + strlen(cmdline_ptr) + 1); + if (status) + goto fdt_set_fail; + } + + /* Set initrd address/end in device tree, if present */ + if (initrd_size != 0) { + u64 initrd_image_end; + u64 initrd_image_start = cpu_to_fdt64(initrd_addr); + + status = fdt_setprop(fdt, node, "linux,initrd-start", + &initrd_image_start, sizeof(u64)); + if (status) + goto fdt_set_fail; + initrd_image_end = cpu_to_fdt64(initrd_addr + initrd_size); + status = fdt_setprop(fdt, node, "linux,initrd-end", + &initrd_image_end, sizeof(u64)); + if (status) + goto fdt_set_fail; + } + + /* Add FDT entries for EFI runtime services in chosen node. */ + node = fdt_subnode_offset(fdt, 0, "chosen"); + fdt_val64 = cpu_to_fdt64((u64)(unsigned long)sys_table); + status = fdt_setprop(fdt, node, "linux,uefi-system-table", + &fdt_val64, sizeof(fdt_val64)); + if (status) + goto fdt_set_fail; + + fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map); + status = fdt_setprop(fdt, node, "linux,uefi-mmap-start", + &fdt_val64, sizeof(fdt_val64)); + if (status) + goto fdt_set_fail; + + fdt_val32 = cpu_to_fdt32(map_size); + status = fdt_setprop(fdt, node, "linux,uefi-mmap-size", + &fdt_val32, sizeof(fdt_val32)); + if (status) + goto fdt_set_fail; + + fdt_val32 = cpu_to_fdt32(desc_size); + status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size", + &fdt_val32, sizeof(fdt_val32)); + if (status) + goto fdt_set_fail; + + fdt_val32 = cpu_to_fdt32(desc_ver); + status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver", + &fdt_val32, sizeof(fdt_val32)); + if (status) + goto fdt_set_fail; + + /* + * Add kernel version banner so stub/kernel match can be + * verified. + */ + status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver", + linux_banner); + if (status) + goto fdt_set_fail; + + return EFI_SUCCESS; + +fdt_set_fail: + if (status == -FDT_ERR_NOSPACE) + return EFI_BUFFER_TOO_SMALL; + + return EFI_LOAD_ERROR; +} + +#ifndef EFI_FDT_ALIGN +#define EFI_FDT_ALIGN EFI_PAGE_SIZE +#endif + +/* + * Allocate memory for a new FDT, then add EFI, commandline, and + * initrd related fields to the FDT. This routine increases the + * FDT allocation size until the allocated memory is large + * enough. EFI allocations are in EFI_PAGE_SIZE granules, + * which are fixed at 4K bytes, so in most cases the first + * allocation should succeed. + * EFI boot services are exited at the end of this function. + * There must be no allocations between the get_memory_map() + * call and the exit_boot_services() call, so the exiting of + * boot services is very tightly tied to the creation of the FDT + * with the final memory map in it. + */ + +efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, + void *handle, + unsigned long *new_fdt_addr, + unsigned long max_addr, + u64 initrd_addr, u64 initrd_size, + char *cmdline_ptr, + unsigned long fdt_addr, + unsigned long fdt_size) +{ + unsigned long map_size, desc_size; + u32 desc_ver; + unsigned long mmap_key; + efi_memory_desc_t *memory_map; + unsigned long new_fdt_size; + efi_status_t status; + + /* + * Estimate size of new FDT, and allocate memory for it. We + * will allocate a bigger buffer if this ends up being too + * small, so a rough guess is OK here. + */ + new_fdt_size = fdt_size + EFI_PAGE_SIZE; + while (1) { + status = efi_high_alloc(sys_table, new_fdt_size, EFI_FDT_ALIGN, + new_fdt_addr, max_addr); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n"); + goto fail; + } + + /* + * Now that we have done our final memory allocation (and free) + * we can get the memory map key needed for + * exit_boot_services(). + */ + status = efi_get_memory_map(sys_table, &memory_map, &map_size, + &desc_size, &desc_ver, &mmap_key); + if (status != EFI_SUCCESS) + goto fail_free_new_fdt; + + status = update_fdt(sys_table, + (void *)fdt_addr, fdt_size, + (void *)*new_fdt_addr, new_fdt_size, + cmdline_ptr, initrd_addr, initrd_size, + memory_map, map_size, desc_size, desc_ver); + + /* Succeeding the first time is the expected case. */ + if (status == EFI_SUCCESS) + break; + + if (status == EFI_BUFFER_TOO_SMALL) { + /* + * We need to allocate more space for the new + * device tree, so free existing buffer that is + * too small. Also free memory map, as we will need + * to get new one that reflects the free/alloc we do + * on the device tree buffer. + */ + efi_free(sys_table, new_fdt_size, *new_fdt_addr); + sys_table->boottime->free_pool(memory_map); + new_fdt_size += EFI_PAGE_SIZE; + } else { + pr_efi_err(sys_table, "Unable to constuct new device tree.\n"); + goto fail_free_mmap; + } + } + + /* Now we are ready to exit_boot_services.*/ + status = sys_table->boottime->exit_boot_services(handle, mmap_key); + + + if (status == EFI_SUCCESS) + return status; + + pr_efi_err(sys_table, "Exit boot services failed.\n"); + +fail_free_mmap: + sys_table->boottime->free_pool(memory_map); + +fail_free_new_fdt: + efi_free(sys_table, new_fdt_size, *new_fdt_addr); + +fail: + return EFI_LOAD_ERROR; +} + +static void *get_fdt(efi_system_table_t *sys_table) +{ + efi_guid_t fdt_guid = DEVICE_TREE_GUID; + efi_config_table_t *tables; + void *fdt; + int i; + + tables = (efi_config_table_t *) sys_table->tables; + fdt = NULL; + + for (i = 0; i < sys_table->nr_tables; i++) + if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) { + fdt = (void *) tables[i].table; + break; + } + + return fdt; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 11c1ad2c1982..dc1a28bb8b8b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -394,6 +394,9 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long si #define EFI_FILE_SYSTEM_GUID \ EFI_GUID( 0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b ) +#define DEVICE_TREE_GUID \ + EFI_GUID( 0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 ) + typedef struct { efi_guid_t guid; u64 table; From c6a99bb2c26a0ca26bf3e732172673337ed0bc79 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 10 Jan 2014 14:26:06 -0500 Subject: [PATCH 010/298] efi: create memory map iteration helper There are a lot of places in the kernel which iterate through an EFI memory map. Most of these places use essentially the same for-loop code. This patch adds a for_each_efi_memory_desc() helper to clean up all of the existing duplicate code and avoid more in the future. Signed-off-by: Mark Salter Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit e885cd805fc6e65ef5150a211c7bac02f925af04) Signed-off-by: Mark Brown --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/efi.h b/include/linux/efi.h index dc1a28bb8b8b..a50bf34c041b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -633,6 +633,12 @@ extern void efi_reserve_boot_services(void); extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose); extern struct efi_memory_map memmap; +/* Iterate through an efi_memory_map */ +#define for_each_efi_memory_desc(m, md) \ + for ((md) = (m)->map; \ + (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ + (md) = (void *)(md) + (m)->desc_size) + /** * efi_range_is_wc - check the WC bit on an address range * @start: starting kvirt address From e56a7353e01997bfd2c4ef53a3126767ef22ac15 Mon Sep 17 00:00:00 2001 From: Roy Franz Date: Sun, 22 Sep 2013 15:45:26 -0700 Subject: [PATCH 011/298] efi: Add proper definitions for some EFI function pointers. The x86/AMD64 EFI stubs must use a call wrapper to convert between the Linux and EFI ABIs, so void pointers are sufficient. For ARM, the ABIs are compatible, so we can directly invoke the function pointers. The functions that are used by the ARM stub are updated to match the EFI definitions. Also add some EFI types used by EFI functions. Signed-off-by: Roy Franz Acked-by: Mark Salter Reviewed-by: Grant Likely Signed-off-by: Matt Fleming (cherry picked from commit ed37ddffe201bfad7be3c45bc08bd65b5298adca) Signed-off-by: Mark Brown Conflicts: arch/x86/boot/compressed/eboot.h --- arch/x86/boot/compressed/eboot.h | 1 + include/linux/efi.h | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 81b6b652b46a..02265107cce3 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -10,6 +10,7 @@ #define SEG_GRANULARITY_4KB (1 << 0) #define DESC_TYPE_CODE_DATA (1 << 0) +#define EFI_READ_CHUNK_SIZE (1024 * 1024) #define EFI_CONSOLE_OUT_DEVICE_GUID \ EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ diff --git a/include/linux/efi.h b/include/linux/efi.h index a50bf34c041b..048ee805dfc5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -834,6 +834,7 @@ struct efi_simple_text_output_protocol { void *test_string; }; + extern struct list_head efivar_sysfs_list; static inline void From 46e7056874b66d01106b6617bef2ae5e4e039fd8 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:21:22 +0000 Subject: [PATCH 012/298] efi: Move facility flags to struct efi As we grow support for more EFI architectures they're going to want the ability to query which EFI features are available on the running system. Instead of storing this information in an architecture-specific place, stick it in the global 'struct efi', which is already the central location for EFI state. While we're at it, let's change the return value of efi_enabled() to be bool and replace all references to 'facility' with 'feature', which is the usual word used to describe the attributes of the running system. Signed-off-by: Matt Fleming (cherry picked from commit 3e909599215456928e6b42a04f11c2517881570b) Signed-off-by: Mark Brown Conflicts: arch/x86/include/asm/efi.h arch/x86/platform/efi/efi.c Conflicts: arch/x86/include/asm/efi.h --- arch/x86/kernel/setup.c | 6 +++--- arch/x86/platform/efi/efi.c | 14 +++++++------- include/linux/efi.h | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ce72964b2f46..fa511acff7e6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -926,11 +926,11 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL32", 4)) { - set_bit(EFI_BOOT, &x86_efi_facility); + set_bit(EFI_BOOT, &efi.flags); } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL64", 4)) { - set_bit(EFI_BOOT, &x86_efi_facility); - set_bit(EFI_64BIT, &x86_efi_facility); + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); } if (efi_enabled(EFI_BOOT)) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b97acecf3fd9..a4756ae4dc2e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -68,8 +68,6 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -unsigned long x86_efi_facility; - static __initdata efi_config_table_type_t arch_tables[] = { #ifdef CONFIG_X86_UV {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, @@ -88,6 +86,8 @@ int efi_enabled(int facility) } EXPORT_SYMBOL(efi_enabled); +u64 efi_setup; /* efi setup_data physical address */ + static bool __initdata disable_runtime = false; static int __init setup_noefi(char *arg) { @@ -456,7 +456,7 @@ void __init efi_reserve_boot_services(void) void __init efi_unmap_memmap(void) { - clear_bit(EFI_MEMMAP, &x86_efi_facility); + clear_bit(EFI_MEMMAP, &efi.flags); if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; @@ -723,7 +723,7 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); efi.config_table = (unsigned long)efi.systab->tables; efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; @@ -751,7 +751,7 @@ void __init efi_init(void) if (efi_config_init(arch_tables)) return; - set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); + set_bit(EFI_CONFIG_TABLES, &efi.flags); /* * Note: We currently don't support runtime services on an EFI @@ -763,12 +763,12 @@ void __init efi_init(void) else { if (disable_runtime || efi_runtime_init()) return; - set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } if (efi_memmap_init()) return; - set_bit(EFI_MEMMAP, &x86_efi_facility); + set_bit(EFI_MEMMAP, &efi.flags); print_efi_memmap(); } diff --git a/include/linux/efi.h b/include/linux/efi.h index 048ee805dfc5..d674acdc436f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -584,6 +584,7 @@ extern struct efi { efi_reset_system_t *reset_system; efi_set_virtual_address_map_t *set_virtual_address_map; struct efi_memory_map *memmap; + unsigned long flags; } efi; static inline int @@ -678,17 +679,24 @@ extern int __init efi_setup_pcdp_console(char *); #ifdef CONFIG_EFI # ifdef CONFIG_X86 -extern int efi_enabled(int facility); -# else -static inline int efi_enabled(int facility) + +/* + * Test whether the above EFI_* bits are enabled. + */ +static inline bool efi_enabled(int feature) { - return 1; + return test_bit(feature, &efi.flags) != 0; +} +# else +static inline bool efi_enabled(int feature) +{ + return true; } # endif #else -static inline int efi_enabled(int facility) +static inline bool efi_enabled(int feature) { - return 0; + return false; } #endif From acfb0b1c85b44a0c4113ddf556ef12b00ad5d952 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 13 Feb 2014 17:16:36 +0900 Subject: [PATCH 013/298] efi: Use NULL instead of 0 for pointer Fix following sparse warnings: drivers/firmware/efi/efivars.c:230:66: warning: Using plain integer as NULL pointer drivers/firmware/efi/efi.c:236:27: warning: Using plain integer as NULL pointer Signed-off-by: Daeseok Youn Signed-off-by: Matt Fleming (cherry picked from commit 69e608411473ac56358ef35277563982d0565381) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi.c | 2 +- drivers/firmware/efi/efivars.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 83c2edf5a640..c547de817108 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -235,7 +235,7 @@ static __initdata efi_config_table_type_t common_tables[] = { {SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab}, {SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios}, {UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga}, - {NULL_GUID, NULL, 0}, + {NULL_GUID, NULL, NULL}, }; static __init int match_config_table(efi_guid_t *guid, diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3dc248239197..50ea412a25e6 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -227,7 +227,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) memcpy(&entry->var, new_var, count); err = efivar_entry_set(entry, new_var->Attributes, - new_var->DataSize, new_var->Data, false); + new_var->DataSize, new_var->Data, NULL); if (err) { printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err); return -EIO; From 50be1a9b5658cc698dcdd1feac2013fee7dab507 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 16 Mar 2014 12:14:49 +0000 Subject: [PATCH 014/298] efivars: Use local variables instead of a pointer dereference In order to support a compat interface we need to stop passing pointers to structures around, since the type of structure is going to depend on whether the current task is a compat task. Cc: Mike Waychison Signed-off-by: Matt Fleming (cherry picked from commit bafc84d539c0ffa916037840df54428623abc3e6) Signed-off-by: Mark Brown --- drivers/firmware/efi/efivars.c | 48 +++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 50ea412a25e6..06ec6ee1c58a 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -197,37 +197,48 @@ static ssize_t efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) { struct efi_variable *new_var, *var = &entry->var; + efi_char16_t *name; + unsigned long size; + efi_guid_t vendor; + u32 attributes; + u8 *data; int err; if (count != sizeof(struct efi_variable)) return -EINVAL; new_var = (struct efi_variable *)buf; + + attributes = new_var->Attributes; + vendor = new_var->VendorGuid; + name = new_var->VariableName; + size = new_var->DataSize; + data = new_var->Data; + /* * If only updating the variable data, then the name * and guid should remain the same */ - if (memcmp(new_var->VariableName, var->VariableName, sizeof(var->VariableName)) || - efi_guidcmp(new_var->VendorGuid, var->VendorGuid)) { + if (memcmp(name, var->VariableName, sizeof(var->VariableName)) || + efi_guidcmp(vendor, var->VendorGuid)) { printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n"); return -EINVAL; } - if ((new_var->DataSize <= 0) || (new_var->Attributes == 0)){ + if ((size <= 0) || (attributes == 0)){ printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n"); return -EINVAL; } - if ((new_var->Attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(new_var, new_var->Data, new_var->DataSize) == false) { + if ((attributes & ~EFI_VARIABLE_MASK) != 0 || + efivar_validate(new_var, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } memcpy(&entry->var, new_var, count); - err = efivar_entry_set(entry, new_var->Attributes, - new_var->DataSize, new_var->Data, NULL); + err = efivar_entry_set(entry, attributes, size, data, NULL); if (err) { printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err); return -EIO; @@ -328,13 +339,20 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, { struct efi_variable *new_var = (struct efi_variable *)buf; struct efivar_entry *new_entry; + unsigned long size; + u32 attributes; + u8 *data; int err; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if ((new_var->Attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(new_var, new_var->Data, new_var->DataSize) == false) { + attributes = new_var->Attributes; + size = new_var->DataSize; + data = new_var->Data; + + if ((attributes & ~EFI_VARIABLE_MASK) != 0 || + efivar_validate(new_var, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } @@ -345,8 +363,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, memcpy(&new_entry->var, new_var, sizeof(*new_var)); - err = efivar_entry_set(new_entry, new_var->Attributes, new_var->DataSize, - new_var->Data, &efivar_sysfs_list); + err = efivar_entry_set(new_entry, attributes, size, + data, &efivar_sysfs_list); if (err) { if (err == -EEXIST) err = -EINVAL; @@ -370,14 +388,18 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, { struct efi_variable *del_var = (struct efi_variable *)buf; struct efivar_entry *entry; + efi_char16_t *name; + efi_guid_t vendor; int err = 0; if (!capable(CAP_SYS_ADMIN)) return -EACCES; + name = del_var->VariableName; + vendor = del_var->VendorGuid; + efivar_entry_iter_begin(); - entry = efivar_entry_find(del_var->VariableName, del_var->VendorGuid, - &efivar_sysfs_list, true); + entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true); if (!entry) err = -EINVAL; else if (__efivar_entry_delete(entry)) From 04c1c34efab786ab524af83aaf378a3a799d1bdb Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 17 Mar 2014 09:17:28 +0000 Subject: [PATCH 015/298] efivars: Check size of user object Unbelieavably there are no checks to see whether the data structure passed to 'new_var' and 'del_var' is the size that we expect. Let's add some for better robustness. Cc: Mike Waychison Signed-off-by: Matt Fleming (cherry picked from commit e003bbee2a6a19a4c733335989284caf1b179e0d) Signed-off-by: Mark Brown --- drivers/firmware/efi/efivars.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 06ec6ee1c58a..2c21cccc2572 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -347,6 +347,9 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (count != sizeof(*new_var)) + return -EINVAL; + attributes = new_var->Attributes; size = new_var->DataSize; data = new_var->Data; @@ -395,6 +398,9 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (count != sizeof(*del_var)) + return -EINVAL; + name = del_var->VariableName; vendor = del_var->VendorGuid; From 19a52566e810292700619824de6550bac6f5dec4 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 17 Mar 2014 10:57:00 +0000 Subject: [PATCH 016/298] efivars: Stop passing a struct argument to efivar_validate() In preparation for compat support, we can't assume that user variable object is represented by a 'struct efi_variable'. Convert the validation functions to take the variable name as an argument, which is the only piece of the struct that was ever used anyway. Cc: Mike Waychison Signed-off-by: Matt Fleming (cherry picked from commit a5d92ad32dad94fd8f3f61778561d532bb3a2f77) Signed-off-by: Mark Brown --- drivers/firmware/efi/efivars.c | 6 ++++-- drivers/firmware/efi/vars.c | 30 +++++++++++++++--------------- include/linux/efi.h | 6 ++++-- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 2c21cccc2572..5ee2cfb96698 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -231,7 +231,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(new_var, data, size) == false) { + efivar_validate(name, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } @@ -339,6 +339,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, { struct efi_variable *new_var = (struct efi_variable *)buf; struct efivar_entry *new_entry; + efi_char16_t *name; unsigned long size; u32 attributes; u8 *data; @@ -351,11 +352,12 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, return -EINVAL; attributes = new_var->Attributes; + name = new_var->VariableName; size = new_var->DataSize; data = new_var->Data; if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(new_var, data, size) == false) { + efivar_validate(name, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index b22659cccca4..f0a43646a2f3 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -42,7 +42,7 @@ DECLARE_WORK(efivar_work, NULL); EXPORT_SYMBOL_GPL(efivar_work); static bool -validate_device_path(struct efi_variable *var, int match, u8 *buffer, +validate_device_path(efi_char16_t *var_name, int match, u8 *buffer, unsigned long len) { struct efi_generic_dev_path *node; @@ -75,7 +75,7 @@ validate_device_path(struct efi_variable *var, int match, u8 *buffer, } static bool -validate_boot_order(struct efi_variable *var, int match, u8 *buffer, +validate_boot_order(efi_char16_t *var_name, int match, u8 *buffer, unsigned long len) { /* An array of 16-bit integers */ @@ -86,18 +86,18 @@ validate_boot_order(struct efi_variable *var, int match, u8 *buffer, } static bool -validate_load_option(struct efi_variable *var, int match, u8 *buffer, +validate_load_option(efi_char16_t *var_name, int match, u8 *buffer, unsigned long len) { u16 filepathlength; int i, desclength = 0, namelen; - namelen = ucs2_strnlen(var->VariableName, sizeof(var->VariableName)); + namelen = ucs2_strnlen(var_name, EFI_VAR_NAME_LEN); /* Either "Boot" or "Driver" followed by four digits of hex */ for (i = match; i < match+4; i++) { - if (var->VariableName[i] > 127 || - hex_to_bin(var->VariableName[i] & 0xff) < 0) + if (var_name[i] > 127 || + hex_to_bin(var_name[i] & 0xff) < 0) return true; } @@ -132,12 +132,12 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer, /* * And, finally, check the filepath */ - return validate_device_path(var, match, buffer + desclength + 6, + return validate_device_path(var_name, match, buffer + desclength + 6, filepathlength); } static bool -validate_uint16(struct efi_variable *var, int match, u8 *buffer, +validate_uint16(efi_char16_t *var_name, int match, u8 *buffer, unsigned long len) { /* A single 16-bit integer */ @@ -148,7 +148,7 @@ validate_uint16(struct efi_variable *var, int match, u8 *buffer, } static bool -validate_ascii_string(struct efi_variable *var, int match, u8 *buffer, +validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer, unsigned long len) { int i; @@ -166,7 +166,7 @@ validate_ascii_string(struct efi_variable *var, int match, u8 *buffer, struct variable_validate { char *name; - bool (*validate)(struct efi_variable *var, int match, u8 *data, + bool (*validate)(efi_char16_t *var_name, int match, u8 *data, unsigned long len); }; @@ -189,10 +189,10 @@ static const struct variable_validate variable_validate[] = { }; bool -efivar_validate(struct efi_variable *var, u8 *data, unsigned long len) +efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len) { int i; - u16 *unicode_name = var->VariableName; + u16 *unicode_name = var_name; for (i = 0; variable_validate[i].validate != NULL; i++) { const char *name = variable_validate[i].name; @@ -208,7 +208,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len) /* Wildcard in the matching name means we've matched */ if (c == '*') - return variable_validate[i].validate(var, + return variable_validate[i].validate(var_name, match, data, len); /* Case sensitive match */ @@ -217,7 +217,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len) /* Reached the end of the string while matching */ if (!c) - return variable_validate[i].validate(var, + return variable_validate[i].validate(var_name, match, data, len); } } @@ -805,7 +805,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, *set = false; - if (efivar_validate(&entry->var, data, *size) == false) + if (efivar_validate(name, data, *size) == false) return -EINVAL; /* diff --git a/include/linux/efi.h b/include/linux/efi.h index d674acdc436f..3fce870313cf 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -818,8 +818,10 @@ struct efivars { * and we use a page for reading/writing. */ +#define EFI_VAR_NAME_LEN 1024 + struct efi_variable { - efi_char16_t VariableName[1024/sizeof(efi_char16_t)]; + efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)]; efi_guid_t VendorGuid; unsigned long DataSize; __u8 Data[1024]; @@ -891,7 +893,7 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, struct list_head *head, bool remove); -bool efivar_validate(struct efi_variable *var, u8 *data, unsigned long len); +bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len); extern struct work_struct efivar_work; void efivar_run_worker(void); From cb202d5ec4d8a1950f2756f58ddb45ea1d044c3d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 17 Mar 2014 15:08:34 +0000 Subject: [PATCH 017/298] efivars: Refactor sanity checking code into separate function Move a large chunk of code that checks the validity of efi_variable into a new function, because we'll also need to use it for the compat code. Cc: Mike Waychison Signed-off-by: Matt Fleming (cherry picked from commit 54d2fbfb0c9d341c891926100ed0e5d4c4b0c987) Signed-off-by: Mark Brown --- drivers/firmware/efi/efivars.c | 52 +++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 5ee2cfb96698..a44310c6a8ba 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -189,6 +189,35 @@ efivar_data_read(struct efivar_entry *entry, char *buf) memcpy(buf, var->Data, var->DataSize); return var->DataSize; } + +static inline int +sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor, + unsigned long size, u32 attributes, u8 *data) +{ + /* + * If only updating the variable data, then the name + * and guid should remain the same + */ + if (memcmp(name, var->VariableName, sizeof(var->VariableName)) || + efi_guidcmp(vendor, var->VendorGuid)) { + printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n"); + return -EINVAL; + } + + if ((size <= 0) || (attributes == 0)){ + printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n"); + return -EINVAL; + } + + if ((attributes & ~EFI_VARIABLE_MASK) != 0 || + efivar_validate(name, data, size) == false) { + printk(KERN_ERR "efivars: Malformed variable content\n"); + return -EINVAL; + } + + return 0; +} + /* * We allow each variable to be edited via rewriting the * entire efi variable structure. @@ -215,26 +244,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) size = new_var->DataSize; data = new_var->Data; - /* - * If only updating the variable data, then the name - * and guid should remain the same - */ - if (memcmp(name, var->VariableName, sizeof(var->VariableName)) || - efi_guidcmp(vendor, var->VendorGuid)) { - printk(KERN_ERR "efivars: Cannot edit the wrong variable!\n"); - return -EINVAL; - } - - if ((size <= 0) || (attributes == 0)){ - printk(KERN_ERR "efivars: DataSize & Attributes must be valid!\n"); - return -EINVAL; - } - - if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(name, data, size) == false) { - printk(KERN_ERR "efivars: Malformed variable content\n"); - return -EINVAL; - } + err = sanity_check(var, name, vendor, size, attributes, data); + if (err) + return err; memcpy(&entry->var, new_var, count); From 621ab44c97e62e5c6a49d40ecaf83b37de4ad657 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 17 Mar 2014 15:36:37 +0000 Subject: [PATCH 018/298] efivars: Add compatibility code for compat tasks It seems people are using 32-bit efibootmgr on top of 64-bit kernels, which will currently fail horribly when using the efivars interface, which is the traditional efibootmgr backend (the other being efivarfs). Since there is no versioning info in the data structure, figure out when we need to munge the structure data via judicious use of is_compat_task(). Cc: Mike Waychison Signed-off-by: Matt Fleming (cherry picked from commit e33655a386ed3b26ad36fb97a47ebb1c2ca1e928) Signed-off-by: Mark Brown --- drivers/firmware/efi/efivars.c | 140 +++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 25 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index a44310c6a8ba..463c56545ae8 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -69,6 +69,7 @@ #include #include #include +#include #define EFIVARS_VERSION "0.08" #define EFIVARS_DATE "2004-May-17" @@ -86,6 +87,15 @@ static struct kset *efivars_kset; static struct bin_attribute *efivars_new_var; static struct bin_attribute *efivars_del_var; +struct compat_efi_variable { + efi_char16_t VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)]; + efi_guid_t VendorGuid; + __u32 DataSize; + __u8 Data[1024]; + __u32 Status; + __u32 Attributes; +} __packed; + struct efivar_attribute { struct attribute attr; ssize_t (*show) (struct efivar_entry *entry, char *buf); @@ -218,6 +228,25 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor, return 0; } +static inline bool is_compat(void) +{ + if (IS_ENABLED(CONFIG_COMPAT) && is_compat_task()) + return true; + + return false; +} + +static void +copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src) +{ + memcpy(dst->VariableName, src->VariableName, EFI_VAR_NAME_LEN); + memcpy(dst->Data, src->Data, sizeof(src->Data)); + + dst->VendorGuid = src->VendorGuid; + dst->DataSize = src->DataSize; + dst->Attributes = src->Attributes; +} + /* * We allow each variable to be edited via rewriting the * entire efi variable structure. @@ -233,22 +262,42 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; - if (count != sizeof(struct efi_variable)) - return -EINVAL; + if (is_compat()) { + struct compat_efi_variable *compat; - new_var = (struct efi_variable *)buf; + if (count != sizeof(*compat)) + return -EINVAL; - attributes = new_var->Attributes; - vendor = new_var->VendorGuid; - name = new_var->VariableName; - size = new_var->DataSize; - data = new_var->Data; + compat = (struct compat_efi_variable *)buf; + attributes = compat->Attributes; + vendor = compat->VendorGuid; + name = compat->VariableName; + size = compat->DataSize; + data = compat->Data; - err = sanity_check(var, name, vendor, size, attributes, data); - if (err) - return err; + err = sanity_check(var, name, vendor, size, attributes, data); + if (err) + return err; - memcpy(&entry->var, new_var, count); + copy_out_compat(&entry->var, compat); + } else { + if (count != sizeof(struct efi_variable)) + return -EINVAL; + + new_var = (struct efi_variable *)buf; + + attributes = new_var->Attributes; + vendor = new_var->VendorGuid; + name = new_var->VariableName; + size = new_var->DataSize; + data = new_var->Data; + + err = sanity_check(var, name, vendor, size, attributes, data); + if (err) + return err; + + memcpy(&entry->var, new_var, count); + } err = efivar_entry_set(entry, attributes, size, data, NULL); if (err) { @@ -263,6 +312,8 @@ static ssize_t efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + struct compat_efi_variable *compat; + size_t size; if (!entry || !buf) return 0; @@ -272,9 +323,23 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) &entry->var.DataSize, entry->var.Data)) return -EIO; - memcpy(buf, var, sizeof(*var)); + if (is_compat()) { + compat = (struct compat_efi_variable *)buf; - return sizeof(*var); + size = sizeof(*compat); + memcpy(compat->VariableName, var->VariableName, + EFI_VAR_NAME_LEN); + memcpy(compat->Data, var->Data, sizeof(compat->Data)); + + compat->VendorGuid = var->VendorGuid; + compat->DataSize = var->DataSize; + compat->Attributes = var->Attributes; + } else { + size = sizeof(*var); + memcpy(buf, var, size); + } + + return size; } /* @@ -349,8 +414,10 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { + struct compat_efi_variable *compat = (struct compat_efi_variable *)buf; struct efi_variable *new_var = (struct efi_variable *)buf; struct efivar_entry *new_entry; + bool need_compat = is_compat(); efi_char16_t *name; unsigned long size; u32 attributes; @@ -360,13 +427,23 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (count != sizeof(*new_var)) - return -EINVAL; + if (need_compat) { + if (count != sizeof(*compat)) + return -EINVAL; - attributes = new_var->Attributes; - name = new_var->VariableName; - size = new_var->DataSize; - data = new_var->Data; + attributes = compat->Attributes; + name = compat->VariableName; + size = compat->DataSize; + data = compat->Data; + } else { + if (count != sizeof(*new_var)) + return -EINVAL; + + attributes = new_var->Attributes; + name = new_var->VariableName; + size = new_var->DataSize; + data = new_var->Data; + } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || efivar_validate(name, data, size) == false) { @@ -378,7 +455,10 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, if (!new_entry) return -ENOMEM; - memcpy(&new_entry->var, new_var, sizeof(*new_var)); + if (need_compat) + copy_out_compat(&new_entry->var, compat); + else + memcpy(&new_entry->var, new_var, sizeof(*new_var)); err = efivar_entry_set(new_entry, attributes, size, data, &efivar_sysfs_list); @@ -404,6 +484,7 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, char *buf, loff_t pos, size_t count) { struct efi_variable *del_var = (struct efi_variable *)buf; + struct compat_efi_variable *compat; struct efivar_entry *entry; efi_char16_t *name; efi_guid_t vendor; @@ -412,11 +493,20 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (count != sizeof(*del_var)) - return -EINVAL; + if (is_compat()) { + if (count != sizeof(*compat)) + return -EINVAL; - name = del_var->VariableName; - vendor = del_var->VendorGuid; + compat = (struct compat_efi_variable *)buf; + name = compat->VariableName; + vendor = compat->VendorGuid; + } else { + if (count != sizeof(*del_var)) + return -EINVAL; + + name = del_var->VariableName; + vendor = del_var->VendorGuid; + } efivar_entry_iter_begin(); entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true); From 1e7a3effc791805eb48403035df367e5ba3d9dd0 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 13:47:37 +0000 Subject: [PATCH 019/298] efi: Add separate 32-bit/64-bit definitions The traditional approach of using machine-specific types such as 'unsigned long' does not allow the kernel to interact with firmware running in a different CPU mode, e.g. 64-bit kernel with 32-bit EFI. Add distinct EFI structure definitions for both 32-bit and 64-bit so that we can use them in the 32-bit and 64-bit code paths. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming (cherry picked from commit 677703cef0a148ba07d37ced649ad25b1cda2f78) Signed-off-by: Mark Brown Conflicts: include/linux/efi.h Conflicts: include/linux/efi.h --- arch/x86/boot/compressed/eboot.h | 44 ++++++ include/linux/efi.h | 252 +++++++++++++++++++++++++++++++ 2 files changed, 296 insertions(+) diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 02265107cce3..cc78e2da7af9 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -38,6 +38,24 @@ struct efi_graphics_output_mode_info { u32 pixels_per_scan_line; } __packed; +struct efi_graphics_output_protocol_mode_32 { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode_64 { + u32 max_mode; + u32 mode; + u64 info; + u64 size_of_info; + u64 frame_buffer_base; + u64 frame_buffer_size; +} __packed; + struct efi_graphics_output_protocol_mode { u32 max_mode; u32 mode; @@ -47,6 +65,20 @@ struct efi_graphics_output_protocol_mode { unsigned long frame_buffer_size; } __packed; +struct efi_graphics_output_protocol_32 { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; +}; + +struct efi_graphics_output_protocol_64 { + u64 query_mode; + u64 set_mode; + u64 blt; + u64 mode; +}; + struct efi_graphics_output_protocol { void *query_mode; unsigned long set_mode; @@ -54,6 +86,18 @@ struct efi_graphics_output_protocol { struct efi_graphics_output_protocol_mode *mode; }; +struct efi_uga_draw_protocol_32 { + u32 get_mode; + u32 set_mode; + u32 blt; +}; + +struct efi_uga_draw_protocol_64 { + u64 get_mode; + u64 set_mode; + u64 blt; +}; + struct efi_uga_draw_protocol { void *get_mode; void *set_mode; diff --git a/include/linux/efi.h b/include/linux/efi.h index 3fce870313cf..103eb87ca7b8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -153,6 +153,102 @@ typedef struct { u8 sets_to_zero; } efi_time_cap_t; +typedef struct { + efi_table_hdr_t hdr; + u32 raise_tpl; + u32 restore_tpl; + u32 allocate_pages; + u32 free_pages; + u32 get_memory_map; + u32 allocate_pool; + u32 free_pool; + u32 create_event; + u32 set_timer; + u32 wait_for_event; + u32 signal_event; + u32 close_event; + u32 check_event; + u32 install_protocol_interface; + u32 reinstall_protocol_interface; + u32 uninstall_protocol_interface; + u32 handle_protocol; + u32 __reserved; + u32 register_protocol_notify; + u32 locate_handle; + u32 locate_device_path; + u32 install_configuration_table; + u32 load_image; + u32 start_image; + u32 exit; + u32 unload_image; + u32 exit_boot_services; + u32 get_next_monotonic_count; + u32 stall; + u32 set_watchdog_timer; + u32 connect_controller; + u32 disconnect_controller; + u32 open_protocol; + u32 close_protocol; + u32 open_protocol_information; + u32 protocols_per_handle; + u32 locate_handle_buffer; + u32 locate_protocol; + u32 install_multiple_protocol_interfaces; + u32 uninstall_multiple_protocol_interfaces; + u32 calculate_crc32; + u32 copy_mem; + u32 set_mem; + u32 create_event_ex; +} __packed efi_boot_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 raise_tpl; + u64 restore_tpl; + u64 allocate_pages; + u64 free_pages; + u64 get_memory_map; + u64 allocate_pool; + u64 free_pool; + u64 create_event; + u64 set_timer; + u64 wait_for_event; + u64 signal_event; + u64 close_event; + u64 check_event; + u64 install_protocol_interface; + u64 reinstall_protocol_interface; + u64 uninstall_protocol_interface; + u64 handle_protocol; + u64 __reserved; + u64 register_protocol_notify; + u64 locate_handle; + u64 locate_device_path; + u64 install_configuration_table; + u64 load_image; + u64 start_image; + u64 exit; + u64 unload_image; + u64 exit_boot_services; + u64 get_next_monotonic_count; + u64 stall; + u64 set_watchdog_timer; + u64 connect_controller; + u64 disconnect_controller; + u64 open_protocol; + u64 close_protocol; + u64 open_protocol_information; + u64 protocols_per_handle; + u64 locate_handle_buffer; + u64 locate_protocol; + u64 install_multiple_protocol_interfaces; + u64 uninstall_multiple_protocol_interfaces; + u64 calculate_crc32; + u64 copy_mem; + u64 set_mem; + u64 create_event_ex; +} __packed efi_boot_services_64_t; + /* * EFI Boot Services table */ @@ -231,12 +327,61 @@ typedef enum { EfiPciIoAttributeOperationMaximum } EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION; +typedef struct { + u32 read; + u32 write; +} efi_pci_io_protocol_access_32_t; + +typedef struct { + u64 read; + u64 write; +} efi_pci_io_protocol_access_64_t; typedef struct { void *read; void *write; } efi_pci_io_protocol_access_t; +typedef struct { + u32 poll_mem; + u32 poll_io; + efi_pci_io_protocol_access_32_t mem; + efi_pci_io_protocol_access_32_t io; + efi_pci_io_protocol_access_32_t pci; + u32 copy_mem; + u32 map; + u32 unmap; + u32 allocate_buffer; + u32 free_buffer; + u32 flush; + u32 get_location; + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_32; + +typedef struct { + u64 poll_mem; + u64 poll_io; + efi_pci_io_protocol_access_64_t mem; + efi_pci_io_protocol_access_64_t io; + efi_pci_io_protocol_access_64_t pci; + u64 copy_mem; + u64 map; + u64 unmap; + u64 allocate_buffer; + u64 free_buffer; + u64 flush; + u64 get_location; + u64 attributes; + u64 get_bar_attributes; + u64 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_64; + typedef struct { void *poll_mem; void *poll_io; @@ -290,6 +435,42 @@ typedef struct { #define EFI_RUNTIME_SERVICES_SIGNATURE ((u64)0x5652453544e5552ULL) #define EFI_RUNTIME_SERVICES_REVISION 0x00010000 +typedef struct { + efi_table_hdr_t hdr; + u32 get_time; + u32 set_time; + u32 get_wakeup_time; + u32 set_wakeup_time; + u32 set_virtual_address_map; + u32 convert_pointer; + u32 get_variable; + u32 get_next_variable; + u32 set_variable; + u32 get_next_high_mono_count; + u32 reset_system; + u32 update_capsule; + u32 query_capsule_caps; + u32 query_variable_info; +} efi_runtime_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 get_time; + u64 set_time; + u64 get_wakeup_time; + u64 set_wakeup_time; + u64 set_virtual_address_map; + u64 convert_pointer; + u64 get_variable; + u64 get_next_variable; + u64 set_variable; + u64 get_next_high_mono_count; + u64 reset_system; + u64 update_capsule; + u64 query_capsule_caps; + u64 query_variable_info; +} efi_runtime_services_64_t; + typedef struct { efi_table_hdr_t hdr; void *get_time; @@ -494,6 +675,38 @@ struct efi_fdt_params { u32 desc_ver; }; +typedef struct { + u32 revision; + u32 parent_handle; + u32 system_table; + u32 device_handle; + u32 file_path; + u32 reserved; + u32 load_options_size; + u32 load_options; + u32 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_32_t; + +typedef struct { + u32 revision; + u64 parent_handle; + u64 system_table; + u64 device_handle; + u64 file_path; + u64 reserved; + u32 load_options_size; + u64 load_options; + u64 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_64_t; + typedef struct { u32 revision; void *parent_handle; @@ -522,6 +735,34 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; +typedef struct { + u64 revision; + u32 open; + u32 close; + u32 delete; + u32 read; + u32 write; + u32 get_position; + u32 set_position; + u32 get_info; + u32 set_info; + u32 flush; +} efi_file_handle_32_t; + +typedef struct { + u64 revision; + u64 open; + u64 close; + u64 delete; + u64 read; + u64 write; + u64 get_position; + u64 set_position; + u64 get_info; + u64 set_info; + u64 flush; +} efi_file_handle_64_t; + typedef struct _efi_file_handle { u64 revision; efi_status_t (*open)(struct _efi_file_handle *, @@ -837,6 +1078,17 @@ struct efivar_entry { bool deleting; }; +struct efi_simple_text_output_protocol_32 { + u32 reset; + u32 output_string; + u32 test_string; +}; + +struct efi_simple_text_output_protocol_64 { + u64 reset; + u64 output_string; + u64 test_string; +}; struct efi_simple_text_output_protocol { void *reset; From fe23789ddbdb7e8c8a3da4a5ff40f1a1940146a5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:49:51 +0000 Subject: [PATCH 020/298] ia64/efi: Implement efi_enabled() There's no good reason to keep efi_enabled() under CONFIG_X86 anymore, since nothing about the implementation is specific to x86. Set EFI feature flags in the ia64 boot path instead of claiming to support all features. The old behaviour was actually buggy since efi.memmap never points to a valid memory map, so we shouldn't be claiming to support EFI_MEMMAP. Fortunately, this bug was never triggered because EFI_MEMMAP isn't used outside of arch/x86 currently, but that may not always be the case. Reviewed-and-tested-by: Tony Luck Signed-off-by: Matt Fleming (cherry picked from commit 092063808c498eccac8e891973bf143e7b60d723) Signed-off-by: Mark Brown --- arch/ia64/kernel/efi.c | 7 +++++++ include/linux/efi.h | 8 -------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index da5b462e6de6..741b99c1a0b1 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -477,6 +477,9 @@ efi_init (void) char *cp, vendor[100] = "unknown"; int i; + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + /* * It's too early to be able to use the standard kernel command line * support... @@ -529,6 +532,8 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + palo_phys = EFI_INVALID_TABLE_ADDR; if (efi_config_init(arch_tables) != 0) @@ -657,6 +662,8 @@ efi_enter_virtual_mode (void) return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + /* * Now that EFI is in virtual mode, we call the EFI functions more * efficiently: diff --git a/include/linux/efi.h b/include/linux/efi.h index 103eb87ca7b8..2bcd8d23053a 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -919,8 +919,6 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_ARCH_1 6 /* First arch-specific bit */ #ifdef CONFIG_EFI -# ifdef CONFIG_X86 - /* * Test whether the above EFI_* bits are enabled. */ @@ -928,12 +926,6 @@ static inline bool efi_enabled(int feature) { return test_bit(feature, &efi.flags) != 0; } -# else -static inline bool efi_enabled(int feature) -{ - return true; -} -# endif #else static inline bool efi_enabled(int feature) { From b30449dd456694c746ac83d8f210308e6485ff75 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:21:22 +0000 Subject: [PATCH 021/298] efi: Move facility flags to struct efi As we grow support for more EFI architectures they're going to want the ability to query which EFI features are available on the running system. Instead of storing this information in an architecture-specific place, stick it in the global 'struct efi', which is already the central location for EFI state. While we're at it, let's change the return value of efi_enabled() to be bool and replace all references to 'facility' with 'feature', which is the usual word used to describe the attributes of the running system. Signed-off-by: Matt Fleming (cherry picked from commit 3e909599215456928e6b42a04f11c2517881570b) Signed-off-by: Mark Brown Conflicts: arch/x86/platform/efi/efi.c Conflicts: arch/x86/platform/efi/efi.c --- arch/x86/platform/efi/efi.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index a4756ae4dc2e..8aee671ad05f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -77,17 +77,6 @@ static __initdata efi_config_table_type_t arch_tables[] = { u64 efi_setup; /* efi setup_data physical address */ -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ - return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); - -u64 efi_setup; /* efi setup_data physical address */ - static bool __initdata disable_runtime = false; static int __init setup_noefi(char *arg) { From 7819d7cf87b77ba3cdd7d5f77f8bce04fe09330e Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 15 Apr 2014 22:47:52 -0400 Subject: [PATCH 022/298] arm64: efi: add EFI stub This patch adds PE/COFF header fields to the start of the kernel Image so that it appears as an EFI application to UEFI firmware. An EFI stub is included to allow direct booting of the kernel Image. Signed-off-by: Mark Salter [Add support in PE/COFF header for signed images] Signed-off-by: Ard Biesheuvel Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 3c7f255039a2ad6ee1e3890505caf0d029b22e29) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig arch/arm64/kernel/Makefile Conflicts: arch/arm64/kernel/Makefile --- arch/arm64/Kconfig | 14 ++ arch/arm64/kernel/Makefile | 3 + arch/arm64/kernel/efi-entry.S | 109 ++++++++++++++ arch/arm64/kernel/efi-stub.c | 81 +++++++++++ arch/arm64/kernel/head.S | 112 +++++++++++++++ drivers/firmware/efi/arm-stub.c | 247 ++++++++++++++++++++++++++++++++ 6 files changed, 566 insertions(+) create mode 100644 arch/arm64/kernel/efi-entry.S create mode 100644 arch/arm64/kernel/efi-stub.c create mode 100644 drivers/firmware/efi/arm-stub.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 27bbcfc7202a..c7e1e30f6b61 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -258,6 +258,20 @@ config CMDLINE_FORCE This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. +config EFI + bool "UEFI runtime support" + depends on OF && !CPU_BIG_ENDIAN + select LIBFDT + select UCS2_STRING + select EFI_PARAMS_FROM_FDT + default y + help + This option provides support for runtime services provided + by UEFI firmware (such as non-volatile variables, realtime + clock, and platform reset). A UEFI stub is also provided to + allow the kernel to be booted as an EFI application. This + is only useful on systems that have UEFI firmware. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2d4554b13410..57c9ffe9a285 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,6 +4,8 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ + -I$(src)/../../../scripts/dtc/libfdt # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ @@ -20,6 +22,7 @@ arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 000000000000..66716c9b9e5f --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,109 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include + +#include + +#define EFI_LOAD_ERROR 0x8000000000000001 + + __INIT + + /* + * We arrive here from the EFI boot manager with: + * + * * CPU in little-endian mode + * * MMU on with identity-mapped RAM + * * Icache and Dcache on + * + * We will most likely be running from some place other than where + * we want to be. The kernel image wants to be placed at TEXT_OFFSET + * from start of RAM. + */ +ENTRY(efi_stub_entry) + /* + * Create a stack frame to save FP/LR with extra space + * for image_addr variable passed to efi_entry(). + */ + stp x29, x30, [sp, #-32]! + + /* + * Call efi_entry to do the real work. + * x0 and x1 are already set up by firmware. Current runtime + * address of image is calculated and passed via *image_addr. + * + * unsigned long efi_entry(void *handle, + * efi_system_table_t *sys_table, + * unsigned long *image_addr) ; + */ + adrp x8, _text + add x8, x8, #:lo12:_text + add x2, sp, 16 + str x8, [x2] + bl efi_entry + cmn x0, #1 + b.eq efi_load_fail + + /* + * efi_entry() will have relocated the kernel image if necessary + * and we return here with device tree address in x0 and the kernel + * entry point stored at *image_addr. Save those values in registers + * which are callee preserved. + */ + mov x20, x0 // DTB address + ldr x0, [sp, #16] // relocated _text address + mov x21, x0 + + /* + * Flush dcache covering current runtime addresses + * of kernel text/data. Then flush all of icache. + */ + adrp x1, _text + add x1, x1, #:lo12:_text + adrp x2, _edata + add x2, x2, #:lo12:_edata + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* Turn off Dcache and MMU */ + mrs x0, CurrentEL + cmp x0, #PSR_MODE_EL2t + ccmp x0, #PSR_MODE_EL2h, #0x4, ne + b.ne 1f + mrs x0, sctlr_el2 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el2, x0 + isb + b 2f +1: + mrs x0, sctlr_el1 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb +2: + /* Jump to kernel entry point */ + mov x0, x20 + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x21 + +efi_load_fail: + mov x0, #EFI_LOAD_ERROR + ldp x29, x30, [sp], #32 + ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 000000000000..60e98a639ac5 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, + efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, + void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + status = efi_relocate_kernel(sys_table, image_addr, + kernel_size, kernel_memsize, + dram_base + TEXT_OFFSET, + PAGE_SIZE); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + return status; + } + if (*image_addr != (dram_base + TEXT_OFFSET)) { + pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); + efi_free(sys_table, kernel_memsize, *image_addr); + return EFI_ERROR; + } + *image_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b281fffda51..156f3b4b717a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -107,8 +107,18 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved +#endif .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved @@ -119,7 +129,109 @@ .byte 0x52 .byte 0x4d .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT diff --git a/drivers/firmware/efi/arm-stub.c b/drivers/firmware/efi/arm-stub.c new file mode 100644 index 000000000000..19239a9b7e29 --- /dev/null +++ b/drivers/firmware/efi/arm-stub.c @@ -0,0 +1,247 @@ +/* + * EFI stub implementation that is shared by arm and arm64 architectures. + * This should be #included by the EFI stub implementation files. + * + * Copyright (C) 2013,2014 Linaro Limited + * Roy Franz + * + * This file is part of the Linux kernel, and is made available under the + * terms of the GNU General Public License version 2. + * + */ + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, + void *__image, void **__fh) +{ + efi_file_io_interface_t *io; + efi_loaded_image_t *image = __image; + efi_file_handle_t *fh; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + efi_status_t status; + void *handle = (void *)(unsigned long)image->device_handle; + + status = sys_table_arg->boottime->handle_protocol(handle, + &fs_proto, (void **)&io); + if (status != EFI_SUCCESS) { + efi_printk(sys_table_arg, "Failed to handle fs_proto\n"); + return status; + } + + status = io->open_volume(io, &fh); + if (status != EFI_SUCCESS) + efi_printk(sys_table_arg, "Failed to open volume\n"); + + *__fh = fh; + return status; +} +static efi_status_t efi_file_close(void *handle) +{ + efi_file_handle_t *fh = handle; + + return fh->close(handle); +} + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr) +{ + efi_file_handle_t *fh = handle; + + return fh->read(handle, size, addr); +} + + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz) +{ + efi_file_handle_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + unsigned long info_sz; + + status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table_arg, "Failed to open file: "); + efi_char16_printk(sys_table_arg, filename_16); + efi_printk(sys_table_arg, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = h->get_info(h, &info_guid, &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table_arg, "Failed to get file info size\n"); + return status; + } + +grow: + status = sys_table_arg->boottime->allocate_pool(EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); + return status; + } + + status = h->get_info(h, &info_guid, &info_sz, + info); + if (status == EFI_BUFFER_TOO_SMALL) { + sys_table_arg->boottime->free_pool(info); + goto grow; + } + + *file_sz = info->file_size; + sys_table_arg->boottime->free_pool(info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table_arg, "Failed to get initrd info\n"); + + return status; +} + + + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, + efi_char16_t *str) +{ + struct efi_simple_text_output_protocol *out; + + out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out; + out->output_string(out, str); +} + + +/* + * This function handles the architcture specific differences between arm and + * arm64 regarding where the kernel image must be loaded and any memory that + * must be reserved. On failure it is required to free all + * all allocations it has made. + */ +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image); +/* + * EFI entry point for the arm/arm64 EFI stubs. This is the entrypoint + * that is described in the PE/COFF header. Most of the code is the same + * for both archictectures, with the arch-specific code provided in the + * handle_kernel_image() function. + */ +unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, + unsigned long *image_addr) +{ + efi_loaded_image_t *image; + efi_status_t status; + unsigned long image_size = 0; + unsigned long dram_base; + /* addr/point and size pairs for memory management*/ + unsigned long initrd_addr; + u64 initrd_size = 0; + unsigned long fdt_addr; /* Original DTB */ + u64 fdt_size = 0; /* We don't get size from configuration table */ + char *cmdline_ptr = NULL; + int cmdline_size = 0; + unsigned long new_fdt_addr; + efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID; + unsigned long reserve_addr = 0; + unsigned long reserve_size = 0; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + goto fail; + + pr_efi(sys_table, "Booting Linux Kernel...\n"); + + /* + * Get a handle to the loaded image protocol. This is used to get + * information about the running image, such as size and the command + * line. + */ + status = sys_table->boottime->handle_protocol(handle, + &loaded_image_proto, (void *)&image); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to get loaded image protocol\n"); + goto fail; + } + + dram_base = get_dram_base(sys_table); + if (dram_base == EFI_ERROR) { + pr_efi_err(sys_table, "Failed to find DRAM base\n"); + goto fail; + } + status = handle_kernel_image(sys_table, image_addr, &image_size, + &reserve_addr, + &reserve_size, + dram_base, image); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + goto fail; + } + + /* + * Get the command line from EFI, using the LOADED_IMAGE + * protocol. We are going to copy the command line into the + * device tree, so this can be allocated anywhere. + */ + cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size); + if (!cmdline_ptr) { + pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n"); + goto fail_free_image; + } + + /* Load a device tree from the configuration table, if present. */ + fdt_addr = (uintptr_t)get_fdt(sys_table); + if (!fdt_addr) { + status = handle_cmdline_files(sys_table, image, cmdline_ptr, + "dtb=", + ~0UL, (unsigned long *)&fdt_addr, + (unsigned long *)&fdt_size); + + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to load device tree!\n"); + goto fail_free_cmdline; + } + } + + status = handle_cmdline_files(sys_table, image, cmdline_ptr, + "initrd=", dram_base + SZ_512M, + (unsigned long *)&initrd_addr, + (unsigned long *)&initrd_size); + if (status != EFI_SUCCESS) + pr_efi_err(sys_table, "Failed initrd from command line!\n"); + + new_fdt_addr = fdt_addr; + status = allocate_new_fdt_and_exit_boot(sys_table, handle, + &new_fdt_addr, dram_base + MAX_FDT_OFFSET, + initrd_addr, initrd_size, cmdline_ptr, + fdt_addr, fdt_size); + + /* + * If all went well, we need to return the FDT address to the + * calling function so it can be passed to kernel as part of + * the kernel boot protocol. + */ + if (status == EFI_SUCCESS) + return new_fdt_addr; + + pr_efi_err(sys_table, "Failed to update FDT and exit boot services\n"); + + efi_free(sys_table, initrd_size, initrd_addr); + efi_free(sys_table, fdt_size, fdt_addr); + +fail_free_cmdline: + efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr); + +fail_free_image: + efi_free(sys_table, image_size, *image_addr); + efi_free(sys_table, reserve_size, reserve_addr); +fail: + return EFI_ERROR; +} From 8ea8a3935d29b68511997e1aa6d32cebdb2e9243 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Apr 2014 17:46:58 +0200 Subject: [PATCH 023/298] efi/arm64: ignore dtb= when UEFI SecureBoot is enabled Loading unauthenticated FDT blobs directly from storage is a security hazard, so this should only be allowed when running with UEFI Secure Boot disabled. Signed-off-by: Ard Biesheuvel Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 345c736edd07b657a8c48190baed2719b85d0938) Signed-off-by: Mark Brown --- drivers/firmware/efi/arm-stub.c | 39 +++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/arm-stub.c b/drivers/firmware/efi/arm-stub.c index 19239a9b7e29..41114ce03b01 100644 --- a/drivers/firmware/efi/arm-stub.c +++ b/drivers/firmware/efi/arm-stub.c @@ -12,6 +12,30 @@ * */ +static int __init efi_secureboot_enabled(efi_system_table_t *sys_table_arg) +{ + static efi_guid_t const var_guid __initconst = EFI_GLOBAL_VARIABLE_GUID; + static efi_char16_t const var_name[] __initconst = { + 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 }; + + efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable; + unsigned long size = sizeof(u8); + efi_status_t status; + u8 val; + + status = f_getvar((efi_char16_t *)var_name, (efi_guid_t *)&var_guid, + NULL, &size, &val); + + switch (status) { + case EFI_SUCCESS: + return val; + case EFI_NOT_FOUND: + return 0; + default: + return 1; + } +} + static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, void **__fh) { @@ -144,7 +168,7 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, /* addr/point and size pairs for memory management*/ unsigned long initrd_addr; u64 initrd_size = 0; - unsigned long fdt_addr; /* Original DTB */ + unsigned long fdt_addr = 0; /* Original DTB */ u64 fdt_size = 0; /* We don't get size from configuration table */ char *cmdline_ptr = NULL; int cmdline_size = 0; @@ -196,9 +220,13 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, goto fail_free_image; } - /* Load a device tree from the configuration table, if present. */ - fdt_addr = (uintptr_t)get_fdt(sys_table); - if (!fdt_addr) { + /* + * Unauthenticated device tree data is a security hazard, so + * ignore 'dtb=' unless UEFI Secure Boot is disabled. + */ + if (efi_secureboot_enabled(sys_table)) { + pr_efi(sys_table, "UEFI Secure Boot is enabled.\n"); + } else { status = handle_cmdline_files(sys_table, image, cmdline_ptr, "dtb=", ~0UL, (unsigned long *)&fdt_addr, @@ -209,6 +237,9 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, goto fail_free_cmdline; } } + if (!fdt_addr) + /* Look for a device tree configuration table entry. */ + fdt_addr = (uintptr_t)get_fdt(sys_table); status = handle_cmdline_files(sys_table, image, cmdline_ptr, "initrd=", dram_base + SZ_512M, From e600b8ae2ffc1496ea48328c4d05c4fe667f1a48 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Jun 2014 11:31:06 +0100 Subject: [PATCH 024/298] efi: Fix compiler warnings (unused, const, type) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a few compiler warning in the efi code for unused variable, discarding const qualifier and wrong pointer type: drivers/firmware/efi/fdt.c|66 col 22| warning: unused variable ‘name’ [-Wunused-variable] drivers/firmware/efi/efi.c|368 col 3| warning: passing argument 3 of ‘of_get_flat_dt_prop’ from incompatible pointer type [enabled by default] drivers/firmware/efi/efi.c|368 col 8| warning: assignment discards ‘const’ qualifier from pointer target type [enabled by default] Signed-off-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 6fb8cc82c096fd5ccf277678639193cae07125a0) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi.c | 6 +++--- drivers/firmware/efi/fdt.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index c547de817108..30ce5dcfabfe 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -350,10 +350,10 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, int depth, void *data) { struct param_info *info = data; - void *prop, *dest; - unsigned long len; + const void *prop; + void *dest; u64 val; - int i; + int i, len; if (depth != 1 || (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c index 5c6a8e8a9580..82d774161cc9 100644 --- a/drivers/firmware/efi/fdt.c +++ b/drivers/firmware/efi/fdt.c @@ -63,7 +63,7 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, */ prev = 0; for (;;) { - const char *type, *name; + const char *type; int len; node = fdt_next_node(fdt, prev, NULL); From 2228feef662890472ba9357af9a4c93a66dd4239 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 13 Jun 2014 13:11:51 +0200 Subject: [PATCH 025/298] efi/arm64: efistub: remove local copy of linux_banner The shared efistub code for ARM and arm64 contains a local copy of linux_banner, allowing it to be referenced from separate executables such as the ARM decompressor. However, this introduces a dependency on generated header files, causing unnecessary rebuilds of the stub itself and, in case of arm64, vmlinux which contains it. On arm64, the copy is not actually needed since we can reference the original symbol directly, and as it turns out, there may be better ways to deal with this for ARM as well, so let's remove it from the shared code. If it still needs to be reintroduced for ARM later, it should live under arch/arm anyway and not in shared code. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Matt Fleming (cherry picked from commit a55c072dfe520f8fa03cf11b07b9268a8a17820a) Signed-off-by: Mark Brown --- arch/arm64/kernel/efi-stub.c | 2 -- drivers/firmware/efi/fdt.c | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index 60e98a639ac5..e786e6cdc400 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include /* * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c index 82d774161cc9..507a3df46a5d 100644 --- a/drivers/firmware/efi/fdt.c +++ b/drivers/firmware/efi/fdt.c @@ -23,16 +23,6 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, u32 fdt_val32; u64 fdt_val64; - /* - * Copy definition of linux_banner here. Since this code is - * built as part of the decompressor for ARM v7, pulling - * in version.c where linux_banner is defined for the - * kernel brings other kernel dependencies with it. - */ - const char linux_banner[] = - "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n"; - /* Do some checks on provided FDT, if it exists*/ if (orig_fdt) { if (fdt_check_header(orig_fdt)) { From 62e8d6186af39dcfab94755fc36f1211e794b93e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 8 Jul 2014 16:54:18 +0100 Subject: [PATCH 026/298] efi: fdt: Do not report an error during boot if UEFI is not available Currently, fdt_find_uefi_params() reports an error if no EFI parameters are found in the DT. This is however a valid case for non-UEFI kernel booting. This patch checks changes the error reporting to a pr_info("UEFI not found") when no EFI parameters are found in the DT. Signed-off-by: Catalin Marinas Acked-by: Leif Lindholm Tested-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit 29e2435fd6d71e0136e2c2ff0433b7dbeeaaccfd) Signed-off-by: Mark Brown --- drivers/firmware/efi/efi.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 30ce5dcfabfe..e8d5c00443e2 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -343,6 +343,7 @@ static __initdata struct { struct param_info { int verbose; + int found; void *params; }; @@ -359,16 +360,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; - pr_info("Getting parameters from FDT:\n"); - for (i = 0; i < ARRAY_SIZE(dt_params); i++) { prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len); - if (!prop) { - pr_err("Can't find %s in device tree!\n", - dt_params[i].name); + if (!prop) return 0; - } dest = info->params + dt_params[i].offset; + info->found++; val = of_read_number(prop, len / sizeof(u32)); @@ -387,10 +384,21 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose) { struct param_info info; + int ret; + + pr_info("Getting EFI parameters from FDT:\n"); info.verbose = verbose; + info.found = 0; info.params = params; - return of_scan_flat_dt(fdt_find_uefi_params, &info); + ret = of_scan_flat_dt(fdt_find_uefi_params, &info); + if (!info.found) + pr_info("UEFI not found.\n"); + else if (!ret) + pr_err("Can't find '%s' in device tree!\n", + dt_params[info.found].name); + + return ret; } #endif /* CONFIG_EFI_PARAMS_FROM_FDT */ From cdfa59f5c33f353fad6a4df93434e88535f75da0 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 15 Apr 2014 21:59:30 -0400 Subject: [PATCH 027/298] arm64: add EFI runtime services This patch adds EFI runtime support for arm64. This runtime support allows the kernel to access various EFI runtime services provided by EFI firmware. Things like reboot, real time clock, EFI boot variables, and others. This functionality is supported for little endian kernels only. The UEFI firmware standard specifies that the firmware be little endian. A future patch is expected to add support for big endian kernels running with little endian firmware. Signed-off-by: Mark Salter [ Remove unnecessary cache/tlb maintenance. ] Signed-off-by: Mark Rutland Signed-off-by: Leif Lindholm Signed-off-by: Matt Fleming (cherry picked from commit f84d02755f5a9f3b88e8d15d6384da25ad6dcf5e) Signed-off-by: Mark Brown Conflicts: arch/arm64/Kconfig arch/arm64/kernel/Makefile --- arch/arm64/Kconfig | 2 + arch/arm64/include/asm/efi.h | 14 ++ arch/arm64/kernel/efi.c | 466 +++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 5 + 4 files changed, 487 insertions(+) create mode 100644 arch/arm64/include/asm/efi.h create mode 100644 arch/arm64/kernel/efi.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c7e1e30f6b61..e44a33805d88 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -321,6 +321,8 @@ source "net/Kconfig" source "drivers/Kconfig" +source "drivers/firmware/Kconfig" + source "fs/Kconfig" source "arch/arm64/kvm/Kconfig" diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h new file mode 100644 index 000000000000..5a46c4e7f539 --- /dev/null +++ b/arch/arm64/include/asm/efi.h @@ -0,0 +1,14 @@ +#ifndef _ASM_EFI_H +#define _ASM_EFI_H + +#include + +#ifdef CONFIG_EFI +extern void efi_init(void); +extern void efi_idmap_init(void); +#else +#define efi_init() +#define efi_idmap_init() +#endif + +#endif /* _ASM_EFI_H */ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 000000000000..7bfd65097210 --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,466 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ + uefi_debug = 1; + + return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +static void __init efi_setup_idmap(void) +{ + struct memblock_region *r; + efi_memory_desc_t *md; + u64 paddr, npages, size; + + for_each_memblock(memory, r) + create_id_mapping(r->base, r->size, 0); + + /* map runtime io spaces */ + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + continue; + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + create_id_mapping(paddr, size, 1); + } +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi.systab->fw_vendor, + sizeof(vendor)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + retval = efi_config_init(NULL); + if (retval == 0) + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + early_memunmap(c16, sizeof(vendor)); + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + + return retval; +} + +static __initdata char memory_type_name[][32] = { + {"Reserved"}, + {"Loader Code"}, + {"Loader Data"}, + {"Boot Code"}, + {"Boot Data"}, + {"Runtime Code"}, + {"Runtime Data"}, + {"Conventional Memory"}, + {"Unusable Memory"}, + {"ACPI Reclaim Memory"}, + {"ACPI Memory NVS"}, + {"Memory Mapped I/O"}, + {"MMIO Port Space"}, + {"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + if (!is_normal_ram(md)) + return 0; + + if (md->attribute & EFI_MEMORY_RUNTIME) + return 1; + + if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_RESERVED_TYPE) + return 1; + + return 0; +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (uefi_debug) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (uefi_debug) + pr_info(" 0x%012llx-0x%012llx [%s]", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + memory_type_name[md->type]); + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md) || + md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) { + memblock_reserve(paddr, size); + if (uefi_debug) + pr_cont("*"); + } + + if (uefi_debug) + pr_cont("\n"); + } +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ + u64 size = end - start; + + if (uefi_debug) + pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); + + free_bootmem_late(start, size); + return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ + u64 map_start, map_end, total = 0; + + if (end <= start) + return total; + + map_start = (u64)memmap.phys_map; + map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); + map_start &= PAGE_MASK; + + if (start < map_end && end > map_start) { + /* region overlaps UEFI memmap */ + if (start < map_start) + total += free_one_region(start, map_start); + + if (map_end < end) + total += free_one_region(map_end, end); + } else + total += free_one_region(start, end); + + return total; +} + +static void __init free_boot_services(void) +{ + u64 total_freed = 0; + u64 keep_end, free_start, free_end; + efi_memory_desc_t *md; + + /* + * If kernel uses larger pages than UEFI, we have to be careful + * not to inadvertantly free memory we want to keep if there is + * overlap at the kernel page size alignment. We do not want to + * free is_reserve_region() memory nor the UEFI memmap itself. + * + * The memory map is sorted, so we keep track of the end of + * any previous region we want to keep, remember any region + * we want to free and defer freeing it until we encounter + * the next region we want to keep. This way, before freeing + * it, we can clip it as needed to avoid freeing memory we + * want to keep for UEFI. + */ + + keep_end = 0; + free_start = 0; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + + if (is_reserve_region(md)) { + /* + * We don't want to free any memory from this region. + */ + if (free_start) { + /* adjust free_end then free region */ + if (free_end > md->phys_addr) + free_end -= PAGE_SIZE; + total_freed += free_region(free_start, free_end); + free_start = 0; + } + keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + continue; + } + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + /* no need to free this region */ + continue; + } + + /* + * We want to free memory from this region. + */ + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (free_start) { + if (paddr <= free_end) + free_end = paddr + size; + else { + total_freed += free_region(free_start, free_end); + free_start = paddr; + free_end = paddr + size; + } + } else { + free_start = paddr; + free_end = paddr + size; + } + if (free_start < keep_end) { + free_start += PAGE_SIZE; + if (free_start >= free_end) + free_start = 0; + } + } + if (free_start) + total_freed += free_region(free_start, free_end); + + if (total_freed) + pr_info("Freed 0x%llx bytes of EFI boot services memory", + total_freed); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms, uefi_debug)) + return; + + efi_system_table = params.system_table; + + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); + memmap.phys_map = (void *)params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); +} + +void __init efi_idmap_init(void) +{ + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ + efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ + u64 paddr, vaddr, npages, size; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + vaddr = (__force u64)ioremap_cache(paddr, size); + else + vaddr = (__force u64)ioremap(paddr, size); + + if (!vaddr) { + pr_err("Unable to remap 0x%llx pages @ %p\n", + npages, (void *)paddr); + return 0; + } + + /* adjust for any rounding when EFI and system pagesize differs */ + md->virt_addr = vaddr + (md->phys_addr - paddr); + + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + memcpy(*new, md, memmap.desc_size); + *new += memmap.desc_size; + + return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + phys_addr_t virtmap_phys; + void *virtmap, *virt_md; + efi_status_t status; + u64 mapsize; + int count = 0; + unsigned long flags; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return -1; + } + + pr_info("Remapping and enabling EFI services.\n"); + + /* replace early memmap mapping with permanent mapping */ + mapsize = memmap.map_end - memmap.map; + early_memunmap(memmap.map, mapsize); + memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + mapsize); + memmap.map_end = memmap.map + mapsize; + + efi.memmap = &memmap; + + /* Map the runtime regions */ + virtmap = kmalloc(mapsize, GFP_KERNEL); + if (!virtmap) { + pr_err("Failed to allocate EFI virtual memmap\n"); + return -1; + } + virtmap_phys = virt_to_phys(virtmap); + virt_md = virtmap; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (remap_region(md, &virt_md)) + ++count; + } + + efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + if (efi.systab) + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + local_irq_save(flags); + cpu_switch_mm(idmap_pg_dir, &init_mm); + + /* Call SetVirtualAddressMap with the physical address of the map */ + runtime = efi.systab->runtime; + efi.set_virtual_address_map = runtime->set_virtual_address_map; + + status = efi.set_virtual_address_map(count * memmap.desc_size, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)virtmap_phys); + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + local_irq_restore(flags); + + kfree(virtmap); + + free_boot_services(); + + if (status != EFI_SUCCESS) { + pr_err("Failed to set EFI virtual address map! [%lx]\n", + status); + return -1; + } + + /* Set up runtime services function pointers */ + runtime = efi.systab->runtime; + efi.get_time = runtime->get_time; + efi.set_time = runtime->set_time; + efi.get_wakeup_time = runtime->get_wakeup_time; + efi.set_wakeup_time = runtime->set_wakeup_time; + efi.get_variable = runtime->get_variable; + efi.get_next_variable = runtime->get_next_variable; + efi.set_variable = runtime->set_variable; + efi.query_variable_info = runtime->query_variable_info; + efi.update_capsule = runtime->update_capsule; + efi.query_capsule_caps = runtime->query_capsule_caps; + efi.get_next_high_mono_count = runtime->get_next_high_mono_count; + efi.reset_system = runtime->reset_system; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c8e9effe52e1..c475bde48772 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include #include #include +#include unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -329,11 +331,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + efi_init(); arm64_memblock_init(); paging_init(); request_standard_resources(); + efi_idmap_init(); + unflatten_device_tree(); psci_init(); From c7aae4d46ea5c7fb51edd68839588967ef2a7536 Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Fri, 23 May 2014 14:16:56 +0100 Subject: [PATCH 028/298] arm64: efi: only attempt efi map setup if booting via EFI Booting a kernel with CONFIG_EFI enabled on a non-EFI system caused an oops with the current UEFI support code. Add the required test to prevent this. Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Signed-off-by: Matt Fleming (cherry picked from commit 74bcc2499291d38b6253f9dbd6af33a195222208) Signed-off-by: Mark Brown --- arch/arm64/kernel/efi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 7bfd65097210..14db1f6e8d7f 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -333,6 +333,9 @@ void __init efi_init(void) void __init efi_idmap_init(void) { + if (!efi_enabled(EFI_BOOT)) + return; + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ efi_setup_idmap(); } From d9161136f8f65a83983ce24c651d3ea1381f031d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 23 Jan 2015 12:09:23 +0000 Subject: [PATCH 029/298] ARM: Revert "coresight: adding basic support for Vexpress TC2" This reverts commit 19bdec7dbcccfe2d0aed8cba3e1e8597b7fd8c88, for some reason this is causing boot failures on TC2 when booted using UEFI. Revert pending investigation. Signed-off-by: Mark Brown --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 199 --------------------- 1 file changed, 199 deletions(-) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index f53b737316e3..15f98cbcb75a 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -334,205 +334,6 @@ }; }; - etb@0,20010000 { - compatible = "arm,coresight-etb10", "arm,primecell"; - reg = <0 0x20010000 0 0x1000>; - - coresight-default-sink; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etb_in_port: endpoint@0 { - slave-mode; - remote-endpoint = <&replicator_out_port0>; - }; - }; - }; - - tpiu@0,20030000 { - compatible = "arm,coresight-tpiu", "arm,primecell"; - reg = <0 0x20030000 0 0x1000>; - - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - tpiu_in_port: endpoint@0 { - slave-mode; - remote-endpoint = <&replicator_out_port1>; - }; - }; - }; - - replicator { - /* non-configurable replicators don't show up on the - * AMBA bus. As such no need to add "arm,primecell". - */ - compatible = "arm,coresight-replicator"; - - ports { - #address-cells = <1>; - #size-cells = <0>; - - /* replicator output ports */ - port@0 { - reg = <0>; - replicator_out_port0: endpoint { - remote-endpoint = <&etb_in_port>; - }; - }; - - port@1 { - reg = <1>; - replicator_out_port1: endpoint { - remote-endpoint = <&tpiu_in_port>; - }; - }; - - /* replicator input port */ - port@2 { - reg = <0>; - replicator_in_port0: endpoint { - slave-mode; - remote-endpoint = <&funnel_out_port0>; - }; - }; - }; - }; - - funnel@0,20040000 { - compatible = "arm,coresight-funnel", "arm,primecell"; - reg = <0 0x20040000 0 0x1000>; - - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - ports { - #address-cells = <1>; - #size-cells = <0>; - - /* funnel output port */ - port@0 { - reg = <0>; - funnel_out_port0: endpoint { - remote-endpoint = - <&replicator_in_port0>; - }; - }; - - /* funnel input ports */ - port@1 { - reg = <0>; - funnel_in_port0: endpoint { - slave-mode; - remote-endpoint = <&ptm0_out_port>; - }; - }; - - port@2 { - reg = <1>; - funnel_in_port1: endpoint { - slave-mode; - remote-endpoint = <&ptm1_out_port>; - }; - }; - - port@3 { - reg = <2>; - funnel_in_port2: endpoint { - slave-mode; - remote-endpoint = <&etm0_out_port>; - }; - }; - - /* Input port #3 is for ITM, not supported here */ - - port@4 { - reg = <4>; - funnel_in_port4: endpoint { - slave-mode; - remote-endpoint = <&etm1_out_port>; - }; - }; - - port@5 { - reg = <5>; - funnel_in_port5: endpoint { - slave-mode; - remote-endpoint = <&etm2_out_port>; - }; - }; - }; - }; - - ptm@0,2201c000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2201c000 0 0x1000>; - - cpu = <&cpu0>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - ptm0_out_port: endpoint { - remote-endpoint = <&funnel_in_port0>; - }; - }; - }; - - ptm@0,2201d000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2201d000 0 0x1000>; - - cpu = <&cpu1>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - ptm1_out_port: endpoint { - remote-endpoint = <&funnel_in_port1>; - }; - }; - }; - - etm@0,2203c000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203c000 0 0x1000>; - - cpu = <&cpu2>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm0_out_port: endpoint { - remote-endpoint = <&funnel_in_port2>; - }; - }; - }; - - etm@0,2203d000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203d000 0 0x1000>; - - cpu = <&cpu3>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm1_out_port: endpoint { - remote-endpoint = <&funnel_in_port4>; - }; - }; - }; - - etm@0,2203e000 { - compatible = "arm,coresight-etm3x", "arm,primecell"; - reg = <0 0x2203e000 0 0x1000>; - - cpu = <&cpu4>; - clocks = <&oscclk6a>; - clock-names = "apb_pclk"; - port { - etm2_out_port: endpoint { - remote-endpoint = <&funnel_in_port5>; - }; - }; - }; - smb { compatible = "simple-bus"; From eaf90cde5482556c52020b4ac0095b3dd0ba2390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Mon, 15 Dec 2014 09:24:13 +0200 Subject: [PATCH 030/298] gre: fix the inner mac header in nbma tunnel xmit path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8a0033a947403569caeca45fa5e6f7ba60d51974 ] The NBMA GRE tunnels temporarily push GRE header that contain the per-packet NBMA destination on the skb via header ops early in xmit path. It is the later pulled before the real GRE header is constructed. The inner mac was thus set differently in nbma case: the GRE header has been pushed by neighbor layer, and mac header points to beginning of the temporary gre header (set by dev_queue_xmit). Now that the offloads expect mac header to point to the gre payload, fix the xmit patch to: - pull first the temporary gre header away - and reset mac header to point to gre payload This fixes tso to work again with nbma tunnels. Fixes: 14051f0452a2 ("gre: Use inner mac length when computing tunnel length") Signed-off-by: Timo Teräs Cc: Tom Herbert Cc: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 94213c891565..b40b90d3bd2b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -250,10 +250,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; - skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); - if (IS_ERR(skb)) - goto out; - if (dev->header_ops) { /* Need space for new headers */ if (skb_cow_head(skb, dev->needed_headroom - @@ -266,6 +262,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, * to gre header. */ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + skb_reset_mac_header(skb); } else { if (skb_cow_head(skb, dev->needed_headroom)) goto free_skb; @@ -273,6 +270,10 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, tnl_params = &tunnel->parms.iph; } + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); + if (IS_ERR(skb)) + goto out; + __gre_xmit(skb, dev, tnl_params, skb->protocol); return NETDEV_TX_OK; From 0c6de524d10a1a3fb367f2d680349c6abd19846e Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 16 Dec 2014 17:58:17 -0500 Subject: [PATCH 031/298] netlink: Always copy on mmap TX. [ Upstream commit 4682a0358639b29cf69437ed909c6221f8c89847 ] Checking the file f_count and the nlk->mapped count is not completely sufficient to prevent the mmap'd area contents from changing from under us during netlink mmap sendmsg() operations. Be careful to sample the header's length field only once, because this could change from under us as well. Fixes: 5fd96123ee19 ("netlink: implement memory mapped sendmsg()") Signed-off-by: David S. Miller Acked-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 52 +++++++++++++--------------------------- 1 file changed, 16 insertions(+), 36 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7c177bc43806..53322c33aca7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -510,14 +510,14 @@ out: return err; } -static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len) { #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 struct page *p_start, *p_end; /* First page is flushed through netlink_{get,set}_status */ p_start = pgvec_to_page(hdr + PAGE_SIZE); - p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); + p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1); while (p_start <= p_end) { flush_dcache_page(p_start); p_start++; @@ -699,24 +699,16 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, struct nl_mmap_hdr *hdr; struct sk_buff *skb; unsigned int maxlen; - bool excl = true; int err = 0, len = 0; - /* Netlink messages are validated by the receiver before processing. - * In order to avoid userspace changing the contents of the message - * after validation, the socket and the ring may only be used by a - * single process, otherwise we fall back to copying. - */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || - atomic_read(&nlk->mapped) > 1) - excl = false; - mutex_lock(&nlk->pg_vec_lock); ring = &nlk->tx_ring; maxlen = ring->frame_size - NL_MMAP_HDRLEN; do { + unsigned int nm_len; + hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); if (hdr == NULL) { if (!(msg->msg_flags & MSG_DONTWAIT) && @@ -724,35 +716,23 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, schedule(); continue; } - if (hdr->nm_len > maxlen) { + + nm_len = ACCESS_ONCE(hdr->nm_len); + if (nm_len > maxlen) { err = -EINVAL; goto out; } - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, nm_len); - if (likely(dst_portid == 0 && dst_group == 0 && excl)) { - skb = alloc_skb_head(GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - sock_hold(sk); - netlink_ring_setup_skb(skb, sk, ring, hdr); - NETLINK_CB(skb).flags |= NETLINK_SKB_TX; - __skb_put(skb, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); - atomic_inc(&ring->pending); - } else { - skb = alloc_skb(hdr->nm_len, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto out; - } - __skb_put(skb, hdr->nm_len); - memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); - netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); + skb = alloc_skb(nm_len, GFP_KERNEL); + if (skb == NULL) { + err = -ENOBUFS; + goto out; } + __skb_put(skb, nm_len); + memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len); + netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); netlink_increment_head(ring); @@ -798,7 +778,7 @@ static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) hdr->nm_pid = NETLINK_CB(skb).creds.pid; hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); - netlink_frame_flush_dcache(hdr); + netlink_frame_flush_dcache(hdr, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; From ef82260c89cb090b9447d8ed89d9eb3ac3b9247e Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 18 Dec 2014 10:30:26 +0000 Subject: [PATCH 032/298] netlink: Don't reorder loads/stores before marking mmap netlink frame as available [ Upstream commit a18e6a186f53af06937a2c268c72443336f4ab56 ] Each mmap Netlink frame contains a status field which indicates whether the frame is unused, reserved, contains data or needs to be skipped. Both loads and stores may not be reordeded and must complete before the status field is changed and another CPU might pick up the frame for use. Use an smp_mb() to cover needs of both types of callers to netlink_set_status(), callers which have been reading data frame from the frame, and callers which have been filling or releasing and thus writing to the frame. - Example code path requiring a smp_rmb(): memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); - Example code path requiring a smp_wmb(): hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); netlink_frame_flush_dcache(hdr); netlink_set_status(hdr, NL_MMAP_STATUS_VALID); Fixes: f9c228 ("netlink: implement memory mapped recvmsg()") Reported-by: Eric Dumazet Signed-off-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 53322c33aca7..1d52506bda14 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -535,9 +535,9 @@ static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) static void netlink_set_status(struct nl_mmap_hdr *hdr, enum nl_mmap_status status) { + smp_mb(); hdr->nm_status = status; flush_dcache_page(pgvec_to_page(hdr)); - smp_wmb(); } static struct nl_mmap_hdr * From 1bd686b1ae7c2a95aa8f716eb89f8d3a9120affe Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sat, 20 Dec 2014 12:15:49 -0800 Subject: [PATCH 033/298] in6: fix conflict with glibc [ Upstream commit 6d08acd2d32e3e877579315dc3202d7a5f336d98 ] Resolve conflicts between glibc definition of IPV6 socket options and those defined in Linux headers. Looks like earlier efforts to solve this did not cover all the definitions. It resolves warnings during iproute2 build. Please consider for stable as well. Signed-off-by: Stephen Hemminger Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/in6.h | 3 ++- include/uapi/linux/libc-compat.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index e9a1d2d973b6..4c399ae04677 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -149,7 +149,7 @@ struct in6_flowlabel_req { /* * IPV6 socket options */ - +#if __UAPI_DEF_IPV6_OPTIONS #define IPV6_ADDRFORM 1 #define IPV6_2292PKTINFO 2 #define IPV6_2292HOPOPTS 3 @@ -192,6 +192,7 @@ struct in6_flowlabel_req { #define IPV6_IPSEC_POLICY 34 #define IPV6_XFRM_POLICY 35 +#endif /* * Multicast: diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h index c140620dad92..e28807ad17fa 100644 --- a/include/uapi/linux/libc-compat.h +++ b/include/uapi/linux/libc-compat.h @@ -69,6 +69,7 @@ #define __UAPI_DEF_SOCKADDR_IN6 0 #define __UAPI_DEF_IPV6_MREQ 0 #define __UAPI_DEF_IPPROTO_V6 0 +#define __UAPI_DEF_IPV6_OPTIONS 0 #else @@ -82,6 +83,7 @@ #define __UAPI_DEF_SOCKADDR_IN6 1 #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 +#define __UAPI_DEF_IPV6_OPTIONS 1 #endif /* _NETINET_IN_H */ @@ -103,6 +105,7 @@ #define __UAPI_DEF_SOCKADDR_IN6 1 #define __UAPI_DEF_IPV6_MREQ 1 #define __UAPI_DEF_IPPROTO_V6 1 +#define __UAPI_DEF_IPV6_OPTIONS 1 /* Definitions for xattr.h */ #define __UAPI_DEF_XATTR 1 From f26c07f96829df378e5d6645b598e85c8387cf0e Mon Sep 17 00:00:00 2001 From: Prashant Sreedharan Date: Sat, 20 Dec 2014 12:16:17 -0800 Subject: [PATCH 034/298] tg3: tg3_disable_ints using uninitialized mailbox value to disable interrupts [ Upstream commit 05b0aa579397b734f127af58e401a30784a1e315 ] During driver load in tg3_init_one, if the driver detects DMA activity before intializing the chip tg3_halt is called. As part of tg3_halt interrupts are disabled using routine tg3_disable_ints. This routine was using mailbox value which was not initialized (default value is 0). As a result driver was writing 0x00000001 to pci config space register 0, which is the vendor id / device id. This driver bug was exposed because of the commit a7877b17a667 (PCI: Check only the Vendor ID to identify Configuration Request Retry). Also this issue is only seen in older generation chipsets like 5722 because config space write to offset 0 from driver is possible. The newer generation chips ignore writes to offset 0. Also without commit a7877b17a667, for these older chips when a GRC reset is issued the Bootcode would reprogram the vendor id/device id, which is the reason this bug was masked earlier. Fixed by initializing the interrupt mailbox registers before calling tg3_halt. Please queue for -stable. Reported-by: Nils Holland Reported-by: Marcelo Ricardo Leitner Signed-off-by: Prashant Sreedharan Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 086eac5af5c2..82061139b215 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17731,23 +17731,6 @@ static int tg3_init_one(struct pci_dev *pdev, goto err_out_apeunmap; } - /* - * Reset chip in case UNDI or EFI driver did not shutdown - * DMA self test will enable WDMAC and we'll see (spurious) - * pending DMA on the PCI bus at that point. - */ - if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || - (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { - tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); - tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - } - - err = tg3_test_dma(tp); - if (err) { - dev_err(&pdev->dev, "DMA engine test failed, aborting\n"); - goto err_out_apeunmap; - } - intmbx = MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW; rcvmbx = MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW; sndmbx = MAILBOX_SNDHOST_PROD_IDX_0 + TG3_64BIT_REG_LOW; @@ -17792,6 +17775,23 @@ static int tg3_init_one(struct pci_dev *pdev, sndmbx += 0xc; } + /* + * Reset chip in case UNDI or EFI driver did not shutdown + * DMA self test will enable WDMAC and we'll see (spurious) + * pending DMA on the PCI bus at that point. + */ + if ((tr32(HOSTCC_MODE) & HOSTCC_MODE_ENABLE) || + (tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { + tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); + tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); + } + + err = tg3_test_dma(tp); + if (err) { + dev_err(&pdev->dev, "DMA engine test failed, aborting\n"); + goto err_out_apeunmap; + } + tg3_init_coal(tp); pci_set_drvdata(pdev, dev); From 73ef2a1c62d46a3260b0bceab99ca7b642f670dc Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 20 Dec 2014 13:48:55 +0100 Subject: [PATCH 035/298] batman-adv: Calculate extra tail size based on queued fragments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5b6698b0e4a37053de35cc24ee695b98a7eb712b ] The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605 ("batman-adv: Receive fragmented packets and merge"). The new code provided a mostly unused parameter skb for the merging function. It is used inside the function to calculate the additionally needed skb tailroom. But instead of increasing its own tailroom, it is only increasing the tailroom of the first queued skb. This is not correct in some situations because the first queued entry can be a different one than the parameter. An observed problem was: 1. packet with size 104, total_size 1464, fragno 1 was received - packet is queued 2. packet with size 1400, total_size 1464, fragno 0 was received - packet is queued at the end of the list 3. enough data was received and can be given to the merge function (1464 == (1400 - 20) + (104 - 20)) - merge functions gets 1400 byte large packet as skb argument 4. merge function gets first entry in queue (104 byte) - stored as skb_out 5. merge function calculates the required extra tail as total_size - skb->len - pskb_expand_head tail of skb_out with 64 bytes 6. merge function tries to squeeze the extra 1380 bytes from the second queued skb (1400 byte aka skb parameter) in the 64 extra tail bytes of skb_out Instead calculate the extra required tail bytes for skb_out also using skb_out instead of using the parameter skb. The skb parameter is only used to get the total_size from the last received packet. This is also the total_size used to decide that all fragments were received. Reported-by: Philipp Psurek Signed-off-by: Sven Eckelmann Acked-by: Martin Hundebøll Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c46387a46535..0efde3592479 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -251,7 +251,7 @@ batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb) kfree(entry); /* Make room for the rest of the fragments. */ - if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) { + if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { kfree_skb(skb_out); skb_out = NULL; goto free; From 22afb6820969625df9d8f71c3bfc64e0a1049218 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 20 Dec 2014 13:48:56 +0100 Subject: [PATCH 036/298] batman-adv: Unify fragment size calculation [ Upstream commit 0402e444cd199389b7fe47be68a67b817e09e097 ] The fragmentation code was replaced in 610bfc6bc99bc83680d190ebc69359a05fc7f605 ("batman-adv: Receive fragmented packets and merge") by an implementation which can handle up to 16 fragments of a packet. The packet is prepared for the split in fragments by the function batadv_frag_send_packet and the actual split is done by batadv_frag_create. Both functions calculate the size of a fragment themself. But their calculation differs because batadv_frag_send_packet also subtracts ETH_HLEN. Therefore, the check in batadv_frag_send_packet "can a full fragment can be created?" may return true even when batadv_frag_create cannot create a full fragment. The function batadv_frag_create doesn't check the size of the skb before splitting it and therefore might try to create a larger fragment than the remaining buffer. This creates an integer underflow and an invalid len is given to skb_split. Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0efde3592479..e5c5f573c0d4 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -434,7 +434,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE */ mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE); - max_fragment_size = (mtu - header_size - ETH_HLEN); + max_fragment_size = mtu - header_size; max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; /* Don't even try to fragment, if we need more than 16 fragments */ From b27b36d5d0383133a76661e8e7604ea952ff5cb8 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 20 Dec 2014 13:48:57 +0100 Subject: [PATCH 037/298] batman-adv: avoid NULL dereferences and fix if check [ Upstream commit 0d1644919578db525b9a7b6c8197ce02adbfce26 ] Gateway having bandwidth_down equal to zero are not accepted at all and so never added to the Gateway list. For this reason checking the bandwidth_down member in batadv_gw_out_of_range() is useless. This is probably a copy/paste error and this check was supposed to be "!gw_node" only. Moreover, the way the check is written now may also lead to a NULL dereference. Fix this by rewriting the if-condition properly. Introduced by 414254e342a0d58144de40c3da777521ebaeeb07 ("batman-adv: tvlv - gateway download/upload bandwidth container") Signed-off-by: Antonio Quartulli Reported-by: David Binderman Signed-off-by: Marek Lindner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/batman-adv/gateway_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 36b9ae61f5e8..2393ea72d65f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -812,7 +812,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, goto out; gw_node = batadv_gw_node_get(bat_priv, orig_dst_node); - if (!gw_node->bandwidth_down == 0) + if (!gw_node) goto out; switch (atomic_read(&bat_priv->gw_mode)) { From 72111ffa1946a7fc2f476265e7237ce2ce654624 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Mon, 22 Dec 2014 19:04:14 +0900 Subject: [PATCH 038/298] net: Fix stacked vlan offload features computation [ Upstream commit 796f2da81bead71ffc91ef70912cd8d1827bf756 ] When vlan tags are stacked, it is very likely that the outer tag is stored in skb->vlan_tci and skb->protocol shows the inner tag's vlan_proto. Currently netif_skb_features() first looks at skb->protocol even if there is the outer tag in vlan_tci, thus it incorrectly retrieves the protocol encapsulated by the inner vlan instead of the inner vlan protocol. This allows GSO packets to be passed to HW and they end up being corrupted. Fixes: 58e998c6d239 ("offloading: Force software GSO for multiple vlan tags.") Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 3ed11a555834..8ee8f9b39355 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2529,11 +2529,14 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + if (!vlan_tx_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + return harmonize_features(skb, dev, features); + } } features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | From 7d19bd80c0657f83ca668535ee2e2706e76136ce Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 23 Dec 2014 01:13:18 +0100 Subject: [PATCH 039/298] net: Reset secmark when scrubbing packet [ Upstream commit b8fb4e0648a2ab3734140342002f68fb0c7d1602 ] skb_scrub_packet() is called when a packet switches between a context such as between underlay and overlay, between namespaces, or between L3 subnets. While we already scrub the packet mark, connection tracking entry, and cached destination, the security mark/context is left intact. It seems wrong to inherit the security context of a packet when going from overlay to underlay or across forwarding paths. Signed-off-by: Thomas Graf Acked-by: Flavio Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index baf6fc457df9..e2b1bba69882 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3937,6 +3937,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); From 3a13604986badac07103028cb726be3f1866871d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Jan 2015 00:39:23 +1100 Subject: [PATCH 040/298] tcp: Do not apply TSO segment limit to non-TSO packets [ Upstream commit 843925f33fcc293d80acf2c5c8a78adf3344d49b ] Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs. In fact the problem was completely unrelated to IPsec. The bug is also reproducible if you just disable TSO/GSO. The problem is that when the MSS goes down, existing queued packet on the TX queue that have not been transmitted yet all look like TSO packets and get treated as such. This then triggers a bug where tcp_mss_split_point tells us to generate a zero-sized packet on the TX queue. Once that happens we're screwed because the zero-sized packet can never be removed by ACKs. Fixes: 1485348d242 ("tcp: Apply device TSO segment limit earlier") Reported-by: Thomas Jarosch Signed-off-by: Herbert Xu Cheers, Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 91b98e5a17aa..7efa26bb872c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1894,7 +1894,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; - if (tso_segs == 1) { + if (tso_segs == 1 || !sk->sk_gso_max_segs) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) @@ -1931,7 +1931,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) + if (tso_segs > 1 && sk->sk_gso_max_segs && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, From 07d187d6fd9fe29d88463d83401ed24619a1aeb8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 11 Jan 2015 10:32:18 -0800 Subject: [PATCH 041/298] alx: fix alx_poll() [ Upstream commit 7a05dc64e2e4c611d89007b125b20c0d2a4d31a5 ] Commit d75b1ade567f ("net: less interrupt masking in NAPI") uncovered wrong alx_poll() behavior. A NAPI poll() handler is supposed to return exactly the budget when/if napi_complete() has not been called. It is also supposed to return number of frames that were received, so that netdev_budget can have a meaning. Also, in case of TX pressure, we still have to dequeue received packets : alx_clean_rx_irq() has to be called even if alx_clean_tx_irq(alx) returns false, otherwise device is half duplex. Signed-off-by: Eric Dumazet Fixes: d75b1ade567f ("net: less interrupt masking in NAPI") Reported-by: Oded Gabbay Bisected-by: Oded Gabbay Tested-by: Oded Gabbay Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/alx/main.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 380d24922049..3e1d7d29b4ec 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -184,15 +184,16 @@ static void alx_schedule_reset(struct alx_priv *alx) schedule_work(&alx->reset_wk); } -static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) +static int alx_clean_rx_irq(struct alx_priv *alx, int budget) { struct alx_rx_queue *rxq = &alx->rxq; struct alx_rrd *rrd; struct alx_buffer *rxb; struct sk_buff *skb; u16 length, rfd_cleaned = 0; + int work = 0; - while (budget > 0) { + while (work < budget) { rrd = &rxq->rrd[rxq->rrd_read_idx]; if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT))) break; @@ -203,7 +204,7 @@ static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) ALX_GET_FIELD(le32_to_cpu(rrd->word0), RRD_NOR) != 1) { alx_schedule_reset(alx); - return 0; + return work; } rxb = &rxq->bufs[rxq->read_idx]; @@ -243,7 +244,7 @@ static bool alx_clean_rx_irq(struct alx_priv *alx, int budget) } napi_gro_receive(&alx->napi, skb); - budget--; + work++; next_pkt: if (++rxq->read_idx == alx->rx_ringsz) @@ -258,21 +259,22 @@ next_pkt: if (rfd_cleaned) alx_refill_rx_ring(alx, GFP_ATOMIC); - return budget > 0; + return work; } static int alx_poll(struct napi_struct *napi, int budget) { struct alx_priv *alx = container_of(napi, struct alx_priv, napi); struct alx_hw *hw = &alx->hw; - bool complete = true; unsigned long flags; + bool tx_complete; + int work; - complete = alx_clean_tx_irq(alx) && - alx_clean_rx_irq(alx, budget); + tx_complete = alx_clean_tx_irq(alx); + work = alx_clean_rx_irq(alx, budget); - if (!complete) - return 1; + if (!tx_complete || work == budget) + return budget; napi_complete(&alx->napi); @@ -284,7 +286,7 @@ static int alx_poll(struct napi_struct *napi, int budget) alx_post_write(hw); - return 0; + return work; } static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) From 52bf2a12aa7ee1701fa52c3206e3c0188e433aeb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 14 Jan 2015 18:15:30 +0100 Subject: [PATCH 042/298] team: avoid possible underflow of count_pending value for notify_peers and mcast_rejoin [ Upstream commit b0d11b42785b70e19bc6a3122eead3f7969a7589 ] This patch is fixing a race condition that may cause setting count_pending to -1, which results in unwanted big bulk of arp messages (in case of "notify peers"). Consider following scenario: count_pending == 2 CPU0 CPU1 team_notify_peers_work atomic_dec_and_test (dec count_pending to 1) schedule_delayed_work team_notify_peers atomic_add (adding 1 to count_pending) team_notify_peers_work atomic_dec_and_test (dec count_pending to 1) schedule_delayed_work team_notify_peers_work atomic_dec_and_test (dec count_pending to 0) schedule_delayed_work team_notify_peers_work atomic_dec_and_test (dec count_pending to -1) Fix this race by using atomic_dec_if_positive - that will prevent count_pending running under 0. Fixes: fc423ff00df3a1955441 ("team: add peer notification") Fixes: 492b200efdd20b8fcfd ("team: add support for sending multicast rejoins") Signed-off-by: Jiri Pirko Signed-off-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 979fe433278c..32efe8371ff8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -629,6 +629,7 @@ static int team_change_mode(struct team *team, const char *kind) static void team_notify_peers_work(struct work_struct *work) { struct team *team; + int val; team = container_of(work, struct team, notify_peers.dw.work); @@ -636,9 +637,14 @@ static void team_notify_peers_work(struct work_struct *work) schedule_delayed_work(&team->notify_peers.dw, 0); return; } + val = atomic_dec_if_positive(&team->notify_peers.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, team->dev); rtnl_unlock(); - if (!atomic_dec_and_test(&team->notify_peers.count_pending)) + if (val) schedule_delayed_work(&team->notify_peers.dw, msecs_to_jiffies(team->notify_peers.interval)); } @@ -669,6 +675,7 @@ static void team_notify_peers_fini(struct team *team) static void team_mcast_rejoin_work(struct work_struct *work) { struct team *team; + int val; team = container_of(work, struct team, mcast_rejoin.dw.work); @@ -676,9 +683,14 @@ static void team_mcast_rejoin_work(struct work_struct *work) schedule_delayed_work(&team->mcast_rejoin.dw, 0); return; } + val = atomic_dec_if_positive(&team->mcast_rejoin.count_pending); + if (val < 0) { + rtnl_unlock(); + return; + } call_netdevice_notifiers(NETDEV_RESEND_IGMP, team->dev); rtnl_unlock(); - if (!atomic_dec_and_test(&team->mcast_rejoin.count_pending)) + if (val) schedule_delayed_work(&team->mcast_rejoin.dw, msecs_to_jiffies(team->mcast_rejoin.interval)); } From 0042721e23933b6e56cf2766557addbd6d16a018 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 18 Dec 2014 15:58:42 +0530 Subject: [PATCH 043/298] enic: fix rx skb checksum [ Upstream commit 17e96834fd35997ca7cdfbf15413bcd5a36ad448 ] Hardware always provides compliment of IP pseudo checksum. Stack expects whole packet checksum without pseudo checksum if CHECKSUM_COMPLETE is set. This causes checksum error in nf & ovs. kernel: qg-19546f09-f2: hw csum failure kernel: CPU: 9 PID: 0 Comm: swapper/9 Tainted: GF O-------------- 3.10.0-123.8.1.el7.x86_64 #1 kernel: Hardware name: Cisco Systems Inc UCSB-B200-M3/UCSB-B200-M3, BIOS B200M3.2.2.3.0.080820141339 08/08/2014 kernel: ffff881218f40000 df68243feb35e3a8 ffff881237a43ab8 ffffffff815e237b kernel: ffff881237a43ad0 ffffffff814cd4ca ffff8829ec71eb00 ffff881237a43af0 kernel: ffffffff814c6232 0000000000000286 ffff8829ec71eb00 ffff881237a43b00 kernel: Call Trace: kernel: [] dump_stack+0x19/0x1b kernel: [] netdev_rx_csum_fault+0x3a/0x40 kernel: [] __skb_checksum_complete_head+0x62/0x70 kernel: [] __skb_checksum_complete+0x11/0x20 kernel: [] nf_ip_checksum+0xcc/0x100 kernel: [] icmp_error+0x1f7/0x35c [nf_conntrack_ipv4] kernel: [] ? netif_rx+0xb9/0x1d0 kernel: [] ? internal_dev_recv+0xdb/0x130 [openvswitch] kernel: [] nf_conntrack_in+0xf0/0xa80 [nf_conntrack] kernel: [] ? inet_del_offload+0x40/0x40 kernel: [] ipv4_conntrack_in+0x22/0x30 [nf_conntrack_ipv4] kernel: [] nf_iterate+0xaa/0xc0 kernel: [] ? inet_del_offload+0x40/0x40 kernel: [] nf_hook_slow+0x84/0x140 kernel: [] ? inet_del_offload+0x40/0x40 kernel: [] ip_rcv+0x344/0x380 Hardware verifies IP & tcp/udp header checksum but does not provide payload checksum, use CHECKSUM_UNNECESSARY. Set it only if its valid IP tcp/udp packet. Cc: Jiri Benc Cc: Stefan Assmann Reported-by: Sunil Choudhary Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Reviewed-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b740bfce72ef..ff9b423805a0 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1044,10 +1044,14 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq, PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); } - if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc) { - skb->csum = htons(checksum); - skb->ip_summed = CHECKSUM_COMPLETE; - } + /* Hardware does not provide whole packet checksum. It only + * provides pseudo checksum. Since hw validates the packet + * checksum but not provide us the checksum value. use + * CHECSUM_UNNECESSARY. + */ + if ((netdev->features & NETIF_F_RXCSUM) && tcp_udp_csum_ok && + ipv4_csum_ok) + skb->ip_summed = CHECKSUM_UNNECESSARY; if (vlan_stripped) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); From a81b0a4f949b912d318926001b72592ab6043a03 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 19 Dec 2014 15:32:00 -0800 Subject: [PATCH 044/298] net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding [ Upstream commit 2c26d34bbcc0b3f30385d5587aa232289e2eed8e ] When using VXLAN tunnels and a sky2 device, I have experienced checksum failures of the following type: [ 4297.761899] eth0: hw csum failure [...] [ 4297.765223] Call Trace: [ 4297.765224] [] dump_stack+0x46/0x58 [ 4297.765235] [] netdev_rx_csum_fault+0x42/0x50 [ 4297.765238] [] ? skb_push+0x40/0x40 [ 4297.765240] [] __skb_checksum_complete+0xbc/0xd0 [ 4297.765243] [] tcp_v4_rcv+0x2e2/0x950 [ 4297.765246] [] ? ip_rcv_finish+0x360/0x360 These are reliably reproduced in a network topology of: container:eth0 == host(OVS VXLAN on VLAN) == bond0 == eth0 (sky2) -> switch When VXLAN encapsulated traffic is received from a similarly configured peer, the above warning is generated in the receive processing of the encapsulated packet. Note that the warning is associated with the container eth0. The skbs from sky2 have ip_summed set to CHECKSUM_COMPLETE, and because the packet is an encapsulated Ethernet frame, the checksum generated by the hardware includes the inner protocol and Ethernet headers. The receive code is careful to update the skb->csum, except in __dev_forward_skb, as called by dev_forward_skb. __dev_forward_skb calls eth_type_trans, which in turn calls skb_pull_inline(skb, ETH_HLEN) to skip over the Ethernet header, but does not update skb->csum when doing so. This patch resolves the problem by adding a call to skb_postpull_rcsum to update the skb->csum after the call to eth_type_trans. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index 8ee8f9b39355..a507b6646e7f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1709,6 +1709,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return netif_rx_internal(skb); } From de3afb937b5e554406b2d8fb38876c7cd2006fe7 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 2 Dec 2014 03:36:57 -0800 Subject: [PATCH 045/298] drm/vmwgfx: Fix fence event code commit 89669e7a7f96be3ee8d9a22a071d7c0d3b4428fc upstream. The commit "vmwgfx: Rework fence event action" introduced a number of bugs that are fixed with this commit: a) A forgotten return stateemnt. b) An if statement with identical branches. Reported-by: Rob Clark Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz Reviewed-by: Sinclair Yeh Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 436b013b4231..b65272d7ea56 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1049,6 +1049,8 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv, if (ret != 0) goto out_no_queue; + return 0; + out_no_queue: event->base.destroy(&event->base); out_no_event: @@ -1124,17 +1126,10 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, BUG_ON(fence == NULL); - if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME) - ret = vmw_event_fence_action_create(file_priv, fence, - arg->flags, - arg->user_data, - true); - else - ret = vmw_event_fence_action_create(file_priv, fence, - arg->flags, - arg->user_data, - true); - + ret = vmw_event_fence_action_create(file_priv, fence, + arg->flags, + arg->user_data, + true); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) DRM_ERROR("Failed to attach event to fence.\n"); From 1b4359eaa51ce65739197addd34a0d41fa81ea32 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 13 Nov 2014 22:43:23 +0900 Subject: [PATCH 046/298] drm/ttm: Avoid memory allocation from shrinker functions. commit 881fdaa5e4cb0d68e52acab0ad4e1820e2bfffa4 upstream. Andrew Morton wrote: > On Wed, 12 Nov 2014 13:08:55 +0900 Tetsuo Handa wrote: > > > Andrew Morton wrote: > > > Poor ttm guys - this is a bit of a trap we set for them. > > > > Commit a91576d7916f6cce ("drm/ttm: Pass GFP flags in order to avoid deadlock.") > > changed to use sc->gfp_mask rather than GFP_KERNEL. > > > > - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), > > - GFP_KERNEL); > > + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp); > > > > But this bug is caused by sc->gfp_mask containing some flags which are not > > in GFP_KERNEL, right? Then, I think > > > > - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp); > > + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp & GFP_KERNEL); > > > > would hide this bug. > > > > But I think we should use GFP_ATOMIC (or drop __GFP_WAIT flag) > > Well no - ttm_page_pool_free() should stop calling kmalloc altogether. > Just do > > struct page *pages_to_free[16]; > > and rework the code to free 16 pages at a time. Easy. Well, ttm code wants to process 512 pages at a time for performance. Memory footprint increased by 512 * sizeof(struct page *) buffer is only 4096 bytes. What about using static buffer like below? ---------- >From d3cb5393c9c8099d6b37e769f78c31af1541fe8c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 13 Nov 2014 22:21:54 +0900 Subject: drm/ttm: Avoid memory allocation from shrinker functions. Commit a91576d7916f6cce ("drm/ttm: Pass GFP flags in order to avoid deadlock.") caused BUG_ON() due to sc->gfp_mask containing flags which are not in GFP_KERNEL. https://bugzilla.kernel.org/show_bug.cgi?id=87891 Changing from sc->gfp_mask to (sc->gfp_mask & GFP_KERNEL) would avoid the BUG_ON(), but avoiding memory allocation from shrinker function is better and reliable fix. Shrinker function is already serialized by global lock, and clean up function is called after shrinker function is unregistered. Thus, we can use static buffer when called from shrinker function and clean up function. Signed-off-by: Tetsuo Handa Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 26 ++++++++++++++---------- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 25 ++++++++++++++--------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index cf4bad2c1d59..76329d27385b 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -297,11 +297,12 @@ static void ttm_pool_update_free_locked(struct ttm_page_pool *pool, * * @pool: to free the pages from * @free_all: If set to true will free all pages in pool - * @gfp: GFP flags. + * @use_static: Safe to use static buffer **/ static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free, - gfp_t gfp) + bool use_static) { + static struct page *static_buf[NUM_PAGES_TO_ALLOC]; unsigned long irq_flags; struct page *p; struct page **pages_to_free; @@ -311,7 +312,11 @@ static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free, if (NUM_PAGES_TO_ALLOC < nr_free) npages_to_free = NUM_PAGES_TO_ALLOC; - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp); + if (use_static) + pages_to_free = static_buf; + else + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), + GFP_KERNEL); if (!pages_to_free) { pr_err("Failed to allocate memory for pool free operation\n"); return 0; @@ -374,7 +379,8 @@ restart: if (freed_pages) ttm_pages_put(pages_to_free, freed_pages); out: - kfree(pages_to_free); + if (pages_to_free != static_buf) + kfree(pages_to_free); return nr_free; } @@ -383,8 +389,6 @@ out: * * XXX: (dchinner) Deadlock warning! * - * We need to pass sc->gfp_mask to ttm_page_pool_free(). - * * This code is crying out for a shrinker per pool.... */ static unsigned long @@ -407,8 +411,8 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (shrink_pages == 0) break; pool = &_manager->pools[(i + pool_offset)%NUM_POOLS]; - shrink_pages = ttm_page_pool_free(pool, nr_free, - sc->gfp_mask); + /* OK to use static buffer since global mutex is held. */ + shrink_pages = ttm_page_pool_free(pool, nr_free, true); freed += nr_free - shrink_pages; } mutex_unlock(&lock); @@ -710,7 +714,7 @@ static void ttm_put_pages(struct page **pages, unsigned npages, int flags, } spin_unlock_irqrestore(&pool->lock, irq_flags); if (npages) - ttm_page_pool_free(pool, npages, GFP_KERNEL); + ttm_page_pool_free(pool, npages, false); } /* @@ -849,9 +853,9 @@ void ttm_page_alloc_fini(void) pr_info("Finalizing pool allocator\n"); ttm_pool_mm_shrink_fini(_manager); + /* OK to use static buffer since global mutex is no longer used. */ for (i = 0; i < NUM_POOLS; ++i) - ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES, - GFP_KERNEL); + ttm_page_pool_free(&_manager->pools[i], FREE_ALL_PAGES, true); kobject_put(&_manager->kobj); _manager = NULL; diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index ca65df144765..3dfa97d04e51 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -411,11 +411,12 @@ static void ttm_dma_page_put(struct dma_pool *pool, struct dma_page *d_page) * * @pool: to free the pages from * @nr_free: If set to true will free all pages in pool - * @gfp: GFP flags. + * @use_static: Safe to use static buffer **/ static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free, - gfp_t gfp) + bool use_static) { + static struct page *static_buf[NUM_PAGES_TO_ALLOC]; unsigned long irq_flags; struct dma_page *dma_p, *tmp; struct page **pages_to_free; @@ -432,7 +433,11 @@ static unsigned ttm_dma_page_pool_free(struct dma_pool *pool, unsigned nr_free, npages_to_free, nr_free); } #endif - pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), gfp); + if (use_static) + pages_to_free = static_buf; + else + pages_to_free = kmalloc(npages_to_free * sizeof(struct page *), + GFP_KERNEL); if (!pages_to_free) { pr_err("%s: Failed to allocate memory for pool free operation\n", @@ -502,7 +507,8 @@ restart: if (freed_pages) ttm_dma_pages_put(pool, &d_pages, pages_to_free, freed_pages); out: - kfree(pages_to_free); + if (pages_to_free != static_buf) + kfree(pages_to_free); return nr_free; } @@ -531,7 +537,8 @@ static void ttm_dma_free_pool(struct device *dev, enum pool_type type) if (pool->type != type) continue; /* Takes a spinlock.. */ - ttm_dma_page_pool_free(pool, FREE_ALL_PAGES, GFP_KERNEL); + /* OK to use static buffer since global mutex is held. */ + ttm_dma_page_pool_free(pool, FREE_ALL_PAGES, true); WARN_ON(((pool->npages_in_use + pool->npages_free) != 0)); /* This code path is called after _all_ references to the * struct device has been dropped - so nobody should be @@ -984,7 +991,7 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) /* shrink pool if necessary (only on !is_cached pools)*/ if (npages) - ttm_dma_page_pool_free(pool, npages, GFP_KERNEL); + ttm_dma_page_pool_free(pool, npages, false); ttm->state = tt_unpopulated; } EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); @@ -994,8 +1001,6 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate); * * XXX: (dchinner) Deadlock warning! * - * We need to pass sc->gfp_mask to ttm_dma_page_pool_free(). - * * I'm getting sadder as I hear more pathetical whimpers about needing per-pool * shrinkers */ @@ -1028,8 +1033,8 @@ ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) if (++idx < pool_offset) continue; nr_free = shrink_pages; - shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free, - sc->gfp_mask); + /* OK to use static buffer since global mutex is held. */ + shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free, true); freed += nr_free - shrink_pages; pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n", From 4c65add00e3f8e7b2db00588db7d8f3cc68853e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 7 Nov 2014 11:05:04 -0500 Subject: [PATCH 047/298] drm/radeon: fix typo in CI dpm disable commit 129acb7c0b682512e89c4f65c33593d50f2f49a9 upstream. Need to disable DS, not enable it when disabling dpm. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/ci_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 543ba2d4a659..c7c285646857 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -4733,7 +4733,7 @@ void ci_dpm_disable(struct radeon_device *rdev) ci_enable_spread_spectrum(rdev, false); ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false); ci_stop_dpm(rdev); - ci_enable_ds_master_switch(rdev, true); + ci_enable_ds_master_switch(rdev, false); ci_enable_ulv(rdev, false); ci_clear_vc(rdev); ci_reset_to_default(rdev); From 08295790175190009e76b31f22572e609608ff0c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 17 Nov 2014 15:08:17 -0500 Subject: [PATCH 048/298] drm/radeon: work around a hw bug in MGCG on CIK commit 4bb62c95a7e781a238b2ab374f34b1bf91e01ddc upstream. Always need to set bit 0 of RLC_CGTT_MGCG_OVERRIDE to avoid unreliable doorbell updates in some cases. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/cik.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ddf70d6c0270..8ef67cb4ef1e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5879,6 +5879,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) } orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); + data |= 0x00000001; data &= 0xfffffffd; if (orig != data) WREG32(RLC_CGTT_MGCG_OVERRIDE, data); @@ -5910,7 +5911,7 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) } } else { orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE); - data |= 0x00000002; + data |= 0x00000003; if (orig != data) WREG32(RLC_CGTT_MGCG_OVERRIDE, data); From 39a9d48d8a0fe4554926bbfce07508c30088297a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 3 Dec 2014 00:03:49 -0500 Subject: [PATCH 049/298] drm/radeon: check the right ring in radeon_evict_flags() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e5c21cac1001089007260c48b0c89ebaace0e71 upstream. Check the that ring we are using for copies is functional rather than the GFX ring. On newer asics we use the DMA ring for bo moves. Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 040a2a10ea17..45a9a03efc06 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -191,7 +191,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo = container_of(bo, struct radeon_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false) + if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); else radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); From bec547b5af2fd43e9a77dbf42984d72a329f1440 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Dec 2014 09:42:10 -0500 Subject: [PATCH 050/298] drm/radeon: properly filter DP1.2 4k modes on non-DP1.2 hw commit 410cce2a6b82299b46ff316c6384e789ce275ecb upstream. The check was already in place in the dp mode_valid check, but radeon_dp_get_dp_link_clock() never returned the high clock mode_valid was checking for because that function clipped the clock based on the hw capabilities. Add an explicit check in the mode_valid function. bug: https://bugs.freedesktop.org/show_bug.cgi?id=87172 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/atombios_dp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 5727dbdeda7f..b4dbaded2caf 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -576,6 +576,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; From b6460f20c917df41d0f0d80b8f9d84a46f3e5564 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 11 Apr 2014 15:55:17 +0200 Subject: [PATCH 051/298] drm/i915: Don't complain about stolen conflicts on gen3 commit 0b6d24c01932db99fc95304235e751e7f7625c41 upstream. Apparently stuff works that way on those machines. I agree with Chris' concern that this is a bit risky but imo worth a shot in -next just for fun. Afaics all these machines have the pci resources allocated like that by the BIOS, so I suspect that it's all ok. This regression goes back to commit eaba1b8f3379b5d100bd146b9a41d28348bdfd09 Author: Chris Wilson Date: Thu Jul 4 12:28:35 2013 +0100 drm/i915: Verify that our stolen memory doesn't conflict Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76983 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71031 Tested-by: lu hua Signed-off-by: Daniel Vetter Reviewed-by: Jesse Barnes Tested-by: Paul Menzel Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_stolen.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 3c78b2268209..800e06c28018 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -137,7 +137,11 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) r = devm_request_mem_region(dev->dev, base + 1, dev_priv->gtt.stolen_size - 1, "Graphics Stolen Memory"); - if (r == NULL) { + /* + * GEN3 firmware likes to smash pci bridges into the stolen + * range. Apparently this works. + */ + if (r == NULL && !IS_GEN3(dev)) { DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", base, base + (uint32_t)dev_priv->gtt.stolen_size); base = 0; From cac12318e7c4d707926b6df7316f292f14e1daa3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Nov 2014 08:03:12 +0000 Subject: [PATCH 052/298] drm/i915: Only warn the first time we attempt to mmio whilst suspended commit 2b387059817fd100cddc5a97118d63e3f3fade74 upstream. In all likelihood we will do a few hundred errnoneous register operations if we do a single invalid register access whilst the device is suspended. As each instance causes a WARN, this floods the system logs and can make the system unresponsive. The warning was first introduced in commit b2ec142cb0101f298f8e091c7d75b1ec5b809b65 Author: Paulo Zanoni Date: Fri Feb 21 13:52:25 2014 -0300 drm/i915: call assert_device_not_suspended at gen6_force_wake_work and despite the claims the WARN is still encountered in the wild today. Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Imre Deak Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_uncore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c8796316d242..b6c063cad59b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -451,8 +451,8 @@ hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg) static void assert_device_not_suspended(struct drm_i915_private *dev_priv) { - WARN(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended, - "Device suspended\n"); + WARN_ONCE(HAS_RUNTIME_PM(dev_priv->dev) && dev_priv->pm.suspended, + "Device suspended\n"); } #define REG_READ_HEADER(x) \ From 3637f9ee1b9be1750aff98951ae04a880e3d92c1 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 16 Dec 2014 13:55:38 -0500 Subject: [PATCH 053/298] drm/nv4c/mc: disable msi commit 4761703bd04bbdf56396d264903cc5a1fdcb3c01 upstream. Several users have, over time, reported issues with MSI on these IGPs. They're old, rarely available, and MSI doesn't provide such huge advantages on them. Just disable. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87361 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74492 Fixes: fa8c9ac72fe ("drm/nv4c/mc: nv4x igp's have a different msi rearm register") Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c index a75c35ccf25c..165401c4045c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -24,13 +24,6 @@ #include "nv04.h" -static void -nv4c_mc_msi_rearm(struct nouveau_mc *pmc) -{ - struct nv04_mc_priv *priv = (void *)pmc; - nv_wr08(priv, 0x088050, 0xff); -} - struct nouveau_oclass * nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .base.handle = NV_SUBDEV(MC, 0x4c), @@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .fini = _nouveau_mc_fini, }, .intr = nv04_mc_intr, - .msi_rearm = nv4c_mc_msi_rearm, }.base; From fdb749fc582fb81fb5fd00a05c25c7ac1aea5d9e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Dec 2014 08:44:31 +0000 Subject: [PATCH 054/298] drm/i915: Invalidate media caches on gen7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 148b83d0815a3778c8949e6a97cb798cbaa0efb3 upstream. In the gen7 pipe control there is an extra bit to flush the media caches, so let's set it during cache invalidation flushes. v2: Rename to MEDIA_STATE_CLEAR to be more inline with spec. Cc: Simon Farnsworth Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0a3b9386eb43..0c83b3dab58c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -320,6 +320,7 @@ #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ #define PIPE_CONTROL_CS_STALL (1<<20) #define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_WRITE_FLUSH (1<<12) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d488fc71ef49..8278864bcc87 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -334,6 +334,7 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR; /* * TLB invalidate requires a post-sync write. */ From 0a8ef139dfb7f88335c83e167ee07cb754d84132 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 16 Dec 2014 08:44:32 +0000 Subject: [PATCH 055/298] drm/i915: Force the CS stall for invalidate flushes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit add284a3a2481e759d6bec35f6444c32c8ddc383 upstream. In order to act as a full command barrier by itself, we need to tell the pipecontrol to actually stall the command streamer while the flush runs. We require the full command barrier before operations like MI_SET_CONTEXT, which currently rely on a prior invalidate flush. References: https://bugs.freedesktop.org/show_bug.cgi?id=83677 Cc: Simon Farnsworth Cc: Daniel Vetter Cc: Ville Syrjälä Signed-off-by: Chris Wilson Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8278864bcc87..d2af1e138c91 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -341,6 +341,8 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, flags |= PIPE_CONTROL_QW_WRITE; flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + /* Workaround: we must issue a pipe_control with CS-stall bit * set before a pipe_control command that has the state cache * invalidate bit set. */ From 186909739227802293f997c1944151633e6487f8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 1 Oct 2014 14:28:36 +0530 Subject: [PATCH 056/298] ARC: [nsimosci] move peripherals to match model to FPGA commit e8ef060b37c2d3cc5fd0c0edbe4e42ec1cb9768b upstream. This allows the sdplite/Zebu images to run on OSCI simulation platform Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/boot/dts/nsimosci.dts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts index 398064cef746..4c169d825415 100644 --- a/arch/arc/boot/dts/nsimosci.dts +++ b/arch/arc/boot/dts/nsimosci.dts @@ -20,7 +20,7 @@ /* this is for console on PGU */ /* bootargs = "console=tty0 consoleblank=0"; */ /* this is for console on serial */ - bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; + bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug"; }; aliases { @@ -46,9 +46,9 @@ #interrupt-cells = <1>; }; - uart0: serial@c0000000 { + uart0: serial@f0000000 { compatible = "ns8250"; - reg = <0xc0000000 0x2000>; + reg = <0xf0000000 0x2000>; interrupts = <11>; clock-frequency = <3686400>; baud = <115200>; @@ -57,21 +57,21 @@ no-loopback-test = <1>; }; - pgu0: pgu@c9000000 { + pgu0: pgu@f9000000 { compatible = "snps,arcpgufb"; - reg = <0xc9000000 0x400>; + reg = <0xf9000000 0x400>; }; - ps2: ps2@c9001000 { + ps2: ps2@f9001000 { compatible = "snps,arc_ps2"; - reg = <0xc9000400 0x14>; + reg = <0xf9000400 0x14>; interrupts = <13>; interrupt-names = "arc_ps2_irq"; }; - eth0: ethernet@c0003000 { + eth0: ethernet@f0003000 { compatible = "snps,oscilan"; - reg = <0xc0003000 0x44>; + reg = <0xf0003000 0x44>; interrupts = <7>, <8>; interrupt-names = "rx", "tx"; }; From b3bae233fe5884b1e1dda4d381ca8082614921e0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Feb 2014 13:47:43 +0530 Subject: [PATCH 057/298] ARC: switch to generic ENTRY/END assembler annotations commit ec7ac6afd07b2d958aab9dfc0a686300b856922a upstream. With commit 9df62f054406 "arch: use ASM_NL instead of ';'" the generic macros can handle the arch specific newline quirk. Hence we can get rid of ARC asm macros and use the "C" style macros. Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/linkage.h | 14 --------- arch/arc/kernel/ctx_sw_asm.S | 2 +- arch/arc/kernel/entry.S | 52 +++++++++++++++++----------------- arch/arc/lib/memcmp.S | 6 ++-- arch/arc/lib/memcpy-700.S | 6 ++-- arch/arc/lib/memset.S | 10 +++---- arch/arc/lib/strchr-700.S | 6 ++-- arch/arc/lib/strcmp.S | 6 ++-- arch/arc/lib/strcpy-700.S | 6 ++-- arch/arc/lib/strlen.S | 6 ++-- arch/arc/mm/tlbex.S | 10 +++---- 11 files changed, 54 insertions(+), 70 deletions(-) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 66ee5527aefc..5faad17118b4 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -13,20 +13,6 @@ #define ASM_NL ` /* use '`' to mark new line in macro */ -/* Can't use the ENTRY macro in linux/linkage.h - * gas considers ';' as comment vs. newline - */ -.macro ARC_ENTRY name - .global \name - .align 4 - \name: -.endm - -.macro ARC_EXIT name -#define ASM_PREV_SYM_ADDR(name) .-##name - .size \ name, ASM_PREV_SYM_ADDR(\name) -.endm - /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm #ifdef CONFIG_ARC_HAS_DCCM diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index 65690e7fcc8c..2ff0347a2fd7 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -62,4 +62,4 @@ __switch_to: ld.ab blink, [sp, 4] j [blink] -ARC_EXIT __switch_to +END(__switch_to) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 6e8f83a32522..29b82adbf0b4 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -141,7 +141,7 @@ VECTOR EV_Extension ; 0x130, Extn Intruction Excp (0x26) VECTOR reserved ; Reserved Exceptions .endr -#include /* ARC_{EXTRY,EXIT} */ +#include /* {EXTRY,EXIT} */ #include /* SAVE_ALL_{INT1,INT2,SYS...} */ #include #include @@ -184,7 +184,7 @@ reserved: ; processor restart ; --------------------------------------------- ; Level 2 ISR: Can interrupt a Level 1 ISR ; --------------------------------------------- -ARC_ENTRY handle_interrupt_level2 +ENTRY(handle_interrupt_level2) ; TODO-vineetg for SMP this wont work ; free up r9 as scratchpad @@ -225,14 +225,14 @@ ARC_ENTRY handle_interrupt_level2 b ret_from_exception -ARC_EXIT handle_interrupt_level2 +END(handle_interrupt_level2) #endif ; --------------------------------------------- ; Level 1 ISR ; --------------------------------------------- -ARC_ENTRY handle_interrupt_level1 +ENTRY(handle_interrupt_level1) /* free up r9 as scratchpad */ #ifdef CONFIG_SMP @@ -265,7 +265,7 @@ ARC_ENTRY handle_interrupt_level1 sr r8, [AUX_IRQ_LV12] ; clear bit in Sticky Status Reg b ret_from_exception -ARC_EXIT handle_interrupt_level1 +END(handle_interrupt_level1) ;################### Non TLB Exception Handling ############################# @@ -273,7 +273,7 @@ ARC_EXIT handle_interrupt_level1 ; Instruction Error Exception Handler ; --------------------------------------------- -ARC_ENTRY instr_service +ENTRY(instr_service) EXCEPTION_PROLOGUE @@ -284,13 +284,13 @@ ARC_ENTRY instr_service bl do_insterror_or_kprobe b ret_from_exception -ARC_EXIT instr_service +END(instr_service) ; --------------------------------------------- ; Memory Error Exception Handler ; --------------------------------------------- -ARC_ENTRY mem_service +ENTRY(mem_service) EXCEPTION_PROLOGUE @@ -301,13 +301,13 @@ ARC_ENTRY mem_service bl do_memory_error b ret_from_exception -ARC_EXIT mem_service +END(mem_service) ; --------------------------------------------- ; Machine Check Exception Handler ; --------------------------------------------- -ARC_ENTRY EV_MachineCheck +ENTRY(EV_MachineCheck) EXCEPTION_PROLOGUE @@ -331,13 +331,13 @@ ARC_ENTRY EV_MachineCheck j do_machine_check_fault -ARC_EXIT EV_MachineCheck +END(EV_MachineCheck) ; --------------------------------------------- ; Protection Violation Exception Handler ; --------------------------------------------- -ARC_ENTRY EV_TLBProtV +ENTRY(EV_TLBProtV) EXCEPTION_PROLOGUE @@ -385,12 +385,12 @@ ARC_ENTRY EV_TLBProtV b ret_from_exception -ARC_EXIT EV_TLBProtV +END(EV_TLBProtV) ; --------------------------------------------- ; Privilege Violation Exception Handler ; --------------------------------------------- -ARC_ENTRY EV_PrivilegeV +ENTRY(EV_PrivilegeV) EXCEPTION_PROLOGUE @@ -401,12 +401,12 @@ ARC_ENTRY EV_PrivilegeV bl do_privilege_fault b ret_from_exception -ARC_EXIT EV_PrivilegeV +END(EV_PrivilegeV) ; --------------------------------------------- ; Extension Instruction Exception Handler ; --------------------------------------------- -ARC_ENTRY EV_Extension +ENTRY(EV_Extension) EXCEPTION_PROLOGUE @@ -417,7 +417,7 @@ ARC_ENTRY EV_Extension bl do_extension_fault b ret_from_exception -ARC_EXIT EV_Extension +END(EV_Extension) ;######################### System Call Tracing ######################### @@ -504,7 +504,7 @@ trap_with_param: ; (2) Break Points ;------------------------------------------------------------------ -ARC_ENTRY EV_Trap +ENTRY(EV_Trap) EXCEPTION_PROLOGUE @@ -534,9 +534,9 @@ ARC_ENTRY EV_Trap jl [r9] ; Entry into Sys Call Handler ; fall through to ret_from_system_call -ARC_EXIT EV_Trap +END(EV_Trap) -ARC_ENTRY ret_from_system_call +ENTRY(ret_from_system_call) st r0, [sp, PT_r0] ; sys call return value in pt_regs @@ -546,7 +546,7 @@ ARC_ENTRY ret_from_system_call ; ; If ret to user mode do we need to handle signals, schedule() et al. -ARC_ENTRY ret_from_exception +ENTRY(ret_from_exception) ; Pre-{IRQ,Trap,Exception} K/U mode from pt_regs->status32 ld r8, [sp, PT_status32] ; returning to User/Kernel Mode @@ -728,9 +728,9 @@ not_level1_interrupt: debug_marker_syscall: rtie -ARC_EXIT ret_from_exception +END(ret_from_exception) -ARC_ENTRY ret_from_fork +ENTRY(ret_from_fork) ; when the forked child comes here from the __switch_to function ; r0 has the last task pointer. ; put last task in scheduler queue @@ -747,11 +747,11 @@ ARC_ENTRY ret_from_fork ; special case of kernel_thread entry point returning back due to ; kernel_execve() - pretend return from syscall to ret to userland b ret_from_exception -ARC_EXIT ret_from_fork +END(ret_from_fork) ;################### Special Sys Call Wrappers ########################## -ARC_ENTRY sys_clone_wrapper +ENTRY(sys_clone_wrapper) SAVE_CALLEE_SAVED_USER bl @sys_clone DISCARD_CALLEE_SAVED_USER @@ -761,7 +761,7 @@ ARC_ENTRY sys_clone_wrapper bnz tracesys_exit b ret_from_system_call -ARC_EXIT sys_clone_wrapper +END(sys_clone_wrapper) #ifdef CONFIG_ARC_DW2_UNWIND ; Workaround for bug 94179 (STAR ): diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index bc813d55b6c3..978bf8314dfb 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -6,7 +6,7 @@ * published by the Free Software Foundation. */ -#include +#include #ifdef __LITTLE_ENDIAN__ #define WORD2 r2 @@ -16,7 +16,7 @@ #define SHIFT r2 #endif -ARC_ENTRY memcmp +ENTRY(memcmp) or r12,r0,r1 asl_s r12,r12,30 sub r3,r2,1 @@ -121,4 +121,4 @@ ARC_ENTRY memcmp .Lnil: j_s.d [blink] mov r0,0 -ARC_EXIT memcmp +END(memcmp) diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S index b64cc10ac918..3222573e50de 100644 --- a/arch/arc/lib/memcpy-700.S +++ b/arch/arc/lib/memcpy-700.S @@ -6,9 +6,9 @@ * published by the Free Software Foundation. */ -#include +#include -ARC_ENTRY memcpy +ENTRY(memcpy) or r3,r0,r1 asl_s r3,r3,30 mov_s r5,r0 @@ -63,4 +63,4 @@ ARC_ENTRY memcpy .Lendbloop: j_s.d [blink] stb r12,[r5,0] -ARC_EXIT memcpy +END(memcpy) diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S index 9b2d88d2e141..d36bd43fc98d 100644 --- a/arch/arc/lib/memset.S +++ b/arch/arc/lib/memset.S @@ -6,11 +6,11 @@ * published by the Free Software Foundation. */ -#include +#include #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ -ARC_ENTRY memset +ENTRY(memset) mov_s r4,r0 or r12,r0,r2 bmsk.f r12,r12,1 @@ -46,14 +46,14 @@ ARC_ENTRY memset stb.ab r1,[r4,1] .Ltiny_end: j_s [blink] -ARC_EXIT memset +END(memset) ; memzero: @r0 = mem, @r1 = size_t ; memset: @r0 = mem, @r1 = char, @r2 = size_t -ARC_ENTRY memzero +ENTRY(memzero) ; adjust bzero args to memset args mov r2, r1 mov r1, 0 b memset ;tail call so need to tinker with blink -ARC_EXIT memzero +END(memzero) diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S index 9c548c7cf001..b725d5862107 100644 --- a/arch/arc/lib/strchr-700.S +++ b/arch/arc/lib/strchr-700.S @@ -11,9 +11,9 @@ presence of the norm instruction makes it easier to operate on whole words branch-free. */ -#include +#include -ARC_ENTRY strchr +ENTRY(strchr) extb_s r1,r1 asl r5,r1,8 bmsk r2,r0,1 @@ -130,4 +130,4 @@ ARC_ENTRY strchr j_s.d [blink] mov.mi r0,0 #endif /* ENDIAN */ -ARC_EXIT strchr +END(strchr) diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S index 5dc802b45cf3..3544600fefe6 100644 --- a/arch/arc/lib/strcmp.S +++ b/arch/arc/lib/strcmp.S @@ -13,9 +13,9 @@ source 1; however, that would increase the overhead for loop setup / finish, and strcmp might often terminate early. */ -#include +#include -ARC_ENTRY strcmp +ENTRY(strcmp) or r2,r0,r1 bmsk_s r2,r2,1 brne r2,0,.Lcharloop @@ -93,4 +93,4 @@ ARC_ENTRY strcmp .Lcmpend: j_s.d [blink] sub r0,r2,r3 -ARC_EXIT strcmp +END(strcmp) diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S index b7ca4ae81d88..8422f38e1218 100644 --- a/arch/arc/lib/strcpy-700.S +++ b/arch/arc/lib/strcpy-700.S @@ -16,9 +16,9 @@ there, but the it is not likely to be taken often, and it would also be likey to cost an unaligned mispredict at the next call. */ -#include +#include -ARC_ENTRY strcpy +ENTRY(strcpy) or r2,r0,r1 bmsk_s r2,r2,1 brne.d r2,0,charloop @@ -67,4 +67,4 @@ charloop: brne.d r3,0,charloop stb.ab r3,[r10,1] j [blink] -ARC_EXIT strcpy +END(strcpy) diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S index 39759e099696..53cfd5685a5f 100644 --- a/arch/arc/lib/strlen.S +++ b/arch/arc/lib/strlen.S @@ -6,9 +6,9 @@ * published by the Free Software Foundation. */ -#include +#include -ARC_ENTRY strlen +ENTRY(strlen) or r3,r0,7 ld r2,[r3,-7] ld.a r6,[r3,-3] @@ -80,4 +80,4 @@ ARC_ENTRY strlen .Learly_end: b.d .Lend sub_s.ne r1,r1,r1 -ARC_EXIT strlen +END(strlen) diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 3fcfdb38d242..79bfc81358c9 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -260,7 +260,7 @@ ARCFP_CODE ;Fast Path Code, candidate for ICCM ; I-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissI +ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS @@ -293,13 +293,13 @@ ARC_ENTRY EV_TLBMissI TLBMISS_RESTORE_REGS rtie -ARC_EXIT EV_TLBMissI +END(EV_TLBMissI) ;----------------------------------------------------------------------------- ; D-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissD +ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS @@ -381,6 +381,4 @@ do_slow_path_pf: bl do_page_fault b ret_from_exception -ARC_EXIT EV_TLBMissD - -ARC_ENTRY EV_TLBMissB ; Bogus entry to measure sz of DTLBMiss hdlr +END(EV_TLBMissD) From 8ddc4f7e414ab53c5038784811e7b8a6dc009200 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 2 Dec 2014 09:53:25 +0200 Subject: [PATCH 058/298] cfg80211: don't WARN about two consecutive Country IE hint commit 70dcec5a488a7b81779190ac8089475fe4b8b962 upstream. This can happen and there is no point in added more detection code lower in the stack. Catching these in one single point (cfg80211) is enough. Stop WARNING about this case. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=89001 Fixes: 2f1c6c572d7b ("cfg80211: process non country IE conflicting first") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 338794ea44d1..856933635082 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1625,11 +1625,8 @@ __reg_process_hint_country_ie(struct wiphy *wiphy, return REG_REQ_IGNORE; return REG_REQ_ALREADY_SET; } - /* - * Two consecutive Country IE hints on the same wiphy. - * This should be picked up early by the driver/stack - */ - if (WARN_ON(regdom_changes(country_ie_request->alpha2))) + + if (regdom_changes(country_ie_request->alpha2)) return REG_REQ_OK; return REG_REQ_ALREADY_SET; } From 466e935215eb10b289be4242881588d708631768 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Thu, 4 Dec 2014 12:22:16 +0200 Subject: [PATCH 059/298] cfg80211: avoid mem leak on driver hint set commit 34f05f543f02350e920bddb7660ffdd4697aaf60 upstream. In the already-set and intersect case of a driver-hint, the previous wiphy regdomain was not freed before being reset with a copy of the cfg80211 regdomain. Signed-off-by: Arik Nemtsov Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/reg.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 856933635082..04d530560ec8 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1547,7 +1547,7 @@ static enum reg_request_treatment reg_process_hint_driver(struct wiphy *wiphy, struct regulatory_request *driver_request) { - const struct ieee80211_regdomain *regd; + const struct ieee80211_regdomain *regd, *tmp; enum reg_request_treatment treatment; treatment = __reg_process_hint_driver(driver_request); @@ -1566,7 +1566,10 @@ reg_process_hint_driver(struct wiphy *wiphy, kfree(driver_request); return REG_REQ_IGNORE; } + + tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); } From 1cc15ed2a028739efd4ed07a53d305ae4415a232 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 11 Dec 2014 23:48:55 +0200 Subject: [PATCH 060/298] cfg80211: Fix 160 MHz channels with 80+80 and 160 MHz drivers commit 08f6f147773b23b765b94633a8eaa82e7defcf4c upstream. The VHT supported channel width field is a two bit integer, not a bitfield. cfg80211_chandef_usable() was interpreting it incorrectly and ended up rejecting 160 MHz channel width if the driver indicated support for both 160 and 80+80 MHz channels. Fixes: 3d9d1d6656a73 ("nl80211/cfg80211: support VHT channel configuration") (however, no real drivers had 160 MHz support it until 3.16) Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/chan.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 78559b5bbd1f..27157a7801e8 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -516,7 +516,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; - u32 width, control_freq; + u32 width, control_freq, cap; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; @@ -554,7 +554,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, return false; break; case NL80211_CHAN_WIDTH_80P80: - if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)) + cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return false; case NL80211_CHAN_WIDTH_80: if (!vht_cap->vht_supported) @@ -565,7 +566,9 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, case NL80211_CHAN_WIDTH_160: if (!vht_cap->vht_supported) return false; - if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)) + cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && + cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return false; prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; width = 160; From 8aae71a78dca94143c0ee3116c3e7337144874c1 Mon Sep 17 00:00:00 2001 From: Dominique Leuenberger Date: Thu, 13 Nov 2014 20:57:37 +0100 Subject: [PATCH 061/298] hp_accel: Add support for HP ZBook 15 commit 6583659e0f92e38079a8dd081e0a1181a0f37747 upstream. HP ZBook 15 laptop needs a non-standard mapping (x_inverted). BugLink: http://bugzilla.opensuse.org/show_bug.cgi?id=905329 Signed-off-by: Dominique Leuenberger Signed-off-by: Takashi Iwai Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp_accel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 3dc934438c28..07fbcb0fb646 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -237,6 +237,7 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap), AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap), AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted), + AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted), { NULL, } /* Laptop models without axis info (yet): * "NC6910" "HP Compaq 6910" From 7177a581fe3eeb67b6a5be052912bf217695e23f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 18 Dec 2014 11:51:01 +0100 Subject: [PATCH 062/298] tick/powerclamp: Remove tick_nohz_idle abuse commit a5fd9733a30d18d7ac23f17080e7e07bb3205b69 upstream. commit 4dbd27711cd9 "tick: export nohz tick idle symbols for module use" was merged via the thermal tree without an explicit ack from the relevant maintainers. The exports are abused by the intel powerclamp driver which implements a fake idle state from a sched FIFO task. This causes all kinds of wreckage in the NOHZ core code which rightfully assumes that tick_nohz_idle_enter/exit() are only called from the idle task itself. Recent changes in the NOHZ core lead to a failure of the powerclamp driver and now people try to hack completely broken and backwards workarounds into the NOHZ core code. This is completely unacceptable and just papers over the real problem. There are way more subtle issues lurking around the corner. The real solution is to fix the powerclamp driver by rewriting it with a sane concept, but that's beyond the scope of this. So the only solution for now is to remove the calls into the core NOHZ code from the powerclamp trainwreck along with the exports. Fixes: d6d71ee4a14a "PM: Introduce Intel PowerClamp Driver" Signed-off-by: Thomas Gleixner Cc: Preeti U Murthy Cc: Viresh Kumar Cc: Frederic Weisbecker Cc: Fengguang Wu Cc: Frederic Weisbecker Cc: Pan Jacob jun Cc: LKP Cc: Peter Zijlstra Cc: Zhang Rui Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1412181110110.17382@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/intel_powerclamp.c | 2 -- kernel/time/tick-sched.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index a084325f1386..6e75177915fa 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -435,7 +435,6 @@ static int clamp_thread(void *arg) * allowed. thus jiffies are updated properly. */ preempt_disable(); - tick_nohz_idle_enter(); /* mwait until target jiffies is reached */ while (time_before(jiffies, target_jiffies)) { unsigned long ecx = 1; @@ -451,7 +450,6 @@ static int clamp_thread(void *arg) start_critical_timings(); atomic_inc(&idle_wakeup_counter); } - tick_nohz_idle_exit(); preempt_enable(); } del_timer_sync(&wakeup_timer); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6558b7ac112d..8c08a6f9cca0 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -807,7 +807,6 @@ void tick_nohz_idle_enter(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); /** * tick_nohz_irq_exit - update next tick event from interrupt exit @@ -934,7 +933,6 @@ void tick_nohz_idle_exit(void) local_irq_enable(); } -EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { From 5666a3de7ab455e889cdcecd2128bc316f842df3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Dec 2014 23:01:41 +0100 Subject: [PATCH 063/298] genirq: Prevent proc race against freeing of irq descriptors commit c291ee622165cb2c8d4e7af63fffd499354a23be upstream. Since the rework of the sparse interrupt code to actually free the unused interrupt descriptors there exists a race between the /proc interfaces to the irq subsystem and the code which frees the interrupt descriptor. CPU0 CPU1 show_interrupts() desc = irq_to_desc(X); free_desc(desc) remove_from_radix_tree(); kfree(desc); raw_spinlock_irq(&desc->lock); /proc/interrupts is the only interface which can actively corrupt kernel memory via the lock access. /proc/stat can only read from freed memory. Extremly hard to trigger, but possible. The interfaces in /proc/irq/N/ are not affected by this because the removal of the proc file is serialized in procfs against concurrent readers/writers. The removal happens before the descriptor is freed. For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue as the descriptor is never freed. It's merely cleared out with the irq descriptor lock held. So any concurrent proc access will either see the old correct value or the cleared out ones. Protect the lookup and access to the irq descriptor in show_interrupts() with the sparse_irq_lock. Provide kstat_irqs_usr() which is protecting the lookup and access with sparse_irq_lock and switch /proc/stat to use it. Document the existing kstat_irqs interfaces so it's clear that the caller needs to take care about protection. The users of these interfaces are either not affected due to SPARSE_IRQ=n or already protected against removal. Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator" Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- fs/proc/stat.c | 2 +- include/linux/kernel_stat.h | 1 + kernel/irq/internals.h | 8 ++++++ kernel/irq/irqdesc.c | 52 +++++++++++++++++++++++++++++++++++++ kernel/irq/proc.c | 22 +++++++++++++++- 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c62f0cc..dbd027235440 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) - seq_put_decimal_ull(p, ' ', kstat_irqs(j)); + seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be4a7c3..4b2053a232c9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -74,6 +74,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) * Number of interrupts per specific IRQ source, since bootup */ extern unsigned int kstat_irqs(unsigned int irq); +extern unsigned int kstat_irqs_usr(unsigned int irq); /* * Number of interrupts per cpu, since bootup diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 001fa5bab490..8a160e8a44e8 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -74,6 +74,14 @@ extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); extern void mask_irq(struct irq_desc *desc); extern void unmask_irq(struct irq_desc *desc); +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } +#endif + extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8ab8e9390297..07d45516b540 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -131,6 +131,16 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif +void irq_lock_sparse(void) +{ + mutex_lock(&sparse_irq_lock); +} + +void irq_unlock_sparse(void) +{ + mutex_unlock(&sparse_irq_lock); +} + static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; @@ -167,6 +177,12 @@ static void free_desc(unsigned int irq) unregister_irq_proc(irq, desc); + /* + * sparse_irq_lock protects also show_interrupts() and + * kstat_irq_usr(). Once we deleted the descriptor from the + * sparse tree we can free it. Access in proc will fail to + * lookup the descriptor. + */ mutex_lock(&sparse_irq_lock); delete_irq_desc(irq); mutex_unlock(&sparse_irq_lock); @@ -489,6 +505,15 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu + * @irq: The interrupt number + * @cpu: The cpu number + * + * Returns the sum of interrupt counts on @cpu since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); @@ -497,6 +522,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +/** + * kstat_irqs - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. The caller must ensure that the interrupt is not removed + * concurrently. + */ unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -509,3 +542,22 @@ unsigned int kstat_irqs(unsigned int irq) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; } + +/** + * kstat_irqs_usr - Get the statistics for an interrupt + * @irq: The interrupt number + * + * Returns the sum of interrupt counts on all cpus since boot for + * @irq. Contrary to kstat_irqs() this can be called from any + * preemptible context. It's protected against concurrent removal of + * an interrupt descriptor when sparse irqs are enabled. + */ +unsigned int kstat_irqs_usr(unsigned int irq) +{ + int sum; + + irq_lock_sparse(); + sum = kstat_irqs(irq); + irq_unlock_sparse(); + return sum; +} diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 36f6ee181b0c..095cd7230aef 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -15,6 +15,23 @@ #include "internals.h" +/* + * Access rules: + * + * procfs protects read/write of /proc/irq/N/ files against a + * concurrent free of the interrupt descriptor. remove_proc_entry() + * immediately prevents new read/writes to happen and waits for + * already running read/write functions to complete. + * + * We remove the proc entries first and then delete the interrupt + * descriptor from the radix tree and free it. So it is guaranteed + * that irq_to_desc(N) is valid as long as the read/writes are + * permitted by procfs. + * + * The read from /proc/interrupts is a different problem because there + * is no protection. So the lookup and the access to irqdesc + * information must be protected by sparse_irq_lock. + */ static struct proc_dir_entry *root_irq_dir; #ifdef CONFIG_SMP @@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } + irq_lock_sparse(); desc = irq_to_desc(i); if (!desc) - return 0; + goto outsparse; raw_spin_lock_irqsave(&desc->lock, flags); for_each_online_cpu(j) @@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); out: raw_spin_unlock_irqrestore(&desc->lock, flags); +outsparse: + irq_unlock_sparse(); return 0; } #endif From 1f95b9f9ee9f3b3346990fa658f7c0fd5d370073 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 20 Nov 2014 20:50:07 -0800 Subject: [PATCH 064/298] iscsi-target: Fail connection on short sendmsg writes commit 6bf6ca7515c1df06f5c03737537f5e0eb191e29e upstream. This patch changes iscsit_do_tx_data() to fail on short writes when kernel_sendmsg() returns a value different than requested transfer length, returning -EPIPE and thus causing a connection reset to occur. This avoids a potential bug in the original code where a short write would result in kernel_sendmsg() being called again with the original iovec base + length. In practice this has not been an issue because iscsit_do_tx_data() is only used for transferring 48 byte headers + 4 byte digests, along with seldom used control payloads from NOPIN + TEXT_RSP + REJECT with less than 32k of data. So following Al's audit of iovec consumers, go ahead and fail the connection on short writes for now, and remove the bogus logic ahead of his proper upstream fix. Reported-by: Al Viro Cc: David S. Miller Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/iscsi/iscsi_target_util.c | 26 ++++++++++-------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index ab77f80ead2b..1e406af4ee47 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -1356,15 +1356,15 @@ static int iscsit_do_tx_data( struct iscsi_conn *conn, struct iscsi_data_count *count) { - int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; + int ret, iov_len; struct kvec *iov_p; struct msghdr msg; if (!conn || !conn->sock || !conn->conn_ops) return -1; - if (data <= 0) { - pr_err("Data length is: %d\n", data); + if (count->data_length <= 0) { + pr_err("Data length is: %d\n", count->data_length); return -1; } @@ -1373,20 +1373,16 @@ static int iscsit_do_tx_data( iov_p = count->iov; iov_len = count->iov_count; - while (total_tx < data) { - tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, - (data - total_tx)); - if (tx_loop <= 0) { - pr_debug("tx_loop: %d total_tx %d\n", - tx_loop, total_tx); - return tx_loop; - } - total_tx += tx_loop; - pr_debug("tx_loop: %d, total_tx: %d, data: %d\n", - tx_loop, total_tx, data); + ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, + count->data_length); + if (ret != count->data_length) { + pr_err("Unexpected ret: %d send data %d\n", + ret, count->data_length); + return -EPIPE; } + pr_debug("ret: %d, sent data: %d\n", ret, count->data_length); - return total_tx; + return ret; } int rx_data( From de00e825cd000dc50ce533befad96d2487ba7686 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 2 Dec 2014 20:58:46 +0530 Subject: [PATCH 065/298] Revert "[SCSI] mpt2sas: Remove phys on topology change." commit 81a89c2d891b78695aa7e4cc0d5a7427785ae078 upstream. This reverts commit 3520f9c779bed098ca76dd3fb6377264301d57ed ("mpt2sas: Remove phys on topology change") Reverting the previous mpt2sas drives patch changes, since we will observe below issue Issue: Drives connected Enclosure/Expander will unregister with SCSI Transport Layer, if any one remove and add expander cable with in DMD (Device Missing Delay) time period or even any one power-off and power-on the Enclosure with in the DMD period. Signed-off-by: Sreekanth Reddy Reviewed-by: Tomas Henzl Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt2sas/mpt2sas_transport.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c index 410f4a3e8888..72f9c55d0e00 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_transport.c +++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c @@ -1006,12 +1006,9 @@ mpt2sas_transport_update_links(struct MPT2SAS_ADAPTER *ioc, &mpt2sas_phy->remote_identify); _transport_add_phy_to_an_existing_port(ioc, sas_node, mpt2sas_phy, mpt2sas_phy->remote_identify.sas_address); - } else { + } else memset(&mpt2sas_phy->remote_identify, 0 , sizeof(struct sas_identify)); - _transport_del_phy_from_an_existing_port(ioc, sas_node, - mpt2sas_phy); - } if (mpt2sas_phy->phy) mpt2sas_phy->phy->negotiated_linkrate = From 90bb50f5f7e6ac9ca1d7f1bc77872a9adbcf2439 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Tue, 2 Dec 2014 20:58:47 +0530 Subject: [PATCH 066/298] Revert "[SCSI] mpt3sas: Remove phys on topology change" commit 2311ce4d9c91ed63a46e18f0378f3e406e7e888e upstream. This reverts commit 963ba22b90a955363644cd397b20226928eab976 ("mpt3sas: Remove phys on topology change") Reverting the previous mpt3sas drives patch changes, since we will observe below issue Issue: Drives connected Enclosure/Expander will unregister with SCSI Transport Layer, if any one remove and add expander cable with in DMD (Device Missing Delay) time period or even any one power-off and power-on the Enclosure with in the DMD period. Signed-off-by: Sreekanth Reddy Reviewed-by: Tomas Henzl Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_transport.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c index 65170cb1a00f..55aa597eb229 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_transport.c +++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c @@ -1003,12 +1003,9 @@ mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, &mpt3sas_phy->remote_identify); _transport_add_phy_to_an_existing_port(ioc, sas_node, mpt3sas_phy, mpt3sas_phy->remote_identify.sas_address); - } else { + } else memset(&mpt3sas_phy->remote_identify, 0 , sizeof(struct sas_identify)); - _transport_del_phy_from_an_existing_port(ioc, sas_node, - mpt3sas_phy); - } if (mpt3sas_phy->phy) mpt3sas_phy->phy->negotiated_linkrate = From 483353a777bd3a233920eacfa0e08fc990b182aa Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 3 Dec 2014 21:18:10 -0500 Subject: [PATCH 067/298] scsi: blacklist RSOC for Microsoft iSCSI target devices commit 198a956a11b15b564ac06d1411881e215b587408 upstream. The Microsoft iSCSI target does not support REPORT SUPPORTED OPERATION CODES. Blacklist these devices so we don't attempt to send the command. Signed-off-by: Martin K. Petersen Tested-by: Mike Christie Reported-by: jazz@deti74.ru Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index c1d04d4d3c6c..262ab837a704 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -211,6 +211,7 @@ static struct { {"Medion", "Flash XL MMC/SD", "2.6D", BLIST_FORCELUN}, {"MegaRAID", "LD", NULL, BLIST_FORCELUN}, {"MICROP", "4110", NULL, BLIST_NOTQ}, + {"MSFT", "Virtual HD", NULL, BLIST_NO_RSOC}, {"MYLEX", "DACARMRB", "*", BLIST_REPORTLUN2}, {"nCipher", "Fastness Crypto", NULL, BLIST_FORCELUN}, {"NAKAMICH", "MJ-4.8S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN}, From f35509c3a16001ba8425c341ce3b0e8d0909c822 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 26 Nov 2014 15:24:13 +0100 Subject: [PATCH 068/298] clk: samsung: Fix double add of syscore ops after driver rebind commit c31844ffdbd4e73a16c66e9d7df8ec290ab4b159 upstream. During driver unbind the syscore ops were not unregistered which lead to double add on syscore list: $ echo "3810000.audss-clock-controller" > /sys/bus/platform/drivers/exynos-audss-clk/unbind $ echo "3810000.audss-clock-controller" > /sys/bus/platform/drivers/exynos-audss-clk/bind [ 1463.044061] ------------[ cut here ]------------ [ 1463.047255] WARNING: CPU: 0 PID: 1 at lib/list_debug.c:36 __list_add+0x8c/0xc0() [ 1463.054613] list_add double add: new=c06e52c0, prev=c06e52c0, next=c06d5f84. [ 1463.061625] Modules linked in: [ 1463.064623] CPU: 0 PID: 1 Comm: bash Tainted: G W 3.18.0-rc5-next-20141121-00005-ga8fab06eab42-dirty #1022 [ 1463.075338] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 1463.083046] [] (show_stack) from [] (dump_stack+0x70/0xbc) [ 1463.090236] [] (dump_stack) from [] (warn_slowpath_common+0x74/0xb0) [ 1463.098295] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 1463.106962] [] (warn_slowpath_fmt) from [] (__list_add+0x8c/0xc0) [ 1463.114760] [] (__list_add) from [] (register_syscore_ops+0x30/0x3c) [ 1463.122819] [] (register_syscore_ops) from [] (exynos_audss_clk_probe+0x36c/0x460) [ 1463.132091] [] (exynos_audss_clk_probe) from [] (platform_drv_probe+0x48/0xa4) [ 1463.141013] [] (platform_drv_probe) from [] (driver_probe_device+0x13c/0x37c) [ 1463.149852] [] (driver_probe_device) from [] (bind_store+0x90/0xe0) [ 1463.157822] [] (bind_store) from [] (drv_attr_store+0x20/0x2c) [ 1463.165363] [] (drv_attr_store) from [] (sysfs_kf_write+0x4c/0x50) [ 1463.173252] [] (sysfs_kf_write) from [] (kernfs_fop_write+0xbc/0x198) [ 1463.181395] [] (kernfs_fop_write) from [] (vfs_write+0xa0/0x1a8) [ 1463.189104] [] (vfs_write) from [] (SyS_write+0x40/0x8c) [ 1463.196122] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x48) [ 1463.203655] ---[ end trace 08f6710c9bc8d8f3 ]--- [ 1463.208244] exynos-audss-clk 3810000.audss-clock-controller: setup completed Signed-off-by: Krzysztof Kozlowski Fixes: 1241ef94ccc3 ("clk: samsung: register audio subsystem clocks using common clock framework") Signed-off-by: Sylwester Nawrocki Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-exynos-audss.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c index 884187fbfe00..7f30b94c00a5 100644 --- a/drivers/clk/samsung/clk-exynos-audss.c +++ b/drivers/clk/samsung/clk-exynos-audss.c @@ -210,6 +210,10 @@ static int exynos_audss_clk_remove(struct platform_device *pdev) { int i; +#ifdef CONFIG_PM_SLEEP + unregister_syscore_ops(&exynos_audss_clk_syscore_ops); +#endif + of_clk_del_provider(pdev->dev.of_node); for (i = 0; i < clk_data.clk_num; i++) { From d3bc0f5b485af25d9d314ec072ca0297cf58f6c0 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 2 Dec 2014 08:54:19 +0100 Subject: [PATCH 069/298] clk: Don't try to use a struct clk* after it could have been freed commit 10cdfe54dab034311c8e2fad9ba2dffbe616caa9 upstream. As __clk_release could call kfree on clk and then we wouldn't have a safe way of getting the module that owns the clock. Signed-off-by: Tomeu Vizoso Fixes: fcb0ee6a3d33 ("clk: Implement clk_unregister") Reviewed-by: Stephen Boyd Signed-off-by: Michael Turquette Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f9c4632d4dd3..7145f6d93567 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2232,14 +2232,17 @@ int __clk_get(struct clk *clk) void __clk_put(struct clk *clk) { + struct module *owner; + if (!clk || WARN_ON_ONCE(IS_ERR(clk))) return; clk_prepare_lock(); + owner = clk->owner; kref_put(&clk->ref, __clk_release); clk_prepare_unlock(); - module_put(clk->owner); + module_put(owner); } /*** clk rate change notifiers ***/ From 96afc728b68221243d0c91062e16908640e21354 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 14 Dec 2014 10:49:11 -0500 Subject: [PATCH 070/298] parisc: fix out-of-register compiler error in ldcw inline assembler function commit 45db07382a5c78b0c43b3b0002b63757fb60e873 upstream. The __ldcw macro has a problem when its argument needs to be reloaded from memory. The output memory operand and the input register operand both need to be reloaded using a register in class R1_REGS when generating 64-bit code. This fails because there's only a single register in the class. Instead, use a memory clobber. This also makes the __ldcw macro a compiler memory barrier. Signed-off-by: John David Anglin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/ldcw.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index d2d11b7055ba..8121aa6db2ff 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -33,11 +33,18 @@ #endif /*!CONFIG_PA20*/ -/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. */ +/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. + We don't explicitly expose that "*a" may be written as reload + fails to find a register in class R1_REGS when "a" needs to be + reloaded when generating 64-bit PIC code. Instead, we clobber + memory to indicate to the compiler that the assembly code reads + or writes to items other than those listed in the input and output + operands. This may pessimize the code somewhat but __ldcw is + usually used within code blocks surrounded by memory barriors. */ #define __ldcw(a) ({ \ unsigned __ret; \ - __asm__ __volatile__(__LDCW " 0(%2),%0" \ - : "=r" (__ret), "+m" (*(a)) : "r" (a)); \ + __asm__ __volatile__(__LDCW " 0(%1),%0" \ + : "=r" (__ret) : "r" (a) : "memory"); \ __ret; \ }) From 255fa650d0767a454859a6ba7a014322857295e2 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 5 Dec 2014 19:38:18 -0800 Subject: [PATCH 071/298] storvsc: ring buffer failures may result in I/O freeze commit e86fb5e8ab95f10ec5f2e9430119d5d35020c951 upstream. When ring buffer returns an error indicating retry, storvsc may not return a proper error code to SCSI when bounce buffer is not used. This has introduced I/O freeze on RAID running atop storvsc devices. This patch fixes it by always returning a proper error code. Signed-off-by: Long Li Reviewed-by: K. Y. Srinivasan Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ed0f899e8aa5..86b05151fdab 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1690,13 +1690,12 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) if (ret == -EAGAIN) { /* no more space */ - if (cmd_request->bounce_sgl_count) { + if (cmd_request->bounce_sgl_count) destroy_bounce_buffer(cmd_request->bounce_sgl, cmd_request->bounce_sgl_count); - ret = SCSI_MLQUEUE_DEVICE_BUSY; - goto queue_error; - } + ret = SCSI_MLQUEUE_DEVICE_BUSY; + goto queue_error; } return 0; From 92570438f512ad7ef883426aa0a784b0bb6bb382 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 2 Jan 2015 16:15:59 -0600 Subject: [PATCH 072/298] net: ethernet: cpsw: fix hangs with interrupts commit 7ce67a38f799d1fb332f672b117efbadedaa5352 upstream. The CPSW IP implements pulse-signaled interrupts. Due to that we must write a correct, pre-defined value to the CPDMA_MACEOIVECTOR register so the controller generates a pulse on the correct IRQ line to signal the End Of Interrupt. The way the driver is written today, all four IRQ lines are requested using the same IRQ handler and, because of that, we could fall into situations where a TX IRQ fires but we tell the controller that we ended an RX IRQ (or vice-versa). This situation triggers an IRQ storm on the reserved IRQ 127 of INTC which will in turn call ack_bad_irq() which will, then, print a ton of: unexpected IRQ trap at vector 00 In order to fix the problem, we are moving all calls to cpdma_ctlr_eoi() inside the IRQ handler and making sure we *always* write the correct value to the CPDMA_MACEOIVECTOR register. Note that the algorithm assumes that IRQ numbers and value-to-be-written-to-EOI are proportional, meaning that a write of value 0 would trigger an EOI pulse for the RX_THRESHOLD Interrupt and that's the IRQ number sitting in the 0-th index of our irqs_table array. This, however, is safe at least for current implementations of CPSW so we will refrain from making the check smarter (and, as a side-effect, slower) until we actually have a platform where IRQ lines are swapped. This patch has been tested for several days with AM335x- and AM437x-based platforms. AM57x was left out because there are still pending patches to enable ethernet in mainline for that platform. A read of the TRM confirms the statement on previous paragraph. Reported-by: Yegor Yefremov Fixes: 510a1e7 (drivers: net: davinci_cpdma: acknowledge interrupt properly) Signed-off-by: Felipe Balbi Acked-by: Tony Lindgren Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 921b9df2faca..4a2508e22218 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -716,6 +716,14 @@ static void cpsw_rx_handler(void *token, int len, int status) static irqreturn_t cpsw_interrupt(int irq, void *dev_id) { struct cpsw_priv *priv = dev_id; + int value = irq - priv->irqs_table[0]; + + /* NOTICE: Ending IRQ here. The trick with the 'value' variable above + * is to make sure we will always write the correct value to the EOI + * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2 + * for TX Interrupt and 3 for MISC Interrupt. + */ + cpdma_ctlr_eoi(priv->dma, value); cpsw_intr_disable(priv); if (priv->irq_enabled == true) { @@ -745,8 +753,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) int num_tx, num_rx; num_tx = cpdma_chan_process(priv->txch, 128); - if (num_tx) - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); num_rx = cpdma_chan_process(priv->rxch, budget); if (num_rx < budget) { @@ -754,7 +760,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) napi_complete(napi); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; @@ -1265,8 +1270,6 @@ static int cpsw_ndo_open(struct net_device *ndev) napi_enable(&priv->napi); cpdma_ctlr_start(priv->dma); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); if (priv->data.dual_emac) priv->slaves[priv->emac_port].open_stat = true; @@ -1512,9 +1515,6 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) cpdma_chan_start(priv->txch); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) @@ -1560,9 +1560,6 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) cpsw_interrupt(ndev->irq, priv); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } #endif From 982d04c42d0cb7e70a575153b270191bf03506e0 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 18 Dec 2014 13:40:06 +0200 Subject: [PATCH 073/298] video/logo: prevent use of logos after they have been freed commit 92b004d1aa9f367c372511ca0330f58216b25703 upstream. If the probe of an fb driver has been deferred due to missing dependencies, and the probe is later ran when a module is loaded, the fbdev framework will try to find a logo to use. However, the logos are __initdata, and have already been freed. This causes sometimes page faults, if the logo memory is not mapped, sometimes other random crashes as the logo data is invalid, and sometimes nothing, if the fbdev decides to reject the logo (e.g. the random value depicting the logo's height is too big). This patch adds a late_initcall function to mark the logos as freed. In reality the logos are freed later, and fbdev probe may be ran between this late_initcall and the freeing of the logos. In that case we will miss drawing the logo, even if it would be possible. Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/logo/logo.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c index b670cbda38e3..ffe024b830fc 100644 --- a/drivers/video/logo/logo.c +++ b/drivers/video/logo/logo.c @@ -21,6 +21,21 @@ static bool nologo; module_param(nologo, bool, 0); MODULE_PARM_DESC(nologo, "Disables startup logo"); +/* + * Logos are located in the initdata, and will be freed in kernel_init. + * Use late_init to mark the logos as freed to prevent any further use. + */ + +static bool logos_freed; + +static int __init fb_logo_late_init(void) +{ + logos_freed = true; + return 0; +} + +late_initcall(fb_logo_late_init); + /* logo's are marked __initdata. Use __init_refok to tell * modpost that it is intended that this function uses data * marked __initdata. @@ -29,7 +44,7 @@ const struct linux_logo * __init_refok fb_find_logo(int depth) { const struct linux_logo *logo = NULL; - if (nologo) + if (nologo || logos_freed) return NULL; if (depth >= 1) { From ecbd923080648dbaeeaf8ceeafd268f988eefb58 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 1 Apr 2014 10:22:46 -0300 Subject: [PATCH 074/298] smiapp-pll: Correct clock debug prints commit bc47150ab93988714d1fab7bc82fe5f505a107ad upstream. The PLL flags were not used correctly. Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/smiapp-pll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/i2c/smiapp-pll.c b/drivers/media/i2c/smiapp-pll.c index 2335529b195c..ab5d9a3adebf 100644 --- a/drivers/media/i2c/smiapp-pll.c +++ b/drivers/media/i2c/smiapp-pll.c @@ -67,7 +67,7 @@ static void print_pll(struct device *dev, struct smiapp_pll *pll) { dev_dbg(dev, "pre_pll_clk_div\t%d\n", pll->pre_pll_clk_div); dev_dbg(dev, "pll_multiplier \t%d\n", pll->pll_multiplier); - if (pll->flags != SMIAPP_PLL_FLAG_NO_OP_CLOCKS) { + if (!(pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS)) { dev_dbg(dev, "op_sys_clk_div \t%d\n", pll->op_sys_clk_div); dev_dbg(dev, "op_pix_clk_div \t%d\n", pll->op_pix_clk_div); } @@ -77,7 +77,7 @@ static void print_pll(struct device *dev, struct smiapp_pll *pll) dev_dbg(dev, "ext_clk_freq_hz \t%d\n", pll->ext_clk_freq_hz); dev_dbg(dev, "pll_ip_clk_freq_hz \t%d\n", pll->pll_ip_clk_freq_hz); dev_dbg(dev, "pll_op_clk_freq_hz \t%d\n", pll->pll_op_clk_freq_hz); - if (pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS) { + if (!(pll->flags & SMIAPP_PLL_FLAG_NO_OP_CLOCKS)) { dev_dbg(dev, "op_sys_clk_freq_hz \t%d\n", pll->op_sys_clk_freq_hz); dev_dbg(dev, "op_pix_clk_freq_hz \t%d\n", From 8dd1f3537169d4445d9b55e64eef3feaf1015b14 Mon Sep 17 00:00:00 2001 From: Frank Schaefer Date: Mon, 29 Sep 2014 15:17:35 -0300 Subject: [PATCH 075/298] af9005: fix kernel panic on init if compiled without IR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2279948735609d0d17d7384e776b674619f792ef upstream. This patches fixes an ancient bug in the dvb_usb_af9005 driver, which has been reported at least in the following threads: https://lkml.org/lkml/2009/2/4/350 https://lkml.org/lkml/2014/9/18/558 If the driver is compiled in without any IR support (neither DVB_USB_AF9005_REMOTE nor custom symbols), the symbol_request calls in af9005_usb_module_init() return pointers != NULL although the IR symbols are not available. This leads to the following oops: ... [ 8.529751] usbcore: registered new interface driver dvb_usb_af9005 [ 8.531584] BUG: unable to handle kernel paging request at 02e00000 [ 8.533385] IP: [<7d9d67c6>] af9005_usb_module_init+0x6b/0x9d [ 8.535613] *pde = 00000000 [ 8.536416] Oops: 0000 [#1] PREEMPT PREEMPT DEBUG_PAGEALLOCDEBUG_PAGEALLOC [ 8.537863] CPU: 0 PID: 1 Comm: swapper Not tainted 3.15.0-rc6-00151-ga5c075c #1 [ 8.539827] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 8.541519] task: 89c9a670 ti: 89c9c000 task.ti: 89c9c000 [ 8.541519] EIP: 0060:[<7d9d67c6>] EFLAGS: 00010206 CPU: 0 [ 8.541519] EIP is at af9005_usb_module_init+0x6b/0x9d [ 8.541519] EAX: 02e00000 EBX: 00000000 ECX: 00000006 EDX: 00000000 [ 8.541519] ESI: 00000000 EDI: 7da33ec8 EBP: 89c9df30 ESP: 89c9df2c [ 8.541519] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 8.541519] CR0: 8005003b CR2: 02e00000 CR3: 05a54000 CR4: 00000690 [ 8.541519] Stack: [ 8.541519] 7d9d675b 89c9df90 7d992a49 7d7d5914 89c9df4c 7be3a800 7d08c58c 8a4c3968 [ 8.541519] 89c9df80 7be3a966 00000192 00000006 00000006 7d7d3ff4 8a4c397a 00000200 [ 8.541519] 7d6b1280 8a4c3979 00000006 000009a6 7da32db8 b13eec81 00000006 000009a6 [ 8.541519] Call Trace: [ 8.541519] [<7d9d675b>] ? ttusb2_driver_init+0x16/0x16 [ 8.541519] [<7d992a49>] do_one_initcall+0x77/0x106 [ 8.541519] [<7be3a800>] ? parameqn+0x2/0x35 [ 8.541519] [<7be3a966>] ? parse_args+0x113/0x25c [ 8.541519] [<7d992bc2>] kernel_init_freeable+0xea/0x167 [ 8.541519] [<7cf01070>] kernel_init+0x8/0xb8 [ 8.541519] [<7cf27ec0>] ret_from_kernel_thread+0x20/0x30 [ 8.541519] [<7cf01068>] ? rest_init+0x10c/0x10c [ 8.541519] Code: 08 c2 c7 05 44 ed f9 7d 00 00 e0 02 c7 05 40 ed f9 7d 00 00 e0 02 c7 05 3c ed f9 7d 00 00 e0 02 75 1f b8 00 00 e0 02 85 c0 74 16 00 00 e0 02 c7 05 54 84 8e 7d 00 00 e0 02 a3 58 84 8e 7d eb [ 8.541519] EIP: [<7d9d67c6>] af9005_usb_module_init+0x6b/0x9d SS:ESP 0068:89c9df2c [ 8.541519] CR2: 0000000002e00000 [ 8.541519] ---[ end trace 768b6faf51370fc7 ]--- The prefered fix would be to convert the whole IR code to use the kernel IR infrastructure (which wasn't available at the time this driver had been created). Until anyone who still has this old hardware steps up an does the conversion, fix it by not calling the symbol_request calls if the driver is compiled in without the default IR symbols (CONFIG_DVB_USB_AF9005_REMOTE). Due to the IR related pointers beeing NULL by default, IR support will then be disabled. The downside of this solution is, that it will no longer be possible to compile custom IR symbols (not using CONFIG_DVB_USB_AF9005_REMOTE) in. Please note that this patch has NOT been tested with all possible cases. I don't have the hardware and could only verify that it fixes the reported bug. Reported-by: Fengguag Wu Signed-off-by: Frank Schäfer Acked-by: Luca Olivetti Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/dvb-usb/af9005.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index af176b6ce738..e6d3561eea47 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -1081,9 +1081,12 @@ static int __init af9005_usb_module_init(void) err("usb_register failed. (%d)", result); return result; } +#if IS_MODULE(CONFIG_DVB_USB_AF9005) || defined(CONFIG_DVB_USB_AF9005_REMOTE) + /* FIXME: convert to todays kernel IR infrastructure */ rc_decode = symbol_request(af9005_rc_decode); rc_keys = symbol_request(rc_map_af9005_table); rc_keys_size = symbol_request(rc_map_af9005_table_size); +#endif if (rc_decode == NULL || rc_keys == NULL || rc_keys_size == NULL) { err("af9005_rc_decode function not found, disabling remote"); af9005_properties.rc.legacy.rc_query = NULL; From e572ef381fca0a5c42e6bcddb2eb5199d6e5e410 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 16 Sep 2014 15:57:07 -0300 Subject: [PATCH 076/298] smiapp: Take mutex during PLL update in sensor initialisation commit f85698cd296f08218a7750f321e94607da128600 upstream. The mutex does not serialise anything in this case but avoids a lockdep warning from the control framework. Signed-off-by: Sakari Ailus Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/smiapp/smiapp-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/i2c/smiapp/smiapp-core.c b/drivers/media/i2c/smiapp/smiapp-core.c index 7026ab08ec91..873d0627a75b 100644 --- a/drivers/media/i2c/smiapp/smiapp-core.c +++ b/drivers/media/i2c/smiapp/smiapp-core.c @@ -2624,7 +2624,9 @@ static int smiapp_registered(struct v4l2_subdev *subdev) pll->flags |= SMIAPP_PLL_FLAG_OP_PIX_CLOCK_PER_LANE; pll->scale_n = sensor->limits[SMIAPP_LIMIT_SCALER_N_MIN]; + mutex_lock(&sensor->mutex); rval = smiapp_update_mode(sensor); + mutex_unlock(&sensor->mutex); if (rval) { dev_err(&client->dev, "update mode failed\n"); goto out_nvm_release; From 38722897ccfc15527a252d1e2180dc7f556e74cd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 30 Oct 2014 07:53:04 -0300 Subject: [PATCH 077/298] sound: simplify au0828 quirk table commit 5d1f00a20d2d56ed480e64e938a2391353ee565b upstream. Add a macro to simplify au0828 quirk table. That makes easier to check it against the USB IDs at drivers/media/usb/au0828/au0828-cards.c. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/au0828/au0828-cards.c | 5 + sound/usb/quirks-table.h | 158 +++++------------------- 2 files changed, 36 insertions(+), 127 deletions(-) diff --git a/drivers/media/usb/au0828/au0828-cards.c b/drivers/media/usb/au0828/au0828-cards.c index dd32decb237d..1d4b11038958 100644 --- a/drivers/media/usb/au0828/au0828-cards.c +++ b/drivers/media/usb/au0828/au0828-cards.c @@ -36,6 +36,11 @@ static void hvr950q_cs5340_audio(void *priv, int enable) au0828_clear(dev, REG_000, 0x10); } +/* + * WARNING: There's a quirks table at sound/usb/quirks-table.h + * that should also be updated every time a new device with V4L2 support + * is added here. + */ struct au0828_board au0828_boards[] = { [AU0828_BOARD_UNKNOWN] = { .name = "Unknown board", diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index c657752a420c..8f3e2bf100eb 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2804,133 +2804,37 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, -/* Hauppauge HVR-950Q and HVR-850 */ -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7200), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7210), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7217), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721b), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721e), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x721f), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7240), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-850", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x2040, 0x7280), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, -{ - USB_DEVICE_VENDOR_SPEC(0x0fd9, 0x0008), - .match_flags = USB_DEVICE_ID_MATCH_DEVICE | - USB_DEVICE_ID_MATCH_INT_CLASS | - USB_DEVICE_ID_MATCH_INT_SUBCLASS, - .bInterfaceClass = USB_CLASS_AUDIO, - .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, - .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { - .vendor_name = "Hauppauge", - .product_name = "HVR-950Q", - .ifnum = QUIRK_ANY_INTERFACE, - .type = QUIRK_AUDIO_ALIGN_TRANSFER, - } -}, +/* + * Auvitek au0828 devices with audio interface. + * This should be kept in sync with drivers/media/usb/au0828/au0828-cards.c + * Please notice that some drivers are DVB only, and don't need to be + * here. That's the case, for example, of DVICO_FUSIONHDTV7. + */ + +#define AU0828_DEVICE(vid, pid, vname, pname) { \ + USB_DEVICE_VENDOR_SPEC(vid, pid), \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_INT_CLASS | \ + USB_DEVICE_ID_MATCH_INT_SUBCLASS, \ + .bInterfaceClass = USB_CLASS_AUDIO, \ + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, \ + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { \ + .vendor_name = vname, \ + .product_name = pname, \ + .ifnum = QUIRK_ANY_INTERFACE, \ + .type = QUIRK_AUDIO_ALIGN_TRANSFER, \ + } \ +} + +AU0828_DEVICE(0x2040, 0x7200, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7210, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7217, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721b, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721e, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x721f, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7240, "Hauppauge", "HVR-850"), +AU0828_DEVICE(0x2040, 0x7280, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"), /* Digidesign Mbox */ { From 5c8bce2b451df55571fcd349820411615d6227f6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 30 Oct 2014 07:53:05 -0300 Subject: [PATCH 078/298] sound: Update au0828 quirks table commit 678fa12fb8e75c6dc1e781a02e3ddbbba7e1a904 upstream. The au0828 quirks table is currently not in sync with the au0828 media driver. Syncronize it and put them on the same order as found at au0828 driver, as all the au0828 devices with analog TV need the same quirks. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 8f3e2bf100eb..83bddbdb90e9 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2827,14 +2827,22 @@ YAMAHA_DEVICE(0x7010, "UB99"), } AU0828_DEVICE(0x2040, 0x7200, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7240, "Hauppauge", "HVR-850"), AU0828_DEVICE(0x2040, 0x7210, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x7217, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x721b, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x721e, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x721f, "Hauppauge", "HVR-950Q"), -AU0828_DEVICE(0x2040, 0x7240, "Hauppauge", "HVR-850"), AU0828_DEVICE(0x2040, 0x7280, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x0fd9, 0x0008, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7201, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x2040, 0x7211, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x2040, 0x7281, "Hauppauge", "HVR-950Q-MXL"), +AU0828_DEVICE(0x05e1, 0x0480, "Hauppauge", "Woodbury"), +AU0828_DEVICE(0x2040, 0x8200, "Hauppauge", "Woodbury"), +AU0828_DEVICE(0x2040, 0x7260, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7213, "Hauppauge", "HVR-950Q"), +AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), /* Digidesign Mbox */ { From fd982db71291e4d711360372de7d08554864b375 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Oct 2014 05:10:20 -0300 Subject: [PATCH 079/298] uvcvideo: Fix destruction order in uvc_delete() commit 2228d80dd05a4fc5a410fde847677b8fb3eb23d7 upstream. We've got a bug report at disconnecting a Webcam, where the kernel spews warnings like below: WARNING: CPU: 0 PID: 8385 at ../fs/sysfs/group.c:219 sysfs_remove_group+0x87/0x90() sysfs group c0b2350c not found for kobject 'event3' CPU: 0 PID: 8385 Comm: queue2:src Not tainted 3.16.2-1.gdcee397-default #1 Hardware name: ASUSTeK Computer INC. A7N8X-E/A7N8X-E, BIOS ASUS A7N8X-E Deluxe ACPI BIOS Rev 1013 11/12/2004 c08d0705 ddc75cbc c0718c5b ddc75ccc c024b654 c08c6d44 ddc75ce8 000020c1 c08d0705 000000db c03d1ec7 c03d1ec7 00000009 00000000 c0b2350c d62c9064 ddc75cd4 c024b6a3 00000009 ddc75ccc c08c6d44 ddc75ce8 ddc75cfc c03d1ec7 Call Trace: [] try_stack_unwind+0x156/0x170 [] dump_trace+0x53/0x180 [] show_trace_log_lvl+0x46/0x50 [] show_stack_log_lvl+0x51/0xe0 [] show_stack+0x27/0x50 [] dump_stack+0x3e/0x4e [] warn_slowpath_common+0x84/0xa0 [] warn_slowpath_fmt+0x33/0x40 [] sysfs_remove_group+0x87/0x90 [] device_del+0x34/0x180 [] evdev_disconnect+0x19/0x50 [] __input_unregister_device+0x9a/0x140 [] input_unregister_device+0x45/0x80 [] uvc_delete+0x26/0x110 [uvcvideo] [] v4l2_device_release+0x98/0xc0 [videodev] [] device_release+0x2b/0x90 [] kobject_cleanup+0x6f/0x1a0 [] v4l2_release+0x43/0x70 [videodev] [] __fput+0xb1/0x1b0 [] task_work_run+0x91/0xb0 [] do_exit+0x265/0x910 [] do_group_exit+0x34/0xa0 [] get_signal_to_deliver+0x17f/0x590 [] do_signal+0x3a/0x960 [] do_notify_resume+0x67/0x90 [] work_notifysig+0x30/0x3b [] 0xb7739e5f ---[ end trace b1e56095a485b631 ]--- The cause is that uvc_status_cleanup() is called after usb_put_*() in uvc_delete(). usb_put_*() removes the sysfs parent and eventually removes the children recursively, so the later device_del() can't find its sysfs. The fix is simply rearrange the call orders in uvc_delete() so that the child is removed before the parent. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=897736 Reported-and-tested-by: Martin Pluskal Signed-off-by: Takashi Iwai Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_driver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 753ad4cfc118..45314412b4a3 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1603,12 +1603,12 @@ static void uvc_delete(struct uvc_device *dev) { struct list_head *p, *n; - usb_put_intf(dev->intf); - usb_put_dev(dev->udev); - uvc_status_cleanup(dev); uvc_ctrl_cleanup_device(dev); + usb_put_intf(dev->intf); + usb_put_dev(dev->udev); + if (dev->vdev.dev) v4l2_device_unregister(&dev->vdev); #ifdef CONFIG_MEDIA_CONTROLLER From 847f57d68fbbf01f65b337ac1f68fd5e1ff64e73 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 7 Jan 2015 10:29:11 -0700 Subject: [PATCH 080/298] vfio-pci: Fix the check on pci device type in vfio_pci_probe() commit 7c2e211f3c95b91912a92a8c6736343690042e2e upstream. Current vfio-pci just supports normal pci device, so vfio_pci_probe() will return if the pci device is not a normal device. While current code makes a mistake. PCI_HEADER_TYPE is the offset in configuration space of the device type, but we use this value to mask the type value. This patch fixs this by do the check directly on the pci_dev->hdr_type. Signed-off-by: Wei Yang Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 7ba042498857..75e1d03b8da3 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -810,13 +810,11 @@ static const struct vfio_device_ops vfio_pci_ops = { static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - u8 type; struct vfio_pci_device *vdev; struct iommu_group *group; int ret; - pci_read_config_byte(pdev, PCI_HEADER_TYPE, &type); - if ((type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; group = iommu_group_get(&pdev->dev); From ef879ee1e6b874903ae2668da23b6c0e8c983c7e Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Tue, 13 Jan 2015 17:35:49 +0530 Subject: [PATCH 081/298] drivers: net: cpsw: fix multicast flush in dual emac mode commit 25906052d953d3fbdb7e19480b9de5e6bb949f3f upstream. Since ALE table is a common resource for both the interfaces in Dual EMAC mode and while bringing up the second interface in cpsw_ndo_set_rx_mode() all the multicast entries added by the first interface is flushed out and only second interface multicast addresses are added. Fixing this by flushing multicast addresses based on dual EMAC port vlans which will not affect the other emac port multicast addresses. Fixes: d9ba8f9 (driver: net: ethernet: cpsw: dual emac interface implementation) Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 11 +++++++++-- drivers/net/ethernet/ti/cpsw_ale.c | 10 +++++++++- drivers/net/ethernet/ti/cpsw_ale.h | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4a2508e22218..316650c3b5d7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -596,7 +596,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS << - priv->host_port); + priv->host_port, -1); /* Flood All Unicast Packets to Host port */ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); @@ -620,6 +620,12 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + int vid; + + if (priv->data.dual_emac) + vid = priv->slaves[priv->emac_port].port_vlan; + else + vid = priv->data.default_vlan; if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ @@ -631,7 +637,8 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } /* Clear all mcast from ALE */ - cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port); + cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port, + vid); if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 7f893069c418..4eceb7e42c80 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -236,7 +236,7 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry, cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); } -int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask) +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid) { u32 ale_entry[ALE_ENTRY_WORDS]; int ret, idx; @@ -247,6 +247,14 @@ int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask) if (ret != ALE_TYPE_ADDR && ret != ALE_TYPE_VLAN_ADDR) continue; + /* if vid passed is -1 then remove all multicast entry from + * the table irrespective of vlan id, if a valid vlan id is + * passed then remove only multicast added to that vlan id. + * if vlan id doesn't match then move on to next entry. + */ + if (vid != -1 && cpsw_ale_get_vlan_id(ale_entry) != vid) + continue; + if (cpsw_ale_get_mcast(ale_entry)) { u8 addr[6]; diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index de409c33b250..e701358fd00b 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -88,7 +88,7 @@ void cpsw_ale_stop(struct cpsw_ale *ale); int cpsw_ale_set_ageout(struct cpsw_ale *ale, int ageout); int cpsw_ale_flush(struct cpsw_ale *ale, int port_mask); -int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask); +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask, int vid); int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags, u16 vid); int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port, From 8676ef6461f42c61dd5fb17ba18eb6ccf16e2307 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 12 Jan 2015 12:12:03 -0500 Subject: [PATCH 082/298] ftrace/jprobes/x86: Fix conflict between jprobes and function graph tracing commit 237d28db036e411f22c03cfd5b0f6dc2aa9bf3bc upstream. If the function graph tracer traces a jprobe callback, the system will crash. This can easily be demonstrated by compiling the jprobe sample module that is in the kernel tree, loading it and running the function graph tracer. # modprobe jprobe_example.ko # echo function_graph > /sys/kernel/debug/tracing/current_tracer # ls The first two commands end up in a nice crash after the first fork. (do_fork has a jprobe attached to it, so "ls" just triggers that fork) The problem is caused by the jprobe_return() that all jprobe callbacks must end with. The way jprobes works is that the function a jprobe is attached to has a breakpoint placed at the start of it (or it uses ftrace if fentry is supported). The breakpoint handler (or ftrace callback) will copy the stack frame and change the ip address to return to the jprobe handler instead of the function. The jprobe handler must end with jprobe_return() which swaps the stack and does an int3 (breakpoint). This breakpoint handler will then put back the saved stack frame, simulate the instruction at the beginning of the function it added a breakpoint to, and then continue on. For function tracing to work, it hijakes the return address from the stack frame, and replaces it with a hook function that will trace the end of the call. This hook function will restore the return address of the function call. If the function tracer traces the jprobe handler, the hook function for that handler will not be called, and its saved return address will be used for the next function. This will result in a kernel crash. To solve this, pause function tracing before the jprobe handler is called and unpause it before it returns back to the function it probed. Some other updates: Used a variable "saved_sp" to hold kcb->jprobe_saved_sp. This makes the code look a bit cleaner and easier to understand (various tries to fix this bug required this change). Note, if fentry is being used, jprobes will change the ip address before the function graph tracer runs and it will not be able to trace the function that the jprobe is probing. Link: http://lkml.kernel.org/r/20150114154329.552437962@goodmis.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/kprobes/core.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 79a3f9682871..a1f5b1866cbe 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1017,6 +1017,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); + + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer to get messed up. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); return 1; } @@ -1042,24 +1051,25 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); struct jprobe *jp = container_of(p, struct jprobe, kp); + void *saved_sp = kcb->jprobe_saved_sp; if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { + if (stack_addr(regs) != saved_sp) { struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; printk(KERN_ERR "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); + stack_addr(regs), saved_sp); printk(KERN_ERR "Saved registers for jprobe %p\n", jp); show_regs(saved_regs); printk(KERN_ERR "Current registers\n"); show_regs(regs); BUG(); } + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); + memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } From b047a36f1d480bb432cd56ade8ea490cc1b71bb3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Jan 2015 16:25:08 -0500 Subject: [PATCH 083/298] NFSv4.1: Fix client id trunking on Linux commit 1fc0703af3143914a389bfa081c7acb09502ed5d upstream. Currently, our trunking code will check for session trunking, but will fail to detect client id trunking. This is a problem, because it means that the client will fail to recognise that the two connections represent shared state, even if they do not permit a shared session. By removing the check for the server minor id, and only checking the major id, we will end up doing the right thing in both cases: we close down the new nfs_client and fall back to using the existing one. Fixes: 05f4c350ee02e ("NFS: Discover NFSv4 server trunking when mounting") Cc: Chuck Lever Tested-by: Chuck Lever Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 1abe4f55dea2..037f9572b94c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -565,20 +565,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) } /* - * Returns true if the server owners match + * Returns true if the server major ids match */ static bool -nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b) { struct nfs41_server_owner *o1 = a->cl_serverowner; struct nfs41_server_owner *o2 = b->cl_serverowner; - if (o1->minor_id != o2->minor_id) { - dprintk("NFS: --> %s server owner minor IDs do not match\n", - __func__); - return false; - } - if (o1->major_id_sz != o2->major_id_sz) goto out_major_mismatch; if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) @@ -654,7 +648,12 @@ int nfs41_walk_client_list(struct nfs_client *new, if (!nfs4_match_clientids(pos, new)) continue; - if (!nfs4_match_serverowners(pos, new)) + /* + * Note that session trunking is just a special subcase of + * client id trunking. In either case, we want to fall back + * to using the existing nfs_client. + */ + if (!nfs4_check_clientid_trunking(pos, new)) continue; atomic_inc(&pos->cl_count); From ff80cc960fd67e2fa871eaead6252ad41935703a Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 9 Jan 2015 09:40:43 +0100 Subject: [PATCH 084/298] gpiolib: of: Correct error handling in of_get_named_gpiod_flags commit 7b8792bbdffdff3abda704f89c6a45ea97afdc62 upstream. of_get_named_gpiod_flags fails with -EPROBE_DEFER in cases where the gpio chip is available and the GPIO translation fails. This causes drivers to be re-probed erroneusly, and hides the real problem(i.e. the GPIO number being out of range). Signed-off-by: Hans Holmberg Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib-of.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index e0a98f581f58..74ed17d6cfa1 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -44,8 +44,14 @@ static int of_gpiochip_find_and_xlate(struct gpio_chip *gc, void *data) return false; ret = gc->of_xlate(gc, &gg_data->gpiospec, gg_data->flags); - if (ret < 0) - return false; + if (ret < 0) { + /* We've found the gpio chip, but the translation failed. + * Return true to stop looking and return the translation + * error via out_gpio + */ + gg_data->out_gpio = ERR_PTR(ret); + return true; + } gg_data->out_gpio = gpio_to_desc(ret + gc->base); return true; From 6bea41fbaba9e102a620c1794ae540addad35447 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 12 Jan 2015 17:12:24 +0100 Subject: [PATCH 085/298] gpio: fix memory and reference leaks in gpiochip_add error path commit 5539b3c938d64a60cb1fc442ac3ce9263d52de0c upstream. Memory allocated and references taken by of_gpiochip_add and acpi_gpiochip_add were never released on errors in gpiochip_add (e.g. failure to find free gpio range). Fixes: 391c970c0dd1 ("of/gpio: add default of_xlate function if device has a node pointer") Fixes: 664e3e5ac64c ("gpio / ACPI: register to ACPI events automatically") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 50c4922fe53a..5b88c83888d1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1222,6 +1222,9 @@ int gpiochip_add(struct gpio_chip *chip) spin_unlock_irqrestore(&gpio_lock, flags); + if (status) + goto fail; + #ifdef CONFIG_PINCTRL INIT_LIST_HEAD(&chip->pin_ranges); #endif @@ -1229,12 +1232,12 @@ int gpiochip_add(struct gpio_chip *chip) of_gpiochip_add(chip); acpi_gpiochip_add(chip); - if (status) - goto fail; - status = gpiochip_export(chip); - if (status) + if (status) { + acpi_gpiochip_remove(chip); + of_gpiochip_remove(chip); goto fail; + } pr_debug("%s: registered GPIOs %d to %d on device: %s\n", __func__, chip->base, chip->base + chip->ngpio - 1, From 5cb2226deeb4ed98520ae2c5e6746daf62d99bd1 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Sat, 6 Dec 2014 09:54:06 +0700 Subject: [PATCH 086/298] OHCI: add a quirk for ULi M5237 blocking on reset commit 56abcab833fafcfaeb2f5b25e0364c1dec45f53e upstream. Commit 8dccddbc2368 ("OHCI: final fix for NVIDIA problems (I hope)") introduced into 3.1.9 broke boot on e.g. Freescale P2020DS development board. The code path that was previously specific to NVIDIA controllers had then become taken for all chips. However, the M5237 installed on the board wedges solid when accessing its base+OHCI_FMINTERVAL register, making it impossible to boot any kernel newer than 3.1.8 on this particular and apparently other similar machines. Don't readl() and writel() base+OHCI_FMINTERVAL on PCI ID 10b9:5237. The patch is suitable for the -next tree as well as all maintained kernels up to 3.2 inclusive. Signed-off-by: Arseny Solokha Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 2f3acebb577a..f4e6b945136c 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -571,7 +571,8 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) { void __iomem *base; u32 control; - u32 fminterval; + u32 fminterval = 0; + bool no_fminterval = false; int cnt; if (!mmio_resource_enabled(pdev, 0)) @@ -581,6 +582,13 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) if (base == NULL) return; + /* + * ULi M5237 OHCI controller locks the whole system when accessing + * the OHCI_FMINTERVAL offset. + */ + if (pdev->vendor == PCI_VENDOR_ID_AL && pdev->device == 0x5237) + no_fminterval = true; + control = readl(base + OHCI_CONTROL); /* On PA-RISC, PDC can leave IR set incorrectly; ignore it there. */ @@ -619,7 +627,9 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) } /* software reset of the controller, preserving HcFmInterval */ - fminterval = readl(base + OHCI_FMINTERVAL); + if (!no_fminterval) + fminterval = readl(base + OHCI_FMINTERVAL); + writel(OHCI_HCR, base + OHCI_CMDSTATUS); /* reset requires max 10 us delay */ @@ -628,7 +638,9 @@ static void quirk_usb_handoff_ohci(struct pci_dev *pdev) break; udelay(1); } - writel(fminterval, base + OHCI_FMINTERVAL); + + if (!no_fminterval) + writel(fminterval, base + OHCI_FMINTERVAL); /* Now the controller is safely in SUSPEND and nothing can wake it up */ iounmap(base); From 0f1f031c46c984faeeca6d6d0adb7d11c2da3ced Mon Sep 17 00:00:00 2001 From: Amit Virdi Date: Tue, 13 Jan 2015 14:27:20 +0530 Subject: [PATCH 087/298] usb: dwc3: gadget: Fix TRB preparation during SG commit ec512fb8e5611fed1df2895f90317ce6797d6b32 upstream. When scatter gather (SG) is used, multiple TRBs are prepared from one DWC3 request (dwc3_request). So while preparing TRBs, the 'last' flag should be set only when it is the last TRB being prepared from the last dwc3_request entry. The current implementation uses list_is_last to check if the dwc3_request is the last entry from the request_list. However, list_is_last returns false for the last entry too. This is because, while preparing the first TRB from a request, the function dwc3_prepare_one_trb modifies the request's next and prev pointers while moving the URB to req_queued. Hence, list_is_last always returns false no matter what. The correct way is not to access the modified pointers of dwc3_request but to use list_empty macro instead. Fixes: e5ba5ec833aa (usb: dwc3: gadget: fix scatter gather implementation) Signed-off-by: Amit Virdi Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d90c70c23adb..f4ca19f15876 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -887,8 +887,7 @@ static void dwc3_prepare_trbs(struct dwc3_ep *dep, bool starting) if (i == (request->num_mapped_sgs - 1) || sg_is_last(s)) { - if (list_is_last(&req->list, - &dep->request_list)) + if (list_empty(&dep->request_list)) last_one = true; chain = false; } From adcf24128515915ae4a325af7a6d65b12e255709 Mon Sep 17 00:00:00 2001 From: Amit Virdi Date: Tue, 13 Jan 2015 14:27:21 +0530 Subject: [PATCH 088/298] usb: dwc3: gadget: Stop TRB preparation after limit is reached commit 39e60635a01520e8c8ed3946a28c2b98e6a46f79 upstream. DWC3 gadget sets up a pool of 32 TRBs for each EP during initialization. This means, the max TRBs that can be submitted for an EP is fixed to 32. Since the request queue for an EP is a linked list, any number of requests can be queued to it by the gadget layer. However, the dwc3 driver must not submit TRBs more than the pool it has created for. This limit wasn't respected when SG was used resulting in submitting more than the max TRBs, eventually leading to non-transfer of the TRBs submitted over the max limit. Root cause: When SG is used, there are two loops iterating to prepare TRBs: - Outer loop over the request_list - Inner loop over the SG list The code was missing break to get out of the outer loop. Fixes: eeb720fb21d6 (usb: dwc3: gadget: add support for SG lists) Signed-off-by: Amit Virdi Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f4ca19f15876..8f6738d46b14 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -905,6 +905,9 @@ static void dwc3_prepare_trbs(struct dwc3_ep *dep, bool starting) if (last_one) break; } + + if (last_one) + break; } else { dma = req->request.dma; length = req->request.length; From d92ad55b444c1ac63e669385d9b2a7a95aae6f24 Mon Sep 17 00:00:00 2001 From: Preston Fick Date: Sat, 27 Dec 2014 01:32:41 -0600 Subject: [PATCH 089/298] USB: cp210x: fix ID for production CEL MeshConnect USB Stick commit 90441b4dbe90ba0c38111ea89fa093a8c9627801 upstream. Fixing typo for MeshConnect IDs. The original PID (0x8875) is not in production and is not needed. Instead it has been changed to the official production PID (0x8857). Signed-off-by: Preston Fick Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 5741e9405069..74d6587f643a 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -120,7 +120,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ - { USB_DEVICE(0x10C4, 0x8875) }, /* CEL MeshConnect USB Stick */ + { USB_DEVICE(0x10C4, 0x8857) }, /* CEL MeshConnect USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ From a1fec58704087636b24ebdf49d02934c0995248a Mon Sep 17 00:00:00 2001 From: David Peterson Date: Tue, 6 Jan 2015 15:00:52 +0000 Subject: [PATCH 090/298] USB: cp210x: add IDs for CEL USB sticks and MeshWorks devices commit 1ae78a4870989a354028cb17dabf819b595e70e3 upstream. Added virtual com port VID/PID entries for CEL USB sticks and MeshWorks devices. Signed-off-by: David Peterson Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 74d6587f643a..9e8708c5cbfa 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -120,10 +120,12 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ - { USB_DEVICE(0x10C4, 0x8857) }, /* CEL MeshConnect USB Stick */ + { USB_DEVICE(0x10C4, 0x8856) }, /* CEL EM357 ZigBee USB Stick - LR */ + { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ + { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ From a696260650e4f4f2e95d8520519550586766b458 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 22 Dec 2014 18:39:39 +0100 Subject: [PATCH 091/298] USB: keyspan: fix null-deref at probe commit b5122236bba8d7ef62153da5b55cc65d0944c61e upstream. Fix null-pointer dereference during probe if the interface-status completion handler is called before the individual ports have been set up. Fixes: f79b2d0fe81e ("USB: keyspan: fix NULL-pointer dereferences and memory leaks") Reported-by: Richard Tested-by: Richard Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/keyspan.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 49101fe45d38..35297a845a63 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -421,6 +421,8 @@ static void usa26_instat_callback(struct urb *urb) } port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -431,7 +433,7 @@ static void usa26_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -541,6 +543,8 @@ static void usa28_instat_callback(struct urb *urb) } port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -551,7 +555,7 @@ static void usa28_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -624,6 +628,8 @@ static void usa49_instat_callback(struct urb *urb) } port = serial->port[msg->portNumber]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -634,7 +640,7 @@ static void usa49_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -872,6 +878,8 @@ static void usa90_instat_callback(struct urb *urb) port = serial->port[0]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -882,7 +890,7 @@ static void usa90_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) @@ -943,6 +951,8 @@ static void usa67_instat_callback(struct urb *urb) port = serial->port[msg->port]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + goto resubmit; /* Update handshaking pin state information */ old_dcd_state = p_priv->dcd_state; @@ -951,7 +961,7 @@ static void usa67_instat_callback(struct urb *urb) if (old_dcd_state != p_priv->dcd_state && old_dcd_state) tty_port_tty_hangup(&port->port, true); - +resubmit: /* Resubmit urb so we continue receiving */ err = usb_submit_urb(urb, GFP_ATOMIC); if (err != 0) From 8438d81444fe8338ccf8b43538ce6aacf4cd3c39 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 5 Jan 2015 16:04:12 +0100 Subject: [PATCH 092/298] USB: console: fix uninitialised ldisc semaphore commit d269d4434c72ed0da3a9b1230c30da82c4918c63 upstream. The USB console currently allocates a temporary fake tty which is used to pass terminal settings to the underlying serial driver. The tty struct is not fully initialised, something which can lead to a lockdep warning (or worse) if a serial driver tries to acquire a line-discipline reference: usbserial: USB Serial support registered for pl2303 pl2303 1-2.1:1.0: pl2303 converter detected usb 1-2.1: pl2303 converter now attached to ttyUSB0 INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 68 Comm: udevd Tainted: G W 3.18.0-rc5 #10 [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x24/0x28) [] (dump_stack) from [] (__lock_acquire+0x1e50/0x2004) [] (__lock_acquire) from [] (lock_acquire+0xe4/0x18c) [] (lock_acquire) from [] (ldsem_down_read_trylock+0x78/0x90) [] (ldsem_down_read_trylock) from [] (tty_ldisc_ref+0x24/0x58) [] (tty_ldisc_ref) from [] (usb_serial_handle_dcd_change+0x48/0xe8) [] (usb_serial_handle_dcd_change) from [] (pl2303_read_int_callback+0x210/0x220 [pl2303]) [] (pl2303_read_int_callback [pl2303]) from [] (__usb_hcd_giveback_urb+0x80/0x140) [] (__usb_hcd_giveback_urb) from [] (usb_giveback_urb_bh+0x98/0xd4) [] (usb_giveback_urb_bh) from [] (tasklet_hi_action+0x9c/0x108) [] (tasklet_hi_action) from [] (__do_softirq+0x148/0x42c) [] (__do_softirq) from [] (irq_exit+0xd8/0x114) [] (irq_exit) from [] (__handle_domain_irq+0x84/0xdc) [] (__handle_domain_irq) from [] (omap_intc_handle_irq+0xd8/0xe0) [] (omap_intc_handle_irq) from [] (__irq_svc+0x44/0x7c) Exception stack(0xdf4e7f08 to 0xdf4e7f50) 7f00: debc0b80 df4e7f5c 00000000 00000000 debc0b80 be8da96c 7f20: 00000000 00000128 c000fc84 df4e6000 00000000 df4e7f94 00000004 df4e7f50 7f40: c038ebc0 c038d74c 600f0013 ffffffff [] (__irq_svc) from [] (___sys_sendmsg.part.29+0x0/0x2e0) [] (___sys_sendmsg.part.29) from [] (SyS_sendmsg+0x18/0x1c) [] (SyS_sendmsg) from [] (ret_fast_syscall+0x0/0x48) console [ttyUSB0] enabled Fixes: 36697529b5bb ("tty: Replace ldisc locking with ldisc_sem") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/console.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 8d7fc48b1f30..e56f394b58d8 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -140,6 +140,7 @@ static int usb_console_setup(struct console *co, char *options) tty_port_tty_set(&port->port, tty); tty->driver = usb_serial_tty_driver; tty->index = co->index; + init_ldsem(&tty->ldisc_sem); if (tty_init_termios(tty)) { retval = -ENOMEM; goto free_tty; From 23c631c3dbe28d5f89e70c4da7b4313177a4dc40 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 5 Jan 2015 16:04:13 +0100 Subject: [PATCH 093/298] USB: console: fix potential use after free commit 32a4bf2e81ec378e5925d4e069e0677a6c86a6ad upstream. Use tty kref to release the fake tty in usb_console_setup to avoid use after free if the underlying serial driver has acquired a reference. Note that using the tty destructor release_one_tty requires some more state to be initialised. Fixes: 4a90f09b20f4 ("tty: usb-serial krefs") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/console.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index e56f394b58d8..29fa1c3d0089 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -46,6 +46,8 @@ static struct console usbcons; * ------------------------------------------------------------ */ +static const struct tty_operations usb_console_fake_tty_ops = { +}; /* * The parsing of the command line works exactly like the @@ -137,14 +139,17 @@ static int usb_console_setup(struct console *co, char *options) goto reset_open_count; } kref_init(&tty->kref); - tty_port_tty_set(&port->port, tty); tty->driver = usb_serial_tty_driver; tty->index = co->index; init_ldsem(&tty->ldisc_sem); + INIT_LIST_HEAD(&tty->tty_files); + kref_get(&tty->driver->kref); + tty->ops = &usb_console_fake_tty_ops; if (tty_init_termios(tty)) { retval = -ENOMEM; - goto free_tty; + goto put_tty; } + tty_port_tty_set(&port->port, tty); } /* only call the device specific open if this @@ -162,7 +167,7 @@ static int usb_console_setup(struct console *co, char *options) serial->type->set_termios(tty, port, &dummy); tty_port_tty_set(&port->port, NULL); - kfree(tty); + tty_kref_put(tty); } set_bit(ASYNCB_INITIALIZED, &port->port.flags); } @@ -178,8 +183,8 @@ static int usb_console_setup(struct console *co, char *options) fail: tty_port_tty_set(&port->port, NULL); - free_tty: - kfree(tty); + put_tty: + tty_kref_put(tty); reset_open_count: port->port.count = 0; usb_autopm_put_interface(serial->interface); From 727cc57106e72d38ad5d3f0aefb374ef0091f144 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 4 Dec 2014 10:21:56 -0500 Subject: [PATCH 094/298] USB: EHCI: fix initialization bug in iso_stream_schedule() commit 6d89252a998a695ecb0348fc2d717dc33d90cae9 upstream. Commit c3ee9b76aa93 (EHCI: improved logic for isochronous scheduling) introduced the idea of using ehci->last_iso_frame as the origin (or base) for the circular calculations involved in modifying the isochronous schedule. However, the new code it added used ehci->last_iso_frame before the value was properly initialized. This patch rectifies the mistake by moving the initialization lines earlier in iso_stream_schedule(). This fixes Bugzilla #72891. Signed-off-by: Alan Stern Fixes: c3ee9b76aa93 Reported-by: Joe Bryant Tested-by: Joe Bryant Tested-by: Martin Long Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-sched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index e113fd73aeae..c399606f154e 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1581,6 +1581,10 @@ iso_stream_schedule ( else next = (now + 2 + 7) & ~0x07; /* full frame cache */ + /* If needed, initialize last_iso_frame so that this URB will be seen */ + if (ehci->isoc_count == 0) + ehci->last_iso_frame = now >> 3; + /* * Use ehci->last_iso_frame as the base. There can't be any * TDs scheduled for earlier than that. @@ -1671,10 +1675,6 @@ iso_stream_schedule ( urb->start_frame = start & (mod - 1); if (!stream->highspeed) urb->start_frame >>= 3; - - /* Make sure scan_isoc() sees these */ - if (ehci->isoc_count == 0) - ehci->last_iso_frame = now >> 3; return status; fail: From 97ab28073bcc3299080bbaba025861d053e70f43 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 11 Dec 2014 18:14:18 +0100 Subject: [PATCH 095/298] usb: musb: stuff leak of struct usb_hcd commit 68693b8ea4e284c46bff919ac62bd9ccdfdbb6ba upstream. since the split of host+gadget mode in commit 74c2e9360058 ("usb: musb: factor out hcd initalization") we leak the usb_hcd struct. We call now musb_host_cleanup() which does basically usb_remove_hcd() and also sets the hcd variable to NULL. Doing so makes the finall call to musb_host_free() basically a nop and the usb_hcd remains around for ever without anowner. This patch drops that NULL assignment for that reason. Fixes: 74c2e9360058 ("usb: musb: factor out hcd initalization") Cc: Daniel Mack Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index abb38c3833ef..6b0fb6af6815 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2640,7 +2640,6 @@ void musb_host_cleanup(struct musb *musb) if (musb->port_mode == MUSB_PORT_MODE_GADGET) return; usb_remove_hcd(musb->hcd); - musb->hcd = NULL; } void musb_host_free(struct musb *musb) From 6ab8d4d1eb6a7db792397e4a6a61a5a0a51a7411 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:49:10 -0500 Subject: [PATCH 096/298] can: kvaser_usb: Don't free packets when tight on URBs commit b442723fcec445fb0ae1104888dd22cd285e0a91 upstream. Flooding the Kvaser CAN to USB dongle with multiple reads and writes in high frequency caused seemingly-random panics in the kernel. On further inspection, it seems the driver erroneously freed the to-be-transmitted packet upon getting tight on URBs and returning NETDEV_TX_BUSY, leading to invalid memory writes and double frees at a later point in time. Note: Finding no more URBs/transmit-contexts and returning NETDEV_TX_BUSY is a driver bug in and out of itself: it means that our start/stop queue flow control is broken. This patch only fixes the (buggy) error handling code; the root cause shall be fixed in a later commit. Acked-by: Olivier Sobrie Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index e77d11049747..16ba387ea5cc 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1285,12 +1285,14 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (!urb) { netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; - goto nourbmem; + dev_kfree_skb(skb); + return NETDEV_TX_OK; } buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC); if (!buf) { stats->tx_dropped++; + dev_kfree_skb(skb); goto nobufmem; } @@ -1325,6 +1327,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, } } + /* This should never happen; it implies a flow control bug */ if (!context) { netdev_warn(netdev, "cannot find free context\n"); ret = NETDEV_TX_BUSY; @@ -1355,9 +1358,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (unlikely(err)) { can_free_echo_skb(netdev, context->echo_index); - skb = NULL; /* set to NULL to avoid double free in - * dev_kfree_skb(skb) */ - atomic_dec(&priv->active_tx_urbs); usb_unanchor_urb(urb); @@ -1379,8 +1379,6 @@ releasebuf: kfree(buf); nobufmem: usb_free_urb(urb); -nourbmem: - dev_kfree_skb(skb); return ret; } From 1c287e6f623c4dddeea2e5542cb6eb6dba17456e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:52:06 -0500 Subject: [PATCH 097/298] can: kvaser_usb: Reset all URB tx contexts upon channel close commit 889b77f7fd2bcc922493d73a4c51d8a851505815 upstream. Flooding the Kvaser CAN to USB dongle with multiple reads and writes in very high frequency (*), closing the CAN channel while all the transmissions are on (#), opening the device again (@), then sending a small number of packets would make the driver enter an almost infinite loop of: [....] [15959.853988] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853990] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853991] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853993] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853994] kvaser_usb 4-3:1.0 can0: cannot find free context [15959.853995] kvaser_usb 4-3:1.0 can0: cannot find free context [....] _dragging the whole system down_ in the process due to the excessive logging output. Initially, this has caused random panics in the kernel due to a buggy error recovery path. That got fixed in an earlier commit.(%) This patch aims at solving the root cause. --> 16 tx URBs and contexts are allocated per CAN channel per USB device. Such URBs are protected by: a) A simple atomic counter, up to a value of MAX_TX_URBS (16) b) A flag in each URB context, stating if it's free c) The fact that ndo_start_xmit calls are themselves protected by the networking layers higher above After grabbing one of the tx URBs, if the driver noticed that all of them are now taken, it stops the netif transmission queue. Such queue is worken up again only if an acknowedgment was received from the firmware on one of our earlier-sent frames. Meanwhile, upon channel close (#), the driver sends a CMD_STOP_CHIP to the firmware, effectively closing all further communication. In the high traffic case, the atomic counter remains at MAX_TX_URBS, and all the URB contexts remain marked as active. While opening the channel again (@), it cannot send any further frames since no more free tx URB contexts are available. Reset all tx URB contexts upon CAN channel close. (*) 50 parallel instances of `cangen0 -g 0 -ix` (#) `ifconfig can0 down` (@) `ifconfig can0 up` (%) "can: kvaser_usb: Don't free packets when tight on URBs" Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 16ba387ea5cc..318d4ae87fad 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1237,6 +1237,9 @@ static int kvaser_usb_close(struct net_device *netdev) if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); + /* reset tx contexts */ + kvaser_usb_unlink_tx_urbs(priv); + priv->can.state = CAN_STATE_STOPPED; close_candev(priv->netdev); From 04711556e2348ff38a156b8c4c1f1c1e57a485af Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 5 Jan 2015 12:57:13 -0500 Subject: [PATCH 098/298] can: kvaser_usb: Don't send a RESET_CHIP for non-existing channels commit 5e7e6e0c9b47a45576c38b4a72d67927a5e049f7 upstream. Recent Leaf firmware versions (>= 3.1.557) do not allow to send commands for non-existing channels. If a command is sent for a non-existing channel, the firmware crashes. Reported-by: Christopher Storah Signed-off-by: Olivier Sobrie Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 318d4ae87fad..4e65b35bebc0 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1493,6 +1493,10 @@ static int kvaser_usb_init_one(struct usb_interface *intf, struct kvaser_usb_net_priv *priv; int i, err; + err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel); + if (err) + return err; + netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS); if (!netdev) { dev_err(&intf->dev, "Cannot alloc candev\n"); @@ -1596,9 +1600,6 @@ static int kvaser_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); - for (i = 0; i < MAX_NET_DEVICES; i++) - kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i); - err = kvaser_usb_get_software_info(dev); if (err) { dev_err(&intf->dev, From ea27d985dee350921b31381bc80c31612a5582c9 Mon Sep 17 00:00:00 2001 From: Srihari Vijayaraghavan Date: Wed, 7 Jan 2015 16:25:53 -0800 Subject: [PATCH 099/298] Input: i8042 - reset keyboard to fix Elantech touchpad detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 148e9a711e034e06310a8c36b64957934ebe30f2 upstream. On some laptops, keyboard needs to be reset in order to successfully detect touchpad (e.g., some Gigabyte laptop models with Elantech touchpads). Without resettin keyboard touchpad pretends to be completely dead. Based on the original patch by Mateusz JoÅ„czyk this version has been expanded to include DMI based detection & application of the fix automatically on the affected models of laptops. This has been confirmed to fix problem by three users already on three different models of laptops. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=81331 Signed-off-by: Srihari Vijayaraghavan Acked-by: Mateusz JoÅ„czyk Tested-by: Srihari Vijayaraghavan Tested by: Zakariya Dehlawi Tested-by: Guillaum Bouchard Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- Documentation/kernel-parameters.txt | 1 + drivers/input/serio/i8042-x86ia64io.h | 32 +++++++++++++++++++++++++++ drivers/input/serio/i8042.c | 14 ++++++++++++ 3 files changed, 47 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7116fda7077f..5d91ba1606bb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1172,6 +1172,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. i8042.notimeout [HW] Ignore timeout condition signalled by controller i8042.reset [HW] Reset the controller during init and cleanup i8042.unlock [HW] Unlock (ignore) the keylock + i8042.kbdreset [HW] Reset device connected to KBD port i810= [HW,DRM] diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 8fca488fdc15..2141cf06601d 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -721,6 +721,35 @@ static const struct dmi_system_id __initconst i8042_dmi_dritek_table[] = { { } }; +/* + * Some laptops need keyboard reset before probing for the trackpad to get + * it detected, initialised & finally work. + */ +static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { + { + /* Gigabyte P35 v2 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P35V2"), + }, + }, + { + /* Aorus branded Gigabyte X3 Plus - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "X3"), + }, + }, + { + /* Gigabyte P34 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GIGABYTE"), + DMI_MATCH(DMI_PRODUCT_NAME, "P34"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1016,6 +1045,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_dritek_table)) i8042_dritek = true; + if (dmi_check_system(i8042_dmi_kbdreset_table)) + i8042_kbdreset = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 3807c3e971cc..eb796fff9e62 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -67,6 +67,10 @@ static bool i8042_notimeout; module_param_named(notimeout, i8042_notimeout, bool, 0); MODULE_PARM_DESC(notimeout, "Ignore timeouts signalled by i8042"); +static bool i8042_kbdreset; +module_param_named(kbdreset, i8042_kbdreset, bool, 0); +MODULE_PARM_DESC(kbdreset, "Reset device connected to KBD port"); + #ifdef CONFIG_X86 static bool i8042_dritek; module_param_named(dritek, i8042_dritek, bool, 0); @@ -789,6 +793,16 @@ static int __init i8042_check_aux(void) if (i8042_toggle_aux(true)) return -1; +/* + * Reset keyboard (needed on some laptops to successfully detect + * touchpad, e.g., some Gigabyte laptop models with Elantech + * touchpads). + */ + if (i8042_kbdreset) { + pr_warn("Attempting to reset device connected to KBD port\n"); + i8042_kbd_write(NULL, (unsigned char) 0xff); + } + /* * Test AUX IRQ delivery to make sure BIOS did not grab the IRQ and * used it for a PCI card or somethig else. From b5a2f391506c47b878d6ea517efb09c44f2b7e38 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 8 Jan 2015 14:53:23 -0800 Subject: [PATCH 100/298] Input: I8042 - add Acer Aspire 7738 to the nomux list commit 9333caeaeae4f831054e0e127a6ed3948b604d3e upstream. When KBC is in active multiplexing mode the touchpad on this laptop does not work. Reported-by: Bilal Koc Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 2141cf06601d..c43c46f7dcd0 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -407,6 +407,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"), }, }, + { + /* Acer Aspire 7738 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7738"), + }, + }, { /* Gericom Bellagio */ .matches = { From d98a28f7b90a94944298528910d3726713b42ab5 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 5 Dec 2014 16:16:07 -0200 Subject: [PATCH 101/298] ARM: dts: imx25: Fix the SPI1 clocks commit 7a87e9cbc3a2f0ff0955815335e08c9862359130 upstream. From Documentation/devicetree/bindings/clock/imx25-clock.txt: cspi1_ipg 78 cspi2_ipg 79 cspi3_ipg 80 , so fix the SPI1 clocks accordingly to avoid a kernel hang when trying to access SPI1. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx25.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index de1611966d8b..6a26e79f0ef4 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -158,7 +158,7 @@ #size-cells = <0>; compatible = "fsl,imx25-cspi", "fsl,imx35-cspi"; reg = <0x43fa4000 0x4000>; - clocks = <&clks 62>, <&clks 62>; + clocks = <&clks 78>, <&clks 78>; clock-names = "ipg", "per"; interrupts = <14>; status = "disabled"; From 9180709ed68160f928a7e90565325715079a9b75 Mon Sep 17 00:00:00 2001 From: Dmitry Voytik Date: Thu, 6 Nov 2014 22:46:20 +0400 Subject: [PATCH 102/298] ARM: imx6q: drop unnecessary semicolon commit d2a10a1727b3948019128e83162f22c65859f1fd upstream. Drop unnecessary semicolon after closing curly bracket. Signed-off-by: Dmitry Voytik Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 4d677f442539..0f28abcab13f 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -162,7 +162,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) post_div_table[2].div = 1; video_div_table[1].div = 1; video_div_table[2].div = 1; - }; + } /* type name parent_name base div_mask */ clk[pll1_sys] = imx_clk_pllv3(IMX_PLLV3_SYS, "pll1_sys", "osc", base, 0x7f); From f9f738ab8241a14042aa21f0299b42321d42d689 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Wed, 3 Dec 2014 15:03:51 -0800 Subject: [PATCH 103/298] ARM: clk-imx6q: fix video divider for rev T0 1.0 commit 81ef447950bf0955aca46f4a7617d8ce435cf0ce upstream. The post dividers do not work on i.MX6Q rev T0 1.0 so they must be fixed to 1. As the table index was wrong, a divider a of 4 could still be requested which implied the clock not to be set properly. This is the root cause of the HDMI not working at high resolution on rev T0 1.0 of the SoC. Signed-off-by: Gary Bisson Cc: Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 0f28abcab13f..01a5765a8b26 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -161,7 +161,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) post_div_table[1].div = 1; post_div_table[2].div = 1; video_div_table[1].div = 1; - video_div_table[2].div = 1; + video_div_table[3].div = 1; } /* type name parent_name base div_mask */ From 1ac32ed7cfb1b37d5101b463412e528107297700 Mon Sep 17 00:00:00 2001 From: Lennart Sorensen Date: Mon, 5 Jan 2015 15:45:45 -0800 Subject: [PATCH 104/298] ARM: omap5/dra7xx: Fix frequency typos commit 572b24e6d85d98cdc552f07e9fb9870d9460d81b upstream. The switch statement of the possible list of SYSCLK1 frequencies is missing a 0 in 4 out of the 7 frequencies. Fixes: fa6d79d27614 ("ARM: OMAP: Add initialisation for the real-time counter") Signed-off-by: Len Sorensen Reviewed-by: Lokesh Vutla Acked-by: Nishanth Menon Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/timer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 74044aaf438b..73d80b8576c3 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -513,11 +513,11 @@ static void __init realtime_counter_init(void) rate = clk_get_rate(sys_clk); /* Numerator/denumerator values refer TRM Realtime Counter section */ switch (rate) { - case 1200000: + case 12000000: num = 64; den = 125; break; - case 1300000: + case 13000000: num = 768; den = 1625; break; @@ -529,11 +529,11 @@ static void __init realtime_counter_init(void) num = 192; den = 625; break; - case 2600000: + case 26000000: num = 384; den = 1625; break; - case 2700000: + case 27000000: num = 256; den = 1125; break; From afd419f511d753e398aca1be3abec30578c9f594 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 6 Jan 2015 14:39:10 +0100 Subject: [PATCH 105/298] ARM: shmobile: sh73a0 legacy: Set .control_parent for all irqpin instances commit b0ddb319db3d7a1943445f0de0a45c07a7f3457a upstream. The sh73a0 INTC can't mask interrupts properly most likely due to a hardware bug. Set the .control_parent flag to delegate masking to the parent interrupt controller, like was already done for irqpin1. Without this, accessing the three-axis digital accelerometer ADXL345 on kzm9g through /dev/input/event1 causes an interrupt storm, which requires a power-cycle to recover from. This was inspired by a patch for arch/arm/boot/dts/sh73a0.dtsi from Laurent Pinchart . Signed-off-by: Geert Uytterhoeven Fixes: 341eb5465f67437a ("ARM: shmobile: INTC External IRQ pin driver on sh73a0") Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-shmobile/setup-sh73a0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c index f74ab530c71d..2b73c8a0c244 100644 --- a/arch/arm/mach-shmobile/setup-sh73a0.c +++ b/arch/arm/mach-shmobile/setup-sh73a0.c @@ -617,6 +617,7 @@ static struct platform_device ipmmu_device = { static struct renesas_intc_irqpin_config irqpin0_platform_data = { .irq_base = irq_pin(0), /* IRQ0 -> IRQ7 */ + .control_parent = true, }; static struct resource irqpin0_resources[] = { @@ -678,6 +679,7 @@ static struct platform_device irqpin1_device = { static struct renesas_intc_irqpin_config irqpin2_platform_data = { .irq_base = irq_pin(16), /* IRQ16 -> IRQ23 */ + .control_parent = true, }; static struct resource irqpin2_resources[] = { @@ -708,6 +710,7 @@ static struct platform_device irqpin2_device = { static struct renesas_intc_irqpin_config irqpin3_platform_data = { .irq_base = irq_pin(24), /* IRQ24 -> IRQ31 */ + .control_parent = true, }; static struct resource irqpin3_resources[] = { From d896e223d601ce7abec4783aec3fb563ec9ad4e3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Dec 2014 16:58:05 -0800 Subject: [PATCH 106/298] decompress_bunzip2: off by one in get_next_block() commit b5c8afe5be51078a979d86ae5ae78c4ac948063d upstream. "origPtr" is used as an offset into the bd->dbuf[] array. That array is allocated in start_bunzip() and has "bd->dbufSize" number of elements so the test here should be >= instead of >. Later we check "origPtr" again before using it as an offset so I don't know if this bug can be triggered in real life. Fixes: bc22c17e12c1 ('bzip2/lzma: library support for gzip, bzip2 and lzma decompression') Signed-off-by: Dan Carpenter Cc: Alain Knaff Cc: Yinghai Lu Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/decompress_bunzip2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 31c5f7675fbf..f504027d66a8 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -184,7 +184,7 @@ static int INIT get_next_block(struct bunzip_data *bd) if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; origPtr = get_bits(bd, 24); - if (origPtr > dbufSize) + if (origPtr >= dbufSize) return RETVAL_DATA_ERROR; /* mapping table: if some byte values are never used (encoding things like ascii text), the compression code removes the gaps to have fewer From b388e0d11005381b79a8d296e53a376fbf17a577 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 10 Dec 2014 13:53:51 +0100 Subject: [PATCH 107/298] um: Skip futex_atomic_cmpxchg_inatomic() test commit f911d731054ab3d82ee72a16b889e17ca3a2332a upstream. futex_atomic_cmpxchg_inatomic() does not work on UML because it triggers a copy_from_user() in kernel context. On UML copy_from_user() can only be used if the kernel was called by a real user space process such that UML can use ptrace() to fetch the value. Reported-by: Miklos Szeredi Suggested-by: Geert Uytterhoeven Signed-off-by: Richard Weinberger Tested-by: Daniel Walter Signed-off-by: Greg Kroah-Hartman --- arch/um/Kconfig.common | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index 21ca44c4f6d5..1f0ea5537e8a 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -2,6 +2,7 @@ config UML bool default y select HAVE_UID16 + select HAVE_FUTEX_CMPXCHG if FUTEX select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_IO From 8e044b92a85b2445c725ce855cde351f828efc1e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 3 Jan 2015 13:11:10 +0100 Subject: [PATCH 108/298] x86, um: actually mark system call tables readonly commit b485342bd79af363c77ef1a421c4a0aef2de9812 upstream. Commit a074335a370e ("x86, um: Mark system call tables readonly") was supposed to mark the sys_call_table in UML as RO by adding the const, but it doesn't have the desired effect as it's nevertheless being placed into the data section since __cacheline_aligned enforces sys_call_table being placed into .data..cacheline_aligned instead. We need to use the ____cacheline_aligned version instead to fix this issue. Before: $ nm -v arch/x86/um/sys_call_table_64.o | grep -1 "sys_call_table" U sys_writev 0000000000000000 D sys_call_table 0000000000000000 D syscall_table_size After: $ nm -v arch/x86/um/sys_call_table_64.o | grep -1 "sys_call_table" U sys_writev 0000000000000000 R sys_call_table 0000000000000000 D syscall_table_size Fixes: a074335a370e ("x86, um: Mark system call tables readonly") Cc: H. Peter Anvin Cc: Andrew Morton Signed-off-by: Daniel Borkmann Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/x86/um/sys_call_table_32.c | 2 +- arch/x86/um/sys_call_table_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 531d4269e2e3..bd16d6c370ec 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -34,7 +34,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index f2f0723070ca..95783087f0d3 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -46,7 +46,7 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. From 7055233b96b0337e1d918ad093af402fd32fe4ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Jan 2015 15:05:25 -0500 Subject: [PATCH 109/298] LOCKD: Fix a race when initialising nlmsvc_timeout commit 06bed7d18c2c07b3e3eeadf4bd357f6e806618cc upstream. This commit fixes a race whereby nlmclnt_init() first starts the lockd daemon, and then calls nlm_bind_host() with the expectation that nlmsvc_timeout has already been initialised. Unfortunately, there is no no synchronisation between lockd() and lockd_up() to guarantee that this is the case. Fix is to move the initialisation of nlmsvc_timeout into lockd_create_svc Fixes: 9a1b6bf818e74 ("LOCKD: Don't call utsname()->nodename...") Cc: Bruce Fields Cc: stable@vger.kernel.org # 3.10.x Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 223e1cb14345..59a53f664005 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -137,10 +137,6 @@ lockd(void *vrqstp) dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); - if (!nlm_timeout) - nlm_timeout = LOCKD_DFLT_TIMEO; - nlmsvc_timeout = nlm_timeout * HZ; - /* * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. @@ -346,6 +342,10 @@ static struct svc_serv *lockd_create_svc(void) printk(KERN_WARNING "lockd_up: no pid, %d users??\n", nlmsvc_users); + if (!nlm_timeout) + nlm_timeout = LOCKD_DFLT_TIMEO; + nlmsvc_timeout = nlm_timeout * HZ; + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); From 386241c97f30fa473b106beb57863cf05d220ea2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 2 Oct 2014 09:30:55 +0200 Subject: [PATCH 110/298] tcm_loop: Fixup tag handling commit 6375f8908255ea7438b60bb5998e6b3e1628500d upstream. The SCSI command tag is set to the tag assigned from the block layer, not the SCSI-II tag message. So we need to convert it into the correct SCSI-II tag message based on the device flags, not the tag value itself. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/loopback/tcm_loop.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index fadad7c5f635..67e76ffc728a 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -153,18 +153,11 @@ static int tcm_loop_change_queue_type(struct scsi_device *sdev, int tag) /* * Locate the SAM Task Attr from struct scsi_cmnd * */ -static int tcm_loop_sam_attr(struct scsi_cmnd *sc) +static int tcm_loop_sam_attr(struct scsi_cmnd *sc, int tag) { - if (sc->device->tagged_supported) { - switch (sc->tag) { - case HEAD_OF_QUEUE_TAG: - return MSG_HEAD_TAG; - case ORDERED_QUEUE_TAG: - return MSG_ORDERED_TAG; - default: - break; - } - } + if (sc->device->tagged_supported && + sc->device->ordered_tags && tag >= 0) + return MSG_ORDERED_TAG; return MSG_SIMPLE_TAG; } @@ -214,7 +207,7 @@ static void tcm_loop_submission_work(struct work_struct *work) } rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd, &tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun, - scsi_bufflen(sc), tcm_loop_sam_attr(sc), + scsi_bufflen(sc), tcm_loop_sam_attr(sc, tl_cmd->sc_cmd_tag), sc->sc_data_direction, 0, scsi_sglist(sc), scsi_sg_count(sc), sgl_bidi, sgl_bidi_count, @@ -252,7 +245,7 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) } tl_cmd->sc = sc; - tl_cmd->sc_cmd_tag = sc->tag; + tl_cmd->sc_cmd_tag = sc->request->tag; INIT_WORK(&tl_cmd->work, tcm_loop_submission_work); queue_work(tcm_loop_workqueue, &tl_cmd->work); return 0; @@ -356,7 +349,7 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, - sc->tag, TMR_ABORT_TASK); + sc->request->tag, TMR_ABORT_TASK); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } From eb7c55639c00e5f0bf05f1dcf126fb49931a3e3f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 Nov 2014 14:58:57 +0100 Subject: [PATCH 111/298] tcm_loop: Fix wrong I_T nexus association commit 506787a2c7daed45f0a213674ca706cbc83a9089 upstream. tcm_loop has the I_T nexus associated with the HBA. This causes commands to become misdirected if the HBA has more than one target portal group; any command is then being sent to the first target portal group instead of the correct one. The nexus needs to be associated with the target portal group instead. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/loopback/tcm_loop.c | 66 +++++++++++------------------- drivers/target/loopback/tcm_loop.h | 7 +--- 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 67e76ffc728a..67c802c93ef3 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -190,7 +190,7 @@ static void tcm_loop_submission_work(struct work_struct *work) set_host_byte(sc, DID_TRANSPORT_DISRUPTED); goto out_done; } - tl_nexus = tl_hba->tl_nexus; + tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) { scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus" " does not exist\n"); @@ -256,16 +256,26 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) * to struct scsi_device */ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, - struct tcm_loop_nexus *tl_nexus, int lun, int task, enum tcm_tmreq_table tmr) { struct se_cmd *se_cmd = NULL; struct se_session *se_sess; struct se_portal_group *se_tpg; + struct tcm_loop_nexus *tl_nexus; struct tcm_loop_cmd *tl_cmd = NULL; struct tcm_loop_tmr *tl_tmr = NULL; int ret = TMR_FUNCTION_FAILED, rc; + /* + * Locate the tl_nexus and se_sess pointers + */ + tl_nexus = tl_tpg->tl_nexus; + if (!tl_nexus) { + pr_err("Unable to perform device reset without" + " active I_T Nexus\n"); + return ret; + } + tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL); if (!tl_cmd) { pr_err("Unable to allocate memory for tl_cmd\n"); @@ -281,7 +291,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, se_cmd = &tl_cmd->tl_se_cmd; se_tpg = &tl_tpg->tl_se_tpg; - se_sess = tl_nexus->se_sess; + se_sess = tl_tpg->tl_nexus->se_sess; /* * Initialize struct se_cmd descriptor from target_core_mod infrastructure */ @@ -326,7 +336,6 @@ release: static int tcm_loop_abort_task(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tpg *tl_tpg; int ret = FAILED; @@ -334,21 +343,8 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) * Locate the tcm_loop_hba_t pointer */ tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - - /* - * Locate the tl_tpg pointer from TargetID in sc->device->id - */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, sc->request->tag, TMR_ABORT_TASK); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } @@ -360,7 +356,6 @@ static int tcm_loop_abort_task(struct scsi_cmnd *sc) static int tcm_loop_device_reset(struct scsi_cmnd *sc) { struct tcm_loop_hba *tl_hba; - struct tcm_loop_nexus *tl_nexus; struct tcm_loop_tpg *tl_tpg; int ret = FAILED; @@ -368,20 +363,9 @@ static int tcm_loop_device_reset(struct scsi_cmnd *sc) * Locate the tcm_loop_hba_t pointer */ tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); - /* - * Locate the tl_nexus and se_sess pointers - */ - tl_nexus = tl_hba->tl_nexus; - if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); - return FAILED; - } - /* - * Locate the tl_tpg pointer from TargetID in sc->device->id - */ tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id]; - ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun, + + ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun, 0, TMR_LUN_RESET); return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED; } @@ -988,8 +972,8 @@ static int tcm_loop_make_nexus( struct tcm_loop_nexus *tl_nexus; int ret = -ENOMEM; - if (tl_tpg->tl_hba->tl_nexus) { - pr_debug("tl_tpg->tl_hba->tl_nexus already exists\n"); + if (tl_tpg->tl_nexus) { + pr_debug("tl_tpg->tl_nexus already exists\n"); return -EEXIST; } se_tpg = &tl_tpg->tl_se_tpg; @@ -1024,7 +1008,7 @@ static int tcm_loop_make_nexus( */ __transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl, tl_nexus->se_sess, tl_nexus); - tl_tpg->tl_hba->tl_nexus = tl_nexus; + tl_tpg->tl_nexus = tl_nexus; pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated" " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba), name); @@ -1040,12 +1024,8 @@ static int tcm_loop_drop_nexus( { struct se_session *se_sess; struct tcm_loop_nexus *tl_nexus; - struct tcm_loop_hba *tl_hba = tpg->tl_hba; - if (!tl_hba) - return -ENODEV; - - tl_nexus = tl_hba->tl_nexus; + tl_nexus = tpg->tl_nexus; if (!tl_nexus) return -ENODEV; @@ -1061,13 +1041,13 @@ static int tcm_loop_drop_nexus( } pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated" - " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba), + " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba), tl_nexus->se_sess->se_node_acl->initiatorname); /* * Release the SCSI I_T Nexus to the emulated SAS Target Port */ transport_deregister_session(tl_nexus->se_sess); - tpg->tl_hba->tl_nexus = NULL; + tpg->tl_nexus = NULL; kfree(tl_nexus); return 0; } @@ -1083,7 +1063,7 @@ static ssize_t tcm_loop_tpg_show_nexus( struct tcm_loop_nexus *tl_nexus; ssize_t ret; - tl_nexus = tl_tpg->tl_hba->tl_nexus; + tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) return -ENODEV; diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h index 54c59d0b6608..6ae49f272ba6 100644 --- a/drivers/target/loopback/tcm_loop.h +++ b/drivers/target/loopback/tcm_loop.h @@ -27,11 +27,6 @@ struct tcm_loop_tmr { }; struct tcm_loop_nexus { - int it_nexus_active; - /* - * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h - */ - struct scsi_host *sh; /* * Pointer to TCM session for I_T Nexus */ @@ -51,6 +46,7 @@ struct tcm_loop_tpg { atomic_t tl_tpg_port_count; struct se_portal_group tl_se_tpg; struct tcm_loop_hba *tl_hba; + struct tcm_loop_nexus *tl_nexus; }; struct tcm_loop_hba { @@ -59,7 +55,6 @@ struct tcm_loop_hba { struct se_hba_s *se_hba; struct se_lun *tl_hba_lun; struct se_port *tl_hba_lun_sep; - struct tcm_loop_nexus *tl_nexus; struct device dev; struct Scsi_Host *sh; struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA]; From 24d2683ee34ce75c48969b9b0fdce138e43e6d01 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 21 Dec 2014 10:42:08 -0800 Subject: [PATCH 112/298] vhost-scsi: Add missing virtio-scsi -> TCM attribute conversion commit 46243860806bdc2756f3ce8ac86b4d7c616bcd6c upstream. While looking at hch's recent conversion to drop the MSG_*_TAG definitions, I noticed a long standing bug in vhost-scsi where the VIRTIO_SCSI_S_* attribute definitions where incorrectly being passed directly into target_submit_cmd_map_sgls(). This patch adds the missing virtio-scsi to TCM/SAM task attribute conversion. Cc: Christoph Hellwig Cc: Michael S. Tsirkin Cc: Paolo Bonzini Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/scsi.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 5d0b7b846440..486d710a5293 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -861,6 +861,23 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd, return 0; } +static int vhost_scsi_to_tcm_attr(int attr) +{ + switch (attr) { + case VIRTIO_SCSI_S_SIMPLE: + return MSG_SIMPLE_TAG; + case VIRTIO_SCSI_S_ORDERED: + return MSG_ORDERED_TAG; + case VIRTIO_SCSI_S_HEAD: + return MSG_HEAD_TAG; + case VIRTIO_SCSI_S_ACA: + return MSG_ACA_TAG; + default: + break; + } + return MSG_SIMPLE_TAG; +} + static void tcm_vhost_submission_work(struct work_struct *work) { struct tcm_vhost_cmd *cmd = @@ -887,9 +904,10 @@ static void tcm_vhost_submission_work(struct work_struct *work) rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, - cmd->tvc_task_attr, cmd->tvc_data_direction, - TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, - sg_bidi_ptr, sg_no_bidi, NULL, 0); + vhost_scsi_to_tcm_attr(cmd->tvc_task_attr), + cmd->tvc_data_direction, TARGET_SCF_ACK_KREF, + sg_ptr, cmd->tvc_sgl_count, sg_bidi_ptr, sg_no_bidi, + NULL, 0); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0); From 1ef1fb791dfb83cdc1baaab5db4c06cb3e63ffbb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:17 +0200 Subject: [PATCH 113/298] iscsi,iser-target: Initiate termination only once commit 954f23722b5753305be490330cf2680b7a25f4a3 upstream. Since commit 0fc4ea701fcf ("Target/iser: Don't put isert_conn inside disconnected handler") we put the conn kref in isert_wait_conn, so we need .wait_conn to be invoked also in the error path. Introduce call to isert_conn_terminate (called under lock) which transitions the connection state to TERMINATING and calls rdma_disconnect. If the state is already teminating, just bail out back (temination started). Also, make sure to destroy the connection when getting a connect error event if didn't get to connected (state UP). Same for the handling of REJECTED and UNREACHABLE cma events. Squashed: iscsi-target: Add call to wait_conn in establishment error flow Reported-by: Slava Shwartsman Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 84 ++++++++++++++--------- drivers/infiniband/ulp/isert/ib_isert.h | 1 - drivers/target/iscsi/iscsi_target_login.c | 3 + 3 files changed, 54 insertions(+), 34 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a96cfc31372e..7c486b43c76f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -692,6 +692,33 @@ isert_put_conn(struct isert_conn *isert_conn) kref_put(&isert_conn->conn_kref, isert_release_conn_kref); } +/** + * isert_conn_terminate() - Initiate connection termination + * @isert_conn: isert connection struct + * + * Notes: + * In case the connection state is UP, move state + * to TEMINATING and start teardown sequence (rdma_disconnect). + * + * This routine must be called with conn_mutex held. Thus it is + * safe to call multiple times. + */ +static void +isert_conn_terminate(struct isert_conn *isert_conn) +{ + int err; + + if (isert_conn->state == ISER_CONN_UP) { + isert_conn->state = ISER_CONN_TERMINATING; + pr_info("Terminating conn %p state %d\n", + isert_conn, isert_conn->state); + err = rdma_disconnect(isert_conn->conn_cm_id); + if (err) + pr_warn("Failed rdma_disconnect isert_conn %p\n", + isert_conn); + } +} + static void isert_disconnect_work(struct work_struct *work) { @@ -700,33 +727,15 @@ isert_disconnect_work(struct work_struct *work) pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state == ISER_CONN_UP) - isert_conn->state = ISER_CONN_TERMINATING; - - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - mutex_unlock(&isert_conn->conn_mutex); - goto wake_up; - } - if (!isert_conn->conn_cm_id) { - mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); - return; - } - - if (isert_conn->disconnect) { - /* Send DREQ/DREP towards our initiator */ - rdma_disconnect(isert_conn->conn_cm_id); - } - + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); -wake_up: + pr_info("conn %p completing conn_wait\n", isert_conn); complete(&isert_conn->conn_wait); } static int -isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) +isert_disconnected_handler(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn; @@ -739,18 +748,24 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) isert_conn = (struct isert_conn *)cma_id->context; - isert_conn->disconnect = disconnect; INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); schedule_work(&isert_conn->conn_logout_work); return 0; } +static void +isert_connect_error(struct rdma_cm_id *cma_id) +{ + struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + + isert_put_conn(isert_conn); +} + static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; - bool disconnect = false; pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); @@ -768,11 +783,14 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ - disconnect = true; case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ - ret = isert_disconnected_handler(cma_id, disconnect); + ret = isert_disconnected_handler(cma_id); break; + case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: + isert_connect_error(cma_id); + break; default: pr_err("Unhandled RDMA CMA event: %d\n", event->event); break; @@ -1799,7 +1817,7 @@ isert_cq_rx_comp_err(struct isert_conn *isert_conn) msleep(3000); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); iscsit_cause_connection_reinstatement(isert_conn->conn, 0); @@ -2798,10 +2816,6 @@ static void isert_wait_conn(struct iscsi_conn *conn) pr_debug("isert_wait_conn: Starting \n"); mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->conn_cm_id) { - pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); - rdma_disconnect(isert_conn->conn_cm_id); - } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. @@ -2810,13 +2824,17 @@ static void isert_wait_conn(struct iscsi_conn *conn) mutex_unlock(&isert_conn->conn_mutex); return; } - if (isert_conn->state == ISER_CONN_UP) - isert_conn->state = ISER_CONN_TERMINATING; + isert_conn_terminate(isert_conn); mutex_unlock(&isert_conn->conn_mutex); wait_for_completion(&isert_conn->conn_wait_comp_err); - wait_for_completion(&isert_conn->conn_wait); + + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); + + pr_info("Destroying conn %p\n", isert_conn); isert_put_conn(isert_conn); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index cbecaabe90b9..452cbf392901 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -127,7 +127,6 @@ struct isert_conn { #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; - bool disconnect; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index d509aa74cfa1..c5d3811a7b8c 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1186,6 +1186,9 @@ old_sess_out: conn->sock = NULL; } + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); From 3703a33ed0564e3ef703859c06af54fc3581d8c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:20 +0200 Subject: [PATCH 114/298] iser-target: Fix flush + disconnect completion handling commit 128e9cc84566a84146baea2335b3824288eed817 upstream. ISER_CONN_UP state is not sufficient to know if we should wait for completion of flush errors and disconnected_handler event. Instead, split it to 2 states: - ISER_CONN_UP: Got to CM connected phase, This state indicates that we need to wait for a CM disconnect event before going to teardown. - ISER_CONN_FULL_FEATURE: Got to full feature phase after we posted login response, This state indicates that we posted recv buffers and we need to wait for flush completions before going to teardown. Also avoid deffering disconnected handler to a work, and handle it within disconnected handler. More work here is needed to handle DEVICE_REMOVAL event correctly (cleanup all resources). Squashed: iser-target: Don't deffer disconnected handler to a work Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 52 ++++++++++++++----------- drivers/infiniband/ulp/isert/ib_isert.h | 2 +- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 7c486b43c76f..34f59bb918d7 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -671,6 +671,9 @@ isert_connected_handler(struct rdma_cm_id *cma_id) { struct isert_conn *isert_conn = cma_id->context; + pr_info("conn %p\n", isert_conn); + + isert_conn->state = ISER_CONN_UP; kref_get(&isert_conn->conn_kref); } @@ -697,8 +700,9 @@ isert_put_conn(struct isert_conn *isert_conn) * @isert_conn: isert connection struct * * Notes: - * In case the connection state is UP, move state + * In case the connection state is FULL_FEATURE, move state * to TEMINATING and start teardown sequence (rdma_disconnect). + * In case the connection state is UP, complete flush as well. * * This routine must be called with conn_mutex held. Thus it is * safe to call multiple times. @@ -708,32 +712,31 @@ isert_conn_terminate(struct isert_conn *isert_conn) { int err; - if (isert_conn->state == ISER_CONN_UP) { - isert_conn->state = ISER_CONN_TERMINATING; + switch (isert_conn->state) { + case ISER_CONN_TERMINATING: + break; + case ISER_CONN_UP: + /* + * No flush completions will occur as we didn't + * get to ISER_CONN_FULL_FEATURE yet, complete + * to allow teardown progress. + */ + complete(&isert_conn->conn_wait_comp_err); + case ISER_CONN_FULL_FEATURE: /* FALLTHRU */ pr_info("Terminating conn %p state %d\n", isert_conn, isert_conn->state); + isert_conn->state = ISER_CONN_TERMINATING; err = rdma_disconnect(isert_conn->conn_cm_id); if (err) pr_warn("Failed rdma_disconnect isert_conn %p\n", isert_conn); + break; + default: + pr_warn("conn %p teminating in state %d\n", + isert_conn, isert_conn->state); } } -static void -isert_disconnect_work(struct work_struct *work) -{ - struct isert_conn *isert_conn = container_of(work, - struct isert_conn, conn_logout_work); - - pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - mutex_lock(&isert_conn->conn_mutex); - isert_conn_terminate(isert_conn); - mutex_unlock(&isert_conn->conn_mutex); - - pr_info("conn %p completing conn_wait\n", isert_conn); - complete(&isert_conn->conn_wait); -} - static int isert_disconnected_handler(struct rdma_cm_id *cma_id) { @@ -748,8 +751,12 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id) isert_conn = (struct isert_conn *)cma_id->context; - INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); - schedule_work(&isert_conn->conn_logout_work); + mutex_lock(&isert_conn->conn_mutex); + isert_conn_terminate(isert_conn); + mutex_unlock(&isert_conn->conn_mutex); + + pr_info("conn %p completing conn_wait\n", isert_conn); + complete(&isert_conn->conn_wait); return 0; } @@ -924,7 +931,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. */ mutex_lock(&isert_conn->conn_mutex); - if (coalesce && isert_conn->state == ISER_CONN_UP && + if (coalesce && isert_conn->state == ISER_CONN_FULL_FEATURE && ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { tx_desc->llnode_active = true; llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); @@ -1021,7 +1028,8 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, if (ret) return ret; - isert_conn->state = ISER_CONN_UP; + /* Now we are in FULL_FEATURE phase */ + isert_conn->state = ISER_CONN_FULL_FEATURE; goto post_send; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 452cbf392901..55ff7fd020eb 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -23,6 +23,7 @@ enum iser_ib_op_code { enum iser_conn_state { ISER_CONN_INIT, ISER_CONN_UP, + ISER_CONN_FULL_FEATURE, ISER_CONN_TERMINATING, ISER_CONN_DOWN, }; @@ -115,7 +116,6 @@ struct isert_conn { struct ib_mr *conn_mr; struct ib_qp *conn_qp; struct isert_device *conn_device; - struct work_struct conn_logout_work; struct mutex conn_mutex; struct completion conn_wait; struct completion conn_wait_comp_err; From c4f8768db2c2e097713a13aa07aa26ce9b18e2df Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:21 +0200 Subject: [PATCH 115/298] iser-target: Parallelize CM connection establishment commit 2371e5da8cfe91443339b54444dec6254fdd6dfc upstream. There is no point in accepting a new CM request only when we are completely done with the last iscsi login. Instead we accept immediately, this will also cause the CM connection to reach connected state and the initiator is allowed to send the first login. We mark that we got the initial login and let iscsi layer pick it up when it gets there. This reduces the parallel login sequence by a factor of more then 4 (and more for multi-login) and also prevents the initiator (who does all logins in parallel) from giving up on login timeout expiration. In order to support multiple login requests sequence (CHAP) we call isert_rx_login_req from isert_rx_completion insead of letting isert_get_login_rx call it. Squashed: iser-target: Use kref_get_unless_zero in connected_handler iser-target: Acquire conn_mutex when changing connection state iser-target: Reject connect request in failure path Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 84 ++++++++++++++++++------- drivers/infiniband/ulp/isert/ib_isert.h | 2 + 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 34f59bb918d7..ffe26b344e6b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -52,6 +52,10 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); static int isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); +static int +isert_rdma_post_recvl(struct isert_conn *isert_conn); +static int +isert_rdma_accept(struct isert_conn *isert_conn); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -515,6 +519,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); + init_completion(&isert_conn->login_req_comp); init_completion(&isert_conn->conn_wait); init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); @@ -596,6 +601,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) if (ret) goto out_conn_dev; + ret = isert_rdma_post_recvl(isert_conn); + if (ret) + goto out_conn_dev; + + ret = isert_rdma_accept(isert_conn); + if (ret) + goto out_conn_dev; + mutex_lock(&isert_np->np_accept_mutex); list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list); mutex_unlock(&isert_np->np_accept_mutex); @@ -620,6 +633,7 @@ out_login_buf: kfree(isert_conn->login_buf); out: kfree(isert_conn); + rdma_reject(cma_id, NULL, 0); return ret; } @@ -673,8 +687,15 @@ isert_connected_handler(struct rdma_cm_id *cma_id) pr_info("conn %p\n", isert_conn); - isert_conn->state = ISER_CONN_UP; - kref_get(&isert_conn->conn_kref); + if (!kref_get_unless_zero(&isert_conn->conn_kref)) { + pr_warn("conn %p connect_release is running\n", isert_conn); + return; + } + + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->state != ISER_CONN_FULL_FEATURE) + isert_conn->state = ISER_CONN_UP; + mutex_unlock(&isert_conn->conn_mutex); } static void @@ -1029,7 +1050,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, return ret; /* Now we are in FULL_FEATURE phase */ + mutex_lock(&isert_conn->conn_mutex); isert_conn->state = ISER_CONN_FULL_FEATURE; + mutex_unlock(&isert_conn->conn_mutex); goto post_send; } @@ -1046,18 +1069,17 @@ post_send: } static void -isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, - struct isert_conn *isert_conn) +isert_rx_login_req(struct isert_conn *isert_conn) { + struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf; + int rx_buflen = isert_conn->login_req_len; struct iscsi_conn *conn = isert_conn->conn; struct iscsi_login *login = conn->conn_login; int size; - if (!login) { - pr_err("conn->conn_login is NULL\n"); - dump_stack(); - return; - } + pr_info("conn %p\n", isert_conn); + + WARN_ON_ONCE(!login); if (login->first_request) { struct iscsi_login_req *login_req = @@ -1420,11 +1442,20 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, hdr->opcode, hdr->itt, hdr->flags, (int)(xfer_len - ISER_HEADERS_LEN)); - if ((char *)desc == isert_conn->login_req_buf) - isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN, - isert_conn); - else + if ((char *)desc == isert_conn->login_req_buf) { + isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN; + if (isert_conn->conn) { + struct iscsi_login *login = isert_conn->conn->conn_login; + + if (login && !login->first_request) + isert_rx_login_req(isert_conn); + } + mutex_lock(&isert_conn->conn_mutex); + complete(&isert_conn->login_req_comp); + mutex_unlock(&isert_conn->conn_mutex); + } else { isert_rx_do_work(desc, isert_conn); + } ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE); @@ -2699,7 +2730,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) struct isert_conn *isert_conn = (struct isert_conn *)conn->context; int ret; - pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn); + pr_info("before login_req comp conn: %p\n", isert_conn); + ret = wait_for_completion_interruptible(&isert_conn->login_req_comp); + if (ret) { + pr_err("isert_conn %p interrupted before got login req\n", + isert_conn); + return ret; + } + reinit_completion(&isert_conn->login_req_comp); + /* * For login requests after the first PDU, isert_rx_login_req() will * kick schedule_delayed_work(&conn->login_work) as the packet is @@ -2709,11 +2748,15 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login) if (!login->first_request) return 0; + isert_rx_login_req(isert_conn); + + pr_info("before conn_login_comp conn: %p\n", conn); ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp); if (ret) return ret; - pr_debug("isert_get_login_rx processing login->req: %p\n", login->req); + pr_info("processing login->req: %p\n", login->req); + return 0; } @@ -2791,17 +2834,10 @@ accept_wait: isert_conn->conn = conn; max_accept = 0; - ret = isert_rdma_post_recvl(isert_conn); - if (ret) - return ret; - - ret = isert_rdma_accept(isert_conn); - if (ret) - return ret; - isert_set_conn_info(np, conn, isert_conn); - pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn); + pr_debug("Processing isert_conn: %p\n", isert_conn); + return 0; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 55ff7fd020eb..2da5d815acf4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -103,6 +103,7 @@ struct isert_conn { char *login_req_buf; char *login_rsp_buf; u64 login_req_dma; + int login_req_len; u64 login_rsp_dma; unsigned int conn_rx_desc_head; struct iser_rx_desc *conn_rx_descs; @@ -110,6 +111,7 @@ struct isert_conn { struct iscsi_conn *conn; struct list_head conn_accept_node; struct completion conn_login_comp; + struct completion login_req_comp; struct iser_tx_desc conn_login_tx_desc; struct rdma_cm_id *conn_cm_id; struct ib_pd *conn_pd; From ef0dddf5f9ff1ac674d08b97189d3123277e17c9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:26 +0200 Subject: [PATCH 116/298] iser-target: Fix connected_handler + teardown flow race commit 19e2090fb246ca21b3e569ead51a6a7a1748eadd upstream. Take isert_conn pointer from cm_id->qp->qp_context. This will allow us to know that the cm_id context is always the network portal. This will make the cm_id event check (connection or network portal) more reliable. In order to avoid a NULL dereference in cma_id->qp->qp_context we destroy the qp after we destroy the cm_id (and make the dereference safe). session stablishment/teardown sequences can happen in parallel, we should take into account that connected_handler might race with connection teardown flow. Also, protect isert_conn->conn_device->active_qps decrement within the error patch during QP creation failure and the normal teardown path in isert_connect_release(). Squashed: iser-target: Decrement completion context active_qps in error flow Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 31 +++++++++++++++---------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ffe26b344e6b..2b6508ff7d58 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -136,12 +136,18 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr); if (ret) { pr_err("rdma_create_qp failed for cma_id %d\n", ret); - return ret; + goto err; } isert_conn->conn_qp = cma_id->qp; pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n"); return 0; +err: + mutex_lock(&device_list_mutex); + device->cq_active_qps[min_index]--; + mutex_unlock(&device_list_mutex); + + return ret; } static void @@ -527,7 +533,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) spin_lock_init(&isert_conn->conn_lock); INIT_LIST_HEAD(&isert_conn->conn_fr_pool); - cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; isert_conn->responder_resources = event->param.conn.responder_resources; isert_conn->initiator_depth = event->param.conn.initiator_depth; @@ -649,18 +654,20 @@ isert_connect_release(struct isert_conn *isert_conn) if (device && device->use_fastreg) isert_conn_free_fastreg_pool(isert_conn); + isert_free_rx_descriptors(isert_conn); + rdma_destroy_id(isert_conn->conn_cm_id); + if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) isert_conn->conn_qp->recv_cq->cq_context)->cq_index; pr_debug("isert_connect_release: cq_index: %d\n", cq_index); + mutex_lock(&device_list_mutex); isert_conn->conn_device->cq_active_qps[cq_index]--; + mutex_unlock(&device_list_mutex); - rdma_destroy_qp(isert_conn->conn_cm_id); + ib_destroy_qp(isert_conn->conn_qp); } - isert_free_rx_descriptors(isert_conn); - rdma_destroy_id(isert_conn->conn_cm_id); - ib_dereg_mr(isert_conn->conn_mr); ib_dealloc_pd(isert_conn->conn_pd); @@ -683,7 +690,7 @@ isert_connect_release(struct isert_conn *isert_conn) static void isert_connected_handler(struct rdma_cm_id *cma_id) { - struct isert_conn *isert_conn = cma_id->context; + struct isert_conn *isert_conn = cma_id->qp->qp_context; pr_info("conn %p\n", isert_conn); @@ -761,16 +768,16 @@ isert_conn_terminate(struct isert_conn *isert_conn) static int isert_disconnected_handler(struct rdma_cm_id *cma_id) { + struct iscsi_np *np = cma_id->context; + struct isert_np *isert_np = np->np_context; struct isert_conn *isert_conn; - if (!cma_id->qp) { - struct isert_np *isert_np = cma_id->context; - + if (isert_np->np_cm_id == cma_id) { isert_np->np_cm_id = NULL; return -1; } - isert_conn = (struct isert_conn *)cma_id->context; + isert_conn = cma_id->qp->qp_context; mutex_lock(&isert_conn->conn_mutex); isert_conn_terminate(isert_conn); @@ -785,7 +792,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id) static void isert_connect_error(struct rdma_cm_id *cma_id) { - struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + struct isert_conn *isert_conn = cma_id->qp->qp_context; isert_put_conn(isert_conn); } From 1280a221de7b688876549f682ee34dfd8798b800 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:27 +0200 Subject: [PATCH 117/298] iser-target: Handle ADDR_CHANGE event for listener cm_id commit ca6c1d82d12d8013fb75ce015900d62b9754623c upstream. The np listener cm_id will also get ADDR_CHANGE event upcall (in case it is bound to a specific IP). Handle it correctly by creating a new cm_id and implicitly destroy the old one. Since this is the second event a listener np cm_id may encounter, we move the np cm_id event handling to a routine. Squashed: iser-target: Move cma_id setup to a function Reported-by: Slava Shwartsman Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 109 +++++++++++++++++------- drivers/infiniband/ulp/isert/ib_isert.h | 1 + 2 files changed, 78 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 2b6508ff7d58..425b5b2fb343 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -56,6 +56,7 @@ static int isert_rdma_post_recvl(struct isert_conn *isert_conn); static int isert_rdma_accept(struct isert_conn *isert_conn); +struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -499,8 +500,8 @@ err: static int isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - struct iscsi_np *np = cma_id->context; - struct isert_np *isert_np = np->np_context; + struct isert_np *isert_np = cma_id->context; + struct iscsi_np *np = isert_np->np; struct isert_conn *isert_conn; struct isert_device *device; struct ib_device *ib_dev = cma_id->device; @@ -766,16 +767,40 @@ isert_conn_terminate(struct isert_conn *isert_conn) } static int -isert_disconnected_handler(struct rdma_cm_id *cma_id) +isert_np_cma_handler(struct isert_np *isert_np, + enum rdma_cm_event_type event) { - struct iscsi_np *np = cma_id->context; - struct isert_np *isert_np = np->np_context; + pr_debug("isert np %p, handling event %d\n", isert_np, event); + + switch (event) { + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_np->np_cm_id = NULL; + break; + case RDMA_CM_EVENT_ADDR_CHANGE: + isert_np->np_cm_id = isert_setup_id(isert_np); + if (IS_ERR(isert_np->np_cm_id)) { + pr_err("isert np %p setup id failed: %ld\n", + isert_np, PTR_ERR(isert_np->np_cm_id)); + isert_np->np_cm_id = NULL; + } + break; + default: + pr_err("isert np %p Unexpected event %d\n", + isert_np, event); + } + + return -1; +} + +static int +isert_disconnected_handler(struct rdma_cm_id *cma_id, + enum rdma_cm_event_type event) +{ + struct isert_np *isert_np = cma_id->context; struct isert_conn *isert_conn; - if (isert_np->np_cm_id == cma_id) { - isert_np->np_cm_id = NULL; - return -1; - } + if (isert_np->np_cm_id == cma_id) + return isert_np_cma_handler(cma_id->context, event); isert_conn = cma_id->qp->qp_context; @@ -819,7 +844,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ - ret = isert_disconnected_handler(cma_id); + ret = isert_disconnected_handler(cma_id, event->event); break; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ @@ -2643,13 +2668,51 @@ isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state) return ret; } +struct rdma_cm_id * +isert_setup_id(struct isert_np *isert_np) +{ + struct iscsi_np *np = isert_np->np; + struct rdma_cm_id *id; + struct sockaddr *sa; + int ret; + + sa = (struct sockaddr *)&np->np_sockaddr; + pr_debug("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); + + id = rdma_create_id(isert_cma_handler, isert_np, + RDMA_PS_TCP, IB_QPT_RC); + if (IS_ERR(id)) { + pr_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); + ret = PTR_ERR(id); + goto out; + } + pr_debug("id %p context %p\n", id, id->context); + + ret = rdma_bind_addr(id, sa); + if (ret) { + pr_err("rdma_bind_addr() failed: %d\n", ret); + goto out_id; + } + + ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG); + if (ret) { + pr_err("rdma_listen() failed: %d\n", ret); + goto out_id; + } + + return id; +out_id: + rdma_destroy_id(id); +out: + return ERR_PTR(ret); +} + static int isert_setup_np(struct iscsi_np *np, struct __kernel_sockaddr_storage *ksockaddr) { struct isert_np *isert_np; struct rdma_cm_id *isert_lid; - struct sockaddr *sa; int ret; isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL); @@ -2661,9 +2724,8 @@ isert_setup_np(struct iscsi_np *np, mutex_init(&isert_np->np_accept_mutex); INIT_LIST_HEAD(&isert_np->np_accept_list); init_completion(&isert_np->np_login_comp); + isert_np->np = np; - sa = (struct sockaddr *)ksockaddr; - pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa); /* * Setup the np->np_sockaddr from the passed sockaddr setup * in iscsi_target_configfs.c code.. @@ -2671,37 +2733,20 @@ isert_setup_np(struct iscsi_np *np, memcpy(&np->np_sockaddr, ksockaddr, sizeof(struct __kernel_sockaddr_storage)); - isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP, - IB_QPT_RC); + isert_lid = isert_setup_id(isert_np); if (IS_ERR(isert_lid)) { - pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n", - PTR_ERR(isert_lid)); ret = PTR_ERR(isert_lid); goto out; } - ret = rdma_bind_addr(isert_lid, sa); - if (ret) { - pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret); - goto out_lid; - } - - ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG); - if (ret) { - pr_err("rdma_listen() for isert_lid failed: %d\n", ret); - goto out_lid; - } - isert_np->np_cm_id = isert_lid; np->np_context = isert_np; - pr_debug("Setup isert_lid->context: %p\n", isert_lid->context); return 0; -out_lid: - rdma_destroy_id(isert_lid); out: kfree(isert_np); + return ret; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 2da5d815acf4..8f5eeb2b5679 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -159,6 +159,7 @@ struct isert_device { }; struct isert_np { + struct iscsi_np *np; struct semaphore np_sem; struct rdma_cm_id *np_cm_id; struct mutex np_accept_mutex; From 0350f8a1be79a42653f1cd2ecfc71123c57d577d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 2 Dec 2014 16:57:29 +0200 Subject: [PATCH 118/298] iser-target: Fix implicit termination of connections commit b02efbfc9a051b41e71fe8f94ddf967260e024a6 upstream. In situations such as bond failover, The new session establishment implicitly invokes the termination of the old connection. So, we don't want to wait for the old connection wait_conn to completely terminate before we accept the new connection and post a login response. The solution is to deffer the comp_wait completion and the conn_put to a work so wait_conn will effectively be non-blocking (flush errors are assumed to come very fast). We allocate isert_release_wq with WQ_UNBOUND and WQ_UNBOUND_MAX_ACTIVE to spread the concurrency of release works. Reported-by: Slava Shwartsman Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/ulp/isert/ib_isert.c | 42 ++++++++++++++++++++----- drivers/infiniband/ulp/isert/ib_isert.h | 1 + 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 425b5b2fb343..60142274fe4b 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -41,6 +41,7 @@ static DEFINE_MUTEX(device_list_mutex); static LIST_HEAD(device_list); static struct workqueue_struct *isert_rx_wq; static struct workqueue_struct *isert_comp_wq; +static struct workqueue_struct *isert_release_wq; static void isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); @@ -2905,6 +2906,24 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } +static void isert_release_work(struct work_struct *work) +{ + struct isert_conn *isert_conn = container_of(work, + struct isert_conn, + release_work); + + pr_info("Starting release conn %p\n", isert_conn); + + wait_for_completion(&isert_conn->conn_wait); + + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); + + pr_info("Destroying conn %p\n", isert_conn); + isert_put_conn(isert_conn); +} + static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; @@ -2924,14 +2943,9 @@ static void isert_wait_conn(struct iscsi_conn *conn) mutex_unlock(&isert_conn->conn_mutex); wait_for_completion(&isert_conn->conn_wait_comp_err); - wait_for_completion(&isert_conn->conn_wait); - mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; - mutex_unlock(&isert_conn->conn_mutex); - - pr_info("Destroying conn %p\n", isert_conn); - isert_put_conn(isert_conn); + INIT_WORK(&isert_conn->release_work, isert_release_work); + queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsi_conn *conn) @@ -2977,10 +2991,21 @@ static int __init isert_init(void) goto destroy_rx_wq; } + isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!isert_release_wq) { + pr_err("Unable to allocate isert_release_wq\n"); + ret = -ENOMEM; + goto destroy_comp_wq; + } + iscsit_register_transport(&iser_target_transport); - pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n"); + pr_info("iSER_TARGET[0] - Loaded iser_target_transport\n"); + return 0; +destroy_comp_wq: + destroy_workqueue(isert_comp_wq); destroy_rx_wq: destroy_workqueue(isert_rx_wq); return ret; @@ -2989,6 +3014,7 @@ destroy_rx_wq: static void __exit isert_exit(void) { flush_scheduled_work(); + destroy_workqueue(isert_release_wq); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 8f5eeb2b5679..1178c5b6800c 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -126,6 +126,7 @@ struct isert_conn { int conn_fr_pool_size; /* lock to protect fastreg pool */ spinlock_t conn_lock; + struct work_struct release_work; #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; From dbb49267de1c0ce0874b232335851011fc1fb310 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 19 May 2014 08:57:55 -0700 Subject: [PATCH 119/298] bcache: Make sure to pass GFP_WAIT to mempool_alloc() commit bcf090e0040e30f8409e6a535a01e6473afb096f upstream. this was very wrong - mempool_alloc() only guarantees success with GFP_WAIT. bcache uses GFP_NOWAIT in various other places where we have a fallback, circuits must've gotten crossed when writing this code or something. Signed-off-by: Kent Overstreet Cc: Gabriel de Perthuis Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 5f9c2a665ca5..fbcb6225f794 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -199,7 +199,7 @@ void bch_btree_node_read_done(struct btree *b) struct bset *i = btree_bset_first(b); struct btree_iter *iter; - iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter = mempool_alloc(b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; From b48f65688dc020cc1a7eb3718be910a3e5681685 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Sat, 6 Dec 2014 20:32:16 +0530 Subject: [PATCH 120/298] KVM: nVMX: Disable unrestricted mode if ept=0 commit 78051e3b7e35722ad3f31dd611f1b34770bddab8 upstream. If L0 has disabled EPT, don't advertise unrestricted mode at all since it depends on EPT to run real mode code. Fixes: 92fbc7b195b824e201d9f06f2b93105f72384d65 Reviewed-by: Jan Kiszka Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0c90f4b3f835..de426887b359 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2320,12 +2320,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) nested_vmx_secondary_ctls_low = 0; nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | - SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_WBINVD_EXITING; if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; + nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT | + SECONDARY_EXEC_UNRESTRICTED_GUEST; nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; From e8480fd4697b9ac7f0fe96cde4c123f1d6e2892d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Nov 2014 17:11:21 +0100 Subject: [PATCH 121/298] netfilter: ipset: small potential read beyond the end of buffer commit 2196937e12b1b4ba139806d132647e1651d655df upstream. We could be reading 8 bytes into a 4 byte buffer here. It seems harmless but adding a check is the right thing to do and it silences a static checker warning. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index cf9937743abb..53ea1644a297 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1839,6 +1839,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; From 565318483e521ee292e9e1c74dba169481b631bf Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 25 Aug 2014 16:26:45 +0400 Subject: [PATCH 122/298] net: prevent of emerging cross-namespace symlinks commit 4c75431ac3520631f1d9e74aa88407e6374dbbc4 upstream. Code manipulating sysfs symlinks on adjacent net_devices(s) currently doesn't take into account that devices potentially belong to different namespaces. This patch trying to fix an issue as follows: - check for net_ns before creating / deleting symlink. for now only netdev_adjacent_rename_links and __netdev_adjacent_dev_remove are affected, afaics __netdev_adjacent_dev_insert implies both net_devs belong to the same namespace. - Drop all existing symlinks to / from all adj_devs before switching namespace and recreate them just after. Signed-off-by: Alexander Y. Fomichev Signed-off-by: David S. Miller Cc: Miquel van Smoorenburg Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index a507b6646e7f..3f5bc63eabfc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4791,7 +4791,8 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, dev_list) && + net_eq(dev_net(dev),dev_net(adj_dev))) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); @@ -5061,11 +5062,65 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void netdev_adjacent_add_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_add(iter->dev, dev, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_add(dev, iter->dev, + &dev->adj_list.lower); + } +} + +void netdev_adjacent_del_links(struct net_device *dev) +{ + struct netdev_adjacent *iter; + + struct net *net = dev_net(dev); + + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.lower); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.upper); + } + + list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; + netdev_adjacent_sysfs_del(iter->dev, dev->name, + &iter->dev->adj_list.upper); + netdev_adjacent_sysfs_del(dev, iter->dev->name, + &dev->adj_list.lower); + } +} + void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) { struct netdev_adjacent *iter; + struct net *net = dev_net(dev); + list_for_each_entry(iter, &dev->adj_list.upper, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.lower); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -5073,6 +5128,8 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname) } list_for_each_entry(iter, &dev->adj_list.lower, list) { + if (!net_eq(net,dev_net(iter->dev))) + continue; netdev_adjacent_sysfs_del(iter->dev, oldname, &iter->dev->adj_list.upper); netdev_adjacent_sysfs_add(iter->dev, dev, @@ -6679,6 +6736,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-removed uevent to the old namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + netdev_adjacent_del_links(dev); /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6693,6 +6751,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* Send a netdev-add uevent to the new namespace */ kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); From 7cc4382590a1b2fca9433d84967f0373405fc307 Mon Sep 17 00:00:00 2001 From: "Alexander Y. Fomichev" Date: Mon, 15 Sep 2014 14:22:35 +0400 Subject: [PATCH 123/298] net: fix creation adjacent device symlinks commit 7ce64c79c4decdeb1afe0bf2f6ef834b382871d1 upstream. __netdev_adjacent_dev_insert may add adjust device of different net namespace, without proper check it leads to emergence of broken sysfs links from/to devices in another namespace. Fix: rewrite netdev_adjacent_is_neigh_list macro as a function, move net_eq check into netdev_adjacent_is_neigh_list. (thanks David) related to: 4c75431ac3520631f1d9e74aa88407e6374dbbc4 Signed-off-by: Alexander Fomichev Signed-off-by: David S. Miller Cc: Miquel van Smoorenburg Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 3f5bc63eabfc..86bb9cc81f02 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4705,9 +4705,14 @@ static void netdev_adjacent_sysfs_del(struct net_device *dev, sysfs_remove_link(&(dev->dev.kobj), linkname); } -#define netdev_adjacent_is_neigh_list(dev, dev_list) \ - (dev_list == &dev->adj_list.upper || \ - dev_list == &dev->adj_list.lower) +static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *dev_list) +{ + return (dev_list == &dev->adj_list.upper || + dev_list == &dev->adj_list.lower) && + net_eq(dev_net(dev), dev_net(adj_dev)); +} static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, @@ -4737,7 +4742,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, pr_debug("dev_hold for %s, because of link added from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); - if (netdev_adjacent_is_neigh_list(dev, dev_list)) { + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); if (ret) goto free_adj; @@ -4758,7 +4763,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, return 0; remove_symlinks: - if (netdev_adjacent_is_neigh_list(dev, dev_list)) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); free_adj: kfree(adj); @@ -4791,8 +4796,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, if (adj->master) sysfs_remove_link(&(dev->dev.kobj), "master"); - if (netdev_adjacent_is_neigh_list(dev, dev_list) && - net_eq(dev_net(dev),dev_net(adj_dev))) + if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); From 00a3ce6d494476faadbe799814568c81a600e3fe Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Wed, 29 Oct 2014 14:50:22 -0700 Subject: [PATCH 124/298] fsnotify: next_i is freed during fsnotify_unmount_inodes. commit 6424babfd68dd8a83d9c60a5242d27038856599f upstream. During file system stress testing on 3.10 and 3.12 based kernels, the umount command occasionally hung in fsnotify_unmount_inodes in the section of code: spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); continue; } As this section of code holds the global inode_sb_list_lock, eventually the system hangs trying to acquire the lock. Multiple crash dumps showed: The inode->i_state == 0x60 and i_count == 0 and i_sb_list would point back at itself. As this is not the value of list upon entry to the function, the kernel never exits the loop. To help narrow down problem, the call to list_del_init in inode_sb_list_del was changed to list_del. This poisons the pointers in the i_sb_list and causes a kernel to panic if it transverse a freed inode. Subsequent stress testing paniced in fsnotify_unmount_inodes at the bottom of the list_for_each_entry_safe loop showing next_i had become free. We believe the root cause of the problem is that next_i is being freed during the window of time that the list_for_each_entry_safe loop temporarily releases inode_sb_list_lock to call fsnotify and fsnotify_inode_delete. The code in fsnotify_unmount_inodes attempts to prevent the freeing of inode and next_i by calling __iget. However, the code doesn't do the __iget call on next_i if i_count == 0 or if i_state & (I_FREEING | I_WILL_FREE) The patch addresses this issue by advancing next_i in the above two cases until we either find a next_i which we can __iget or we reach the end of the list. This makes the handling of next_i more closely match the handling of the variable "inode." The time to reproduce the hang is highly variable (from hours to days.) We ran the stress test on a 3.10 kernel with the proposed patch for a week without failure. During list_for_each_entry_safe, next_i is becoming free causing the loop to never terminate. Advance next_i in those cases where __iget is not done. Signed-off-by: Jerry Hoemann Cc: Jeff Kirsher Cc: Ken Helias Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/notify/inode_mark.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 74825be65b7b..fbb9dfb7b1d2 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -288,20 +288,25 @@ void fsnotify_unmount_inodes(struct list_head *list) spin_unlock(&inode->i_lock); /* In case the dropping of a reference would nuke next_i. */ - if ((&next_i->i_sb_list != list) && - atomic_read(&next_i->i_count)) { + while (&next_i->i_sb_list != list) { spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && + atomic_read(&next_i->i_count)) { __iget(next_i); need_iput = next_i; + spin_unlock(&next_i->i_lock); + break; } spin_unlock(&next_i->i_lock); + next_i = list_entry(next_i->i_sb_list.next, + struct inode, i_sb_list); } /* - * We can safely drop inode_sb_list_lock here because we hold - * references on both inode and next_i. Also no new inodes - * will be added since the umount has begun. + * We can safely drop inode_sb_list_lock here because either + * we actually hold references on both inode and next_i or + * end of list. Also no new inodes will be added since the + * umount has begun. */ spin_unlock(&inode_sb_list_lock); From 38b1b3c12638b929e6b6e276d85ef76ad4698e05 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 15 Jul 2014 17:53:12 +0200 Subject: [PATCH 125/298] s390/3215: fix hanging console issue commit 26d766c60f4ea08cd14f0f3435a6db3d6cc2ae96 upstream. The ccw_device_start in raw3215_start_io can fail. raw3215_try_io does not check if the request could be started and removes any pending timer. This can leave the system in a hanging state. Check for pending request after raw3215_start_io and start a timer if necessary. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/con3215.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index bb86494e2b7b..9a408f6e95db 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -288,12 +288,16 @@ static void raw3215_timeout(unsigned long __data) unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - if (!(raw->port.flags & ASYNC_SUSPENDED)) { - raw3215_mk_write_req(raw); - raw3215_start_io(raw); + raw->flags &= ~RAW3215_TIMER_RUNS; + if (!(raw->port.flags & ASYNC_SUSPENDED)) { + raw3215_mk_write_req(raw); + raw3215_start_io(raw); + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; } } spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags); @@ -317,17 +321,15 @@ static inline void raw3215_try_io(struct raw3215_info *raw) (raw->flags & RAW3215_FLUSHING)) { /* execute write requests bigger than minimum size */ raw3215_start_io(raw); - if (raw->flags & RAW3215_TIMER_RUNS) { - del_timer(&raw->timer); - raw->flags &= ~RAW3215_TIMER_RUNS; - } - } else if (!(raw->flags & RAW3215_TIMER_RUNS)) { - /* delay small writes */ - raw->timer.expires = RAW3215_TIMEOUT + jiffies; - add_timer(&raw->timer); - raw->flags |= RAW3215_TIMER_RUNS; } } + if ((raw->queued_read || raw->queued_write) && + !(raw->flags & RAW3215_WORKING) && + !(raw->flags & RAW3215_TIMER_RUNS)) { + raw->timer.expires = RAW3215_TIMEOUT + jiffies; + add_timer(&raw->timer); + raw->flags |= RAW3215_TIMER_RUNS; + } } /* From 72ff8bbada6c20dddad1eb5db10051a68ea38c6f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 13 Aug 2014 12:01:30 +0200 Subject: [PATCH 126/298] s390/3215: fix tty output containing tabs commit e512d56c799517f33b301d81e9a5e0ebf30c2d1e upstream. git commit 37f81fa1f63ad38e16125526bb2769ae0ea8d332 "n_tty: do O_ONLCR translation as a single write" surfaced a bug in the 3215 device driver. In combination this broke tab expansion for tty ouput. The cause is an asymmetry in the behaviour of tty3215_ops->write vs tty3215_ops->put_char. The put_char function scans for '\t' but the write function does not. As the driver has logic for the '\t' expansion remove XTABS from c_oflag of the initial termios as well. Reported-by: Stephen Powell Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/char/con3215.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 9a408f6e95db..19915c5b256f 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -1029,12 +1029,26 @@ static int tty3215_write(struct tty_struct * tty, const unsigned char *buf, int count) { struct raw3215_info *raw; + int i, written; if (!tty) return 0; raw = (struct raw3215_info *) tty->driver_data; - raw3215_write(raw, buf, count); - return count; + written = count; + while (count > 0) { + for (i = 0; i < count; i++) + if (buf[i] == '\t' || buf[i] == '\n') + break; + raw3215_write(raw, buf, i); + count -= i; + buf += i; + if (count > 0) { + raw3215_putchar(raw, *buf); + count--; + buf++; + } + } + return written; } /* @@ -1182,7 +1196,7 @@ static int __init tty3215_init(void) driver->subtype = SYSTEM_TYPE_TTY; driver->init_termios = tty_std_termios; driver->init_termios.c_iflag = IGNBRK | IGNPAR; - driver->init_termios.c_oflag = ONLCR | XTABS; + driver->init_termios.c_oflag = ONLCR; driver->init_termios.c_lflag = ISIG; driver->flags = TTY_DRIVER_REAL_RAW; tty_set_operations(driver, &tty3215_ops); From 4d7313cd7cbe56ccb511eca23ef5bba7f10ffcb8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 27 Jan 2015 08:19:48 -0800 Subject: [PATCH 127/298] Linux 3.14.30 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7aff64ee4fb6..5b94752a85e3 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 14 -SUBLEVEL = 29 +SUBLEVEL = 30 EXTRAVERSION = NAME = Remembering Coco From 8345de68658e052faf769e5d33ea3ca2db43a49b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Jan 2015 13:00:04 +0100 Subject: [PATCH 128/298] gpio: sysfs: fix gpio-chip device-attribute leak commit 121b6a79955a3a3fd7bbb9b8cb88d5b9dad6283d upstream. The gpio-chip device attributes were never destroyed when the device was removed. Fix by using device_create_with_groups() to create the device attributes of the chip class device. Note that this also fixes the attribute-creation race with userspace. Fixes: d8f388d8dc8d ("gpio: sysfs interface") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 5b88c83888d1..7a1e95184ac4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -680,16 +680,13 @@ static ssize_t chip_ngpio_show(struct device *dev, } static DEVICE_ATTR(ngpio, 0444, chip_ngpio_show, NULL); -static const struct attribute *gpiochip_attrs[] = { +static struct attribute *gpiochip_attrs[] = { &dev_attr_base.attr, &dev_attr_label.attr, &dev_attr_ngpio.attr, NULL, }; - -static const struct attribute_group gpiochip_attr_group = { - .attrs = (struct attribute **) gpiochip_attrs, -}; +ATTRIBUTE_GROUPS(gpiochip); /* * /sys/class/gpio/export ... write-only @@ -1030,13 +1027,13 @@ static int gpiochip_export(struct gpio_chip *chip) /* use chip->base for the ID; it's already known to be unique */ mutex_lock(&sysfs_lock); - dev = device_create(&gpio_class, chip->dev, MKDEV(0, 0), chip, - "gpiochip%d", chip->base); - if (!IS_ERR(dev)) { - status = sysfs_create_group(&dev->kobj, - &gpiochip_attr_group); - } else + dev = device_create_with_groups(&gpio_class, chip->dev, MKDEV(0, 0), + chip, gpiochip_groups, + "gpiochip%d", chip->base); + if (IS_ERR(dev)) status = PTR_ERR(dev); + else + status = 0; chip->exported = (status == 0); mutex_unlock(&sysfs_lock); From ff82fa873154bc596616600f53dcb960996a48e6 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 13 Jan 2015 13:00:05 +0100 Subject: [PATCH 129/298] gpio: sysfs: fix gpio device-attribute leak commit 0915e6feb38de8d3601819992a5bd050201a56fa upstream. The gpio device attributes were never destroyed when the gpio was unexported (or on export failures). Use device_create_with_groups() to create the default device attributes of the gpio class device. Note that this also fixes the attribute-creation race with userspace for these attributes. Remove contingent attributes in export error path and on unexport. Fixes: d8f388d8dc8d ("gpio: sysfs interface") Signed-off-by: Johan Hovold Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpiolib.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7a1e95184ac4..ccbffd0d7a02 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -408,7 +408,7 @@ static ssize_t gpio_value_store(struct device *dev, return status; } -static const DEVICE_ATTR(value, 0644, +static DEVICE_ATTR(value, 0644, gpio_value_show, gpio_value_store); static irqreturn_t gpio_sysfs_irq(int irq, void *priv) @@ -633,18 +633,16 @@ static ssize_t gpio_active_low_store(struct device *dev, return status ? : size; } -static const DEVICE_ATTR(active_low, 0644, +static DEVICE_ATTR(active_low, 0644, gpio_active_low_show, gpio_active_low_store); -static const struct attribute *gpio_attrs[] = { +static struct attribute *gpio_attrs[] = { &dev_attr_value.attr, &dev_attr_active_low.attr, NULL, }; -static const struct attribute_group gpio_attr_group = { - .attrs = (struct attribute **) gpio_attrs, -}; +ATTRIBUTE_GROUPS(gpio); /* * /sys/class/gpio/gpiochipN/ @@ -841,18 +839,15 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change) if (desc->chip->names && desc->chip->names[offset]) ioname = desc->chip->names[offset]; - dev = device_create(&gpio_class, desc->chip->dev, MKDEV(0, 0), - desc, ioname ? ioname : "gpio%u", - desc_to_gpio(desc)); + dev = device_create_with_groups(&gpio_class, desc->chip->dev, + MKDEV(0, 0), desc, gpio_groups, + ioname ? ioname : "gpio%u", + desc_to_gpio(desc)); if (IS_ERR(dev)) { status = PTR_ERR(dev); goto fail_unlock; } - status = sysfs_create_group(&dev->kobj, &gpio_attr_group); - if (status) - goto fail_unregister_device; - if (direction_may_change) { status = device_create_file(dev, &dev_attr_direction); if (status) @@ -863,13 +858,15 @@ int gpiod_export(struct gpio_desc *desc, bool direction_may_change) !test_bit(FLAG_IS_OUT, &desc->flags))) { status = device_create_file(dev, &dev_attr_edge); if (status) - goto fail_unregister_device; + goto fail_remove_attr_direction; } set_bit(FLAG_EXPORT, &desc->flags); mutex_unlock(&sysfs_lock); return 0; +fail_remove_attr_direction: + device_remove_file(dev, &dev_attr_direction); fail_unregister_device: device_unregister(dev); fail_unlock: @@ -1003,6 +1000,8 @@ void gpiod_unexport(struct gpio_desc *desc) mutex_unlock(&sysfs_lock); if (dev) { + device_remove_file(dev, &dev_attr_edge); + device_remove_file(dev, &dev_attr_direction); device_unregister(dev); put_device(dev); } From 2f5a82124eaeb9d55f1a27a9c1747d86d67a29bc Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 8 Jan 2015 20:25:05 +0800 Subject: [PATCH 130/298] pinctrl: Fix two deadlocks commit db93facfb0ef542aa5d8079e47580b3e669a4d82 upstream. This patch is to fix two deadlock cases. Deadlock 1: CPU #1 pinctrl_register-> pinctrl_get -> create_pinctrl (Holding lock pinctrl_maps_mutex) -> get_pinctrl_dev_from_devname (Trying to acquire lock pinctrldev_list_mutex) CPU #0 pinctrl_unregister (Holding lock pinctrldev_list_mutex) -> pinctrl_put ->> pinctrl_free -> pinctrl_dt_free_maps -> pinctrl_unregister_map (Trying to acquire lock pinctrl_maps_mutex) Simply to say CPU#1 is holding lock A and trying to acquire lock B, CPU#0 is holding lock B and trying to acquire lock A. Deadlock 2: CPU #3 pinctrl_register-> pinctrl_get -> create_pinctrl (Holding lock pinctrl_maps_mutex) -> get_pinctrl_dev_from_devname (Trying to acquire lock pinctrldev_list_mutex) CPU #2 pinctrl_unregister (Holding lock pctldev->mutex) -> pinctrl_put ->> pinctrl_free -> pinctrl_dt_free_maps -> pinctrl_unregister_map (Trying to acquire lock pinctrl_maps_mutex) CPU #0 tegra_gpio_request (Holding lock pinctrldev_list_mutex) -> pinctrl_get_device_gpio_range (Trying to acquire lock pctldev->mutex) Simply to say CPU#3 is holding lock A and trying to acquire lock D, CPU#2 is holding lock B and trying to acquire lock A, CPU#0 is holding lock D and trying to acquire lock B. Signed-off-by: Jim Lin Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c0fe6091566a..988f5e18763a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1812,14 +1812,15 @@ void pinctrl_unregister(struct pinctrl_dev *pctldev) if (pctldev == NULL) return; - mutex_lock(&pinctrldev_list_mutex); mutex_lock(&pctldev->mutex); - pinctrl_remove_device_debugfs(pctldev); + mutex_unlock(&pctldev->mutex); if (!IS_ERR(pctldev->p)) pinctrl_put(pctldev->p); + mutex_lock(&pinctrldev_list_mutex); + mutex_lock(&pctldev->mutex); /* TODO: check that no pinmuxes are still active? */ list_del(&pctldev->node); /* Destroy descriptor tree */ From 3142f62b91e4f8932809b18f8413141fd9b5cba0 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 19 Jan 2015 13:03:25 -0600 Subject: [PATCH 131/298] libata: prevent HSM state change race between ISR and PIO commit ce7514526742c0898b837d4395f515b79dfb5a12 upstream. It is possible for ata_sff_flush_pio_task() to set ap->hsm_task_state to HSM_ST_IDLE in between the time __ata_sff_port_intr() checks for HSM_ST_IDLE and before it calls ata_sff_hsm_move() causing ata_sff_hsm_move() to BUG(). This problem is hard to reproduce making this patch hard to verify, but this fix will prevent the race. I have not been able to reproduce the problem, but here is a crash dump from a 2.6.32 kernel. On examining the ata port's state, its hsm_task_state field has a value of HSM_ST_IDLE: crash> struct ata_port.hsm_task_state ffff881c1121c000 hsm_task_state = 0 Normally, this should not be possible as ata_sff_hsm_move() was called from ata_sff_host_intr(), which checks hsm_task_state and won't call ata_sff_hsm_move() if it has a HSM_ST_IDLE value. PID: 11053 TASK: ffff8816e846cae0 CPU: 0 COMMAND: "sshd" #0 [ffff88008ba03960] machine_kexec at ffffffff81038f3b #1 [ffff88008ba039c0] crash_kexec at ffffffff810c5d92 #2 [ffff88008ba03a90] oops_end at ffffffff8152b510 #3 [ffff88008ba03ac0] die at ffffffff81010e0b #4 [ffff88008ba03af0] do_trap at ffffffff8152ad74 #5 [ffff88008ba03b50] do_invalid_op at ffffffff8100cf95 #6 [ffff88008ba03bf0] invalid_op at ffffffff8100bf9b [exception RIP: ata_sff_hsm_move+317] RIP: ffffffff813a77ad RSP: ffff88008ba03ca0 RFLAGS: 00010097 RAX: 0000000000000000 RBX: ffff881c1121dc60 RCX: 0000000000000000 RDX: ffff881c1121dd10 RSI: ffff881c1121dc60 RDI: ffff881c1121c000 RBP: ffff88008ba03d00 R8: 0000000000000000 R9: 000000000000002e R10: 000000000001003f R11: 000000000000009b R12: ffff881c1121c000 R13: 0000000000000000 R14: 0000000000000050 R15: ffff881c1121dd78 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff88008ba03d08] ata_sff_host_intr at ffffffff813a7fbd #8 [ffff88008ba03d38] ata_sff_interrupt at ffffffff813a821e #9 [ffff88008ba03d78] handle_IRQ_event at ffffffff810e6ec0 --- drivers/ata/libata-sff.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 37acda6fa7e4..136803c47cdb 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1333,7 +1333,19 @@ void ata_sff_flush_pio_task(struct ata_port *ap) DPRINTK("ENTER\n"); cancel_delayed_work_sync(&ap->sff_pio_task); + + /* + * We wanna reset the HSM state to IDLE. If we do so without + * grabbing the port lock, critical sections protected by it which + * expect the HSM state to stay stable may get surprised. For + * example, we may set IDLE in between the time + * __ata_sff_port_intr() checks for HSM_ST_IDLE and before it calls + * ata_sff_hsm_move() causing ata_sff_hsm_move() to BUG(). + */ + spin_lock_irq(ap->lock); ap->hsm_task_state = HSM_ST_IDLE; + spin_unlock_irq(ap->lock); + ap->sff_pio_task_link = NULL; if (ata_msg_ctl(ap)) From 01680bc55fcfc8269b0afa62d4f812ee870af5c6 Mon Sep 17 00:00:00 2001 From: Jason Lee Cragg Date: Sat, 17 Jan 2015 12:28:29 -0500 Subject: [PATCH 132/298] ALSA: usb-audio: Add mic volume fix quirk for Logitech Webcam C210 commit 6455931186bff407493135e74c5f32efd30860e2 upstream. Signed-off-by: Jason Lee Cragg Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 1bed780e21d9..2d37b3fc3a21 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -886,6 +886,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): + case USB_ID(0x046d, 0x0819): /* Logitech Webcam C210 */ case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */ case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */ From eb2a7a2c3c611967c9a6dec19d8a3f91204db079 Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Sun, 18 Jan 2015 00:36:15 +0100 Subject: [PATCH 133/298] scripts/recordmcount.pl: There is no -m32 gcc option on Super-H anymore commit 1caf6aaaa47471831d77c75f094d4e00ad1ec808 upstream. Compiling SH with gcc-4.8 fails due to the -m32 option not being supported. From http://buildd.debian-ports.org/status/fetch.php?pkg=linux&arch=sh4&ver=3.16.7-ckt4-1&stamp=1421425783 CC init/main.o gcc-4.8: error: unrecognized command line option '-m32' ld: cannot find init/.tmp_mc_main.o: No such file or directory objcopy: 'init/.tmp_mx_main.o': No such file rm: cannot remove 'init/.tmp_mx_main.o': No such file or directory rm: cannot remove 'init/.tmp_mc_main.o': No such file or directory Link: http://lkml.kernel.org/r/1421537778-29001-1-git-send-email-kernel@mkarcher.dialup.fu-berlin.de Link: http://lkml.kernel.org/r/54BCBDD4.10102@physik.fu-berlin.de Cc: Matt Fleming Reported-by: John Paul Adrian Glaubitz Signed-off-by: Michael Karcher Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- scripts/recordmcount.pl | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 91280b82da08..513f7bd85cb7 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -262,7 +262,6 @@ if ($arch eq "x86_64") { # force flags for this arch $ld .= " -m shlelf_linux"; $objcopy .= " -O elf32-sh-linux"; - $cc .= " -m32"; } elsif ($arch eq "powerpc") { $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; From 20ad9d37cfbfdfe594d1d1da55f8c2a367f410d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Jan 2015 09:47:10 +0000 Subject: [PATCH 134/298] drm/i915: Fix mutex->owner inspection race under DEBUG_MUTEXES commit 226e5ae9e5f9108beb0bde4ac69f68fe6210fed9 upstream. If CONFIG_DEBUG_MUTEXES is set, the mutex->owner field is only cleared if the mutex debugging is enabled which introduces a race in our mutex_is_locked_by() - i.e. we may inspect the old owner value before it is acquired by the new task. This is the root cause of this error: # diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c # index 5cf6731..3ef3736 100644 # --- a/kernel/locking/mutex-debug.c # +++ b/kernel/locking/mutex-debug.c # @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) # DEBUG_LOCKS_WARN_ON(lock->owner != current); # # DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); # - mutex_clear_owner(lock); # } # # /* # * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug # * mutexes so that we can do it here after we've verified state. # */ # + mutex_clear_owner(lock); # atomic_set(&lock->count, 1); # } Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87955 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7410a507eacc..3153eabde39b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4978,7 +4978,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ From 563f9a6244ab90aaa31e12cab6c958bd3d406b0b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 Jan 2015 10:46:33 -0500 Subject: [PATCH 135/298] drm/radeon: add a dpm quirk list commit 4369a69ec6ab86821352bd753c68af5880f87956 upstream. Disable dpm on certain problematic boards rather than disabling dpm for the entire chip family since most boards work fine. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 https://bugzilla.kernel.org/show_bug.cgi?id=83731 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_pm.c | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index cfb513f933d5..0095ee7fce34 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1260,8 +1260,39 @@ dpm_failed: return ret; } +struct radeon_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; +}; + +/* cards with dpm stability problems */ +static struct radeon_dpm_quirk radeon_dpm_quirk_list[] = { + /* TURKS - https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 */ + { PCI_VENDOR_ID_ATI, 0x6759, 0x1682, 0x3195 }, + /* TURKS - https://bugzilla.kernel.org/show_bug.cgi?id=83731 */ + { PCI_VENDOR_ID_ATI, 0x6840, 0x1179, 0xfb81 }, + { 0, 0, 0, 0 }, +}; + int radeon_pm_init(struct radeon_device *rdev) { + struct radeon_dpm_quirk *p = radeon_dpm_quirk_list; + bool disable_dpm = false; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + disable_dpm = true; + break; + } + ++p; + } + /* enable dpm on rv6xx+ */ switch (rdev->family) { case CHIP_RV610: @@ -1316,6 +1347,8 @@ int radeon_pm_init(struct radeon_device *rdev) (!(rdev->flags & RADEON_IS_IGP)) && (!rdev->smc_fw)) rdev->pm.pm_method = PM_METHOD_PROFILE; + else if (disable_dpm && (radeon_dpm == -1)) + rdev->pm.pm_method = PM_METHOD_PROFILE; else if (radeon_dpm == 0) rdev->pm.pm_method = PM_METHOD_PROFILE; else From 4caa38ee4ea1193820385016ae1c0c81f23583ee Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 12 Jan 2015 17:15:12 -0500 Subject: [PATCH 136/298] drm/radeon: add si dpm quirk list commit 5615f890bc6babdc2998dec62f3552326d06eb7b upstream. This adds a quirks list to fix stability problems with certain SI boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=76490 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/si_dpm.c | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 879e62844b2b..35bf2bba69bf 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2900,6 +2900,22 @@ static int si_init_smc_spll_table(struct radeon_device *rdev) return ret; } +struct si_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 max_sclk; + u32 max_mclk; +}; + +/* cards with dpm stability problems */ +static struct si_dpm_quirk si_dpm_quirk_list[] = { + /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ + { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, + { 0, 0, 0, 0 }, +}; + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2910,7 +2926,22 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, u32 mclk, sclk; u16 vddc, vddci; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + u32 max_sclk = 0, max_mclk = 0; int i; + struct si_dpm_quirk *p = si_dpm_quirk_list; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + max_sclk = p->max_sclk; + max_mclk = p->max_mclk; + break; + } + ++p; + } if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) @@ -2964,6 +2995,14 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].mclk > max_mclk_vddc) ps->performance_levels[i].mclk = max_mclk_vddc; } + if (max_mclk) { + if (ps->performance_levels[i].mclk > max_mclk) + ps->performance_levels[i].mclk = max_mclk; + } + if (max_sclk) { + if (ps->performance_levels[i].sclk > max_sclk) + ps->performance_levels[i].sclk = max_sclk; + } } /* XXX validate the min clocks required for display */ From fdb1698eadff17e6b2f48281084955ec08b25e1c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jan 2015 10:52:33 -0500 Subject: [PATCH 137/298] drm/radeon: use rv515_ring_start on r5xx commit d8a74e186949e1a2c2f1309212478b0659bf9225 upstream. This was accidently lost in 76a0df859def. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_asic.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 5600d4c5f981..64d6cfba9952 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -335,6 +335,20 @@ static struct radeon_asic_ring r300_gfx_ring = { .set_wptr = &r100_gfx_set_wptr, }; +static struct radeon_asic_ring rv515_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r300_cs_parse, + .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &r100_gfx_get_rptr, + .get_wptr = &r100_gfx_get_wptr, + .set_wptr = &r100_gfx_set_wptr, +}; + static struct radeon_asic r300_asic = { .init = &r300_init, .fini = &r300_fini, @@ -756,7 +770,7 @@ static struct radeon_asic rv515_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -823,7 +837,7 @@ static struct radeon_asic r520_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, From 408f4508ec06f940450603a6782f370df1a048a2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Jan 2015 18:16:04 -0600 Subject: [PATCH 138/298] PCI: Add flag for devices where we can't use bus reset commit f331a859e0ee5a898c1f47596eddad4c4f02d657 upstream. Enable a mechanism for devices to quirk that they do not behave when doing a PCI bus reset. We require a modest level of spec compliant behavior in order to do a reset, for instance the device should come out of reset without throwing errors and PCI config space should be accessible after reset. This is too much to ask for some devices. Link: http://lkml.kernel.org/r/20140923210318.498dacbd@dualc.maya.org Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 40 ++++++++++++++++++++++++++++++++++++---- include/linux/pci.h | 2 ++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index dae70d216762..78c65d327e33 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3187,7 +3187,8 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) { struct pci_dev *pdev; - if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self) + if (pci_is_root_bus(dev->bus) || dev->subordinate || + !dev->bus->self || dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) @@ -3221,7 +3222,8 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) { struct pci_dev *pdev; - if (dev->subordinate || !dev->slot) + if (dev->subordinate || !dev->slot || + dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) @@ -3452,6 +3454,20 @@ int pci_try_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_try_reset_function); +/* Do any devices on or below this bus prevent a bus reset? */ +static bool pci_bus_resetable(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || + (dev->subordinate && !pci_bus_resetable(dev->subordinate))) + return false; + } + + return true; +} + /* Lock devices from the top of the tree down */ static void pci_bus_lock(struct pci_bus *bus) { @@ -3502,6 +3518,22 @@ unlock: return 0; } +/* Do any devices on or below this slot prevent a bus reset? */ +static bool pci_slot_resetable(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || + (dev->subordinate && !pci_bus_resetable(dev->subordinate))) + return false; + } + + return true; +} + /* Lock devices from the top of the tree down */ static void pci_slot_lock(struct pci_slot *slot) { @@ -3623,7 +3655,7 @@ static int pci_slot_reset(struct pci_slot *slot, int probe) { int rc; - if (!slot) + if (!slot || !pci_slot_resetable(slot)) return -ENOTTY; if (!probe) @@ -3715,7 +3747,7 @@ EXPORT_SYMBOL_GPL(pci_try_reset_slot); static int pci_bus_reset(struct pci_bus *bus, int probe) { - if (!bus->self) + if (!bus->self || !pci_bus_resetable(bus)) return -ENOTTY; if (probe) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e5e16c6f7f1..d662546f77d8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -170,6 +170,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Do not use bus resets for device */ + PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), }; enum pci_irq_reroute_variant { From 4de16a49e7fbdec6f54387a34b21b531ee41dd0f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Jan 2015 18:17:12 -0600 Subject: [PATCH 139/298] PCI: Mark Atheros AR93xx to avoid bus reset commit c3e59ee4e76686b0c84ca8faa1011d10cd4ca1b8 upstream. Reports against the TL-WDN4800 card indicate that PCI bus reset of this Atheros device cause system lock-ups and resets. I've also been able to confirm this behavior on multiple systems. The device never returns from reset and attempts to access config space of the device after reset result in hangs. Blacklist bus reset for the device to avoid this issue. [bhelgaas: This regression appeared in v3.14. Andreas bisected it to 425c1b223dac ("PCI: Add Virtual Channel to save/restore support"), but we don't understand the mechanism by which that commit affects the reset path.] [bhelgaas: changelog, references] Link: http://lkml.kernel.org/r/20140923210318.498dacbd@dualc.maya.org Reported-by: Andreas Hartmann Tested-by: Andreas Hartmann Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6e8776b59a2c..27abeb40dfab 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3008,6 +3008,20 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CHELSIO, 0x0030, DECLARE_PCI_FIXUP_HEADER(0x1814, 0x0601, /* Ralink RT2800 802.11n PCI */ quirk_broken_intx_masking); +static void quirk_no_bus_reset(struct pci_dev *dev) +{ + dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; +} + +/* + * Atheros AR93xx chips do not behave after a bus reset. The device will + * throw a Link Down error on AER-capable systems and regardless of AER, + * config space of the device is never accessible again and typically + * causes the system to hang or reset when access is attempted. + * http://www.spinics.net/lists/linux-pci/msg34797.html + */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0030, quirk_no_bus_reset); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { From e614e0c6e088c4066b3118a8c1d228b54fff33fb Mon Sep 17 00:00:00 2001 From: Brian King Date: Thu, 30 Oct 2014 17:27:10 -0500 Subject: [PATCH 140/298] ipr: wait for aborted command responses commit 6cdb08172bc89f0a39e1643c5e7eab362692fd1b upstream. Fixes a race condition in abort handling that was injected when multiple interrupt support was added. When only a single interrupt is present, the adapter guarantees it will send responses for aborted commands prior to the response for the abort command itself. With multiple interrupts, these responses generally come back on different interrupts, so we need to ensure the abort thread waits until the aborted command is complete so we don't perform a double completion. This race condition was being hit frequently in environments which were triggering command timeouts, which was resulting in a double completion causing a kernel oops. Signed-off-by: Brian King Reviewed-by: Wendy Xiong Tested-by: Wendy Xiong Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/ipr.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/ipr.h | 1 + 2 files changed, 93 insertions(+) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 3f5b56a99892..b4ddb7310e36 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -683,6 +683,7 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, ipr_reinit_ipr_cmnd(ipr_cmd); ipr_cmd->u.scratch = 0; ipr_cmd->sibling = NULL; + ipr_cmd->eh_comp = NULL; ipr_cmd->fast_done = fast_done; init_timer(&ipr_cmd->timer); } @@ -848,6 +849,8 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); + if (ipr_cmd->eh_comp) + complete(ipr_cmd->eh_comp); list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } @@ -4805,6 +4808,84 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } +/** + * ipr_match_lun - Match function for specified LUN + * @ipr_cmd: ipr command struct + * @device: device to match (sdev) + * + * Returns: + * 1 if command matches sdev / 0 if command does not match sdev + **/ +static int ipr_match_lun(struct ipr_cmnd *ipr_cmd, void *device) +{ + if (ipr_cmd->scsi_cmd && ipr_cmd->scsi_cmd->device == device) + return 1; + return 0; +} + +/** + * ipr_wait_for_ops - Wait for matching commands to complete + * @ipr_cmd: ipr command struct + * @device: device to match (sdev) + * @match: match function to use + * + * Returns: + * SUCCESS / FAILED + **/ +static int ipr_wait_for_ops(struct ipr_ioa_cfg *ioa_cfg, void *device, + int (*match)(struct ipr_cmnd *, void *)) +{ + struct ipr_cmnd *ipr_cmd; + int wait; + unsigned long flags; + struct ipr_hrr_queue *hrrq; + signed long timeout = IPR_ABORT_TASK_TIMEOUT; + DECLARE_COMPLETION_ONSTACK(comp); + + ENTER; + do { + wait = 0; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock_irqsave(hrrq->lock, flags); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (match(ipr_cmd, device)) { + ipr_cmd->eh_comp = ∁ + wait++; + } + } + spin_unlock_irqrestore(hrrq->lock, flags); + } + + if (wait) { + timeout = wait_for_completion_timeout(&comp, timeout); + + if (!timeout) { + wait = 0; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock_irqsave(hrrq->lock, flags); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (match(ipr_cmd, device)) { + ipr_cmd->eh_comp = NULL; + wait++; + } + } + spin_unlock_irqrestore(hrrq->lock, flags); + } + + if (wait) + dev_err(&ioa_cfg->pdev->dev, "Timed out waiting for aborted commands\n"); + LEAVE; + return wait ? FAILED : SUCCESS; + } + } + } while (wait); + + LEAVE; + return SUCCESS; +} + static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; @@ -5023,11 +5104,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) static int ipr_eh_dev_reset(struct scsi_cmnd *cmd) { int rc; + struct ipr_ioa_cfg *ioa_cfg; + + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; spin_lock_irq(cmd->device->host->host_lock); rc = __ipr_eh_dev_reset(cmd); spin_unlock_irq(cmd->device->host->host_lock); + if (rc == SUCCESS) + rc = ipr_wait_for_ops(ioa_cfg, cmd->device, ipr_match_lun); + return rc; } @@ -5205,13 +5292,18 @@ static int ipr_eh_abort(struct scsi_cmnd *scsi_cmd) { unsigned long flags; int rc; + struct ipr_ioa_cfg *ioa_cfg; ENTER; + ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + spin_lock_irqsave(scsi_cmd->device->host->host_lock, flags); rc = ipr_cancel_op(scsi_cmd); spin_unlock_irqrestore(scsi_cmd->device->host->host_lock, flags); + if (rc == SUCCESS) + rc = ipr_wait_for_ops(ioa_cfg, scsi_cmd->device, ipr_match_lun); LEAVE; return rc; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index 9ce38a22647e..0801f3df4b27 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1585,6 +1585,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct completion *eh_comp; struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; From 90e6c435b7f08718f7c5394bd18c42355ac70ec2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 23 Jan 2015 10:00:07 +0000 Subject: [PATCH 141/298] dm cache: share cache-metadata object across inactive and active DM tables commit 9b1cc9f251affdd27f29fe46d0989ba76c33faf6 upstream. If a DM table is reloaded with an inactive table when the device is not suspended (normal procedure for LVM2), then there will be two dm-bufio objects that can diverge. This can lead to a situation where the inactive table uses bufio to read metadata at the same time the active table writes metadata -- resulting in the inactive table having stale metadata buffers once it is promoted to the active table slot. Fix this by using reference counting and a global list of cache metadata objects to ensure there is only one metadata object per metadata device. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 101 +++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index a87d3fab0271..d290e8396116 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -94,6 +94,9 @@ struct cache_disk_superblock { } __packed; struct dm_cache_metadata { + atomic_t ref_count; + struct list_head list; + struct block_device *bdev; struct dm_block_manager *bm; struct dm_space_map *metadata_sm; @@ -669,10 +672,10 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) /*----------------------------------------------------------------*/ -struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, - sector_t data_block_size, - bool may_format_device, - size_t policy_hint_size) +static struct dm_cache_metadata *metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) { int r; struct dm_cache_metadata *cmd; @@ -683,6 +686,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return NULL; } + atomic_set(&cmd->ref_count, 1); init_rwsem(&cmd->root_lock); cmd->bdev = bdev; cmd->data_block_size = data_block_size; @@ -705,10 +709,95 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, return cmd; } +/* + * We keep a little list of ref counted metadata objects to prevent two + * different target instances creating separate bufio instances. This is + * an issue if a table is reloaded before the suspend. + */ +static DEFINE_MUTEX(table_lock); +static LIST_HEAD(table); + +static struct dm_cache_metadata *lookup(struct block_device *bdev) +{ + struct dm_cache_metadata *cmd; + + list_for_each_entry(cmd, &table, list) + if (cmd->bdev == bdev) { + atomic_inc(&cmd->ref_count); + return cmd; + } + + return NULL; +} + +static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd, *cmd2; + + mutex_lock(&table_lock); + cmd = lookup(bdev); + mutex_unlock(&table_lock); + + if (cmd) + return cmd; + + cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); + if (cmd) { + mutex_lock(&table_lock); + cmd2 = lookup(bdev); + if (cmd2) { + mutex_unlock(&table_lock); + __destroy_persistent_data_objects(cmd); + kfree(cmd); + return cmd2; + } + list_add(&cmd->list, &table); + mutex_unlock(&table_lock); + } + + return cmd; +} + +static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size) +{ + if (cmd->data_block_size != data_block_size) { + DMERR("data_block_size (%llu) different from that in metadata (%llu)\n", + (unsigned long long) data_block_size, + (unsigned long long) cmd->data_block_size); + return false; + } + + return true; +} + +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, + may_format_device, policy_hint_size); + if (cmd && !same_params(cmd, data_block_size)) { + dm_cache_metadata_close(cmd); + return NULL; + } + + return cmd; +} + void dm_cache_metadata_close(struct dm_cache_metadata *cmd) { - __destroy_persistent_data_objects(cmd); - kfree(cmd); + if (atomic_dec_and_test(&cmd->ref_count)) { + mutex_lock(&table_lock); + list_del(&cmd->list); + mutex_unlock(&table_lock); + + __destroy_persistent_data_objects(cmd); + kfree(cmd); + } } /* From ba42a61e500b1619c4f29cd2b6ac693c0ecc9645 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 23 Jan 2015 10:16:16 +0000 Subject: [PATCH 142/298] dm cache: fix problematic dual use of a single migration count variable commit a59db67656021fa212e9b95a583f13c34eb67cd9 upstream. Introduce a new variable to count the number of allocated migration structures. The existing variable cache->nr_migrations became overloaded. It was used to: i) track of the number of migrations in flight for the purposes of quiescing during suspend. ii) to estimate the amount of background IO occuring. Recent discard changes meant that REQ_DISCARD bios are processed with a migration. Discards are not background IO so nr_migrations was not incremented. However this could cause quiescing to complete early. (i) is now handled with a new variable cache->nr_allocated_migrations. cache->nr_migrations has been renamed cache->nr_io_migrations. cleanup_migration() is now called free_io_migration(), since it decrements that variable. Also, remove the unused cache->next_migration variable that got replaced with with prealloc_structs a while ago. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-target.c | 89 ++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index ff284b7a17bd..c10dec0f6e9d 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -222,7 +222,13 @@ struct cache { struct list_head need_commit_migrations; sector_t migration_threshold; wait_queue_head_t migration_wait; - atomic_t nr_migrations; + atomic_t nr_allocated_migrations; + + /* + * The number of in flight migrations that are performing + * background io. eg, promotion, writeback. + */ + atomic_t nr_io_migrations; wait_queue_head_t quiescing_wait; atomic_t quiescing; @@ -259,7 +265,6 @@ struct cache { struct dm_deferred_set *all_io_ds; mempool_t *migration_pool; - struct dm_cache_migration *next_migration; struct dm_cache_policy *policy; unsigned policy_nr_args; @@ -350,10 +355,31 @@ static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cel dm_bio_prison_free_cell(cache->prison, cell); } +static struct dm_cache_migration *alloc_migration(struct cache *cache) +{ + struct dm_cache_migration *mg; + + mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + if (mg) { + mg->cache = cache; + atomic_inc(&mg->cache->nr_allocated_migrations); + } + + return mg; +} + +static void free_migration(struct dm_cache_migration *mg) +{ + if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations)) + wake_up(&mg->cache->migration_wait); + + mempool_free(mg, mg->cache->migration_pool); +} + static int prealloc_data_structs(struct cache *cache, struct prealloc *p) { if (!p->mg) { - p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + p->mg = alloc_migration(cache); if (!p->mg) return -ENOMEM; } @@ -382,7 +408,7 @@ static void prealloc_free_structs(struct cache *cache, struct prealloc *p) free_prison_cell(cache, p->cell1); if (p->mg) - mempool_free(p->mg, cache->migration_pool); + free_migration(p->mg); } static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) @@ -812,24 +838,14 @@ static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, * Migration covers moving data from the origin device to the cache, or * vice versa. *--------------------------------------------------------------*/ -static void free_migration(struct dm_cache_migration *mg) +static void inc_io_migrations(struct cache *cache) { - mempool_free(mg, mg->cache->migration_pool); + atomic_inc(&cache->nr_io_migrations); } -static void inc_nr_migrations(struct cache *cache) +static void dec_io_migrations(struct cache *cache) { - atomic_inc(&cache->nr_migrations); -} - -static void dec_nr_migrations(struct cache *cache) -{ - atomic_dec(&cache->nr_migrations); - - /* - * Wake the worker in case we're suspending the target. - */ - wake_up(&cache->migration_wait); + atomic_dec(&cache->nr_io_migrations); } static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, @@ -852,11 +868,10 @@ static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, wake_worker(cache); } -static void cleanup_migration(struct dm_cache_migration *mg) +static void free_io_migration(struct dm_cache_migration *mg) { - struct cache *cache = mg->cache; + dec_io_migrations(mg->cache); free_migration(mg); - dec_nr_migrations(cache); } static void migration_failure(struct dm_cache_migration *mg) @@ -881,7 +896,7 @@ static void migration_failure(struct dm_cache_migration *mg) cell_defer(cache, mg->new_ocell, true); } - cleanup_migration(mg); + free_io_migration(mg); } static void migration_success_pre_commit(struct dm_cache_migration *mg) @@ -892,7 +907,7 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) if (mg->writeback) { clear_dirty(cache, mg->old_oblock, mg->cblock); cell_defer(cache, mg->old_ocell, false); - cleanup_migration(mg); + free_io_migration(mg); return; } else if (mg->demote) { @@ -902,14 +917,14 @@ static void migration_success_pre_commit(struct dm_cache_migration *mg) mg->old_oblock); if (mg->promote) cell_defer(cache, mg->new_ocell, true); - cleanup_migration(mg); + free_io_migration(mg); return; } } else { if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); policy_remove_mapping(cache->policy, mg->new_oblock); - cleanup_migration(mg); + free_io_migration(mg); return; } } @@ -942,7 +957,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) } else { if (mg->invalidate) policy_remove_mapping(cache->policy, mg->old_oblock); - cleanup_migration(mg); + free_io_migration(mg); } } else { @@ -957,7 +972,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) bio_endio(mg->new_ocell->holder, 0); cell_defer(cache, mg->new_ocell, false); } - cleanup_migration(mg); + free_io_migration(mg); } } @@ -1169,7 +1184,7 @@ static void promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = cell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1192,7 +1207,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, mg->new_ocell = NULL; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1218,7 +1233,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, mg->new_ocell = new_ocell; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1245,7 +1260,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, mg->new_ocell = NULL; mg->start_jiffies = jiffies; - inc_nr_migrations(cache); + inc_io_migrations(cache); quiesce_migration(mg); } @@ -1306,7 +1321,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio) static bool spare_migration_bandwidth(struct cache *cache) { - sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * + sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; return current_volume < cache->migration_threshold; } @@ -1661,7 +1676,7 @@ static void stop_quiescing(struct cache *cache) static void wait_for_migrations(struct cache *cache) { - wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); + wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); } static void stop_worker(struct cache *cache) @@ -1772,9 +1787,6 @@ static void destroy(struct cache *cache) { unsigned i; - if (cache->next_migration) - mempool_free(cache->next_migration, cache->migration_pool); - if (cache->migration_pool) mempool_destroy(cache->migration_pool); @@ -2282,7 +2294,8 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_LIST_HEAD(&cache->quiesced_migrations); INIT_LIST_HEAD(&cache->completed_migrations); INIT_LIST_HEAD(&cache->need_commit_migrations); - atomic_set(&cache->nr_migrations, 0); + atomic_set(&cache->nr_allocated_migrations, 0); + atomic_set(&cache->nr_io_migrations, 0); init_waitqueue_head(&cache->migration_wait); init_waitqueue_head(&cache->quiescing_wait); @@ -2342,8 +2355,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - cache->next_migration = NULL; - cache->need_tick_bio = true; cache->sized = false; cache->invalidate = false; From 5fea291eddb2b2721874c860783e0516df3ad699 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:22:48 -0500 Subject: [PATCH 143/298] time: settimeofday: Validate the values of tv from user commit 6ada1fc0e1c4775de0e043e1bd3ae9d065491aa5 upstream. An unvalidated user input is multiplied by a constant, which can result in an undefined behaviour for large values. While this is validated later, we should avoid triggering undefined behaviour. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Sasha Levin [jstultz: include trivial milisecond->microsecond correction noticed by Andy] Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- include/linux/time.h | 13 +++++++++++++ kernel/time.c | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/include/linux/time.h b/include/linux/time.h index d5d229b2e5af..7d532a32ff3a 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -173,6 +173,19 @@ extern void getboottime(struct timespec *ts); extern void monotonic_to_bootbased(struct timespec *ts); extern void get_monotonic_boottime(struct timespec *ts); +static inline bool timeval_valid(const struct timeval *tv) +{ + /* Dates before 1970 are bogus */ + if (tv->tv_sec < 0) + return false; + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + + return true; +} + extern struct timespec timespec_trunc(struct timespec t, unsigned gran); extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); diff --git a/kernel/time.c b/kernel/time.c index 3c49ab45f822..3eb322e518a3 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -195,6 +195,10 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, if (tv) { if (copy_from_user(&user_tv, tv, sizeof(*tv))) return -EFAULT; + + if (!timeval_valid(&user_tv)) + return -EINVAL; + new_ts.tv_sec = user_tv.tv_sec; new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; } From 09ee0cd0c2072c353bc3c096b36267bfeb0a8a9f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:25:05 -0500 Subject: [PATCH 144/298] time: adjtimex: Validate the ADJ_FREQUENCY values commit 5e5aeb4367b450a28f447f6d5ab57d8f2ab16a5f upstream. Verify that the frequency value from userspace is valid and makes sense. Unverified values can cause overflows later on. Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Sasha Levin [jstultz: Fix up bug for negative values and drop redunent cap check] Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/time/ntp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index af8d1d4f3d55..28db9bedc857 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -631,6 +631,13 @@ int ntp_validate_timex(struct timex *txc) if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME))) return -EPERM; + if (txc->modes & ADJ_FREQUENCY) { + if (LONG_MIN / PPM_SCALE > txc->freq) + return -EINVAL; + if (LONG_MAX / PPM_SCALE < txc->freq) + return -EINVAL; + } + return 0; } From e3143069ec2bd188b6c2f958cb74eeb6a22f341f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 14 Jan 2015 11:11:03 -0200 Subject: [PATCH 145/298] ARM: dts: imx25: Fix PWM "per" clocks commit 7ecd0bde5bfea524a843ad8fa8cb66ccbce68779 upstream. Currently PWM functionality is broken on mx25 due to the wrong assignment of the PWM "per" clock. According to Documentation/devicetree/bindings/clock/imx25-clock.txt: pwm_ipg_per 52 ,so update the pwm "per" to use 'pwm_ipg_per' instead of 'per10' clock. With this change PWM can work fine on mx25. Reported-by: Carlos Soto Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx25.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 6a26e79f0ef4..cf3300a3071d 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -352,7 +352,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa0000 0x4000>; - clocks = <&clks 106>, <&clks 36>; + clocks = <&clks 106>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <36>; }; @@ -371,7 +371,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa8000 0x4000>; - clocks = <&clks 107>, <&clks 36>; + clocks = <&clks 107>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <41>; }; @@ -412,7 +412,7 @@ pwm4: pwm@53fc8000 { compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; reg = <0x53fc8000 0x4000>; - clocks = <&clks 108>, <&clks 36>; + clocks = <&clks 108>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <42>; }; @@ -458,7 +458,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fe0000 0x4000>; - clocks = <&clks 105>, <&clks 36>; + clocks = <&clks 105>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <26>; }; From edecb42f3408f1f95bfa7ed96f6a69859ba12f4c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 18 Jan 2015 09:46:10 -0600 Subject: [PATCH 146/298] bus: mvebu-mbus: fix support of MBus window 13 commit 38bdf45f4aa5cb6186d50a29e6cbbd9d486a1519 upstream. On Armada XP, 375 and 38x the MBus window 13 has the remap capability, like windows 0 to 7. However, the mvebu-mbus driver isn't currently taking into account this special case, which means that when window 13 is actually used, the remap registers are left to 0, making the device using this MBus window unavailable. As a minimal fix for stable, don't use window 13. A full fix will follow later. Fixes: fddddb52a6c ("bus: introduce an Marvell EBU MBus driver") Reviewed-by: Thomas Petazzoni Signed-off-by: Andrew Lunn Signed-off-by: Greg Kroah-Hartman --- drivers/bus/mvebu-mbus.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index 372ae72cce34..e990deed2d33 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -181,12 +181,25 @@ static void mvebu_mbus_disable_window(struct mvebu_mbus_state *mbus, } /* Checks whether the given window number is available */ + +/* On Armada XP, 375 and 38x the MBus window 13 has the remap + * capability, like windows 0 to 7. However, the mvebu-mbus driver + * isn't currently taking into account this special case, which means + * that when window 13 is actually used, the remap registers are left + * to 0, making the device using this MBus window unavailable. The + * quick fix for stable is to not use window 13. A follow up patch + * will correctly handle this window. +*/ static int mvebu_mbus_window_is_free(struct mvebu_mbus_state *mbus, const int win) { void __iomem *addr = mbus->mbuswins_base + mbus->soc->win_cfg_offset(win); u32 ctrl = readl(addr + WIN_CTRL_OFF); + + if (win == 13) + return false; + return !(ctrl & WIN_CTRL_ENABLE); } From ba7883e486bebfbba41608b1ef2da6a6da672e1a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 18 Jan 2015 23:37:32 -0500 Subject: [PATCH 147/298] fix deadlock in cifs_ioctl_clone() commit 378ff1a53b5724f3ac97b0aba3c9ecac072f6fcd upstream. It really needs to check that src is non-directory *and* use {un,}lock_two_nodirectories(). As it is, it's trivial to cause double-lock (ioctl(fd, CIFS_IOC_COPYCHUNK_FILE, fd)) and if the last argument is an fd of directory, we are asking for trouble by violating the locking order - all directories go before all non-directories. If the last argument is an fd of parent directory, it has 50% odds of locking child before parent, which will cause AB-BA deadlock if we race with unlink(). Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/cifs/ioctl.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 77492301cc2b..dfc95646b88c 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -86,21 +86,16 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, } src_inode = src_file.file->f_dentry->d_inode; + rc = -EINVAL; + if (S_ISDIR(src_inode->i_mode)) + goto out_fput; /* * Note: cifs case is easier than btrfs since server responsible for * checks for proper open modes and file type and if it wants * server could even support copy of range where source = target */ - - /* so we do not deadlock racing two ioctls on same files */ - if (target_inode < src_inode) { - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); - } else { - mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); - } + lock_two_nondirectories(target_inode, src_inode); /* determine range to clone */ rc = -EINVAL; @@ -124,13 +119,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, out_unlock: /* although unlocking in the reverse order from locking is not strictly necessary here it is a little cleaner to be consistent */ - if (target_inode < src_inode) { - mutex_unlock(&src_inode->i_mutex); - mutex_unlock(&target_inode->i_mutex); - } else { - mutex_unlock(&target_inode->i_mutex); - mutex_unlock(&src_inode->i_mutex); - } + unlock_two_nondirectories(src_inode, target_inode); out_fput: fdput(src_file); out_drop_write: From 8d01cc87a9a660ed7de390350d414515f5429a50 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 5 Jan 2015 18:40:15 +0100 Subject: [PATCH 148/298] can: dev: fix crtlmode_supported check commit 9b1087aa5e86448fe6ad40a58964e35f3ba423d5 upstream. When changing flags in the CAN drivers ctrlmode the provided new content has to be checked whether the bits are allowed to be changed. The bits that are to be changed are given as a bitfield in cm->mask. Therefore checking against cm->flags is wrong as the content can hold any kind of values. The iproute2 tool sets the bits in cm->mask and cm->flags depending on the detected command line options. To be robust against bogus user space applications additionally sanitize the provided flags with the provided mask. Cc: Wolfgang Grandegger Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index cc11f7f5e91d..1468c4658804 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -664,10 +664,14 @@ static int can_changelink(struct net_device *dev, if (dev->flags & IFF_UP) return -EBUSY; cm = nla_data(data[IFLA_CAN_CTRLMODE]); - if (cm->flags & ~priv->ctrlmode_supported) + + /* check whether changed bits are allowed to be modified */ + if (cm->mask & ~priv->ctrlmode_supported) return -EOPNOTSUPP; + + /* clear bits to be modified and copy the flag values */ priv->ctrlmode &= ~cm->mask; - priv->ctrlmode |= cm->flags; + priv->ctrlmode |= (cm->flags & cm->mask); } if (data[IFLA_CAN_RESTART_MS]) { From 0cbcd35fe591013cac84bcff3a740b1aa0a30008 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Wed, 22 Oct 2014 03:37:08 +0200 Subject: [PATCH 149/298] clocksource: exynos_mct: Fix bitmask regression for exynos4_mct_write commit 8c38d28ba8da98f7102c31d35359b4dbe9d1f329 upstream. EXYNOS4_MCT_L_MASK is defined as 0xffffff00, so applying this bitmask produces a number outside the range 0x00 to 0xff, which always results in execution of the default switch statement. Obviously this is wrong and git history shows that the bitmask inversion was incorrectly set during a refactoring of the MCT code. Fix this by putting the inversion at the correct position again. Acked-by: Kukjin Kim Reported-by: GP Orcullo Reviewed-by: Doug Anderson Signed-off-by: Tobias Jakobi Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/exynos_mct.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index e252939b9ee1..831b48287a22 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -98,8 +98,8 @@ static void exynos4_mct_write(unsigned int value, unsigned long offset) __raw_writel(value, reg_base + offset); if (likely(offset >= EXYNOS4_MCT_L_BASE(0))) { - stat_addr = (offset & ~EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET; - switch (offset & EXYNOS4_MCT_L_MASK) { + stat_addr = (offset & EXYNOS4_MCT_L_MASK) + MCT_L_WSTAT_OFFSET; + switch (offset & ~EXYNOS4_MCT_L_MASK) { case MCT_L_TCON_OFFSET: mask = 1 << 3; /* L_TCON write status */ break; From 10ee480d03d54dbdf38a82711ab58baee9df3e80 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 12 Jan 2015 16:26:02 -0800 Subject: [PATCH 150/298] x86, hyperv: Mark the Hyper-V clocksource as being continuous commit 32c6590d126836a062b3140ed52d898507987017 upstream. The Hyper-V clocksource is continuous; mark it accordingly. Signed-off-by: K. Y. Srinivasan Acked-by: jasowang@redhat.com Cc: gregkh@linuxfoundation.org Cc: devel@linuxdriverproject.org Cc: olaf@aepfle.de Cc: apw@canonical.com Link: http://lkml.kernel.org/r/1421108762-3331-1-git-send-email-kys@microsoft.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mshyperv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 832d05a914ba..317c81172c18 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -67,6 +67,7 @@ static struct clocksource hyperv_cs = { .rating = 400, /* use this when running on Hyperv*/ .read = read_hv_clock, .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static void __init ms_hyperv_init_platform(void) From 16107331facdb20497adbdb99088ebd0bfc9176e Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Tue, 9 Dec 2014 01:27:50 -0500 Subject: [PATCH 151/298] x86/tsc: Change Fast TSC calibration failed from error to info commit 520452172e6b318f3a8bd9d4fe1e25066393de25 upstream. Many users see this message when booting without knowning that it is of no importance and that TSC calibration may have succeeded by another way. As explained by Paul Bolle in http://lkml.kernel.org/r/1348488259.1436.22.camel@x61.thuisdomein "Fast TSC calibration failed" should not be considered as an error since other calibration methods are being tried afterward. At most, those send a warning if they fail (not an error). So let's change the message from error to warning. [ tglx: Make if pr_info. It's really not important at all ] Fixes: c767a54ba065 x86/debug: Add KERN_ to bare printks, convert printks to pr_ Signed-off-by: Alexandre Demers Link: http://lkml.kernel.org/r/1418106470-6906-1-git-send-email-alexandre.f.demers@gmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index de0290605903..b20bced0090f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -618,7 +618,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - pr_err("Fast TSC calibration failed\n"); + pr_info("Fast TSC calibration failed\n"); return 0; success: From 0f5e12ba20bf7f95d7f1b65b44a3689223230acb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 15 Jan 2015 16:51:46 -0800 Subject: [PATCH 152/298] x86, boot: Skip relocs when load address unchanged commit f285f4a21c3253887caceed493089ece17579d59 upstream. On 64-bit, relocation is not required unless the load address gets changed. Without this, relocations do unexpected things when the kernel is above 4G. Reported-by: Baoquan He Signed-off-by: Kees Cook Tested-by: Thomas D. Cc: Vivek Goyal Cc: Jan Beulich Cc: Junjie Mao Cc: Andi Kleen Link: http://lkml.kernel.org/r/20150116005146.GA4212@www.outflux.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/misc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index eb25ca1eb6da..8f45c855f84c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -396,6 +396,8 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap, unsigned long output_len, unsigned long run_size) { + unsigned char *output_orig = output; + real_mode = rmode; sanitize_boot_params(real_mode); @@ -444,7 +446,12 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); - handle_relocations(output, output_len); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); return output; } From 1fe84f910afa365b64a8c210f3a2dd68a001c7d3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Jan 2015 11:27:58 -0800 Subject: [PATCH 153/298] x86, tls, ldt: Stop checking lm in LDT_empty commit e30ab185c490e9a9381385529e0fd32f0a399495 upstream. 32-bit programs don't have an lm bit in their ABI, so they can't reliably cause LDT_empty to return true without resorting to memset. They shouldn't need to do this. This should fix a longstanding, if minor, issue in all 64-bit kernels as well as a potential regression in the TLS hardening code. Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix Signed-off-by: Andy Lutomirski Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/72a059de55e86ad5e2935c80aa91880ddf19d07c.1421954363.git.luto@amacapital.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/desc.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 50d033a8947d..fc237fd0259a 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } -#define _LDT_empty(info) \ +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ @@ -261,12 +262,6 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif - static inline void clear_LDT(void) { set_ldt(NULL, 0); From 83df3d59f5b53a66adf67569d0689aeb5706bf13 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Jan 2015 11:27:59 -0800 Subject: [PATCH 154/298] x86, tls: Interpret an all-zero struct user_desc as "no segment" commit 3669ef9fa7d35f573ec9c0e0341b29251c2734a7 upstream. The Witcher 2 did something like this to allocate a TLS segment index: struct user_desc u_info; bzero(&u_info, sizeof(u_info)); u_info.entry_number = (uint32_t)-1; syscall(SYS_set_thread_area, &u_info); Strictly speaking, this code was never correct. It should have set read_exec_only and seg_not_present to 1 to indicate that it wanted to find a free slot without putting anything there, or it should have put something sensible in the TLS slot if it wanted to allocate a TLS entry for real. The actual effect of this code was to allocate a bogus segment that could be used to exploit espfix. The set_thread_area hardening patches changed the behavior, causing set_thread_area to return -EINVAL and crashing the game. This changes set_thread_area to interpret this as a request to find a free slot and to leave it empty, which isn't *quite* what the game expects but should be close enough to keep it working. In particular, using the code above to allocate two segments will allocate the same segment both times. According to FrostbittenKing on Github, this fixes The Witcher 2. If this somehow still causes problems, we could instead allocate a limit==0 32-bit data segment, but that seems rather ugly to me. Fixes: 41bdc78544b8 x86/tls: Validate TLS entries to protect espfix Signed-off-by: Andy Lutomirski Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/0cb251abe1ff0958b8e468a9a9a905b80ae3a746.1421954363.git.luto@amacapital.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/desc.h | 13 +++++++++++++ arch/x86/kernel/tls.c | 25 +++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index fc237fd0259a..a94b82e8f156 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -262,6 +262,19 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} + static inline void clear_LDT(void) { set_ldt(NULL, 0); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index 4e942f31b1a7..7fc5e843f247 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -29,7 +29,28 @@ static int get_free_idx(void) static bool tls_desc_okay(const struct user_desc *info) { - if (LDT_empty(info)) + /* + * For historical reasons (i.e. no one ever documented how any + * of the segmentation APIs work), user programs can and do + * assume that a struct user_desc that's all zeros except for + * entry_number means "no segment at all". This never actually + * worked. In fact, up to Linux 3.19, a struct user_desc like + * this would create a 16-bit read-write segment with base and + * limit both equal to zero. + * + * That was close enough to "no segment at all" until we + * hardened this function to disallow 16-bit TLS segments. Fix + * it up by interpreting these zeroed segments the way that they + * were almost certainly intended to be interpreted. + * + * The correct way to ask for "no segment at all" is to specify + * a user_desc that satisfies LDT_empty. To keep everything + * working, we accept both. + * + * Note that there's a similar kludge in modify_ldt -- look at + * the distinction between modes 1 and 0x11. + */ + if (LDT_empty(info) || LDT_zero(info)) return true; /* @@ -71,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info)) + if (LDT_empty(info) || LDT_zero(info)) desc->a = desc->b = 0; else fill_ldt(desc, info); From 252f71a8d49ff96ba626376306a6eb18b57b2f65 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 22 Jan 2015 22:58:49 +0000 Subject: [PATCH 155/298] x86/apic: Re-enable PCI_MSI support for non-SMP X86_32 commit 38a1dfda8e77d7ba74c94d06d8bc41ba98a4bc8c upstream. Commit 0dbc6078c06bc0 ('x86, build, pci: Fix PCI_MSI build on !SMP') introduced the dependency that X86_UP_APIC is only available when PCI_MSI is false. This effectively prevents PCI_MSI support on 32bit UP systems because it disables both APIC and IO-APIC. But APIC support is architecturally required for PCI_MSI. The intention of the patch was to enforce APIC support when PCI_MSI is enabled, but failed to do so. Remove the !PCI_MSI dependency from X86_UP_APIC and enforce X86_UP_APIC when PCI_MSI support is enabled on 32bit UP systems. [ tglx: Massaged changelog ] Fixes 0dbc6078c06bc0 'x86, build, pci: Fix PCI_MSI build on !SMP' Signed-off-by: Bryan O'Donoghue Suggested-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Cc: Thomas Petazzoni Link: http://lkml.kernel.org/r/1421967529-9037-1-git-send-email-pure.logic@nexus-software.ie Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 98aa930230ec..2f645c90e4d8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -854,7 +854,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -865,6 +865,10 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. +config X86_UP_APIC_MSI + def_bool y + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI + config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC From aaef7ece8199e21a21ed4f5d065044cd1ce37f81 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 24 Nov 2014 17:39:06 -0800 Subject: [PATCH 156/298] x86/asm/traps: Disable tracing and kprobes in fixup_bad_iret and sync_regs commit 7ddc6a2199f1da405a2fb68c40db8899b1a8cd87 upstream. These functions can be executed on the int3 stack, so kprobes are dangerous. Tracing is probably a bad idea, too. Fixes: b645af2d5905 ("x86_64, traps: Rework bad_iret") Signed-off-by: Andy Lutomirski Cc: Linus Torvalds Cc: Steven Rostedt Link: http://lkml.kernel.org/r/50e33d26adca60816f3ba968875801652507d0c4.1416870125.git.luto@amacapital.net Signed-off-by: Ingo Molnar [bwh: Backported to 3.10: - Use __kprobes instead of NOKPROBE_SYMBOL() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f9d976e0ae67..b1d9002af7db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -365,7 +365,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage notrace __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -390,7 +390,7 @@ struct bad_iret_stack { struct pt_regs regs; }; -asmlinkage __visible +asmlinkage __visible notrace __kprobes struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) { /* From 065566a604b11aa93d7bae59e8c053d2cf1a91f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Jan 2015 15:24:19 +0200 Subject: [PATCH 157/298] sata_dwc_460ex: fix resource leak on error path commit 4aaa71873ddb9faf4b0c4826579e2f6d18ff9ab4 upstream. DMA mapped IO should be unmapped on the error path in probe() and unconditionally on remove(). Fixes: 62936009f35a ([libata] Add 460EX on-chip SATA driver, sata_dwc_460ex) Signed-off-by: Andy Shevchenko Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_dwc_460ex.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 523524b68022..f71e09d6cfe6 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -799,7 +799,7 @@ static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq) if (err) { dev_err(host_pvt.dwc_dev, "%s: dma_request_interrupts returns" " %d\n", __func__, err); - goto error_out; + return err; } /* Enabe DMA */ @@ -810,11 +810,6 @@ static int dma_dwc_init(struct sata_dwc_device *hsdev, int irq) sata_dma_regs); return 0; - -error_out: - dma_dwc_exit(hsdev); - - return err; } static int sata_dwc_scr_read(struct ata_link *link, unsigned int scr, u32 *val) @@ -1664,7 +1659,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) char *ver = (char *)&versionr; u8 *base = NULL; int err = 0; - int irq, rc; + int irq; struct ata_host *host; struct ata_port_info pi = sata_dwc_port_info[0]; const struct ata_port_info *ppi[] = { &pi, NULL }; @@ -1727,7 +1722,7 @@ static int sata_dwc_probe(struct platform_device *ofdev) if (irq == NO_IRQ) { dev_err(&ofdev->dev, "no SATA DMA irq\n"); err = -ENODEV; - goto error_out; + goto error_iomap; } /* Get physical SATA DMA register base address */ @@ -1736,14 +1731,16 @@ static int sata_dwc_probe(struct platform_device *ofdev) dev_err(&ofdev->dev, "ioremap failed for AHBDMA register" " address\n"); err = -ENODEV; - goto error_out; + goto error_iomap; } /* Save dev for later use in dev_xxx() routines */ host_pvt.dwc_dev = &ofdev->dev; /* Initialize AHB DMAC */ - dma_dwc_init(hsdev, irq); + err = dma_dwc_init(hsdev, irq); + if (err) + goto error_dma_iomap; /* Enable SATA Interrupts */ sata_dwc_enable_interrupts(hsdev); @@ -1761,9 +1758,8 @@ static int sata_dwc_probe(struct platform_device *ofdev) * device discovery process, invoking our port_start() handler & * error_handler() to execute a dummy Softreset EH session */ - rc = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht); - - if (rc != 0) + err = ata_host_activate(host, irq, sata_dwc_isr, 0, &sata_dwc_sht); + if (err) dev_err(&ofdev->dev, "failed to activate host"); dev_set_drvdata(&ofdev->dev, host); @@ -1772,7 +1768,8 @@ static int sata_dwc_probe(struct platform_device *ofdev) error_out: /* Free SATA DMA resources */ dma_dwc_exit(hsdev); - +error_dma_iomap: + iounmap((void __iomem *)host_pvt.sata_dma_regs); error_iomap: iounmap(base); error_kmalloc: @@ -1793,6 +1790,7 @@ static int sata_dwc_remove(struct platform_device *ofdev) /* Free SATA DMA resources */ dma_dwc_exit(hsdev); + iounmap((void __iomem *)host_pvt.sata_dma_regs); iounmap(hsdev->reg_base); kfree(hsdev); kfree(host); From cf69173f59163182c12e0ecbda52721397468763 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 29 Dec 2014 09:39:01 -0500 Subject: [PATCH 158/298] KEYS: close race between key lookup and freeing commit a3a8784454692dd72e5d5d34dcdab17b4420e74c upstream. When a key is being garbage collected, it's key->user would get put before the ->destroy() callback is called, where the key is removed from it's respective tracking structures. This leaves a key hanging in a semi-invalid state which leaves a window open for a different task to try an access key->user. An example is find_keyring_by_name() which would dereference key->user for a key that is in the process of being garbage collected (where key->user was freed but ->destroy() wasn't called yet - so it's still present in the linked list). This would cause either a panic, or corrupt memory. Fixes CVE-2014-9529. Signed-off-by: Sasha Levin Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- security/keys/gc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index d3222b6d7d59..009d9370c8fd 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -157,12 +157,12 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - key_user_put(key->user); - /* now throw away the key memory */ if (key->type->destroy) key->type->destroy(key); + key_user_put(key->user); + kfree(key->description); #ifdef KEY_DEBUGGING From 15d25f97e23d65b62c4c99ea4769940fced9250d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 4 Jan 2015 15:20:29 +0100 Subject: [PATCH 159/298] netfilter: nfnetlink: validate nfnetlink header from batch commit 9ea2aa8b7dba9e99544c4187cc298face254569f upstream. Make sure there is enough room for the nfnetlink header in the netlink messages that are part of the batch. There is a similar check in netlink_rcv_skb(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index bf8a108b46e2..6cf2f077e09c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -265,7 +265,8 @@ replay: nlh = nlmsg_hdr(skb); err = 0; - if (nlh->nlmsg_len < NLMSG_HDRLEN) { + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || + skb->len < nlh->nlmsg_len) { err = -EINVAL; goto ack; } From 0838c7fba01ddf0114d0a0b9ddd5f921c2370da9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 6 Dec 2014 16:49:24 +0300 Subject: [PATCH 160/298] ipvs: uninitialized data with IP_VS_IPV6 commit 3b05ac3824ed9648c0d9c02d51d9b54e4e7e874f upstream. The app_tcp_pkt_out() function expects "*diff" to be set and ends up using uninitialized data if CONFIG_IP_VS_IPV6 is turned on. The same issue is there in app_tcp_pkt_in(). Thanks to Julian Anastasov for noticing that. Signed-off-by: Dan Carpenter Acked-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipvs/ip_vs_ftp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77c173282f38..4a662f15eaee 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -183,6 +183,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct nf_conn *ct; struct net *net; + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -191,8 +193,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; @@ -321,6 +321,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct ip_vs_conn *n_cp; struct net *net; + /* no diff required for incoming packets */ + *diff = 0; + #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets @@ -329,9 +332,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; #endif - /* no diff required for incoming packets */ - *diff = 0; - /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; From 98047ad0dcb515a19dec0d9179ef3f7c0c0f3cee Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 10 Dec 2014 14:48:43 +0000 Subject: [PATCH 161/298] Revert "swiotlb-xen: pass dev_addr to swiotlb_tbl_unmap_single" commit dbdd74763f1faf799fbb9ed30423182e92919378 upstream. This reverts commit 2c3fc8d26dd09b9d7069687eead849ee81c78e46. This commit broke on x86 PV because entries in the generic SWIOTLB are indexed using (pseudo-)physical address not DMA address and these are not the same in a x86 PV guest. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 9df5d6ec7eec..f3a9d831d0f9 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -449,7 +449,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr, /* NOTE: We use dev_addr here, not paddr! */ if (is_xen_swiotlb_buffer(dev_addr)) { - swiotlb_tbl_unmap_single(hwdev, dev_addr, size, dir); + swiotlb_tbl_unmap_single(hwdev, paddr, size, dir); return; } From ebdb44da666e70bbf8ab45b9d4d750bacab72a78 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 10 Nov 2014 17:21:13 +0100 Subject: [PATCH 162/298] drbd: merge_bvec_fn: properly remap bvm->bi_bdev commit 3b9d35d744bb5139f9fed57f38c019bb8c7d351c upstream. This was not noticed for many years. Affects operation if md raid is used a backing device for DRBD. CC: stable@kernel.org # v3.2+ Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_req.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 104a040f24de..6efdbeafa33c 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1310,6 +1310,7 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue; if (b->merge_bvec_fn) { + bvm->bi_bdev = mdev->ldev->backing_bdev; backing_limit = b->merge_bvec_fn(b, bvm, bvec); limit = min(limit, backing_limit); } From 31c8446aacd8595a9de61610b2625bcf84e1b7aa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 20 Nov 2014 17:05:53 -0800 Subject: [PATCH 163/298] crypto: prefix module autoloading with "crypto-" commit 5d26a105b5a73e5635eae0629b42fa0a90e07b7b upstream. This prefixes all crypto module loading with "crypto-" so we never run the risk of exposing module auto-loading to userspace via a crypto API, as demonstrated by Mathias Krause: https://lkml.org/lkml/2013/3/4/70 Signed-off-by: Kees Cook Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm/crypto/aes_glue.c | 4 ++-- arch/arm/crypto/sha1_glue.c | 2 +- arch/powerpc/crypto/sha1.c | 2 +- arch/s390/crypto/aes_s390.c | 2 +- arch/s390/crypto/des_s390.c | 4 ++-- arch/s390/crypto/ghash_s390.c | 2 +- arch/s390/crypto/sha1_s390.c | 2 +- arch/s390/crypto/sha256_s390.c | 4 ++-- arch/s390/crypto/sha512_s390.c | 4 ++-- arch/sparc/crypto/aes_glue.c | 2 +- arch/sparc/crypto/camellia_glue.c | 2 +- arch/sparc/crypto/crc32c_glue.c | 2 +- arch/sparc/crypto/des_glue.c | 2 +- arch/sparc/crypto/md5_glue.c | 2 +- arch/sparc/crypto/sha1_glue.c | 2 +- arch/sparc/crypto/sha256_glue.c | 4 ++-- arch/sparc/crypto/sha512_glue.c | 4 ++-- arch/x86/crypto/aes_glue.c | 4 ++-- arch/x86/crypto/aesni-intel_glue.c | 2 +- arch/x86/crypto/blowfish_glue.c | 4 ++-- arch/x86/crypto/camellia_aesni_avx2_glue.c | 4 ++-- arch/x86/crypto/camellia_aesni_avx_glue.c | 4 ++-- arch/x86/crypto/camellia_glue.c | 4 ++-- arch/x86/crypto/cast5_avx_glue.c | 2 +- arch/x86/crypto/cast6_avx_glue.c | 2 +- arch/x86/crypto/crc32-pclmul_glue.c | 4 ++-- arch/x86/crypto/crc32c-intel_glue.c | 4 ++-- arch/x86/crypto/crct10dif-pclmul_glue.c | 4 ++-- arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 +- arch/x86/crypto/salsa20_glue.c | 4 ++-- arch/x86/crypto/serpent_avx2_glue.c | 4 ++-- arch/x86/crypto/serpent_avx_glue.c | 2 +- arch/x86/crypto/serpent_sse2_glue.c | 2 +- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- arch/x86/crypto/sha256_ssse3_glue.c | 4 ++-- arch/x86/crypto/sha512_ssse3_glue.c | 4 ++-- arch/x86/crypto/twofish_avx_glue.c | 2 +- arch/x86/crypto/twofish_glue.c | 4 ++-- arch/x86/crypto/twofish_glue_3way.c | 4 ++-- crypto/842.c | 1 + crypto/aes_generic.c | 2 +- crypto/ansi_cprng.c | 2 +- crypto/anubis.c | 1 + crypto/api.c | 4 ++-- crypto/arc4.c | 1 + crypto/blowfish_generic.c | 2 +- crypto/camellia_generic.c | 2 +- crypto/cast5_generic.c | 2 +- crypto/cast6_generic.c | 2 +- crypto/ccm.c | 4 ++-- crypto/crc32.c | 1 + crypto/crct10dif_generic.c | 2 +- crypto/crypto_null.c | 6 +++--- crypto/ctr.c | 2 +- crypto/deflate.c | 2 +- crypto/des_generic.c | 4 ++-- crypto/fcrypt.c | 1 + crypto/gcm.c | 6 +++--- crypto/ghash-generic.c | 2 +- crypto/khazad.c | 1 + crypto/krng.c | 2 +- crypto/lz4.c | 1 + crypto/lz4hc.c | 1 + crypto/lzo.c | 1 + crypto/md4.c | 2 +- crypto/md5.c | 1 + crypto/michael_mic.c | 1 + crypto/rmd128.c | 1 + crypto/rmd160.c | 1 + crypto/rmd256.c | 1 + crypto/rmd320.c | 1 + crypto/salsa20_generic.c | 2 +- crypto/seed.c | 1 + crypto/serpent_generic.c | 4 ++-- crypto/sha1_generic.c | 2 +- crypto/sha256_generic.c | 4 ++-- crypto/sha512_generic.c | 4 ++-- crypto/tea.c | 4 ++-- crypto/tgr192.c | 4 ++-- crypto/twofish_generic.c | 2 +- crypto/wp512.c | 4 ++-- crypto/zlib.c | 1 + drivers/crypto/padlock-aes.c | 2 +- drivers/crypto/padlock-sha.c | 8 ++++---- drivers/crypto/ux500/cryp/cryp_core.c | 4 ++-- drivers/crypto/ux500/hash/hash_core.c | 8 ++++---- drivers/s390/crypto/ap_bus.c | 3 ++- include/linux/crypto.h | 13 +++++++++++++ 88 files changed, 141 insertions(+), 110 deletions(-) diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 3003fa1f6fb4..0409b8f89782 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -93,6 +93,6 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); MODULE_AUTHOR("David McCullough "); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 76cd976230bc..ace4cd67464c 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -175,5 +175,5 @@ module_exit(sha1_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_AUTHOR("David McCullough "); diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index f9e8b9491efc..0f88c7b41119 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -154,4 +154,4 @@ module_exit(sha1_powerpc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1-powerpc"); +MODULE_ALIAS_CRYPTO("sha1-powerpc"); diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 23223cd63e54..1f272b24fc0b 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -979,7 +979,7 @@ static void __exit aes_s390_fini(void) module_init(aes_s390_init); module_exit(aes_s390_fini); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 7acb77f7ef1a..9e05cc453a40 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -619,8 +619,8 @@ static void __exit des_s390_exit(void) module_init(des_s390_init); module_exit(des_s390_exit); -MODULE_ALIAS("des"); -MODULE_ALIAS("des3_ede"); +MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des3_ede"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index d43485d142e9..7940dc90e80b 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -160,7 +160,7 @@ static void __exit ghash_mod_exit(void) module_init(ghash_mod_init); module_exit(ghash_mod_exit); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index a1b3a9dc9d8a..5b2bee323694 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -103,6 +103,6 @@ static void __exit sha1_s390_fini(void) module_init(sha1_s390_init); module_exit(sha1_s390_fini); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 9b853809a492..b74ff158108c 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -143,7 +143,7 @@ static void __exit sha256_s390_fini(void) module_init(sha256_s390_init); module_exit(sha256_s390_fini); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 32a81383b69c..0c36989ba182 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -86,7 +86,7 @@ static struct shash_alg sha512_alg = { } }; -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha512"); static int sha384_init(struct shash_desc *desc) { @@ -126,7 +126,7 @@ static struct shash_alg sha384_alg = { } }; -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha384"); static int __init init(void) { diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 503e6d96ad4e..ded4cee35318 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -499,6 +499,6 @@ module_exit(aes_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 888f6260b4ec..641f55cb61c3 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 5162fad912ce..d1064e46efe8 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -176,6 +176,6 @@ module_exit(crc32c_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); -MODULE_ALIAS("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 3065bc61f9d3..d11500972994 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -532,6 +532,6 @@ module_exit(des_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); -MODULE_ALIAS("des"); +MODULE_ALIAS_CRYPTO("des"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 09a9ea1dfb69..64c7ff5f72a9 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -185,6 +185,6 @@ module_exit(md5_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); -MODULE_ALIAS("md5"); +MODULE_ALIAS_CRYPTO("md5"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index 6cd5f29e1e0d..1b3e47accc74 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -180,6 +180,6 @@ module_exit(sha1_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index 04f555ab2680..41f27cca2a22 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -237,7 +237,7 @@ module_exit(sha256_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); -MODULE_ALIAS("sha224"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index f04d1994d19a..9fff88541b8c 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -222,7 +222,7 @@ module_exit(sha512_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); -MODULE_ALIAS("sha384"); -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); #include "crop_devid.c" diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index aafe8ce0d65d..e26984f7ab8d 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -66,5 +66,5 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 948ad0e77741..6dfb7d0b139a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1514,4 +1514,4 @@ module_exit(aesni_exit); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333b70e6..1477cfcdbf6b 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -481,5 +481,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); +MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 4209a76fcdaa..9a07fafe3831 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -582,5 +582,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 87a041a10f4a..ed38d959add6 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -574,5 +574,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index c171dcbf192d..5c8b6266a394 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1725,5 +1725,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index e6a3700489b9..f62e9db5a462 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -494,4 +494,4 @@ module_exit(cast5_exit); MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast5"); +MODULE_ALIAS_CRYPTO("cast5"); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 09f3677393e4..0160f68a57ff 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -611,4 +611,4 @@ module_exit(cast6_exit); MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast6"); +MODULE_ALIAS_CRYPTO("cast6"); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 9d014a74ef96..1937fc1d8763 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -197,5 +197,5 @@ module_exit(crc32_pclmul_mod_fini); MODULE_AUTHOR("Alexander Boyko "); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crc32"); -MODULE_ALIAS("crc32-pclmul"); +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 6812ad98355c..28640c3d6af7 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -280,5 +280,5 @@ MODULE_AUTHOR("Austin Zhang , Kent Liu "); MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-pclmul"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index d785cf2c529c..a8d6f69f92a3 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -341,4 +341,4 @@ module_exit(ghash_pclmulqdqni_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " "acclerated by PCLMULQDQ-NI"); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 5e8e67739bb5..399a29d067d6 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -119,5 +119,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS("salsa20"); -MODULE_ALIAS("salsa20-asm"); +MODULE_ALIAS_CRYPTO("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20-asm"); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 2fae489b1524..437e47a4d302 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -558,5 +558,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("serpent"); -MODULE_ALIAS("serpent-asm"); +MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-asm"); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index ff4870870972..7e217398b4eb 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -617,4 +617,4 @@ module_exit(serpent_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 8c95f8637306..bf025adaea01 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -618,4 +618,4 @@ module_exit(serpent_sse2_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d72451..29e1060e9001 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -237,4 +237,4 @@ module_exit(sha1_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f248546da1ca..4dc100d82902 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -318,5 +318,5 @@ module_exit(sha256_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 8626b03e83b7..26a5898a6f26 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -326,5 +326,5 @@ module_exit(sha512_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha512"); -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 4e3c665be129..1ac531ea9bcc 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -579,4 +579,4 @@ module_exit(twofish_exit); MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("twofish"); +MODULE_ALIAS_CRYPTO("twofish"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 0a5202303501..77e06c2da83d 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -96,5 +96,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 13e63b3e1dfb..56d8a08ee479 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -495,5 +495,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/crypto/842.c b/crypto/842.c index 65c7a89cfa09..b48f4f108c47 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -180,3 +180,4 @@ module_exit(nx842_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("842 Compression Algorithm"); +MODULE_ALIAS_CRYPTO("842"); diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index fd0d6b454975..9b3c54c1cbe8 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -1474,4 +1474,4 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 666f1962a160..b4485a108389 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -476,4 +476,4 @@ module_param(dbg, int, 0); MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)"); module_init(prng_mod_init); module_exit(prng_mod_fini); -MODULE_ALIAS("stdrng"); +MODULE_ALIAS_CRYPTO("stdrng"); diff --git a/crypto/anubis.c b/crypto/anubis.c index 008c8a4fb67c..4bb187c2a902 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -704,3 +704,4 @@ module_exit(anubis_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Anubis Cryptographic Algorithm"); +MODULE_ALIAS_CRYPTO("anubis"); diff --git a/crypto/api.c b/crypto/api.c index a2b39c5f3649..2a81e98a0021 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -216,11 +216,11 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) alg = crypto_alg_lookup(name, type, mask); if (!alg) { - request_module("%s", name); + request_module("crypto-%s", name); if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & CRYPTO_ALG_NEED_FALLBACK)) - request_module("%s-all", name); + request_module("crypto-%s-all", name); alg = crypto_alg_lookup(name, type, mask); } diff --git a/crypto/arc4.c b/crypto/arc4.c index 5a772c3657d5..f1a81925558f 100644 --- a/crypto/arc4.c +++ b/crypto/arc4.c @@ -166,3 +166,4 @@ module_exit(arc4_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ARC4 Cipher Algorithm"); MODULE_AUTHOR("Jon Oberheide "); +MODULE_ALIAS_CRYPTO("arc4"); diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 8baf5447d35b..7bd71f02d0dd 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -138,4 +138,4 @@ module_exit(blowfish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm"); -MODULE_ALIAS("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish"); diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index 26bcd7a2d6b4..1b74c5a3e891 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -1098,4 +1098,4 @@ module_exit(camellia_fini); MODULE_DESCRIPTION("Camellia Cipher Algorithm"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("camellia"); +MODULE_ALIAS_CRYPTO("camellia"); diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 5558f630a0eb..84c86db67ec7 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -549,4 +549,4 @@ module_exit(cast5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast5 Cipher Algorithm"); -MODULE_ALIAS("cast5"); +MODULE_ALIAS_CRYPTO("cast5"); diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index de732528a430..f408f0bd8de2 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -291,4 +291,4 @@ module_exit(cast6_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast6 Cipher Algorithm"); -MODULE_ALIAS("cast6"); +MODULE_ALIAS_CRYPTO("cast6"); diff --git a/crypto/ccm.c b/crypto/ccm.c index 1df84217f7c9..647575b41281 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -879,5 +879,5 @@ module_exit(crypto_ccm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Counter with CBC MAC"); -MODULE_ALIAS("ccm_base"); -MODULE_ALIAS("rfc4309"); +MODULE_ALIAS_CRYPTO("ccm_base"); +MODULE_ALIAS_CRYPTO("rfc4309"); diff --git a/crypto/crc32.c b/crypto/crc32.c index 9d1c41569898..187ded28cb0b 100644 --- a/crypto/crc32.c +++ b/crypto/crc32.c @@ -156,3 +156,4 @@ module_exit(crc32_mod_fini); MODULE_AUTHOR("Alexander Boyko "); MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_CRYPTO("crc32"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c index 877e7114ec5c..08bb4f504520 100644 --- a/crypto/crct10dif_generic.c +++ b/crypto/crct10dif_generic.c @@ -124,4 +124,4 @@ module_exit(crct10dif_mod_fini); MODULE_AUTHOR("Tim Chen "); MODULE_DESCRIPTION("T10 DIF CRC calculation."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif"); diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index fee7265cd35d..7b39fa3deac2 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -149,9 +149,9 @@ static struct crypto_alg null_algs[3] = { { .coa_decompress = null_compress } } } }; -MODULE_ALIAS("compress_null"); -MODULE_ALIAS("digest_null"); -MODULE_ALIAS("cipher_null"); +MODULE_ALIAS_CRYPTO("compress_null"); +MODULE_ALIAS_CRYPTO("digest_null"); +MODULE_ALIAS_CRYPTO("cipher_null"); static int __init crypto_null_mod_init(void) { diff --git a/crypto/ctr.c b/crypto/ctr.c index f2b94f27bb2c..3d81ff7e6b48 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -466,4 +466,4 @@ module_exit(crypto_ctr_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CTR Counter block mode"); -MODULE_ALIAS("rfc3686"); +MODULE_ALIAS_CRYPTO("rfc3686"); diff --git a/crypto/deflate.c b/crypto/deflate.c index b57d70eb156b..95d8d37c5021 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -222,4 +222,4 @@ module_exit(deflate_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP"); MODULE_AUTHOR("James Morris "); - +MODULE_ALIAS_CRYPTO("deflate"); diff --git a/crypto/des_generic.c b/crypto/des_generic.c index f6cf63f88468..5ec5ed544149 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -971,7 +971,7 @@ static struct crypto_alg des_algs[2] = { { .cia_decrypt = des3_ede_decrypt } } } }; -MODULE_ALIAS("des3_ede"); +MODULE_ALIAS_CRYPTO("des3_ede"); static int __init des_generic_mod_init(void) { @@ -989,4 +989,4 @@ module_exit(des_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); MODULE_AUTHOR("Dag Arne Osvik "); -MODULE_ALIAS("des"); +MODULE_ALIAS_CRYPTO("des"); diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 021d7fec6bc8..77286ea28865 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -420,3 +420,4 @@ module_exit(fcrypt_mod_fini); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("FCrypt Cipher Algorithm"); MODULE_AUTHOR("David Howells "); +MODULE_ALIAS_CRYPTO("fcrypt"); diff --git a/crypto/gcm.c b/crypto/gcm.c index b4f017939004..0099296a6934 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1441,6 +1441,6 @@ module_exit(crypto_gcm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Galois/Counter Mode"); MODULE_AUTHOR("Mikko Herranen "); -MODULE_ALIAS("gcm_base"); -MODULE_ALIAS("rfc4106"); -MODULE_ALIAS("rfc4543"); +MODULE_ALIAS_CRYPTO("gcm_base"); +MODULE_ALIAS_CRYPTO("rfc4106"); +MODULE_ALIAS_CRYPTO("rfc4543"); diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c index 9d3f0c69a86f..4e97fae9666f 100644 --- a/crypto/ghash-generic.c +++ b/crypto/ghash-generic.c @@ -172,4 +172,4 @@ module_exit(ghash_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm"); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/crypto/khazad.c b/crypto/khazad.c index 60e7cd66facc..873eb5ded6d7 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -880,3 +880,4 @@ module_exit(khazad_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Khazad Cryptographic Algorithm"); +MODULE_ALIAS_CRYPTO("khazad"); diff --git a/crypto/krng.c b/crypto/krng.c index a2d2b72fc135..67c88b331210 100644 --- a/crypto/krng.c +++ b/crypto/krng.c @@ -62,4 +62,4 @@ module_exit(krng_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Kernel Random Number Generator"); -MODULE_ALIAS("stdrng"); +MODULE_ALIAS_CRYPTO("stdrng"); diff --git a/crypto/lz4.c b/crypto/lz4.c index 4586dd15b0d8..53279ab8c3a6 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -104,3 +104,4 @@ module_exit(lz4_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZ4 Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4"); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index 151ba31d34e3..eaec5fa3debf 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -104,3 +104,4 @@ module_exit(lz4hc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZ4HC Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4hc"); diff --git a/crypto/lzo.c b/crypto/lzo.c index 1c2aa69c54b8..d1ff69404353 100644 --- a/crypto/lzo.c +++ b/crypto/lzo.c @@ -103,3 +103,4 @@ module_exit(lzo_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LZO Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lzo"); diff --git a/crypto/md4.c b/crypto/md4.c index 0477a6a01d58..3515af425cc9 100644 --- a/crypto/md4.c +++ b/crypto/md4.c @@ -255,4 +255,4 @@ module_exit(md4_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD4 Message Digest Algorithm"); - +MODULE_ALIAS_CRYPTO("md4"); diff --git a/crypto/md5.c b/crypto/md5.c index 7febeaab923b..36f5e5b103f3 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -168,3 +168,4 @@ module_exit(md5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD5 Message Digest Algorithm"); +MODULE_ALIAS_CRYPTO("md5"); diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index 079b761bc70d..46195e0d0f4d 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -184,3 +184,4 @@ module_exit(michael_mic_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Michael MIC"); MODULE_AUTHOR("Jouni Malinen "); +MODULE_ALIAS_CRYPTO("michael_mic"); diff --git a/crypto/rmd128.c b/crypto/rmd128.c index 8a0f68b7f257..049486ede938 100644 --- a/crypto/rmd128.c +++ b/crypto/rmd128.c @@ -327,3 +327,4 @@ module_exit(rmd128_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-128 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd128"); diff --git a/crypto/rmd160.c b/crypto/rmd160.c index 525d7bb752cf..de585e51d455 100644 --- a/crypto/rmd160.c +++ b/crypto/rmd160.c @@ -371,3 +371,4 @@ module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd160"); diff --git a/crypto/rmd256.c b/crypto/rmd256.c index 69293d9b56e0..4ec02a754e09 100644 --- a/crypto/rmd256.c +++ b/crypto/rmd256.c @@ -346,3 +346,4 @@ module_exit(rmd256_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-256 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd256"); diff --git a/crypto/rmd320.c b/crypto/rmd320.c index 09f97dfdfbba..770f2cb369f8 100644 --- a/crypto/rmd320.c +++ b/crypto/rmd320.c @@ -395,3 +395,4 @@ module_exit(rmd320_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-320 Message Digest"); +MODULE_ALIAS_CRYPTO("rmd320"); diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 9a4770c02284..3d0f9df30ac9 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -248,4 +248,4 @@ module_exit(salsa20_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); -MODULE_ALIAS("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20"); diff --git a/crypto/seed.c b/crypto/seed.c index 9c904d6d2151..c6ba8438be43 100644 --- a/crypto/seed.c +++ b/crypto/seed.c @@ -476,3 +476,4 @@ module_exit(seed_fini); MODULE_DESCRIPTION("SEED Cipher Algorithm"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Hye-Shik Chang , Kim Hyun "); +MODULE_ALIAS_CRYPTO("seed"); diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 7ddbd7e88859..a53b5e2af335 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -665,5 +665,5 @@ module_exit(serpent_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Cipher Algorithm"); MODULE_AUTHOR("Dag Arne Osvik "); -MODULE_ALIAS("tnepres"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("tnepres"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index 42794803c480..76d300fe968f 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -153,4 +153,4 @@ module_exit(sha1_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index 543366779524..8d7811a0031c 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -384,5 +384,5 @@ module_exit(sha256_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm"); -MODULE_ALIAS("sha224"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 6ed124f3ea0f..5479f302b757 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -287,5 +287,5 @@ module_exit(sha512_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms"); -MODULE_ALIAS("sha384"); -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); diff --git a/crypto/tea.c b/crypto/tea.c index 0a572323ee4a..495be2d0077d 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -270,8 +270,8 @@ static void __exit tea_mod_fini(void) crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs)); } -MODULE_ALIAS("xtea"); -MODULE_ALIAS("xeta"); +MODULE_ALIAS_CRYPTO("xtea"); +MODULE_ALIAS_CRYPTO("xeta"); module_init(tea_mod_init); module_exit(tea_mod_fini); diff --git a/crypto/tgr192.c b/crypto/tgr192.c index 87403556fd0b..5a5333f166ea 100644 --- a/crypto/tgr192.c +++ b/crypto/tgr192.c @@ -676,8 +676,8 @@ static void __exit tgr192_mod_fini(void) crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs)); } -MODULE_ALIAS("tgr160"); -MODULE_ALIAS("tgr128"); +MODULE_ALIAS_CRYPTO("tgr160"); +MODULE_ALIAS_CRYPTO("tgr128"); module_init(tgr192_mod_init); module_exit(tgr192_mod_fini); diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 2d5000552d0f..523ad8c4e359 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -211,4 +211,4 @@ module_exit(twofish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm"); -MODULE_ALIAS("twofish"); +MODULE_ALIAS_CRYPTO("twofish"); diff --git a/crypto/wp512.c b/crypto/wp512.c index 180f1d6e03f4..8d4076417e87 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -1167,8 +1167,8 @@ static void __exit wp512_mod_fini(void) crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } -MODULE_ALIAS("wp384"); -MODULE_ALIAS("wp256"); +MODULE_ALIAS_CRYPTO("wp384"); +MODULE_ALIAS_CRYPTO("wp256"); module_init(wp512_mod_init); module_exit(wp512_mod_fini); diff --git a/crypto/zlib.c b/crypto/zlib.c index 06b62e5cdcc7..d98078835281 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -378,3 +378,4 @@ module_exit(zlib_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Zlib Compression Algorithm"); MODULE_AUTHOR("Sony Corporation"); +MODULE_ALIAS_CRYPTO("zlib"); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 633ba945e153..c178ed8c3908 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -563,4 +563,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 9266c0e25492..93d7753ab38a 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -593,7 +593,7 @@ MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("sha1-all"); -MODULE_ALIAS("sha256-all"); -MODULE_ALIAS("sha1-padlock"); -MODULE_ALIAS("sha256-padlock"); +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-padlock"); +MODULE_ALIAS_CRYPTO("sha256-padlock"); diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c index 92105f3dc8e0..e4cea7c45142 100644 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ b/drivers/crypto/ux500/cryp/cryp_core.c @@ -1810,7 +1810,7 @@ module_exit(ux500_cryp_mod_fini); module_param(cryp_mode, int, 0); MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine."); -MODULE_ALIAS("aes-all"); -MODULE_ALIAS("des-all"); +MODULE_ALIAS_CRYPTO("aes-all"); +MODULE_ALIAS_CRYPTO("des-all"); MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c index 1c73f4fbc252..8e5e0187506f 100644 --- a/drivers/crypto/ux500/hash/hash_core.c +++ b/drivers/crypto/ux500/hash/hash_core.c @@ -1995,7 +1995,7 @@ module_exit(ux500_hash_mod_fini); MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 HASH engine."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("sha1-all"); -MODULE_ALIAS("sha256-all"); -MODULE_ALIAS("hmac-sha1-all"); -MODULE_ALIAS("hmac-sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("hmac-sha1-all"); +MODULE_ALIAS_CRYPTO("hmac-sha256-all"); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index ab3baa7f9508..86ade85481bd 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "ap_bus.h" @@ -71,7 +72,7 @@ MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Adjunct Processor Bus driver, " \ "Copyright IBM Corp. 2006, 2012"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("z90crypt"); +MODULE_ALIAS_CRYPTO("z90crypt"); /* * Module parameter diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b92eadf92d72..2b00d92a6e6f 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -25,6 +25,19 @@ #include #include +/* + * Autoloaded crypto modules should only use a prefixed name to avoid allowing + * arbitrary modules to be loaded. Loading from userspace may still need the + * unprefixed names, so retains those aliases as well. + * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3 + * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro + * expands twice on the same line. Instead, use a separate base name for the + * alias. + */ +#define MODULE_ALIAS_CRYPTO(name) \ + __MODULE_INFO(alias, alias_userspace, name); \ + __MODULE_INFO(alias, alias_crypto, "crypto-" name) + /* * Algorithm masks and types. */ From f6e8b55035cb1cc1b6ac054fbfed633177524362 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 24 Nov 2014 16:32:38 -0800 Subject: [PATCH 164/298] crypto: include crypto- module prefix in template commit 4943ba16bbc2db05115707b3ff7b4874e9e3c560 upstream. This adds the module loading prefix "crypto-" to the template lookup as well. For example, attempting to load 'vfat(blowfish)' via AF_ALG now correctly includes the "crypto-" prefix at every level, correctly rejecting "vfat": net-pf-38 algif-hash crypto-vfat(blowfish) crypto-vfat(blowfish)-all crypto-vfat Reported-by: Mathias Krause Signed-off-by: Kees Cook Acked-by: Mathias Krause Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/x86/crypto/fpu.c | 3 +++ crypto/algapi.c | 4 ++-- crypto/authenc.c | 1 + crypto/authencesn.c | 1 + crypto/cbc.c | 1 + crypto/ccm.c | 1 + crypto/chainiv.c | 1 + crypto/cmac.c | 1 + crypto/cryptd.c | 1 + crypto/ctr.c | 1 + crypto/cts.c | 1 + crypto/ecb.c | 1 + crypto/eseqiv.c | 1 + crypto/gcm.c | 1 + crypto/hmac.c | 1 + crypto/lrw.c | 1 + crypto/pcbc.c | 1 + crypto/pcrypt.c | 1 + crypto/seqiv.c | 1 + crypto/vmac.c | 1 + crypto/xcbc.c | 1 + crypto/xts.c | 1 + 22 files changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 98d7a188f46b..f368ba261739 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include struct crypto_fpu_ctx { @@ -159,3 +160,5 @@ void __exit crypto_fpu_exit(void) { crypto_unregister_template(&crypto_fpu_tmpl); } + +MODULE_ALIAS_CRYPTO("fpu"); diff --git a/crypto/algapi.c b/crypto/algapi.c index 7a1ae87f1683..00d8d939733b 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -495,8 +495,8 @@ static struct crypto_template *__crypto_lookup_template(const char *name) struct crypto_template *crypto_lookup_template(const char *name) { - return try_then_request_module(__crypto_lookup_template(name), "%s", - name); + return try_then_request_module(__crypto_lookup_template(name), + "crypto-%s", name); } EXPORT_SYMBOL_GPL(crypto_lookup_template); diff --git a/crypto/authenc.c b/crypto/authenc.c index e1223559d5df..78fb16cab13f 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -721,3 +721,4 @@ module_exit(crypto_authenc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Simple AEAD wrapper for IPsec"); +MODULE_ALIAS_CRYPTO("authenc"); diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 4be0dd4373a9..024bff2344fc 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -814,3 +814,4 @@ module_exit(crypto_authenc_esn_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert "); MODULE_DESCRIPTION("AEAD wrapper for IPsec with extended sequence numbers"); +MODULE_ALIAS_CRYPTO("authencesn"); diff --git a/crypto/cbc.c b/crypto/cbc.c index 61ac42e1e32b..780ee27b2d43 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -289,3 +289,4 @@ module_exit(crypto_cbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CBC block cipher algorithm"); +MODULE_ALIAS_CRYPTO("cbc"); diff --git a/crypto/ccm.c b/crypto/ccm.c index 647575b41281..003bbbd21a2b 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -881,3 +881,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Counter with CBC MAC"); MODULE_ALIAS_CRYPTO("ccm_base"); MODULE_ALIAS_CRYPTO("rfc4309"); +MODULE_ALIAS_CRYPTO("ccm"); diff --git a/crypto/chainiv.c b/crypto/chainiv.c index 834d8dd3d4fc..22b7e55b0e1b 100644 --- a/crypto/chainiv.c +++ b/crypto/chainiv.c @@ -359,3 +359,4 @@ module_exit(chainiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Chain IV Generator"); +MODULE_ALIAS_CRYPTO("chainiv"); diff --git a/crypto/cmac.c b/crypto/cmac.c index 50880cf17fad..7a8bfbd548f6 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -313,3 +313,4 @@ module_exit(crypto_cmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CMAC keyed hash algorithm"); +MODULE_ALIAS_CRYPTO("cmac"); diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 7bdd61b867c8..75c415d37086 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -955,3 +955,4 @@ module_exit(cryptd_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software async crypto daemon"); +MODULE_ALIAS_CRYPTO("cryptd"); diff --git a/crypto/ctr.c b/crypto/ctr.c index 3d81ff7e6b48..2386f7313952 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -467,3 +467,4 @@ module_exit(crypto_ctr_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CTR Counter block mode"); MODULE_ALIAS_CRYPTO("rfc3686"); +MODULE_ALIAS_CRYPTO("ctr"); diff --git a/crypto/cts.c b/crypto/cts.c index 042223f8e733..60b9da3fa7c1 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -350,3 +350,4 @@ module_exit(crypto_cts_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("CTS-CBC CipherText Stealing for CBC"); +MODULE_ALIAS_CRYPTO("cts"); diff --git a/crypto/ecb.c b/crypto/ecb.c index 935cfef4aa84..12011aff0971 100644 --- a/crypto/ecb.c +++ b/crypto/ecb.c @@ -185,3 +185,4 @@ module_exit(crypto_ecb_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ECB block cipher algorithm"); +MODULE_ALIAS_CRYPTO("ecb"); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 42ce9f570aec..388f582ab0b9 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -267,3 +267,4 @@ module_exit(eseqiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator"); +MODULE_ALIAS_CRYPTO("eseqiv"); diff --git a/crypto/gcm.c b/crypto/gcm.c index 0099296a6934..9cea4d0b6904 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1444,3 +1444,4 @@ MODULE_AUTHOR("Mikko Herranen "); MODULE_ALIAS_CRYPTO("gcm_base"); MODULE_ALIAS_CRYPTO("rfc4106"); MODULE_ALIAS_CRYPTO("rfc4543"); +MODULE_ALIAS_CRYPTO("gcm"); diff --git a/crypto/hmac.c b/crypto/hmac.c index 8d9544cf8169..ade790b454e9 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -271,3 +271,4 @@ module_exit(hmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("HMAC hash algorithm"); +MODULE_ALIAS_CRYPTO("hmac"); diff --git a/crypto/lrw.c b/crypto/lrw.c index ba42acc4deba..6f9908a7ebcb 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -400,3 +400,4 @@ module_exit(crypto_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("LRW block cipher mode"); +MODULE_ALIAS_CRYPTO("lrw"); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index d1b8bdfb5855..f654965f0933 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -295,3 +295,4 @@ module_exit(crypto_pcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("PCBC block cipher algorithm"); +MODULE_ALIAS_CRYPTO("pcbc"); diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 309d345ead95..c305d4112735 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -565,3 +565,4 @@ module_exit(pcrypt_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert "); MODULE_DESCRIPTION("Parallel crypto wrapper"); +MODULE_ALIAS_CRYPTO("pcrypt"); diff --git a/crypto/seqiv.c b/crypto/seqiv.c index f2cba4ed6f25..49a4069ff453 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -362,3 +362,4 @@ module_exit(seqiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Sequence Number IV Generator"); +MODULE_ALIAS_CRYPTO("seqiv"); diff --git a/crypto/vmac.c b/crypto/vmac.c index 2eb11a30c29c..bf2d3a89845f 100644 --- a/crypto/vmac.c +++ b/crypto/vmac.c @@ -713,3 +713,4 @@ module_exit(vmac_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("VMAC hash algorithm"); +MODULE_ALIAS_CRYPTO("vmac"); diff --git a/crypto/xcbc.c b/crypto/xcbc.c index a5fbdf3738cf..df90b332554c 100644 --- a/crypto/xcbc.c +++ b/crypto/xcbc.c @@ -286,3 +286,4 @@ module_exit(crypto_xcbc_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XCBC keyed hash algorithm"); +MODULE_ALIAS_CRYPTO("xcbc"); diff --git a/crypto/xts.c b/crypto/xts.c index ca1608f44cb5..f6fd43f100c8 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -362,3 +362,4 @@ module_exit(crypto_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("XTS block cipher mode"); +MODULE_ALIAS_CRYPTO("xts"); From 85976492d2ae1659f9bf9c810c304822970a0c95 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 11 Jan 2015 18:17:42 +0100 Subject: [PATCH 165/298] crypto: add missing crypto module aliases commit 3e14dcf7cb80b34a1f38b55bc96f02d23fdaaaaf upstream. Commit 5d26a105b5a7 ("crypto: prefix module autoloading with "crypto-"") changed the automatic module loading when requesting crypto algorithms to prefix all module requests with "crypto-". This requires all crypto modules to have a crypto specific module alias even if their file name would otherwise match the requested crypto algorithm. Even though commit 5d26a105b5a7 added those aliases for a vast amount of modules, it was missing a few. Add the required MODULE_ALIAS_CRYPTO annotations to those files to make them get loaded automatically, again. This fixes, e.g., requesting 'ecb(blowfish-generic)', which used to work with kernels v3.18 and below. Also change MODULE_ALIAS() lines to MODULE_ALIAS_CRYPTO(). The former won't work for crypto modules any more. Fixes: 5d26a105b5a7 ("crypto: prefix module autoloading with "crypto-"") Cc: Kees Cook Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/crypto/sha1.c | 1 + crypto/aes_generic.c | 1 + crypto/ansi_cprng.c | 1 + crypto/blowfish_generic.c | 1 + crypto/camellia_generic.c | 1 + crypto/cast5_generic.c | 1 + crypto/cast6_generic.c | 1 + crypto/crct10dif_generic.c | 1 + crypto/des_generic.c | 5 +++-- crypto/ghash-generic.c | 1 + crypto/krng.c | 1 + crypto/salsa20_generic.c | 1 + crypto/serpent_generic.c | 1 + crypto/sha1_generic.c | 1 + crypto/sha256_generic.c | 2 ++ crypto/sha512_generic.c | 2 ++ crypto/tea.c | 1 + crypto/tgr192.c | 1 + crypto/twofish_generic.c | 1 + crypto/wp512.c | 1 + 20 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index 0f88c7b41119..b51da9132744 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -154,4 +154,5 @@ module_exit(sha1_powerpc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-powerpc"); diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 9b3c54c1cbe8..3dd101144a58 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -1475,3 +1475,4 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-generic"); diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index b4485a108389..6f5bebc9bf01 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -477,3 +477,4 @@ MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)"); module_init(prng_mod_init); module_exit(prng_mod_fini); MODULE_ALIAS_CRYPTO("stdrng"); +MODULE_ALIAS_CRYPTO("ansi_cprng"); diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 7bd71f02d0dd..87b392a77a93 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -139,3 +139,4 @@ module_exit(blowfish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm"); MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-generic"); diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index 1b74c5a3e891..a02286bf319e 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -1099,3 +1099,4 @@ module_exit(camellia_fini); MODULE_DESCRIPTION("Camellia Cipher Algorithm"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-generic"); diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 84c86db67ec7..df5c72629383 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -550,3 +550,4 @@ module_exit(cast5_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast5 Cipher Algorithm"); MODULE_ALIAS_CRYPTO("cast5"); +MODULE_ALIAS_CRYPTO("cast5-generic"); diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index f408f0bd8de2..058c8d755d03 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -292,3 +292,4 @@ module_exit(cast6_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cast6 Cipher Algorithm"); MODULE_ALIAS_CRYPTO("cast6"); +MODULE_ALIAS_CRYPTO("cast6-generic"); diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c index 08bb4f504520..c1229614c7e3 100644 --- a/crypto/crct10dif_generic.c +++ b/crypto/crct10dif_generic.c @@ -125,3 +125,4 @@ MODULE_AUTHOR("Tim Chen "); MODULE_DESCRIPTION("T10 DIF CRC calculation."); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-generic"); diff --git a/crypto/des_generic.c b/crypto/des_generic.c index 5ec5ed544149..3ec6071309d9 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -971,8 +971,6 @@ static struct crypto_alg des_algs[2] = { { .cia_decrypt = des3_ede_decrypt } } } }; -MODULE_ALIAS_CRYPTO("des3_ede"); - static int __init des_generic_mod_init(void) { return crypto_register_algs(des_algs, ARRAY_SIZE(des_algs)); @@ -990,3 +988,6 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); MODULE_AUTHOR("Dag Arne Osvik "); MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des-generic"); +MODULE_ALIAS_CRYPTO("des3_ede"); +MODULE_ALIAS_CRYPTO("des3_ede-generic"); diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c index 4e97fae9666f..bac70995e064 100644 --- a/crypto/ghash-generic.c +++ b/crypto/ghash-generic.c @@ -173,3 +173,4 @@ module_exit(ghash_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm"); MODULE_ALIAS_CRYPTO("ghash"); +MODULE_ALIAS_CRYPTO("ghash-generic"); diff --git a/crypto/krng.c b/crypto/krng.c index 67c88b331210..0224841b6579 100644 --- a/crypto/krng.c +++ b/crypto/krng.c @@ -63,3 +63,4 @@ module_exit(krng_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Kernel Random Number Generator"); MODULE_ALIAS_CRYPTO("stdrng"); +MODULE_ALIAS_CRYPTO("krng"); diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c index 3d0f9df30ac9..f550b5d94630 100644 --- a/crypto/salsa20_generic.c +++ b/crypto/salsa20_generic.c @@ -249,3 +249,4 @@ module_exit(salsa20_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm"); MODULE_ALIAS_CRYPTO("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20-generic"); diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index a53b5e2af335..94970a794975 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -667,3 +667,4 @@ MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Ci MODULE_AUTHOR("Dag Arne Osvik "); MODULE_ALIAS_CRYPTO("tnepres"); MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-generic"); diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c index 76d300fe968f..fdf7c00de4b0 100644 --- a/crypto/sha1_generic.c +++ b/crypto/sha1_generic.c @@ -154,3 +154,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-generic"); diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c index 8d7811a0031c..136381bdd48d 100644 --- a/crypto/sha256_generic.c +++ b/crypto/sha256_generic.c @@ -385,4 +385,6 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm"); MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha224-generic"); MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha256-generic"); diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c index 5479f302b757..6c6d901a7cc1 100644 --- a/crypto/sha512_generic.c +++ b/crypto/sha512_generic.c @@ -288,4 +288,6 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms"); MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha384-generic"); MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha512-generic"); diff --git a/crypto/tea.c b/crypto/tea.c index 495be2d0077d..b70b441c7d1e 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -270,6 +270,7 @@ static void __exit tea_mod_fini(void) crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs)); } +MODULE_ALIAS_CRYPTO("tea"); MODULE_ALIAS_CRYPTO("xtea"); MODULE_ALIAS_CRYPTO("xeta"); diff --git a/crypto/tgr192.c b/crypto/tgr192.c index 5a5333f166ea..f7ed2fba396c 100644 --- a/crypto/tgr192.c +++ b/crypto/tgr192.c @@ -676,6 +676,7 @@ static void __exit tgr192_mod_fini(void) crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs)); } +MODULE_ALIAS_CRYPTO("tgr192"); MODULE_ALIAS_CRYPTO("tgr160"); MODULE_ALIAS_CRYPTO("tgr128"); diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 523ad8c4e359..ebf7a3efb572 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -212,3 +212,4 @@ module_exit(twofish_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm"); MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-generic"); diff --git a/crypto/wp512.c b/crypto/wp512.c index 8d4076417e87..253db94b5479 100644 --- a/crypto/wp512.c +++ b/crypto/wp512.c @@ -1167,6 +1167,7 @@ static void __exit wp512_mod_fini(void) crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } +MODULE_ALIAS_CRYPTO("wp512"); MODULE_ALIAS_CRYPTO("wp384"); MODULE_ALIAS_CRYPTO("wp256"); From c6aee0d0280bd4ff974a8d093010e8cb885f0f32 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 18 Apr 2014 13:38:34 +0530 Subject: [PATCH 166/298] ARC: Delete stale barrier.h commit 64ee9f32c33cbd53545284742e73c17fedf9d429 upstream. Commit 93ea02bb8435 ("arch: Clean up asm/barrier.h implementations") wired generic barrier.h for ARC, but failed to delete the existing file. In 3.15, due to rcupdate.h updates, this causes a build breakage on ARC: CC arch/arc/kernel/asm-offsets.s In file included from include/linux/sched.h:45:0, from arch/arc/kernel/asm-offsets.c:9: include/linux/rculist.h: In function __list_add_rcu: include/linux/rculist.h:54:2: error: implicit declaration of function smp_store_release [-Werror=implicit-function-declaration] rcu_assign_pointer(list_next_rcu(prev), new); ^ Cc: Peter Zijlstra Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/barrier.h | 37 ---------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 arch/arc/include/asm/barrier.h diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h deleted file mode 100644 index c32245c3d1e9..000000000000 --- a/arch/arc/include/asm/barrier.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_BARRIER_H -#define __ASM_BARRIER_H - -#ifndef __ASSEMBLY__ - -/* TODO-vineetg: Need to see what this does, don't we need sync anywhere */ -#define mb() __asm__ __volatile__ ("" : : : "memory") -#define rmb() mb() -#define wmb() mb() -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) -#define read_barrier_depends() mb() - -/* TODO-vineetg verify the correctness of macros here */ -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#endif - -#define smp_read_barrier_depends() do { } while (0) - -#endif - -#endif From 7a079c1aabcf1e2cf3c5f2ffb493e26d2204450e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 24 Jun 2014 19:33:39 +0530 Subject: [PATCH 167/298] ARC: Fix build breakage for !CONFIG_ARC_DW2_UNWIND commit ba25915fb2cd18152cb14b144dbe8bf2f2bd8e45 upstream. Fixes: ec7ac6afd07b (ARC: switch to generic ENTRY/END assembler annotations) Reported-by: Anton Kolesov Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/ctx_sw_asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index 2ff0347a2fd7..e248594097e7 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -10,9 +10,9 @@ * -This is the more "natural" hand written assembler */ +#include #include /* For the SAVE_* macros */ #include -#include #define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) From bb58e89d00af0644a87cd7c0f1392584238dc3e4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Oct 2014 10:55:49 -0700 Subject: [PATCH 168/298] Input: evdev - fix EVIOCG{type} ioctl commit 7c4f56070fde2367766fa1fb04852599b5e1ad35 upstream. The 'max' size passed into the function is measured in number of bits (KEY_MAX, LED_MAX, etc) so we need to convert it accordingly before trying to copy the data out, otherwise we will try copying too much and end up with up with a page fault. Reported-by: Pavel Machek Reviewed-by: Pavel Machek Reviewed-by: David Herrmann Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/evdev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index ce953d895f5b..fb787c3e88d9 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -757,20 +757,23 @@ static int evdev_handle_set_keycode_v2(struct input_dev *dev, void __user *p) */ static int evdev_handle_get_val(struct evdev_client *client, struct input_dev *dev, unsigned int type, - unsigned long *bits, unsigned int max, - unsigned int size, void __user *p, int compat) + unsigned long *bits, unsigned int maxbit, + unsigned int maxlen, void __user *p, + int compat) { int ret; unsigned long *mem; + size_t len; - mem = kmalloc(sizeof(unsigned long) * max, GFP_KERNEL); + len = BITS_TO_LONGS(maxbit) * sizeof(unsigned long); + mem = kmalloc(len, GFP_KERNEL); if (!mem) return -ENOMEM; spin_lock_irq(&dev->event_lock); spin_lock(&client->buffer_lock); - memcpy(mem, bits, sizeof(unsigned long) * max); + memcpy(mem, bits, len); spin_unlock(&dev->event_lock); @@ -778,7 +781,7 @@ static int evdev_handle_get_val(struct evdev_client *client, spin_unlock_irq(&client->buffer_lock); - ret = bits_to_user(mem, max, size, p, compat); + ret = bits_to_user(mem, maxbit, maxlen, p, compat); if (ret < 0) evdev_queue_syn_dropped(client); From 0e03c6a8708bce57b06326208e494712f5543491 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Fri, 10 Oct 2014 13:09:53 -0700 Subject: [PATCH 169/298] tty: Fix pty master poll() after slave closes v2 commit c4dc304677e8d566572c4738d95c48be150c6606 upstream. Commit f95499c3030f ("n_tty: Don't wait for buffer work in read() loop") introduces a race window where a pty master can be signalled that the pty slave was closed before all the data that the slave wrote is delivered. Commit f8747d4a466a ("tty: Fix pty master read() after slave closes") fixed the problem in case of n_tty_read, but the problem still exists for n_tty_poll. This can be seen by running 'for ((i=0; i<100;i++));do ./test.py ;done' where test.py is: import os, select, pty (pid, pty_fd) = pty.fork() if pid == 0: os.write(1, 'This string should be received by parent') else: poller = select.epoll() poller.register( pty_fd, select.EPOLLIN ) ready = poller.poll( 1 * 1000 ) for fd, events in ready: if not events & select.EPOLLIN: print 'missed POLLIN event' else: print os.read(fd, 100) poller.close() The string from the slave is missed several times. This patch takes the same approach as the fix for read and special cases this condition for poll. Tested on 3.16. Signed-off-by: Francesco Ruggeri Cc: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index d46b4ccec8cd..850e232d086e 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2417,12 +2417,17 @@ static unsigned int n_tty_poll(struct tty_struct *tty, struct file *file, poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); - if (input_available_p(tty, 1)) - mask |= POLLIN | POLLRDNORM; - if (tty->packet && tty->link->ctrl_status) - mask |= POLLPRI | POLLIN | POLLRDNORM; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= POLLHUP; + if (input_available_p(tty, 1)) + mask |= POLLIN | POLLRDNORM; + else if (mask & POLLHUP) { + tty_flush_to_ldisc(tty); + if (input_available_p(tty, 1)) + mask |= POLLIN | POLLRDNORM; + } + if (tty->packet && tty->link->ctrl_status) + mask |= POLLPRI | POLLIN | POLLRDNORM; if (tty_hung_up_p(file)) mask |= POLLHUP; if (!(mask & (POLLHUP | POLLIN | POLLRDNORM))) { From 94732f734786f4388aecf47e238b933fa779d61d Mon Sep 17 00:00:00 2001 From: Sjoerd Simons Date: Mon, 19 Jan 2015 23:07:09 +0100 Subject: [PATCH 170/298] mmc: sdhci: Don't signal the sdio irq if it's not setup [Not needed in newer kernels due to refactoring fixing this issue.] With 3.14.29 (and older kernels) some of my I.mx6 Sabrelite boards were crashing with the following oops: sdhci: Secure Digital Host Controller Interface driver sdhci: Copyright(c) Pierre Ossman sdhci-pltfm: SDHCI platform and OF driver helper sdhci-esdhc-imx 2198000.usdhc: could not get ultra high speed state, work on normal mode mmc0: no vqmmc regulator found mmc0: SDHCI controller on 2198000.usdhc [2198000.usdhc] using ADMA Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = c0004000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.29 #1 task: c08a7120 ti: c089c000 task.ti: c089c000 PC is at wake_up_process+0x8/0x40 LR is at sdhci_irq+0x748/0x9c4 Full boot log can be found at: http://storage.kernelci.org/stable/v3.14.29/arm-multi_v7_defconfig/lab-collabora/boot-imx6q-sabrelite.html This happens if the sdhci interrupt status contains SDHCI_INT_CARD_INT, while the sdio irq was never setup. This patch fixes that in a minimal way by checking if the sdio irq was setup. In more recent kernels this bug went away due to refactoring done by Russel King. So an alternative (potentially better?) fix for this patch is to cherrypick the following patches from a recent kernel: 18258f7239a6 - genirq: Provide synchronize_hardirq() bf3b5ec66bd0 - mmc: sdio_irq: rework sdio irq handling 41005003bcaf - mmc: sdhci: clean up interrupt handling 781e989cf593 - mmc: sdhci: convert to new SDIO IRQ handling Signed-off-by: Sjoerd Simons Cc: Russell King Cc: Tyler Baker Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 7e0176321aff..881bf89acfcc 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2537,7 +2537,7 @@ out: /* * We have to delay this as it calls back into the driver. */ - if (cardint) + if (cardint && host->mmc->sdio_irqs) mmc_signal_sdio_irq(host->mmc); return result; From 83a1ac8aeb09007988ea2dc858e1827d0523ad2e Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Wed, 4 Jun 2014 16:07:31 -0700 Subject: [PATCH 171/298] mm/swap.c: clean up *lru_cache_add* functions commit 2329d3751b082b4fd354f334a88662d72abac52d upstream. In mm/swap.c, __lru_cache_add() is exported, but actually there are no users outside this file. This patch unexports __lru_cache_add(), and makes it static. It also exports lru_cache_add_file(), as it is use by cifs and fuse, which can loaded as modules. Signed-off-by: Jianyu Zhan Cc: Minchan Kim Cc: Johannes Weiner Cc: Shaohua Li Cc: Bob Liu Cc: Seth Jennings Cc: Joonsoo Kim Cc: Rafael Aquini Cc: Mel Gorman Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: Khalid Aziz Cc: Christoph Hellwig Reviewed-by: Zhang Yanfei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/swap.h | 19 ++----------------- mm/swap.c | 31 +++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 789324976801..13eb44cfebb7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -268,8 +268,9 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void __lru_cache_add(struct page *); extern void lru_cache_add(struct page *); +extern void lru_cache_add_anon(struct page *page); +extern void lru_cache_add_file(struct page *page); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); @@ -283,22 +284,6 @@ extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); -/** - * lru_cache_add: add a page to the page lists - * @page: the page to add - */ -static inline void lru_cache_add_anon(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - -static inline void lru_cache_add_file(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); diff --git a/mm/swap.c b/mm/swap.c index c8048d71c642..49dfd717726f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -580,13 +580,7 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); -/* - * Queue the page for addition to the LRU via pagevec. The decision on whether - * to add the page to the [in]active [file|anon] list is deferred until the - * pagevec is drained. This gives a chance for the caller of __lru_cache_add() - * have the page added to the active list using mark_page_accessed(). - */ -void __lru_cache_add(struct page *page) +static void __lru_cache_add(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvec); @@ -596,11 +590,32 @@ void __lru_cache_add(struct page *page) pagevec_add(pvec, page); put_cpu_var(lru_add_pvec); } -EXPORT_SYMBOL(__lru_cache_add); + +/** + * lru_cache_add: add a page to the page lists + * @page: the page to add + */ +void lru_cache_add_anon(struct page *page) +{ + ClearPageActive(page); + __lru_cache_add(page); +} + +void lru_cache_add_file(struct page *page) +{ + ClearPageActive(page); + __lru_cache_add(page); +} +EXPORT_SYMBOL(lru_cache_add_file); /** * lru_cache_add - add a page to a page list * @page: the page to be added to the LRU. + * + * Queue the page for addition to the LRU via pagevec. The decision on whether + * to add the page to the [in]active [file|anon] list is deferred until the + * pagevec is drained. This gives a chance for the caller of lru_cache_add() + * have the page added to the active list using mark_page_accessed(). */ void lru_cache_add(struct page *page) { From c9c1e5d1d2b69554b7c922c88a3b5a7e8c2207c4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:05 -0700 Subject: [PATCH 172/298] mm: page_alloc: do not update zlc unless the zlc is active commit 65bb371984d6a2c909244eb749e482bb40b72e36 upstream. The zlc is used on NUMA machines to quickly skip over zones that are full. However it is always updated, even for the first zone scanned when the zlc might not even be active. As it's a write to a bitmap that potentially bounces cache line it's deceptively expensive and most machines will not care. Only update the zlc if it was active. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4b258297cc7c..c7e27b8c7200 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2059,7 +2059,7 @@ try_this_zone: if (page) break; this_zone_full: - if (IS_ENABLED(CONFIG_NUMA)) + if (IS_ENABLED(CONFIG_NUMA) && zlc_active) zlc_mark_zone_full(zonelist, z); } From e937b98c04ea762d011f7d4a38ef500bba6875a5 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:06 -0700 Subject: [PATCH 173/298] mm: page_alloc: do not treat a zone that cannot be used for dirty pages as "full" commit 800a1e750c7b04c2aa2459afca77e936e01c0029 upstream. If a zone cannot be used for a dirty page then it gets marked "full" which is cached in the zlc and later potentially skipped by allocation requests that have nothing to do with dirty zones. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c7e27b8c7200..481f6868d80a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1991,7 +1991,7 @@ zonelist_scan: */ if ((alloc_flags & ALLOC_WMARK_LOW) && (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) - goto this_zone_full; + continue; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (!zone_watermark_ok(zone, order, mark, From 7a0b5ca9ba11c70b7b960c7b3eedbbdd746d12a0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:07 -0700 Subject: [PATCH 174/298] include/linux/jump_label.h: expose the reference count commit ea5e9539abf1258f23e725cb9cb25aa74efa29eb upstream. This patch exposes the jump_label reference count in preparation for the next patch. cpusets cares about both the jump_label being enabled and how many users of the cpusets there currently are. Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/jump_label.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5c1dfb2a9e73..784304b222b3 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -69,6 +69,10 @@ struct static_key { # include # define HAVE_JUMP_LABEL +#else +struct static_key { + atomic_t enabled; +}; #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ enum jump_label_type { @@ -79,6 +83,12 @@ enum jump_label_type { struct module; #include + +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + #ifdef HAVE_JUMP_LABEL #define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL @@ -134,10 +144,6 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ -struct static_key { - atomic_t enabled; -}; - static __always_inline void jump_label_init(void) { static_key_initialized = true; @@ -145,14 +151,14 @@ static __always_inline void jump_label_init(void) static __always_inline bool static_key_false(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled) > 0)) + if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { - if (likely(atomic_read(&key->enabled) > 0)) + if (likely(static_key_count(key) > 0)) return true; return false; } @@ -194,7 +200,7 @@ static inline int jump_label_apply_nops(struct module *mod) static inline bool static_key_enabled(struct static_key *key) { - return (atomic_read(&key->enabled) > 0); + return static_key_count(key) > 0; } #endif /* _LINUX_JUMP_LABEL_H */ From ab9f54b2d1589e20d6722ec5ecb359debf9f1fc7 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:08 -0700 Subject: [PATCH 175/298] mm: page_alloc: use jump labels to avoid checking number_of_cpusets commit 664eeddeef6539247691197c1ac124d4aa872ab6 upstream. If cpusets are not in use then we still check a global variable on every page allocation. Use jump labels to avoid the overhead. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuset.h | 29 ++++++++++++++++++++++++++--- kernel/cpuset.c | 14 ++++---------- mm/page_alloc.c | 3 ++- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index b19d3dc2e651..ade2390ffe92 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -12,10 +12,31 @@ #include #include #include +#include #ifdef CONFIG_CPUSETS -extern int number_of_cpusets; /* How many cpusets are defined in system? */ +extern struct static_key cpusets_enabled_key; +static inline bool cpusets_enabled(void) +{ + return static_key_false(&cpusets_enabled_key); +} + +static inline int nr_cpusets(void) +{ + /* jump label reference count + the top-level cpuset */ + return static_key_count(&cpusets_enabled_key) + 1; +} + +static inline void cpuset_inc(void) +{ + static_key_slow_inc(&cpusets_enabled_key); +} + +static inline void cpuset_dec(void) +{ + static_key_slow_dec(&cpusets_enabled_key); +} extern int cpuset_init(void); extern void cpuset_init_smp(void); @@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_softwall(node, gfp_mask); } static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_hardwall(node, gfp_mask); } @@ -124,6 +145,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) #else /* !CONFIG_CPUSETS */ +static inline bool cpusets_enabled(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 15b3ea693225..2fb2877e6961 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,12 +61,7 @@ #include #include -/* - * Tracks how many cpusets are currently defined in system. - * When there is only one cpuset (the root cpuset) we can - * short circuit some hooks. - */ -int number_of_cpusets __read_mostly; +struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; /* See "Frequency meter" comments, below. */ @@ -611,7 +606,7 @@ static int generate_sched_domains(cpumask_var_t **domains, goto done; } - csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); + csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; @@ -1961,7 +1956,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); - number_of_cpusets++; + cpuset_inc(); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -2012,7 +2007,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); - number_of_cpusets--; + cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); @@ -2067,7 +2062,6 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG(); - number_of_cpusets = 1; return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 481f6868d80a..584e4578ba00 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1945,7 +1945,8 @@ zonelist_scan: if (IS_ENABLED(CONFIG_NUMA) && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; - if ((alloc_flags & ALLOC_CPUSET) && + if (cpusets_enabled() && + (alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); From a5667128a33f8fffb738f3dc4b02ac80e3bafd21 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:33 -0700 Subject: [PATCH 176/298] mm: page_alloc: calculate classzone_idx once from the zonelist ref commit d8846374a85f4290a473a4e2a64c1ba046c4a0e1 upstream. There is no need to calculate zone_idx(preferred_zone) multiple times or use the pgdat to figure it out. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: David Rientjes Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 60 ++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 584e4578ba00..9c825035229c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1922,17 +1922,15 @@ static inline void init_zone_allows_reclaim(int nid) static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, - struct zone *preferred_zone, int migratetype) + struct zone *preferred_zone, int classzone_idx, int migratetype) { struct zoneref *z; struct page *page = NULL; - int classzone_idx; struct zone *zone; nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ - classzone_idx = zone_idx(preferred_zone); zonelist_scan: /* * Scan zonelist, looking for a zone with enough free. @@ -2189,7 +2187,7 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { struct page *page; @@ -2215,7 +2213,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) goto out; @@ -2250,7 +2248,7 @@ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, enum migrate_mode mode, + int classzone_idx, int migratetype, enum migrate_mode mode, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { @@ -2278,7 +2276,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) { preferred_zone->compact_blockskip_flush = false; compaction_defer_reset(preferred_zone, order, true); @@ -2310,7 +2308,8 @@ static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, enum migrate_mode mode, bool *contended_compaction, + int classzone_idx, int migratetype, + enum migrate_mode mode, bool *contended_compaction, bool *deferred_compaction, unsigned long *did_some_progress) { return NULL; @@ -2350,7 +2349,7 @@ static inline struct page * __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) + int classzone_idx, int migratetype, unsigned long *did_some_progress) { struct page *page = NULL; bool drained = false; @@ -2368,7 +2367,8 @@ retry: page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, + migratetype); /* * If an allocation failed after direct reclaim, it could be because @@ -2391,14 +2391,14 @@ static inline struct page * __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { struct page *page; do { page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (!page && gfp_mask & __GFP_NOFAIL) wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); @@ -2499,7 +2499,7 @@ static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int migratetype) + int classzone_idx, int migratetype) { const gfp_t wait = gfp_mask & __GFP_WAIT; struct page *page = NULL; @@ -2548,15 +2548,19 @@ restart: * Find the true preferred zone if the allocation is unconstrained by * cpusets. */ - if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) - first_zones_zonelist(zonelist, high_zoneidx, NULL, - &preferred_zone); + if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) { + struct zoneref *preferred_zoneref; + preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, + NULL, + &preferred_zone); + classzone_idx = zonelist_zone_idx(preferred_zoneref); + } rebalance: /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) goto got_pg; @@ -2571,7 +2575,7 @@ rebalance: page = __alloc_pages_high_priority(gfp_mask, order, zonelist, high_zoneidx, nodemask, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (page) { goto got_pg; } @@ -2602,7 +2606,8 @@ rebalance: */ page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, - preferred_zone, migratetype, + preferred_zone, + classzone_idx, migratetype, migration_mode, &contended_compaction, &deferred_compaction, &did_some_progress); @@ -2625,7 +2630,8 @@ rebalance: zonelist, high_zoneidx, nodemask, alloc_flags, preferred_zone, - migratetype, &did_some_progress); + classzone_idx, migratetype, + &did_some_progress); if (page) goto got_pg; @@ -2644,7 +2650,7 @@ rebalance: page = __alloc_pages_may_oom(gfp_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, - migratetype); + classzone_idx, migratetype); if (page) goto got_pg; @@ -2685,7 +2691,8 @@ rebalance: */ page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, high_zoneidx, nodemask, alloc_flags, - preferred_zone, migratetype, + preferred_zone, + classzone_idx, migratetype, migration_mode, &contended_compaction, &deferred_compaction, &did_some_progress); @@ -2712,11 +2719,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, { enum zone_type high_zoneidx = gfp_zone(gfp_mask); struct zone *preferred_zone; + struct zoneref *preferred_zoneref; struct page *page = NULL; int migratetype = allocflags_to_migratetype(gfp_mask); unsigned int cpuset_mems_cookie; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; struct mem_cgroup *memcg = NULL; + int classzone_idx; gfp_mask &= gfp_allowed_mask; @@ -2746,11 +2755,12 @@ retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); /* The preferred zone is used for statistics later */ - first_zones_zonelist(zonelist, high_zoneidx, + preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, nodemask ? : &cpuset_current_mems_allowed, &preferred_zone); if (!preferred_zone) goto out; + classzone_idx = zonelist_zone_idx(preferred_zoneref); #ifdef CONFIG_CMA if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) @@ -2760,7 +2770,7 @@ retry: /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, alloc_flags, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); if (unlikely(!page)) { /* * The first pass makes sure allocations are spread @@ -2786,7 +2796,7 @@ retry: gfp_mask = memalloc_noio_flags(gfp_mask); page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask, - preferred_zone, migratetype); + preferred_zone, classzone_idx, migratetype); } trace_mm_page_alloc(page, order, gfp_mask, migratetype); From 993afbb8f012326a5b7b881202a7611de37a6c57 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:10 -0700 Subject: [PATCH 177/298] mm: page_alloc: only check the zone id check if pages are buddies commit d34c5fa06fade08a689fc171bf756fba2858ae73 upstream. A node/zone index is used to check if pages are compatible for merging but this happens unconditionally even if the buddy page is not free. Defer the calculation as long as possible. Ideally we would check the zone boundary but nodes can overlap. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Acked-by: Rik van Riel Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9c825035229c..9278bc2b80e6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -508,16 +508,26 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, if (!pfn_valid_within(page_to_pfn(buddy))) return 0; - if (page_zone_id(page) != page_zone_id(buddy)) - return 0; - if (page_is_guard(buddy) && page_order(buddy) == order) { VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + + if (page_zone_id(page) != page_zone_id(buddy)) + return 0; + return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + + /* + * zone check is done late to avoid uselessly + * calculating zone/node ids for pages that could + * never merge. + */ + if (page_zone_id(page) != page_zone_id(buddy)) + return 0; + return 1; } return 0; From e1ca6f43c34e234f4d156a04ac340c28bb41b7db Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:12 -0700 Subject: [PATCH 178/298] mm: page_alloc: only check the alloc flags and gfp_mask for dirty once commit a6e21b14f22041382e832d30deda6f26f37b1097 upstream. Currently it's calculated once per zone in the zonelist. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Rik van Riel Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9278bc2b80e6..c3f2a9b3333f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1940,6 +1940,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ + bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && + (gfp_mask & __GFP_WRITE); zonelist_scan: /* @@ -1998,8 +2000,7 @@ zonelist_scan: * will require awareness of zones in the * dirty-throttling and the flusher threads. */ - if ((alloc_flags & ALLOC_WMARK_LOW) && - (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) + if (consider_zone_dirty && !zone_dirty_ok(zone)) continue; mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; From 6a6d151242e4ae6b4988ff3fd10d808363f78784 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:14 -0700 Subject: [PATCH 179/298] mm: page_alloc: take the ALLOC_NO_WATERMARK check out of the fast path commit 5dab29113ca56335c78be3f98bf5ddf2ef8eb6a6 upstream. ALLOC_NO_WATERMARK is set in a few cases. Always by kswapd, always for __GFP_MEMALLOC, sometimes for swap-over-nfs, tasks etc. Each of these cases are relatively rare events but the ALLOC_NO_WATERMARK check is an unlikely branch in the fast path. This patch moves the check out of the fast path and after it has been determined that the watermarks have not been met. This helps the common fast path at the cost of making the slow path slower and hitting kswapd with a performance cost. It's a reasonable tradeoff. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Rik van Riel Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c3f2a9b3333f..a3edb34e6466 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1959,9 +1959,6 @@ zonelist_scan: (alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; - BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); - if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS)) - goto try_this_zone; /* * Distribute pages in proportion to the individual * zone size to ensure fair page aging. The zone a @@ -2008,6 +2005,11 @@ zonelist_scan: classzone_idx, alloc_flags)) { int ret; + /* Checked here to keep the fast path fast */ + BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); + if (alloc_flags & ALLOC_NO_WATERMARKS) + goto try_this_zone; + if (IS_ENABLED(CONFIG_NUMA) && !did_zlc_setup && nr_online_nodes > 1) { /* From 89712aa0964866059f13be8f771fbd7df1766776 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:21 -0700 Subject: [PATCH 180/298] mm: page_alloc: use unsigned int for order in more places commit 7aeb09f9104b760fc53c98cb7d20d06640baf9e6 upstream. X86 prefers the use of unsigned types for iterators and there is a tendency to mix whether a signed or unsigned type if used for page order. This converts a number of sites in mm/page_alloc.c to use unsigned int for order where possible. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 8 ++++---- mm/page_alloc.c | 43 ++++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 18843532a0c9..3e02545e03a6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -807,10 +807,10 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); +bool zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a3edb34e6466..4cfdc2fd7b09 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -408,7 +408,8 @@ static int destroy_compound_page(struct page *page, unsigned long order) return bad; } -static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) +static inline void prep_zero_page(struct page *page, unsigned int order, + gfp_t gfp_flags) { int i; @@ -452,7 +453,7 @@ static inline void set_page_guard_flag(struct page *page) { } static inline void clear_page_guard_flag(struct page *page) { } #endif -static inline void set_page_order(struct page *page, int order) +static inline void set_page_order(struct page *page, unsigned int order) { set_page_private(page, order); __SetPageBuddy(page); @@ -503,7 +504,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) * For recording page's order, we use page_private(page). */ static inline int page_is_buddy(struct page *page, struct page *buddy, - int order) + unsigned int order) { if (!pfn_valid_within(page_to_pfn(buddy))) return 0; @@ -722,7 +723,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, spin_unlock(&zone->lock); } -static void free_one_page(struct zone *zone, struct page *page, int order, +static void free_one_page(struct zone *zone, struct page *page, unsigned int order, int migratetype) { spin_lock(&zone->lock); @@ -904,7 +905,7 @@ static inline int check_new_page(struct page *page) return 0; } -static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) +static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) { int i; @@ -1115,16 +1116,17 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, /* Remove an element from the buddy allocator from the fallback list */ static inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) +__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) { struct free_area *area; - int current_order; + unsigned int current_order; struct page *page; int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ - for (current_order = MAX_ORDER-1; current_order >= order; - --current_order) { + for (current_order = MAX_ORDER-1; + current_order >= order && current_order <= MAX_ORDER-1; + --current_order) { for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; @@ -1352,7 +1354,7 @@ void mark_free_pages(struct zone *zone) { unsigned long pfn, max_zone_pfn; unsigned long flags; - int order, t; + unsigned int order, t; struct list_head *curr; if (zone_is_empty(zone)) @@ -1547,8 +1549,8 @@ int split_free_page(struct page *page) */ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, - struct zone *zone, int order, gfp_t gfp_flags, - int migratetype) + struct zone *zone, unsigned int order, + gfp_t gfp_flags, int migratetype) { unsigned long flags; struct page *page; @@ -1697,8 +1699,9 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) * Return true if free pages are above 'mark'. This takes into account the order * of the allocation. */ -static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags, long free_pages) +static bool __zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags, + long free_pages) { /* free_pages my go negative - that's OK */ long min = mark; @@ -1732,15 +1735,15 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark, return true; } -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, +bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, int alloc_flags) { return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags, zone_page_state(z, NR_FREE_PAGES)); } -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags) +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags) { long free_pages = zone_page_state(z, NR_FREE_PAGES); @@ -4137,7 +4140,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, static void __meminit zone_init_free_lists(struct zone *zone) { - int order, t; + unsigned int order, t; for_each_migratetype_order(order, t) { INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); zone->free_area[order].nr_free = 0; @@ -6477,7 +6480,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) { struct page *page; struct zone *zone; - int order, i; + unsigned int order, i; unsigned long pfn; unsigned long flags; /* find the first valid pfn */ @@ -6529,7 +6532,7 @@ bool is_free_buddy_page(struct page *page) struct zone *zone = page_zone(page); unsigned long pfn = page_to_pfn(page); unsigned long flags; - int order; + unsigned int order; spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { From 722191a52ad6b1c89ab89f98114f5d88c5578bfe Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:17 -0700 Subject: [PATCH 181/298] mm: page_alloc: reduce number of times page_to_pfn is called commit dc4b0caff24d9b2918e9f27bc65499ee63187eba upstream. In the free path we calculate page_to_pfn multiple times. Reduce that. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Acked-by: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 9 +++++++-- include/linux/pageblock-flags.h | 33 +++++++++++++------------------- mm/page_alloc.c | 34 ++++++++++++++++++--------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3e02545e03a6..1cc970e6324e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -78,10 +78,15 @@ extern int page_group_by_mobility_disabled; #define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) #define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) -static inline int get_pageblock_migratetype(struct page *page) +#define get_pageblock_migratetype(page) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + PB_migrate_end, MIGRATETYPE_MASK) + +static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) { BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); - return get_pageblock_flags_mask(page, PB_migrate_end, MIGRATETYPE_MASK); + return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, + MIGRATETYPE_MASK); } struct free_area { diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index c08730c10c7a..2baeee12f48e 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -65,33 +65,26 @@ extern int pageblock_order; /* Forward declaration */ struct page; -unsigned long get_pageblock_flags_mask(struct page *page, +unsigned long get_pfnblock_flags_mask(struct page *page, + unsigned long pfn, unsigned long end_bitidx, unsigned long mask); -void set_pageblock_flags_mask(struct page *page, + +void set_pfnblock_flags_mask(struct page *page, unsigned long flags, + unsigned long pfn, unsigned long end_bitidx, unsigned long mask); /* Declarations for getting and setting flags. See mm/page_alloc.c */ -static inline unsigned long get_pageblock_flags_group(struct page *page, - int start_bitidx, int end_bitidx) -{ - unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1; - unsigned long mask = (1 << nr_flag_bits) - 1; - - return get_pageblock_flags_mask(page, end_bitidx, mask); -} - -static inline void set_pageblock_flags_group(struct page *page, - unsigned long flags, - int start_bitidx, int end_bitidx) -{ - unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1; - unsigned long mask = (1 << nr_flag_bits) - 1; - - set_pageblock_flags_mask(page, flags, end_bitidx, mask); -} +#define get_pageblock_flags_group(page, start_bitidx, end_bitidx) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) +#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \ + set_pfnblock_flags_mask(page, flags, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) #ifdef CONFIG_COMPACTION #define get_pageblock_skip(page) \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4cfdc2fd7b09..9c28480aa07e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -560,6 +560,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, */ static inline void __free_one_page(struct page *page, + unsigned long pfn, struct zone *zone, unsigned int order, int migratetype) { @@ -576,7 +577,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON(migratetype == -1); - page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); + page_idx = pfn & ((1 << MAX_ORDER) - 1); VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); @@ -711,7 +712,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, list_del(&page->lru); mt = get_freepage_migratetype(page); /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ - __free_one_page(page, zone, 0, mt); + __free_one_page(page, page_to_pfn(page), zone, 0, mt); trace_mm_page_pcpu_drain(page, 0, mt); if (likely(!is_migrate_isolate_page(page))) { __mod_zone_page_state(zone, NR_FREE_PAGES, 1); @@ -723,13 +724,15 @@ static void free_pcppages_bulk(struct zone *zone, int count, spin_unlock(&zone->lock); } -static void free_one_page(struct zone *zone, struct page *page, unsigned int order, +static void free_one_page(struct zone *zone, + struct page *page, unsigned long pfn, + unsigned int order, int migratetype) { spin_lock(&zone->lock); zone->pages_scanned = 0; - __free_one_page(page, zone, order, migratetype); + __free_one_page(page, pfn, zone, order, migratetype); if (unlikely(!is_migrate_isolate(migratetype))) __mod_zone_freepage_state(zone, 1 << order, migratetype); spin_unlock(&zone->lock); @@ -766,15 +769,16 @@ static void __free_pages_ok(struct page *page, unsigned int order) { unsigned long flags; int migratetype; + unsigned long pfn = page_to_pfn(page); if (!free_pages_prepare(page, order)) return; local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); - migratetype = get_pageblock_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); set_freepage_migratetype(page, migratetype); - free_one_page(page_zone(page), page, order, migratetype); + free_one_page(page_zone(page), page, pfn, order, migratetype); local_irq_restore(flags); } @@ -1393,12 +1397,13 @@ void free_hot_cold_page(struct page *page, int cold) struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; + unsigned long pfn = page_to_pfn(page); int migratetype; if (!free_pages_prepare(page, 0)) return; - migratetype = get_pageblock_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); set_freepage_migratetype(page, migratetype); local_irq_save(flags); __count_vm_event(PGFREE); @@ -1412,7 +1417,7 @@ void free_hot_cold_page(struct page *page, int cold) */ if (migratetype >= MIGRATE_PCPTYPES) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(zone, page, 0, migratetype); + free_one_page(zone, page, pfn, 0, migratetype); goto out; } migratetype = MIGRATE_MOVABLE; @@ -6068,17 +6073,16 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) * @end_bitidx: The last bit of interest * returns pageblock_bits flags */ -unsigned long get_pageblock_flags_mask(struct page *page, +unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long end_bitidx, unsigned long mask) { struct zone *zone; unsigned long *bitmap; - unsigned long pfn, bitidx, word_bitidx; + unsigned long bitidx, word_bitidx; unsigned long word; zone = page_zone(page); - pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); word_bitidx = bitidx / BITS_PER_LONG; @@ -6090,25 +6094,25 @@ unsigned long get_pageblock_flags_mask(struct page *page, } /** - * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages + * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages * @page: The page within the block of interest * @start_bitidx: The first bit of interest * @end_bitidx: The last bit of interest * @flags: The flags to set */ -void set_pageblock_flags_mask(struct page *page, unsigned long flags, +void set_pfnblock_flags_mask(struct page *page, unsigned long flags, + unsigned long pfn, unsigned long end_bitidx, unsigned long mask) { struct zone *zone; unsigned long *bitmap; - unsigned long pfn, bitidx, word_bitidx; + unsigned long bitidx, word_bitidx; unsigned long old_word, word; BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4); zone = page_zone(page); - pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); word_bitidx = bitidx / BITS_PER_LONG; From 5cfde3e59e6cbc3e5f894906559529efd04d869d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:22 -0700 Subject: [PATCH 182/298] mm: page_alloc: convert hot/cold parameter and immediate callers to bool commit b745bc85f21ea707e4ea1a91948055fa3e72c77b upstream. cold is a bool, make it one. Make the likely case the "if" part of the block instead of the else as according to the optimisation manual this is preferred. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- arch/tile/mm/homecache.c | 2 +- fs/fuse/dev.c | 2 +- include/linux/gfp.h | 4 ++-- include/linux/pagemap.h | 2 +- include/linux/swap.h | 2 +- mm/page_alloc.c | 20 ++++++++++---------- mm/swap.c | 4 ++-- mm/swap_state.c | 2 +- mm/vmscan.c | 6 +++--- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 004ba568d93f..33294fdc402e 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -417,7 +417,7 @@ void __homecache_free_pages(struct page *page, unsigned int order) if (put_page_testzero(page)) { homecache_change_page_home(page, order, PAGE_HOME_HASH); if (order == 0) { - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } else { init_page_count(page); __free_pages(page, order); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 0a648bb455ae..6eb13c621a14 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1614,7 +1614,7 @@ out_finish: static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) { - release_pages(req->pages, req->num_pages, 0); + release_pages(req->pages, req->num_pages, false); } static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 39b81dc7d01a..3824ac62f395 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -369,8 +369,8 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_hot_cold_page(struct page *page, int cold); -extern void free_hot_cold_page_list(struct list_head *list, int cold); +extern void free_hot_cold_page(struct page *page, bool cold); +extern void free_hot_cold_page_list(struct list_head *list, bool cold); extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 09c1b03867d9..0ab500011377 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -99,7 +99,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) -void release_pages(struct page **pages, int nr, int cold); +void release_pages(struct page **pages, int nr, bool cold); /* * speculatively take a reference to a page. diff --git a/include/linux/swap.h b/include/linux/swap.h index 13eb44cfebb7..c8beed191dd1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -441,7 +441,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #define free_page_and_swap_cache(page) \ page_cache_release(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), 0); + release_pages((pages), (nr), false); static inline void show_swap_cache_info(void) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9c28480aa07e..13566037167e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1210,7 +1210,7 @@ retry_reserve: */ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, - int migratetype, int cold) + int migratetype, bool cold) { int i; @@ -1229,7 +1229,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * merge IO requests if the physical pages are ordered * properly. */ - if (likely(cold == 0)) + if (likely(!cold)) list_add(&page->lru, list); else list_add_tail(&page->lru, list); @@ -1390,9 +1390,9 @@ void mark_free_pages(struct zone *zone) /* * Free a 0-order page - * cold == 1 ? free a cold page : free a hot page + * cold == true ? free a cold page : free a hot page */ -void free_hot_cold_page(struct page *page, int cold) +void free_hot_cold_page(struct page *page, bool cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; @@ -1424,10 +1424,10 @@ void free_hot_cold_page(struct page *page, int cold) } pcp = &this_cpu_ptr(zone->pageset)->pcp; - if (cold) - list_add_tail(&page->lru, &pcp->lists[migratetype]); - else + if (!cold) list_add(&page->lru, &pcp->lists[migratetype]); + else + list_add_tail(&page->lru, &pcp->lists[migratetype]); pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = ACCESS_ONCE(pcp->batch); @@ -1442,7 +1442,7 @@ out: /* * Free a list of 0-order pages */ -void free_hot_cold_page_list(struct list_head *list, int cold) +void free_hot_cold_page_list(struct list_head *list, bool cold) { struct page *page, *next; @@ -1559,7 +1559,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, { unsigned long flags; struct page *page; - int cold = !!(gfp_flags & __GFP_COLD); + bool cold = ((gfp_flags & __GFP_COLD) != 0); again: if (likely(order == 0)) { @@ -2868,7 +2868,7 @@ void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { if (order == 0) - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); else __free_pages_ok(page, order); } diff --git a/mm/swap.c b/mm/swap.c index 49dfd717726f..d2445819fa58 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -67,7 +67,7 @@ static void __page_cache_release(struct page *page) static void __put_single_page(struct page *page) { __page_cache_release(page); - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } static void __put_compound_page(struct page *page) @@ -826,7 +826,7 @@ void lru_add_drain_all(void) * grabbed the page via the LRU. If it did, give up: shrink_inactive_list() * will free it. */ -void release_pages(struct page **pages, int nr, int cold) +void release_pages(struct page **pages, int nr, bool cold) { int i; LIST_HEAD(pages_to_free); diff --git a/mm/swap_state.c b/mm/swap_state.c index e76ace30d436..2972eee184a4 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -270,7 +270,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr) for (i = 0; i < todo; i++) free_swap_cache(pagep[i]); - release_pages(pagep, todo, 0); + release_pages(pagep, todo, false); pagep += todo; nr -= todo; } diff --git a/mm/vmscan.c b/mm/vmscan.c index be6a689a71a6..e3204efed9ef 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1107,7 +1107,7 @@ keep: VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } - free_hot_cold_page_list(&free_pages, 1); + free_hot_cold_page_list(&free_pages, true); list_splice(&ret_pages, page_list); count_vm_events(PGACTIVATE, pgactivate); @@ -1505,7 +1505,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, spin_unlock_irq(&zone->lru_lock); - free_hot_cold_page_list(&page_list, 1); + free_hot_cold_page_list(&page_list, true); /* * If reclaim is isolating dirty pages under writeback, it implies @@ -1725,7 +1725,7 @@ static void shrink_active_list(unsigned long nr_to_scan, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); - free_hot_cold_page_list(&l_hold, 1); + free_hot_cold_page_list(&l_hold, true); } #ifdef CONFIG_SWAP From ddfef57105382a49c9c38199b434dbced40797fa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:19 -0700 Subject: [PATCH 183/298] mm: page_alloc: lookup pageblock migratetype with IRQs enabled during free commit cfc47a2803db42140167b92d991ef04018e162c7 upstream. get_pageblock_migratetype() is called during free with IRQs disabled. This is unnecessary and disables IRQs for longer than necessary. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Acked-by: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 13566037167e..f57654e204b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -774,9 +774,9 @@ static void __free_pages_ok(struct page *page, unsigned int order) if (!free_pages_prepare(page, order)) return; + migratetype = get_pfnblock_migratetype(page, pfn); local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); - migratetype = get_pfnblock_migratetype(page, pfn); set_freepage_migratetype(page, migratetype); free_one_page(page_zone(page), page, pfn, order, migratetype); local_irq_restore(flags); From 1183f6ad43ab12f33b01d39bd9e64655e9773e38 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:24 -0700 Subject: [PATCH 184/298] mm: shmem: avoid atomic operation during shmem_getpage_gfp commit 07a427884348d38a6fd56fa4d78249c407196650 upstream. shmem_getpage_gfp uses an atomic operation to set the SwapBacked field before it's even added to the LRU or visible. This is unnecessary as what could it possible race against? Use an unlocked variant. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Acked-by: Rik van Riel Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/page-flags.h | 1 + mm/shmem.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ca71a1d347a0..cffeaa9aea21 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -208,6 +208,7 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) + __SETPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobFree, slob_free) diff --git a/mm/shmem.c b/mm/shmem.c index 0f1447563f17..706331e9ee0a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1140,7 +1140,7 @@ repeat: goto decused; } - SetPageSwapBacked(page); + __SetPageSwapBacked(page); __set_page_locked(page); error = mem_cgroup_cache_charge(page, current->mm, gfp & GFP_RECLAIM_MASK); From d065df14f73b74f173e2540591185a69e0a55749 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:26 -0700 Subject: [PATCH 185/298] mm: do not use atomic operations when releasing pages commit e3741b506c5088fa8c911bb5884c430f770fb49d upstream. There should be no references to it any more and a parallel mark should not be reordered against us. Use non-locked varient to clear page active. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap.c b/mm/swap.c index d2445819fa58..c370e59245f9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -867,7 +867,7 @@ void release_pages(struct page **pages, int nr, bool cold) } /* Clear Active bit in case of parallel mark_page_accessed */ - ClearPageActive(page); + __ClearPageActive(page); list_add(&page->lru, &pages_to_free); } From b09d8d5662e0f74339a10be3fb960e879828b925 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:28 -0700 Subject: [PATCH 186/298] mm: do not use unnecessary atomic operations when adding pages to the LRU commit 6fb81a17d21f2a138b8f424af4cf379f2b694060 upstream. When adding pages to the LRU we clear the active bit unconditionally. As the page could be reachable from other paths we cannot use unlocked operations without risk of corruption such as a parallel mark_page_accessed. This patch tests if is necessary to clear the active flag before using an atomic operation. This potentially opens a tiny race when PageActive is checked as mark_page_accessed could be called after PageActive was checked. The race already exists but this patch changes it slightly. The consequence is that that the page may be promoted to the active list that might have been left on the inactive list before the patch. It's too tiny a race and too marginal a consequence to always use atomic operations for. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/swap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index c370e59245f9..9649d99db5f9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -597,13 +597,15 @@ static void __lru_cache_add(struct page *page) */ void lru_cache_add_anon(struct page *page) { - ClearPageActive(page); + if (PageActive(page)) + ClearPageActive(page); __lru_cache_add(page); } void lru_cache_add_file(struct page *page) { - ClearPageActive(page); + if (PageActive(page)) + ClearPageActive(page); __lru_cache_add(page); } EXPORT_SYMBOL(lru_cache_add_file); From 45f8d6eb309ebf1cb9d7ac30fb48d7aba7e8b15e Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:29 -0700 Subject: [PATCH 187/298] fs: buffer: do not use unnecessary atomic operations when discarding buffers commit e7470ee89f003634a88e7b5e5a7b65b3025987de upstream. Discarding buffers uses a bunch of atomic operations when discarding buffers because ...... I can't think of a reason. Use a cmpxchg loop to clear all the necessary flags. In most (all?) cases this will be a single atomic operations. [akpm@linux-foundation.org: move BUFFER_FLAGS_DISCARD into the .c file] Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 4d06a573d199..9c16cc70dd5d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1485,16 +1485,27 @@ EXPORT_SYMBOL(set_bh_page); /* * Called when truncating a buffer on a page completely. */ + +/* Bits that are cleared during an invalidate */ +#define BUFFER_FLAGS_DISCARD \ + (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \ + 1 << BH_Delay | 1 << BH_Unwritten) + static void discard_buffer(struct buffer_head * bh) { + unsigned long b_state, b_state_old; + lock_buffer(bh); clear_buffer_dirty(bh); bh->b_bdev = NULL; - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); + b_state = bh->b_state; + for (;;) { + b_state_old = cmpxchg(&bh->b_state, b_state, + (b_state & ~BUFFER_FLAGS_DISCARD)); + if (b_state_old == b_state) + break; + b_state = b_state_old; + } unlock_buffer(bh); } From 35dbe179fe2af754b0ac92c629435c39bd95681c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:31 -0700 Subject: [PATCH 188/298] mm: non-atomically mark page accessed during page cache allocation where possible commit 2457aec63745e235bcafb7ef312b182d8682f0fc upstream. aops->write_begin may allocate a new page and make it visible only to have mark_page_accessed called almost immediately after. Once the page is visible the atomic operations are necessary which is noticable overhead when writing to an in-memory filesystem like tmpfs but should also be noticable with fast storage. The objective of the patch is to initialse the accessed information with non-atomic operations before the page is visible. The bulk of filesystems directly or indirectly use grab_cache_page_write_begin or find_or_create_page for the initial allocation of a page cache page. This patch adds an init_page_accessed() helper which behaves like the first call to mark_page_accessed() but may called before the page is visible and can be done non-atomically. The primary APIs of concern in this care are the following and are used by most filesystems. find_get_page find_lock_page find_or_create_page grab_cache_page_nowait grab_cache_page_write_begin All of them are very similar in detail to the patch creates a core helper pagecache_get_page() which takes a flags parameter that affects its behavior such as whether the page should be marked accessed or not. Then old API is preserved but is basically a thin wrapper around this core function. Each of the filesystems are then updated to avoid calling mark_page_accessed when it is known that the VM interfaces have already done the job. There is a slight snag in that the timing of the mark_page_accessed() has now changed so in rare cases it's possible a page gets to the end of the LRU as PageReferenced where as previously it might have been repromoted. This is expected to be rare but it's worth the filesystem people thinking about it in case they see a problem with the timing change. It is also the case that some filesystems may be marking pages accessed that previously did not but it makes sense that filesystems have consistent behaviour in this regard. The test case used to evaulate this is a simple dd of a large file done multiple times with the file deleted on each iterations. The size of the file is 1/10th physical memory to avoid dirty page balancing. In the async case it will be possible that the workload completes without even hitting the disk and will have variable results but highlight the impact of mark_page_accessed for async IO. The sync results are expected to be more stable. The exception is tmpfs where the normal case is for the "IO" to not hit the disk. The test machine was single socket and UMA to avoid any scheduling or NUMA artifacts. Throughput and wall times are presented for sync IO, only wall times are shown for async as the granularity reported by dd and the variability is unsuitable for comparison. As async results were variable do to writback timings, I'm only reporting the maximum figures. The sync results were stable enough to make the mean and stddev uninteresting. The performance results are reported based on a run with no profiling. Profile data is based on a separate run with oprofile running. async dd 3.15.0-rc3 3.15.0-rc3 vanilla accessed-v2 ext3 Max elapsed 13.9900 ( 0.00%) 11.5900 ( 17.16%) tmpfs Max elapsed 0.5100 ( 0.00%) 0.4900 ( 3.92%) btrfs Max elapsed 12.8100 ( 0.00%) 12.7800 ( 0.23%) ext4 Max elapsed 18.6000 ( 0.00%) 13.3400 ( 28.28%) xfs Max elapsed 12.5600 ( 0.00%) 2.0900 ( 83.36%) The XFS figure is a bit strange as it managed to avoid a worst case by sheer luck but the average figures looked reasonable. samples percentage ext3 86107 0.9783 vmlinux-3.15.0-rc4-vanilla mark_page_accessed ext3 23833 0.2710 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed ext3 5036 0.0573 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed ext4 64566 0.8961 vmlinux-3.15.0-rc4-vanilla mark_page_accessed ext4 5322 0.0713 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed ext4 2869 0.0384 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed xfs 62126 1.7675 vmlinux-3.15.0-rc4-vanilla mark_page_accessed xfs 1904 0.0554 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed xfs 103 0.0030 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed btrfs 10655 0.1338 vmlinux-3.15.0-rc4-vanilla mark_page_accessed btrfs 2020 0.0273 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed btrfs 587 0.0079 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed tmpfs 59562 3.2628 vmlinux-3.15.0-rc4-vanilla mark_page_accessed tmpfs 1210 0.0696 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed tmpfs 94 0.0054 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed [akpm@linux-foundation.org: don't run init_page_accessed() against an uninitialised pointer] Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Rik van Riel Cc: Peter Zijlstra Tested-by: Prabhakar Lad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 11 +- fs/btrfs/file.c | 5 +- fs/buffer.c | 7 +- fs/ext4/mballoc.c | 14 +-- fs/f2fs/checkpoint.c | 1 - fs/f2fs/node.c | 2 - fs/fuse/file.c | 2 - fs/gfs2/aops.c | 1 - fs/gfs2/meta_io.c | 4 +- fs/ntfs/attrib.c | 1 - fs/ntfs/file.c | 1 - include/linux/page-flags.h | 1 + include/linux/pagemap.h | 107 +++++++++++++++++-- include/linux/swap.h | 1 + mm/filemap.c | 208 ++++++++++++++----------------------- mm/shmem.c | 6 +- mm/swap.c | 11 ++ 17 files changed, 220 insertions(+), 163 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1a858947006e..fa9f90049099 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4507,7 +4507,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) spin_unlock(&eb->refs_lock); } -static void mark_extent_buffer_accessed(struct extent_buffer *eb) +static void mark_extent_buffer_accessed(struct extent_buffer *eb, + struct page *accessed) { unsigned long num_pages, i; @@ -4516,7 +4517,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb) num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); - mark_page_accessed(p); + if (p != accessed) + mark_page_accessed(p); } } @@ -4530,7 +4532,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); - mark_extent_buffer_accessed(eb); + mark_extent_buffer_accessed(eb, NULL); return eb; } rcu_read_unlock(); @@ -4578,7 +4580,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, spin_unlock(&mapping->private_lock); unlock_page(p); page_cache_release(p); - mark_extent_buffer_accessed(exists); + mark_extent_buffer_accessed(exists, p); goto free_eb; } @@ -4593,7 +4595,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, attach_extent_buffer_page(eb, p); spin_unlock(&mapping->private_lock); WARN_ON(PageDirty(p)); - mark_page_accessed(p); eb->pages[i] = p; if (!PageUptodate(p)) uptodate = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f6d00df99a8c..279b06ef5522 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -470,11 +470,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) for (i = 0; i < num_pages; i++) { /* page checked is some magic around finding pages that * have been modified without going through btrfs_set_page_dirty - * clear it here + * clear it here. There should be no need to mark the pages + * accessed as prepare_pages should have marked them accessed + * in prepare_pages via find_or_create_page() */ ClearPageChecked(pages[i]); unlock_page(pages[i]); - mark_page_accessed(pages[i]); page_cache_release(pages[i]); } } diff --git a/fs/buffer.c b/fs/buffer.c index 9c16cc70dd5d..eef21c69f2d7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -227,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits); - page = find_get_page(bd_mapping, index); + page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED); if (!page) goto out; @@ -1368,12 +1368,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size) struct buffer_head *bh = lookup_bh_lru(bdev, block, size); if (bh == NULL) { + /* __find_get_block_slow will mark the page accessed */ bh = __find_get_block_slow(bdev, block); if (bh) bh_lru_install(bh); - } - if (bh) + } else touch_buffer(bh); + return bh; } EXPORT_SYMBOL(__find_get_block); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 242226a87be7..7620133f78bf 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) * allocating. If we are looking at the buddy cache we would * have taken a reference using ext4_mb_load_buddy and that * would have pinned buddy page to page cache. + * The call to ext4_mb_get_buddy_page_lock will mark the + * page accessed. */ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { @@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); if (e4b.bd_buddy_page == NULL) { /* @@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ret = -EIO; goto err; } - mark_page_accessed(page); err: ext4_mb_put_buddy_page_lock(&e4b); return ret; @@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, /* we could use find_or_create_page(), but it locks page * what we'd like to avoid in fast path ... */ - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) /* @@ -1172,15 +1172,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_bitmap_page = page; e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); block++; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_get_page(inode->i_mapping, pnum); + page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); if (page == NULL || !PageUptodate(page)) { if (page) page_cache_release(page); @@ -1201,9 +1202,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ret = -EIO; goto err; } + + /* Pages marked accessed already */ e4b->bd_buddy_page = page; e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); - mark_page_accessed(page); BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 293d0486a40f..5c6fe278fb63 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -71,7 +71,6 @@ repeat: goto repeat; } out: - mark_page_accessed(page); return page; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b0649b76eb4f..bb6478acb369 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -969,7 +969,6 @@ repeat: } got_it: f2fs_bug_on(nid != nid_of_node(page)); - mark_page_accessed(page); return page; } @@ -1024,7 +1023,6 @@ page_hit: f2fs_put_page(page, 1); return ERR_PTR(-EIO); } - mark_page_accessed(page); return page; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a91d3b4d32f3..d8a60270581c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1006,8 +1006,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); flush_dcache_page(page); - mark_page_accessed(page); - if (!tmp) { unlock_page(page); page_cache_release(page); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 49436fa7cd4f..4ccb60d943bb 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -517,7 +517,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); - mark_page_accessed(page); page_cache_release(page); copied += amt; index++; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index b82a9c99e18b..e7b149614f5e 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -136,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) yield(); } } else { - page = find_lock_page(mapping, index); + page = find_get_page_flags(mapping, index, + FGP_LOCK|FGP_ACCESSED); if (!page) return NULL; } @@ -153,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); return bh; diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index a27e3fecefaf..250ed5b20c8f 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1748,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size) if (page) { set_page_dirty(page); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); } ntfs_debug("Done."); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index db9bd8a31725..86ddab916b66 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2060,7 +2060,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb, } do { unlock_page(pages[--do_pages]); - mark_page_accessed(pages[do_pages]); page_cache_release(pages[do_pages]); } while (do_pages); if (unlikely(status)) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index cffeaa9aea21..3c545b48aeab 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -198,6 +198,7 @@ struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) + __SETPAGEFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0ab500011377..ec054b4917df 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -248,12 +248,109 @@ pgoff_t page_cache_next_hole(struct address_space *mapping, pgoff_t page_cache_prev_hole(struct address_space *mapping, pgoff_t index, unsigned long max_scan); +#define FGP_ACCESSED 0x00000001 +#define FGP_LOCK 0x00000002 +#define FGP_CREAT 0x00000004 +#define FGP_WRITE 0x00000008 +#define FGP_NOFS 0x00000010 +#define FGP_NOWAIT 0x00000020 + +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, + int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask); + +/** + * find_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct page *find_get_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, 0, 0, 0); +} + +static inline struct page *find_get_page_flags(struct address_space *mapping, + pgoff_t offset, int fgp_flags) +{ + return pagecache_get_page(mapping, offset, fgp_flags, 0, 0); +} + +/** + * find_lock_page - locate, pin and lock a pagecache page + * pagecache_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * Otherwise, %NULL is returned. + * + * find_lock_page() may sleep. + */ +static inline struct page *find_lock_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0); +} + +/** + * find_or_create_page - locate or add a pagecache page + * @mapping: the page's address_space + * @index: the page's index into the mapping + * @gfp_mask: page allocation mode + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * If the page is not present, a new page is allocated using @gfp_mask + * and added to the page cache and the VM's LRU list. The page is + * returned locked and with an increased refcount. + * + * On memory exhaustion, %NULL is returned. + * + * find_or_create_page() may sleep, even if @gfp_flags specifies an + * atomic allocation! + */ +static inline struct page *find_or_create_page(struct address_space *mapping, + pgoff_t offset, gfp_t gfp_mask) +{ + return pagecache_get_page(mapping, offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT, + gfp_mask, gfp_mask & GFP_RECLAIM_MASK); +} + +/** + * grab_cache_page_nowait - returns locked page at given index in given cache + * @mapping: target address_space + * @index: the page index + * + * Same as grab_cache_page(), but do not wait if the page is unavailable. + * This is intended for speculative data generators, where the data can + * be regenerated if the page couldn't be grabbed. This routine should + * be safe to call while holding the lock for another page. + * + * Clear __GFP_FS when allocating the page to avoid recursion into the fs + * and deadlock against the caller's locked page. + */ +static inline struct page *grab_cache_page_nowait(struct address_space *mapping, + pgoff_t index) +{ + return pagecache_get_page(mapping, index, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(mapping), + GFP_NOFS); +} + struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_get_page(struct address_space *mapping, pgoff_t offset); struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_lock_page(struct address_space *mapping, pgoff_t offset); -struct page *find_or_create_page(struct address_space *mapping, pgoff_t index, - gfp_t gfp_mask); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); @@ -276,8 +373,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * grab_cache_page_nowait(struct address_space *mapping, - pgoff_t index); extern struct page * read_cache_page(struct address_space *mapping, pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, diff --git a/include/linux/swap.h b/include/linux/swap.h index c8beed191dd1..241bf0922770 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -275,6 +275,7 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); +extern void init_page_accessed(struct page *page); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); diff --git a/mm/filemap.c b/mm/filemap.c index bdaa21555abe..b170a68fad35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -847,26 +847,6 @@ out: } EXPORT_SYMBOL(find_get_entry); -/** - * find_get_page - find and get a page reference - * @mapping: the address_space to search - * @offset: the page index - * - * Looks up the page cache slot at @mapping & @offset. If there is a - * page cache page, it is returned with an increased refcount. - * - * Otherwise, %NULL is returned. - */ -struct page *find_get_page(struct address_space *mapping, pgoff_t offset) -{ - struct page *page = find_get_entry(mapping, offset); - - if (radix_tree_exceptional_entry(page)) - page = NULL; - return page; -} -EXPORT_SYMBOL(find_get_page); - /** * find_lock_entry - locate, pin and lock a page cache entry * @mapping: the address_space to search @@ -904,66 +884,84 @@ repeat: EXPORT_SYMBOL(find_lock_entry); /** - * find_lock_page - locate, pin and lock a pagecache page + * pagecache_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index + * @fgp_flags: PCG flags + * @gfp_mask: gfp mask to use if a page is to be allocated * - * Looks up the page cache slot at @mapping & @offset. If there is a - * page cache page, it is returned locked and with an increased - * refcount. + * Looks up the page cache slot at @mapping & @offset. * - * Otherwise, %NULL is returned. + * PCG flags modify how the page is returned * - * find_lock_page() may sleep. + * FGP_ACCESSED: the page will be marked accessed + * FGP_LOCK: Page is return locked + * FGP_CREAT: If page is not present then a new page is allocated using + * @gfp_mask and added to the page cache and the VM's LRU + * list. The page is returned locked and with an increased + * refcount. Otherwise, %NULL is returned. + * + * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even + * if the GFP flags specified for FGP_CREAT are atomic. + * + * If there is a page cache page, it is returned with an increased refcount. */ -struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) -{ - struct page *page = find_lock_entry(mapping, offset); - - if (radix_tree_exceptional_entry(page)) - page = NULL; - return page; -} -EXPORT_SYMBOL(find_lock_page); - -/** - * find_or_create_page - locate or add a pagecache page - * @mapping: the page's address_space - * @index: the page's index into the mapping - * @gfp_mask: page allocation mode - * - * Looks up the page cache slot at @mapping & @offset. If there is a - * page cache page, it is returned locked and with an increased - * refcount. - * - * If the page is not present, a new page is allocated using @gfp_mask - * and added to the page cache and the VM's LRU list. The page is - * returned locked and with an increased refcount. - * - * On memory exhaustion, %NULL is returned. - * - * find_or_create_page() may sleep, even if @gfp_flags specifies an - * atomic allocation! - */ -struct page *find_or_create_page(struct address_space *mapping, - pgoff_t index, gfp_t gfp_mask) +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, + int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask) { struct page *page; - int err; + repeat: - page = find_lock_page(mapping, index); - if (!page) { - page = __page_cache_alloc(gfp_mask); + page = find_get_entry(mapping, offset); + if (radix_tree_exceptional_entry(page)) + page = NULL; + if (!page) + goto no_page; + + if (fgp_flags & FGP_LOCK) { + if (fgp_flags & FGP_NOWAIT) { + if (!trylock_page(page)) { + page_cache_release(page); + return NULL; + } + } else { + lock_page(page); + } + + /* Has the page been truncated? */ + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + page_cache_release(page); + goto repeat; + } + VM_BUG_ON(page->index != offset); + } + + if (page && (fgp_flags & FGP_ACCESSED)) + mark_page_accessed(page); + +no_page: + if (!page && (fgp_flags & FGP_CREAT)) { + int err; + if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) + cache_gfp_mask |= __GFP_WRITE; + if (fgp_flags & FGP_NOFS) { + cache_gfp_mask &= ~__GFP_FS; + radix_gfp_mask &= ~__GFP_FS; + } + + page = __page_cache_alloc(cache_gfp_mask); if (!page) return NULL; - /* - * We want a regular kernel memory (not highmem or DMA etc) - * allocation for the radix tree nodes, but we need to honour - * the context-specific requirements the caller has asked for. - * GFP_RECLAIM_MASK collects those requirements. - */ - err = add_to_page_cache_lru(page, mapping, index, - (gfp_mask & GFP_RECLAIM_MASK)); + + if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK))) + fgp_flags |= FGP_LOCK; + + /* Init accessed so avoit atomic mark_page_accessed later */ + if (fgp_flags & FGP_ACCESSED) + init_page_accessed(page); + + err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -971,9 +969,10 @@ repeat: goto repeat; } } + return page; } -EXPORT_SYMBOL(find_or_create_page); +EXPORT_SYMBOL(pagecache_get_page); /** * find_get_entries - gang pagecache lookup @@ -1263,39 +1262,6 @@ repeat: } EXPORT_SYMBOL(find_get_pages_tag); -/** - * grab_cache_page_nowait - returns locked page at given index in given cache - * @mapping: target address_space - * @index: the page index - * - * Same as grab_cache_page(), but do not wait if the page is unavailable. - * This is intended for speculative data generators, where the data can - * be regenerated if the page couldn't be grabbed. This routine should - * be safe to call while holding the lock for another page. - * - * Clear __GFP_FS when allocating the page to avoid recursion into the fs - * and deadlock against the caller's locked page. - */ -struct page * -grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) -{ - struct page *page = find_get_page(mapping, index); - - if (page) { - if (trylock_page(page)) - return page; - page_cache_release(page); - return NULL; - } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS); - if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) { - page_cache_release(page); - page = NULL; - } - return page; -} -EXPORT_SYMBOL(grab_cache_page_nowait); - /* * CD/DVDs are error prone. When a medium error occurs, the driver may fail * a _large_ part of the i/o request. Imagine the worst scenario: @@ -2397,7 +2363,6 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, { const struct address_space_operations *aops = mapping->a_ops; - mark_page_accessed(page); return aops->write_end(file, mapping, pos, len, copied, page, fsdata); } EXPORT_SYMBOL(pagecache_write_end); @@ -2479,34 +2444,18 @@ EXPORT_SYMBOL(generic_file_direct_write); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { - int status; - gfp_t gfp_mask; struct page *page; - gfp_t gfp_notmask = 0; + int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT; - gfp_mask = mapping_gfp_mask(mapping); - if (mapping_cap_account_dirty(mapping)) - gfp_mask |= __GFP_WRITE; if (flags & AOP_FLAG_NOFS) - gfp_notmask = __GFP_FS; -repeat: - page = find_lock_page(mapping, index); - if (page) - goto found; + fgp_flags |= FGP_NOFS; + + page = pagecache_get_page(mapping, index, fgp_flags, + mapping_gfp_mask(mapping), + GFP_KERNEL); + if (page) + wait_for_stable_page(page); - page = __page_cache_alloc(gfp_mask & ~gfp_notmask); - if (!page) - return NULL; - status = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & ~gfp_notmask); - if (unlikely(status)) { - page_cache_release(page); - if (status == -EEXIST) - goto repeat; - return NULL; - } -found: - wait_for_stable_page(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); @@ -2555,7 +2504,7 @@ again: status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); - if (unlikely(status)) + if (unlikely(status < 0)) break; if (mapping_writably_mapped(mapping)) @@ -2564,7 +2513,6 @@ again: copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); flush_dcache_page(page); - mark_page_accessed(page); status = a_ops->write_end(file, mapping, pos, bytes, copied, page, fsdata); if (unlikely(status < 0)) diff --git a/mm/shmem.c b/mm/shmem.c index 706331e9ee0a..fe1c488012bc 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1438,9 +1438,13 @@ shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; struct inode *inode = mapping->host; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); + ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); + if (ret == 0 && *pagep) + init_page_accessed(*pagep); + return ret; } static int diff --git a/mm/swap.c b/mm/swap.c index 9649d99db5f9..674af557f4f6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -580,6 +580,17 @@ void mark_page_accessed(struct page *page) } EXPORT_SYMBOL(mark_page_accessed); +/* + * Used to mark_page_accessed(page) that is not visible yet and when it is + * still safe to use non-atomic ops + */ +void init_page_accessed(struct page *page) +{ + if (!PageReferenced(page)) + __SetPageReferenced(page); +} +EXPORT_SYMBOL(init_page_accessed); + static void __lru_cache_add(struct page *page) { struct pagevec *pvec = &get_cpu_var(lru_add_pvec); From 07e97c1e9bb85706d84395aa46d47c8ae5ff3e0f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:34 -0700 Subject: [PATCH 189/298] mm: avoid unnecessary atomic operations during end_page_writeback() commit 888cf2db475a256fb0cda042140f73d7881f81fe upstream. If a page is marked for immediate reclaim then it is moved to the tail of the LRU list. This occurs when the system is under enough memory pressure for pages under writeback to reach the end of the LRU but we test for this using atomic operations on every writeback. This patch uses an optimistic non-atomic test first. It'll miss some pages in rare cases but the consequences are not severe enough to warrant such a penalty. While the function does not dominate profiles during a simple dd test the cost of it is reduced. 73048 0.7428 vmlinux-3.15.0-rc5-mmotm-20140513 end_page_writeback 23740 0.2409 vmlinux-3.15.0-rc5-lessatomic end_page_writeback Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index b170a68fad35..8aa817460cc7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -644,8 +644,17 @@ EXPORT_SYMBOL(unlock_page); */ void end_page_writeback(struct page *page) { - if (TestClearPageReclaim(page)) + /* + * TestClearPageReclaim could be used here but it is an atomic + * operation and overkill in this particular case. Failing to + * shuffle a page marked for immediate reclaim is too mild to + * justify taking an atomic operation penalty at the end of + * ever page writeback. + */ + if (PageReclaim(page)) { + ClearPageReclaim(page); rotate_reclaimable_page(page); + } if (!test_clear_page_writeback(page)) BUG(); From 15c145c39719fbcb3818fbc0df843f7263bbbe89 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 2 Jul 2014 15:22:38 -0700 Subject: [PATCH 190/298] shmem: fix init_page_accessed use to stop !PageLRU bug commit 66d2f4d28cd030220e7ea2a628993fcabcb956d1 upstream. Under shmem swapping load, I sometimes hit the VM_BUG_ON_PAGE(!PageLRU) in isolate_lru_pages() at mm/vmscan.c:1281! Commit 2457aec63745 ("mm: non-atomically mark page accessed during page cache allocation where possible") looks like interrupted work-in-progress. mm/filemap.c's call to init_page_accessed() is fine, but not mm/shmem.c's - shmem_write_begin() is clearly wrong to use it after shmem_getpage(), when the page is always visible in radix_tree, and often already on LRU. Revert change to shmem_write_begin(), and use init_page_accessed() or mark_page_accessed() appropriately for SGP_WRITE in shmem_getpage_gfp(). SGP_WRITE also covers shmem_symlink(), which did not mark_page_accessed() before; but since many other filesystems use [__]page_symlink(), which did and does mark the page accessed, consider this as rectifying an oversight. Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Michal Hocko Cc: Dave Hansen Cc: Prabhakar Lad Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index fe1c488012bc..85d8a1a3626c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1035,6 +1035,9 @@ repeat: goto failed; } + if (page && sgp == SGP_WRITE) + mark_page_accessed(page); + /* fallocated page? */ if (page && !PageUptodate(page)) { if (sgp != SGP_READ) @@ -1116,6 +1119,9 @@ repeat: shmem_recalc_inode(inode); spin_unlock(&info->lock); + if (sgp == SGP_WRITE) + mark_page_accessed(page); + delete_from_swap_cache(page); set_page_dirty(page); swap_free(swap); @@ -1142,6 +1148,9 @@ repeat: __SetPageSwapBacked(page); __set_page_locked(page); + if (sgp == SGP_WRITE) + init_page_accessed(page); + error = mem_cgroup_cache_charge(page, current->mm, gfp & GFP_RECLAIM_MASK); if (error) @@ -1438,13 +1447,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret; struct inode *inode = mapping->host; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); - if (ret == 0 && *pagep) - init_page_accessed(*pagep); - return ret; + return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); } static int From 2eeaa64a14ceda1f4b08697548c16601e3942c20 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 6 Aug 2014 16:05:08 -0700 Subject: [PATCH 191/298] mm/memory.c: use entry = ACCESS_ONCE(*pte) in handle_pte_fault() commit c0d73261f5c1355a35b8b40e871d31578ce0c044 upstream. Use ACCESS_ONCE() in handle_pte_fault() when getting the entry or orig_pte upon which all subsequent decisions and pte_same() tests will be made. I have no evidence that its lack is responsible for the mm/filemap.c:202 BUG_ON(page_mapped(page)) in __delete_from_page_cache() found by trinity, and I am not optimistic that it will fix it. But I have found no other explanation, and ACCESS_ONCE() here will surely not hurt. If gcc does re-access the pte before passing it down, then that would be disastrous for correct page fault handling, and certainly could explain the page_mapped() BUGs seen (concurrent fault causing page to be mapped in a second time on top of itself: mapcount 2 for a single pte). Signed-off-by: Hugh Dickins Cc: Sasha Levin Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 924429e5ef4d..728eba39d4fc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3646,7 +3646,7 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = *pte; + entry = ACCESS_ONCE(*pte); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { From 06ed94e91856817cc64e81378c0f1d7fd605d169 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 6 Aug 2014 16:07:29 -0700 Subject: [PATCH 192/298] mm, thp: only collapse hugepages to nodes with affinity for zone_reclaim_mode commit 14a4e2141e24304fff2c697be6382ffb83888185 upstream. Commit 9f1b868a13ac ("mm: thp: khugepaged: add policy for finding target node") improved the previous khugepaged logic which allocated a transparent hugepages from the node of the first page being collapsed. However, it is still possible to collapse pages to remote memory which may suffer from additional access latency. With the current policy, it is possible that 255 pages (with PAGE_SHIFT == 12) will be collapsed remotely if the majority are allocated from that node. When zone_reclaim_mode is enabled, it means the VM should make every attempt to allocate locally to prevent NUMA performance degradation. In this case, we do not want to collapse hugepages to remote nodes that would suffer from increased access latency. Thus, when zone_reclaim_mode is enabled, only allow collapsing to nodes with RECLAIM_DISTANCE or less. There is no functional change for systems that disable zone_reclaim_mode. Signed-off-by: David Rientjes Cc: Dave Hansen Cc: Andrea Arcangeli Acked-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Rik van Riel Cc: "Kirill A. Shutemov" Cc: Bob Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 331faa5c0d5e..adce656d2e9c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2273,6 +2273,30 @@ static void khugepaged_alloc_sleep(void) static int khugepaged_node_load[MAX_NUMNODES]; +static bool khugepaged_scan_abort(int nid) +{ + int i; + + /* + * If zone_reclaim_mode is disabled, then no extra effort is made to + * allocate memory locally. + */ + if (!zone_reclaim_mode) + return false; + + /* If there is a count for this node already, it must be acceptable */ + if (khugepaged_node_load[nid]) + return false; + + for (i = 0; i < MAX_NUMNODES; i++) { + if (!khugepaged_node_load[i]) + continue; + if (node_distance(nid, i) > RECLAIM_DISTANCE) + return true; + } + return false; +} + #ifdef CONFIG_NUMA static int khugepaged_find_target_node(void) { @@ -2589,6 +2613,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, * hit record. */ node = page_to_nid(page); + if (khugepaged_scan_abort(node)) + goto out_unmap; khugepaged_node_load[node]++; VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) From 63aefc47b630e845bd46e5303cb7a3a3f44c8e6f Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 6 Aug 2014 16:06:56 -0700 Subject: [PATCH 193/298] mm: make copy_pte_range static again commit 21bda264f4243f61dfcc485174055f12ad0530b4 upstream. Commit 71e3aac0724f ("thp: transparent hugepage core") adds copy_pte_range prototype to huge_mm.h. I'm not sure why (or if) this function have been used outside of memory.c, but it currently isn't. This patch makes copy_pte_range() static again. Signed-off-by: Jerome Marchand Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/huge_mm.h | 4 ---- mm/memory.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b826239bdce0..63579cb8d3dc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -93,10 +93,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); #endif /* CONFIG_DEBUG_VM */ extern unsigned long transparent_hugepage_flags; -extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pmd_t *dst_pmd, pmd_t *src_pmd, - struct vm_area_struct *vma, - unsigned long addr, unsigned long end); extern int split_huge_page_to_list(struct page *page, struct list_head *list); static inline int split_huge_page(struct page *page) { diff --git a/mm/memory.c b/mm/memory.c index 728eba39d4fc..7f30beaba74f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -878,7 +878,7 @@ out_set_pte: return 0; } -int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { From cbc42af9f6d80d270ce02c6f5daf21afed3fcb71 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:06 -0700 Subject: [PATCH 194/298] vmalloc: use rcu list iterator to reduce vmap_area_lock contention commit 474750aba88817c53f39424e5567b8e4acc4b39b upstream. Richard Yao reported a month ago that his system have a trouble with vmap_area_lock contention during performance analysis by /proc/meminfo. Andrew asked why his analysis checks /proc/meminfo stressfully, but he didn't answer it. https://lkml.org/lkml/2014/4/10/416 Although I'm not sure that this is right usage or not, there is a solution reducing vmap_area_lock contention with no side-effect. That is just to use rcu list iterator in get_vmalloc_info(). rcu can be used in this function because all RCU protocol is already respected by writers, since Nick Piggin commit db64fe02258f1 ("mm: rewrite vmap layer") back in linux-2.6.28 Specifically : insertions use list_add_rcu(), deletions use list_del_rcu() and kfree_rcu(). Note the rb tree is not used from rcu reader (it would not be safe), only the vmap_area_list has full RCU protection. Note that __purge_vmap_area_lazy() already uses this rcu protection. rcu_read_lock(); list_for_each_entry_rcu(va, &vmap_area_list, list) { if (va->flags & VM_LAZY_FREE) { if (va->va_start < *start) *start = va->va_start; if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; } } rcu_read_unlock(); Peter: : While rcu list traversal over the vmap_area_list is safe, this may : arrive at different results than the spinlocked version. The rcu list : traversal version will not be a 'snapshot' of a single, valid instant : of the entire vmap_area_list, but rather a potential amalgam of : different list states. Joonsoo: : Yes, you are right, but I don't think that we should be strict here. : Meminfo is already not a 'snapshot' at specific time. While we try to get : certain stats, the other stats can change. And, although we may arrive at : different results than the spinlocked version, the difference would not be : large and would not make serious side-effect. [edumazet@google.com: add more commit description] Signed-off-by: Joonsoo Kim Reported-by: Richard Yao Acked-by: Eric Dumazet Cc: Peter Hurley Cc: Zhang Yanfei Cc: Johannes Weiner Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0fdf96803c5b..aa3891e8e388 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2681,14 +2681,14 @@ void get_vmalloc_info(struct vmalloc_info *vmi) prev_end = VMALLOC_START; - spin_lock(&vmap_area_lock); + rcu_read_lock(); if (list_empty(&vmap_area_list)) { vmi->largest_chunk = VMALLOC_TOTAL; goto out; } - list_for_each_entry(va, &vmap_area_list, list) { + list_for_each_entry_rcu(va, &vmap_area_list, list) { unsigned long addr = va->va_start; /* @@ -2715,7 +2715,7 @@ void get_vmalloc_info(struct vmalloc_info *vmi) vmi->largest_chunk = VMALLOC_END - prev_end; out: - spin_unlock(&vmap_area_lock); + rcu_read_unlock(); } #endif From 299040331834e81a720acb85be2acfdf85a32b56 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Wed, 6 Aug 2014 16:08:03 -0700 Subject: [PATCH 195/298] memcg, vmscan: Fix forced scan of anonymous pages commit 2ab051e11bfa3cbb7b24177f3d6aaed10a0d743e upstream. When memory cgoups are enabled, the code that decides to force to scan anonymous pages in get_scan_count() compares global values (free, high_watermark) to a value that is restricted to a memory cgroup (file). It make the code over-eager to force anon scan. For instance, it will force anon scan when scanning a memcg that is mainly populated by anonymous page, even when there is plenty of file pages to get rid of in others memcgs, even when swappiness == 0. It breaks user's expectation about swappiness and hurts performance. This patch makes sure that forced anon scan only happens when there not enough file pages for the all zone, not just in one random memcg. [hannes@cmpxchg.org: cleanups] Signed-off-by: Jerome Marchand Acked-by: Michal Hocko Acked-by: Johannes Weiner Reviewed-by: Rik van Riel Cc: Mel Gorman Signed-off-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/vmscan.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index e3204efed9ef..771dad70a08f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1847,7 +1847,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, struct zone *zone = lruvec_zone(lruvec); unsigned long anon_prio, file_prio; enum scan_balance scan_balance; - unsigned long anon, file, free; + unsigned long anon, file; bool force_scan = false; unsigned long ap, fp; enum lru_list lru; @@ -1895,11 +1895,6 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } - anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + - get_lru_size(lruvec, LRU_INACTIVE_ANON); - file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + - get_lru_size(lruvec, LRU_INACTIVE_FILE); - /* * If it's foreseeable that reclaiming the file cache won't be * enough to get the zone back into a desirable shape, we have @@ -1907,8 +1902,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * thrashing - remaining file pages alone. */ if (global_reclaim(sc)) { - free = zone_page_state(zone, NR_FREE_PAGES); - if (unlikely(file + free <= high_wmark_pages(zone))) { + unsigned long zonefile; + unsigned long zonefree; + + zonefree = zone_page_state(zone, NR_FREE_PAGES); + zonefile = zone_page_state(zone, NR_ACTIVE_FILE) + + zone_page_state(zone, NR_INACTIVE_FILE); + + if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) { scan_balance = SCAN_ANON; goto out; } @@ -1943,6 +1944,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * * anon in [0], file in [1] */ + + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; From 84532d77391d45a2a204c039664fa6dbcc121a01 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:11 -0700 Subject: [PATCH 196/298] mm: pagemap: avoid unnecessary overhead when tracepoints are deactivated commit 24b7e5819ad5cbef2b7c7376510862aa8319d240 upstream. This was formerly the series "Improve sequential read throughput" which noted some major differences in performance of tiobench since 3.0. While there are a number of factors, two that dominated were the introduction of the fair zone allocation policy and changes to CFQ. The behaviour of fair zone allocation policy makes more sense than tiobench as a benchmark and CFQ defaults were not changed due to insufficient benchmarking. This series is what's left. It's one functional fix to the fair zone allocation policy when used on NUMA machines and a reduction of overhead in general. tiobench was used for the comparison despite its flaws as an IO benchmark as in this case we are primarily interested in the overhead of page allocator and page reclaim activity. On UMA, it makes little difference to overhead 3.16.0-rc3 3.16.0-rc3 vanilla lowercost-v5 User 383.61 386.77 System 403.83 401.74 Elapsed 5411.50 5413.11 On a 4-socket NUMA machine it's a bit more noticable 3.16.0-rc3 3.16.0-rc3 vanilla lowercost-v5 User 746.94 802.00 System 65336.22 40852.33 Elapsed 27553.52 27368.46 This patch (of 6): The LRU insertion and activate tracepoints take PFN as a parameter forcing the overhead to the caller. Move the overhead to the tracepoint fast-assign method to ensure the cost is only incurred when the tracepoint is active. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/trace/events/pagemap.h | 16 +++++++--------- mm/swap.c | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 1c9fabde69e4..ce0803b8d05f 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -28,12 +28,10 @@ TRACE_EVENT(mm_lru_insertion, TP_PROTO( struct page *page, - unsigned long pfn, - int lru, - unsigned long flags + int lru ), - TP_ARGS(page, pfn, lru, flags), + TP_ARGS(page, lru), TP_STRUCT__entry( __field(struct page *, page ) @@ -44,9 +42,9 @@ TRACE_EVENT(mm_lru_insertion, TP_fast_assign( __entry->page = page; - __entry->pfn = pfn; + __entry->pfn = page_to_pfn(page); __entry->lru = lru; - __entry->flags = flags; + __entry->flags = trace_pagemap_flags(page); ), /* Flag format is based on page-types.c formatting for pagemap */ @@ -64,9 +62,9 @@ TRACE_EVENT(mm_lru_insertion, TRACE_EVENT(mm_lru_activate, - TP_PROTO(struct page *page, unsigned long pfn), + TP_PROTO(struct page *page), - TP_ARGS(page, pfn), + TP_ARGS(page), TP_STRUCT__entry( __field(struct page *, page ) @@ -75,7 +73,7 @@ TRACE_EVENT(mm_lru_activate, TP_fast_assign( __entry->page = page; - __entry->pfn = pfn; + __entry->pfn = page_to_pfn(page); ), /* Flag format is based on page-types.c formatting for pagemap */ diff --git a/mm/swap.c b/mm/swap.c index 674af557f4f6..d2ceddf70d42 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -469,7 +469,7 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, SetPageActive(page); lru += LRU_ACTIVE; add_page_to_lru_list(page, lruvec, lru); - trace_mm_lru_activate(page, page_to_pfn(page)); + trace_mm_lru_activate(page); __count_vm_event(PGACTIVATE); update_page_reclaim_stat(lruvec, file, 1); @@ -962,7 +962,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, SetPageLRU(page); add_page_to_lru_list(page, lruvec, lru); update_page_reclaim_stat(lruvec, file, active); - trace_mm_lru_insertion(page, page_to_pfn(page), lru, trace_pagemap_flags(page)); + trace_mm_lru_insertion(page, lru); } /* From 36806371a2a9c34c6e3cde683cb36732142f0627 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:14 -0700 Subject: [PATCH 197/298] mm: rearrange zone fields into read-only, page alloc, statistics and page reclaim lines commit 3484b2de9499df23c4604a513b36f96326ae81ad upstream. The arrangement of struct zone has changed over time and now it has reached the point where there is some inappropriate sharing going on. On x86-64 for example o The zone->node field is shared with the zone lock and zone->node is accessed frequently from the page allocator due to the fair zone allocation policy. o span_seqlock is almost never used by shares a line with free_area o Some zone statistics share a cache line with the LRU lock so reclaim-intensive and allocator-intensive workloads can bounce the cache line on a stat update This patch rearranges struct zone to put read-only and read-mostly fields together and then splits the page allocator intensive fields, the zone statistics and the page reclaim intensive fields into their own cache lines. Note that the type of lowmem_reserve changes due to the watermark calculations being signed and avoiding a signed/unsigned conversion there. On the test configuration I used the overall size of struct zone shrunk by one cache line. On smaller machines, this is not likely to be noticable. However, on a 4-node NUMA machine running tiobench the system CPU overhead is reduced by this patch. 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5r9 User 746.94 759.78 System 65336.22 58350.98 Elapsed 27553.52 27282.02 Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 209 +++++++++++++++++++++-------------------- mm/page_alloc.c | 7 +- mm/vmstat.c | 4 +- 3 files changed, 112 insertions(+), 108 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1cc970e6324e..672048f4acb2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -321,18 +321,11 @@ enum zone_type { #ifndef __GENERATING_BOUNDS_H struct zone { - /* Fields commonly accessed by the page allocator */ + /* Read-mostly fields */ /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; - /* - * When free pages are below this point, additional steps are taken - * when reading the number of free pages to avoid per-cpu counter - * drift allowing watermarks to be breached - */ - unsigned long percpu_drift_mark; - /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -341,7 +334,20 @@ struct zone { * on the higher zones). This array is recalculated at runtime if the * sysctl_lowmem_reserve_ratio sysctl changes. */ - unsigned long lowmem_reserve[MAX_NR_ZONES]; + long lowmem_reserve[MAX_NR_ZONES]; + +#ifdef CONFIG_NUMA + int node; +#endif + + /* + * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on + * this zone's LRU. Maintained by the pageout code. + */ + unsigned int inactive_ratio; + + struct pglist_data *zone_pgdat; + struct per_cpu_pageset __percpu *pageset; /* * This is a per-zone reserve of pages that should not be @@ -349,34 +355,6 @@ struct zone { */ unsigned long dirty_balance_reserve; -#ifdef CONFIG_NUMA - int node; - /* - * zone reclaim becomes active if more unmapped pages exist. - */ - unsigned long min_unmapped_pages; - unsigned long min_slab_pages; -#endif - struct per_cpu_pageset __percpu *pageset; - /* - * free areas of different sizes - */ - spinlock_t lock; -#if defined CONFIG_COMPACTION || defined CONFIG_CMA - /* Set to true when the PG_migrate_skip bits should be cleared */ - bool compact_blockskip_flush; - - /* pfn where compaction free scanner should start */ - unsigned long compact_cached_free_pfn; - /* pfn where async and sync compaction migration scanner should start */ - unsigned long compact_cached_migrate_pfn[2]; -#endif -#ifdef CONFIG_MEMORY_HOTPLUG - /* see spanned/present_pages for more description */ - seqlock_t span_seqlock; -#endif - struct free_area free_area[MAX_ORDER]; - #ifndef CONFIG_SPARSEMEM /* * Flags for a pageblock_nr_pages block. See pageblock-flags.h. @@ -385,71 +363,14 @@ struct zone { unsigned long *pageblock_flags; #endif /* CONFIG_SPARSEMEM */ -#ifdef CONFIG_COMPACTION +#ifdef CONFIG_NUMA /* - * On compaction failure, 1<> PAGE_SHIFT */ unsigned long zone_start_pfn; @@ -495,9 +416,11 @@ struct zone { * adjust_managed_page_count() should be used instead of directly * touching zone->managed_pages and totalram_pages. */ + unsigned long managed_pages; unsigned long spanned_pages; unsigned long present_pages; - unsigned long managed_pages; + + const char *name; /* * Number of MIGRATE_RESEVE page block. To maintain for just @@ -505,10 +428,92 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_HOTPLUG + /* see spanned/present_pages for more description */ + seqlock_t span_seqlock; +#endif + /* - * rarely used fields: + * wait_table -- the array holding the hash table + * wait_table_hash_nr_entries -- the size of the hash table array + * wait_table_bits -- wait_table_size == (1 << wait_table_bits) + * + * The purpose of all these is to keep track of the people + * waiting for a page to become available and make them + * runnable again when possible. The trouble is that this + * consumes a lot of space, especially when so few things + * wait on pages at a given time. So instead of using + * per-page waitqueues, we use a waitqueue hash table. + * + * The bucket discipline is to sleep on the same queue when + * colliding and wake all in that wait queue when removing. + * When something wakes, it must check to be sure its page is + * truly available, a la thundering herd. The cost of a + * collision is great, but given the expected load of the + * table, they should be so rare as to be outweighed by the + * benefits from the saved space. + * + * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the + * primary users of these fields, and in mm/page_alloc.c + * free_area_init_core() performs the initialization of them. */ - const char *name; + wait_queue_head_t *wait_table; + unsigned long wait_table_hash_nr_entries; + unsigned long wait_table_bits; + + ZONE_PADDING(_pad1_) + + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; + + /* free areas of different sizes */ + struct free_area free_area[MAX_ORDER]; + + /* zone flags, see below */ + unsigned long flags; + + ZONE_PADDING(_pad2_) + + /* Write-intensive fields used by page reclaim */ + + /* Fields commonly accessed by the page reclaim scanner */ + spinlock_t lru_lock; + unsigned long pages_scanned; /* since last reclaim */ + struct lruvec lruvec; + + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* pfn where compaction free scanner should start */ + unsigned long compact_cached_free_pfn; + /* pfn where async and sync compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[2]; +#endif + +#ifdef CONFIG_COMPACTION + /* + * On compaction failure, 1<lowmem_reserve[classzone_idx]; int o; long free_cma = 0; @@ -1725,7 +1724,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order, free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); #endif - if (free_pages - free_cma <= min + lowmem_reserve) + if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx]) return false; for (o = 0; o < order; o++) { /* At the next order, this order's pages become unavailable */ @@ -3257,7 +3256,7 @@ void show_free_areas(unsigned int filter) ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %lu", zone->lowmem_reserve[i]); + printk(" %ld", zone->lowmem_reserve[i]); printk("\n"); } @@ -5585,7 +5584,7 @@ static void calculate_totalreserve_pages(void) for_each_online_pgdat(pgdat) { for (i = 0; i < MAX_NR_ZONES; i++) { struct zone *zone = pgdat->node_zones + i; - unsigned long max = 0; + long max = 0; /* Find valid and maximum lowmem_reserve in the zone */ for (j = i; j < MAX_NR_ZONES; j++) { diff --git a/mm/vmstat.c b/mm/vmstat.c index def5dd2fbe61..600aa56499cc 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1065,10 +1065,10 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone_page_state(zone, i)); seq_printf(m, - "\n protection: (%lu", + "\n protection: (%ld", zone->lowmem_reserve[0]); for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) - seq_printf(m, ", %lu", zone->lowmem_reserve[i]); + seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_printf(m, ")" "\n pagesets"); From e341f2a89ab31ede47254feac4f7abc3203ee56c Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:16 -0700 Subject: [PATCH 198/298] mm: move zone->pages_scanned into a vmstat counter commit 0d5d823ab4e608ec7b52ac4410de4cb74bbe0edd upstream. zone->pages_scanned is a write-intensive cache line during page reclaim and it's also updated during page free. Move the counter into vmstat to take advantage of the per-cpu updates and do not update it in the free paths unless necessary. On a small UMA machine running tiobench the difference is marginal. On a 4-node machine the overhead is more noticable. Note that automatic NUMA balancing was disabled for this test as otherwise the system CPU overhead is unpredictable. 3.16.0-rc3 3.16.0-rc3 3.16.0-rc3 vanillarearrange-v5 vmstat-v5 User 746.94 759.78 774.56 System 65336.22 58350.98 32847.27 Elapsed 27553.52 27282.02 27415.04 Note that the overhead reduction will vary depending on where exactly pages are allocated and freed. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 2 +- mm/page_alloc.c | 12 +++++++++--- mm/vmscan.c | 7 ++++--- mm/vmstat.c | 3 ++- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 672048f4acb2..11671d14409e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -143,6 +143,7 @@ enum zone_stat_item { NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ + NR_PAGES_SCANNED, /* pages scanned since last reclaim */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -478,7 +479,6 @@ struct zone { /* Fields commonly accessed by the page reclaim scanner */ spinlock_t lru_lock; - unsigned long pages_scanned; /* since last reclaim */ struct lruvec lruvec; /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0957400b842..fbf1f547dd83 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -678,9 +678,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, int migratetype = 0; int batch_free = 0; int to_free = count; + unsigned long nr_scanned; spin_lock(&zone->lock); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); while (to_free) { struct page *page; @@ -729,8 +732,11 @@ static void free_one_page(struct zone *zone, unsigned int order, int migratetype) { + unsigned long nr_scanned; spin_lock(&zone->lock); - zone->pages_scanned = 0; + nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); + if (nr_scanned) + __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); __free_one_page(page, pfn, zone, order, migratetype); if (unlikely(!is_migrate_isolate(migratetype))) @@ -3251,7 +3257,7 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_FREE_CMA_PAGES)), K(zone_page_state(zone, NR_WRITEBACK_TEMP)), - zone->pages_scanned, + K(zone_page_state(zone, NR_PAGES_SCANNED)), (!zone_reclaimable(zone) ? "yes" : "no") ); printk("lowmem_reserve[]:"); diff --git a/mm/vmscan.c b/mm/vmscan.c index 771dad70a08f..b850ced69ed6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -163,7 +163,8 @@ static unsigned long zone_reclaimable_pages(struct zone *zone) bool zone_reclaimable(struct zone *zone) { - return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; + return zone_page_state(zone, NR_PAGES_SCANNED) < + zone_reclaimable_pages(zone) * 6; } static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) @@ -1470,7 +1471,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); if (global_reclaim(sc)) { - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); if (current_is_kswapd()) __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else @@ -1659,7 +1660,7 @@ static void shrink_active_list(unsigned long nr_to_scan, nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) - zone->pages_scanned += nr_scanned; + __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); reclaim_stat->recent_scanned[file] += nr_taken; diff --git a/mm/vmstat.c b/mm/vmstat.c index 600aa56499cc..3034638eb465 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -761,6 +761,7 @@ const char * const vmstat_text[] = { "nr_shmem", "nr_dirtied", "nr_written", + "nr_pages_scanned", #ifdef CONFIG_NUMA "numa_hit", @@ -1055,7 +1056,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), - zone->pages_scanned, + zone_page_state(zone, NR_PAGES_SCANNED), zone->spanned_pages, zone->present_pages, zone->managed_pages); From 442ae03a52461e81eb95a776c9a3009fd4f7daca Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 16:07:18 -0700 Subject: [PATCH 199/298] mm: vmscan: only update per-cpu thresholds for online CPU commit bb0b6dffa2ccfbd9747ad0cc87c7459622896e60 upstream. When kswapd is awake reclaiming, the per-cpu stat thresholds are lowered to get more accurate counts to avoid breaching watermarks. This threshold update iterates over all possible CPUs which is unnecessary. Only online CPUs need to be updated. If a new CPU is onlined, refresh_zone_stat_thresholds() will set the thresholds correctly. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 3034638eb465..eded1909a690 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -200,7 +200,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, continue; threshold = (*calculate_pressure)(zone); - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; } From a54cf1b3210610ebd9285e7bd79cb7613f54f0cd Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 28 Aug 2014 19:35:44 +0100 Subject: [PATCH 200/298] mm: page_alloc: abort fair zone allocation policy when remotes nodes are encountered commit f7b5d647946aae1647bf5cd26c16b3a793c1ac49 upstream. The purpose of numa_zonelist_order=zone is to preserve lower zones for use with 32-bit devices. If locality is preferred then the numa_zonelist_order=node policy should be used. Unfortunately, the fair zone allocation policy overrides this by skipping zones on remote nodes until the lower one is found. While this makes sense from a page aging and performance perspective, it breaks the expected zonelist policy. This patch restores the expected behaviour for zone-list ordering. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fbf1f547dd83..3eb5adf1b2dd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1980,7 +1980,7 @@ zonelist_scan: */ if (alloc_flags & ALLOC_FAIR) { if (!zone_local(preferred_zone, zone)) - continue; + break; if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0) continue; } From a0d134d3e45980551e98d9b8ff11713e81622e02 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 28 Aug 2014 19:35:45 +0100 Subject: [PATCH 201/298] mm: page_alloc: reduce cost of the fair zone allocation policy commit 4ffeaf3560a52b4a69cc7909873d08c0ef5909d4 upstream. The fair zone allocation policy round-robins allocations between zones within a node to avoid age inversion problems during reclaim. If the first allocation fails, the batch counts are reset and a second attempt made before entering the slow path. One assumption made with this scheme is that batches expire at roughly the same time and the resets each time are justified. This assumption does not hold when zones reach their low watermark as the batches will be consumed at uneven rates. Allocation failure due to watermark depletion result in additional zonelist scans for the reset and another watermark check before hitting the slowpath. On UMA, the benefit is negligible -- around 0.25%. On 4-socket NUMA machine it's variable due to the variability of measuring overhead with the vmstat changes. The system CPU overhead comparison looks like 3.16.0-rc3 3.16.0-rc3 3.16.0-rc3 vanilla vmstat-v5 lowercost-v5 User 746.94 774.56 802.00 System 65336.22 32847.27 40852.33 Elapsed 27553.52 27415.04 27368.46 However it is worth noting that the overall benchmark still completed faster and intuitively it makes sense to take as few passes as possible through the zonelists. Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mel Gorman Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 6 +++ mm/page_alloc.c | 101 +++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 48 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 11671d14409e..ac819bf9522c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -529,6 +529,7 @@ typedef enum { ZONE_WRITEBACK, /* reclaim scanning has recently found * many pages under writeback */ + ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ } zone_flags_t; static inline void zone_set_flag(struct zone *zone, zone_flags_t flag) @@ -566,6 +567,11 @@ static inline int zone_is_reclaim_locked(const struct zone *zone) return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags); } +static inline int zone_is_fair_depleted(const struct zone *zone) +{ + return test_bit(ZONE_FAIR_DEPLETED, &zone->flags); +} + static inline int zone_is_oom_locked(const struct zone *zone) { return test_bit(ZONE_OOM_LOCKED, &zone->flags); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3eb5adf1b2dd..ea419137f845 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1614,6 +1614,9 @@ again: } __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 && + !zone_is_fair_depleted(zone)) + zone_set_flag(zone, ZONE_FAIR_DEPLETED); __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); @@ -1938,6 +1941,18 @@ static inline void init_zone_allows_reclaim(int nid) } #endif /* CONFIG_NUMA */ +static void reset_alloc_batches(struct zone *preferred_zone) +{ + struct zone *zone = preferred_zone->zone_pgdat->node_zones; + + do { + mod_zone_page_state(zone, NR_ALLOC_BATCH, + high_wmark_pages(zone) - low_wmark_pages(zone) - + atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); + zone_clear_flag(zone, ZONE_FAIR_DEPLETED); + } while (zone++ != preferred_zone); +} + /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -1955,8 +1970,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, int did_zlc_setup = 0; /* just call zlc_setup() one time */ bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && (gfp_mask & __GFP_WRITE); + int nr_fair_skipped = 0; + bool zonelist_rescan; zonelist_scan: + zonelist_rescan = false; + /* * Scan zonelist, looking for a zone with enough free. * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. @@ -1981,8 +2000,10 @@ zonelist_scan: if (alloc_flags & ALLOC_FAIR) { if (!zone_local(preferred_zone, zone)) break; - if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0) + if (zone_is_fair_depleted(zone)) { + nr_fair_skipped++; continue; + } } /* * When allocating a page cache page for writing, we @@ -2088,13 +2109,7 @@ this_zone_full: zlc_mark_zone_full(zonelist, z); } - if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { - /* Disable zlc cache for second zonelist scan */ - zlc_active = 0; - goto zonelist_scan; - } - - if (page) + if (page) { /* * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was * necessary to allocate the page. The expectation is @@ -2103,8 +2118,37 @@ this_zone_full: * for !PFMEMALLOC purposes. */ page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + return page; + } - return page; + /* + * The first pass makes sure allocations are spread fairly within the + * local node. However, the local node might have free pages left + * after the fairness batches are exhausted, and remote zones haven't + * even been considered yet. Try once more without fairness, and + * include remote zones now, before entering the slowpath and waking + * kswapd: prefer spilling to a remote zone over swapping locally. + */ + if (alloc_flags & ALLOC_FAIR) { + alloc_flags &= ~ALLOC_FAIR; + if (nr_fair_skipped) { + zonelist_rescan = true; + reset_alloc_batches(preferred_zone); + } + if (nr_online_nodes > 1) + zonelist_rescan = true; + } + + if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + zonelist_rescan = true; + } + + if (zonelist_rescan) + goto zonelist_scan; + + return NULL; } /* @@ -2433,28 +2477,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, return page; } -static void reset_alloc_batches(struct zonelist *zonelist, - enum zone_type high_zoneidx, - struct zone *preferred_zone) -{ - struct zoneref *z; - struct zone *zone; - - for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { - /* - * Only reset the batches of zones that were actually - * considered in the fairness pass, we don't want to - * trash fairness information for zones that are not - * actually part of this zonelist's round-robin cycle. - */ - if (!zone_local(preferred_zone, zone)) - continue; - mod_zone_page_state(zone, NR_ALLOC_BATCH, - high_wmark_pages(zone) - low_wmark_pages(zone) - - atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); - } -} - static void wake_all_kswapds(unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, @@ -2792,28 +2814,11 @@ retry_cpuset: if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; #endif -retry: /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, classzone_idx, migratetype); if (unlikely(!page)) { - /* - * The first pass makes sure allocations are spread - * fairly within the local node. However, the local - * node might have free pages left after the fairness - * batches are exhausted, and remote zones haven't - * even been considered yet. Try once more without - * fairness, and include remote zones now, before - * entering the slowpath and waking kswapd: prefer - * spilling to a remote zone over swapping locally. - */ - if (alloc_flags & ALLOC_FAIR) { - reset_alloc_batches(zonelist, high_zoneidx, - preferred_zone); - alloc_flags &= ~ALLOC_FAIR; - goto retry; - } /* * Runtime PM, block IO and its error handling path * can deadlock because I/O on the device might not From 525f2d01368852ed91d220e0106bbfcefe5f1450 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 29 Dec 2014 20:30:35 +0100 Subject: [PATCH 202/298] mm: get rid of radix tree gfp mask for pagecache_get_page commit 45f87de57f8fad59302fd263dd81ffa4843b5b24 upstream. Commit 2457aec63745 ("mm: non-atomically mark page accessed during page cache allocation where possible") has added a separate parameter for specifying gfp mask for radix tree allocations. Not only this is less than optimal from the API point of view because it is error prone, it is also buggy currently because grab_cache_page_write_begin is using GFP_KERNEL for radix tree and if fgp_flags doesn't contain FGP_NOFS (mostly controlled by fs by AOP_FLAG_NOFS flag) but the mapping_gfp_mask has __GFP_FS cleared then the radix tree allocation wouldn't obey the restriction and might recurse into filesystem and cause deadlocks. This is the case for most filesystems unfortunately because only ext4 and gfs2 are using AOP_FLAG_NOFS. Let's simply remove radix_gfp_mask parameter because the allocation context is same for both page cache and for the radix tree. Just make sure that the radix tree gets only the sane subset of the mask (e.g. do not pass __GFP_WRITE). Long term it is more preferable to convert remaining users of AOP_FLAG_NOFS to use mapping_gfp_mask instead and simplify this interface even further. Reported-by: Dave Chinner Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/pagemap.h | 13 ++++++------- mm/filemap.c | 20 +++++++++----------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ec054b4917df..fcebdda3651c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -256,7 +256,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping, #define FGP_NOWAIT 0x00000020 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask); + int fgp_flags, gfp_t cache_gfp_mask); /** * find_get_page - find and get a page reference @@ -271,13 +271,13 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, static inline struct page *find_get_page(struct address_space *mapping, pgoff_t offset) { - return pagecache_get_page(mapping, offset, 0, 0, 0); + return pagecache_get_page(mapping, offset, 0, 0); } static inline struct page *find_get_page_flags(struct address_space *mapping, pgoff_t offset, int fgp_flags) { - return pagecache_get_page(mapping, offset, fgp_flags, 0, 0); + return pagecache_get_page(mapping, offset, fgp_flags, 0); } /** @@ -297,7 +297,7 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, static inline struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) { - return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0); + return pagecache_get_page(mapping, offset, FGP_LOCK, 0); } /** @@ -324,7 +324,7 @@ static inline struct page *find_or_create_page(struct address_space *mapping, { return pagecache_get_page(mapping, offset, FGP_LOCK|FGP_ACCESSED|FGP_CREAT, - gfp_mask, gfp_mask & GFP_RECLAIM_MASK); + gfp_mask); } /** @@ -345,8 +345,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, { return pagecache_get_page(mapping, index, FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, - mapping_gfp_mask(mapping), - GFP_NOFS); + mapping_gfp_mask(mapping)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); diff --git a/mm/filemap.c b/mm/filemap.c index 8aa817460cc7..217cfd3b3264 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -897,7 +897,7 @@ EXPORT_SYMBOL(find_lock_entry); * @mapping: the address_space to search * @offset: the page index * @fgp_flags: PCG flags - * @gfp_mask: gfp mask to use if a page is to be allocated + * @gfp_mask: gfp mask to use for the page cache data page allocation * * Looks up the page cache slot at @mapping & @offset. * @@ -916,7 +916,7 @@ EXPORT_SYMBOL(find_lock_entry); * If there is a page cache page, it is returned with an increased refcount. */ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, - int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask) + int fgp_flags, gfp_t gfp_mask) { struct page *page; @@ -953,13 +953,11 @@ no_page: if (!page && (fgp_flags & FGP_CREAT)) { int err; if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping)) - cache_gfp_mask |= __GFP_WRITE; - if (fgp_flags & FGP_NOFS) { - cache_gfp_mask &= ~__GFP_FS; - radix_gfp_mask &= ~__GFP_FS; - } + gfp_mask |= __GFP_WRITE; + if (fgp_flags & FGP_NOFS) + gfp_mask &= ~__GFP_FS; - page = __page_cache_alloc(cache_gfp_mask); + page = __page_cache_alloc(gfp_mask); if (!page) return NULL; @@ -970,7 +968,8 @@ no_page: if (fgp_flags & FGP_ACCESSED) init_page_accessed(page); - err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask); + err = add_to_page_cache_lru(page, mapping, offset, + gfp_mask & GFP_RECLAIM_MASK); if (unlikely(err)) { page_cache_release(page); page = NULL; @@ -2460,8 +2459,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, fgp_flags |= FGP_NOFS; page = pagecache_get_page(mapping, index, fgp_flags, - mapping_gfp_mask(mapping), - GFP_KERNEL); + mapping_gfp_mask(mapping)); if (page) wait_for_stable_page(page); From 50606f9a4cdba0499ae76a3111b3a7acfdb4a9d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 3 Dec 2014 16:07:58 +1100 Subject: [PATCH 203/298] md/raid5: fetch_block must fetch all the blocks handle_stripe_dirtying wants. commit 108cef3aa41669610e1836fe638812dd067d72de upstream. It is critical that fetch_block() and handle_stripe_dirtying() are consistent in their analysis of what needs to be loaded. Otherwise raid5 can wait forever for a block that won't be loaded. Currently when writing to a RAID5 that is resyncing, to a location beyond the resync offset, handle_stripe_dirtying chooses a reconstruct-write cycle, but fetch_block() assumes a read-modify-write, and a lockup can happen. So treat that case just like RAID6, just as we do in handle_stripe_dirtying. RAID6 always does reconstruct-write. This bug was introduced when the behaviour of handle_stripe_dirtying was changed in 3.7, so the patch is suitable for any kernel since, though it will need careful merging for some versions. Cc: stable@vger.kernel.org (v3.7+) Fixes: a7854487cd7128a30a7f4f5259de9f67d5efb95f Reported-by: Henry Cai Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4913c0690872..175584ad643f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2896,7 +2896,8 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, (s->failed >= 2 && fdev[1]->toread) || (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || - (sh->raid_conf->level == 6 && s->failed && s->to_write))) { + ((sh->raid_conf->level == 6 || sh->sector >= sh->raid_conf->mddev->recovery_cp) + && s->failed && s->to_write))) { /* we would like to get this block, possibly by computing it, * otherwise read it if the backing disk is insync */ From 016ea480c9ca2344006d3ef592a0154ba03b507a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 29 Jan 2015 17:41:44 -0800 Subject: [PATCH 204/298] Linux 3.14.31 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5b94752a85e3..5abf670c6651 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 14 -SUBLEVEL = 30 +SUBLEVEL = 31 EXTRAVERSION = NAME = Remembering Coco From 0595b9318ab26bd884d731b00e81708ce942f70d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 26 Jan 2015 12:58:35 -0800 Subject: [PATCH 205/298] x86, build: replace Perl script with Shell script commit d69911a68c865b152a067feaa45e98e6bb0f655b upstream. Commit e6023367d779 ("x86, kaslr: Prevent .bss from overlaping initrd") added Perl to the required build environment. This reimplements in shell the Perl script used to find the size of the kernel with bss and brk added. Signed-off-by: Kees Cook Reported-by: Rob Landley Acked-by: Rob Landley Cc: Anca Emanuel Cc: Fengguang Wu Cc: Junjie Mao Cc: Kees Cook Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/tools/calc_run_size.pl | 39 ---------------------------- arch/x86/tools/calc_run_size.sh | 42 +++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 40 deletions(-) delete mode 100644 arch/x86/tools/calc_run_size.pl create mode 100644 arch/x86/tools/calc_run_size.sh diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index b5bb49866bcc..67e9f5cc91ed 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -76,7 +76,7 @@ suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - perl $(srctree)/arch/x86/tools/calc_run_size.pl) + $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl deleted file mode 100644 index 23210baade2d..000000000000 --- a/arch/x86/tools/calc_run_size.pl +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/perl -# -# Calculate the amount of space needed to run the kernel, including room for -# the .bss and .brk sections. -# -# Usage: -# objdump -h a.out | perl calc_run_size.pl -use strict; - -my $mem_size = 0; -my $file_offset = 0; - -my $sections=" *[0-9]+ \.(?:bss|brk) +"; -while (<>) { - if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { - my $size = hex($1); - my $offset = hex($2); - $mem_size += $size; - if ($file_offset == 0) { - $file_offset = $offset; - } elsif ($file_offset != $offset) { - # BFD linker shows the same file offset in ELF. - # Gold linker shows them as consecutive. - next if ($file_offset + $mem_size == $offset + $size); - - printf STDERR "file_offset: 0x%lx\n", $file_offset; - printf STDERR "mem_size: 0x%lx\n", $mem_size; - printf STDERR "offset: 0x%lx\n", $offset; - printf STDERR "size: 0x%lx\n", $size; - - die ".bss and .brk are non-contiguous\n"; - } - } -} - -if ($file_offset == 0) { - die "Never found .bss or .brk file offset\n"; -} -printf("%d\n", $mem_size + $file_offset); diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh new file mode 100644 index 000000000000..1a4c17bb3910 --- /dev/null +++ b/arch/x86/tools/calc_run_size.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# +# Calculate the amount of space needed to run the kernel, including room for +# the .bss and .brk sections. +# +# Usage: +# objdump -h a.out | sh calc_run_size.sh + +NUM='\([0-9a-fA-F]*[ \t]*\)' +OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') +if [ -z "$OUT" ] ; then + echo "Never found .bss or .brk file offset" >&2 + exit 1 +fi + +OUT=$(echo ${OUT# }) +sizeA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetA=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +sizeB=$(printf "%d" 0x${OUT%% *}) +OUT=${OUT#* } +offsetB=$(printf "%d" 0x${OUT%% *}) + +run_size=$(( $offsetA + $sizeA + $sizeB )) + +# BFD linker shows the same file offset in ELF. +if [ "$offsetA" -ne "$offsetB" ] ; then + # Gold linker shows them as consecutive. + endB=$(( $offsetB + $sizeB )) + if [ "$endB" != "$run_size" ] ; then + printf "sizeA: 0x%x\n" $sizeA >&2 + printf "offsetA: 0x%x\n" $offsetA >&2 + printf "sizeB: 0x%x\n" $sizeB >&2 + printf "offsetB: 0x%x\n" $offsetB >&2 + echo ".bss and .brk are non-contiguous" >&2 + exit 1 + fi +fi + +printf "%d\n" $run_size +exit 0 From 8d1ac9b68cab13300424179059db0a7056ca456f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Jan 2015 17:48:51 +0200 Subject: [PATCH 206/298] spi: dw-mid: fix FIFO size commit 67bf9cda4b498b8cea4a40be67a470afe57d2e88 upstream. The FIFO size is 40 accordingly to the specifications, but this means 0x40, i.e. 64 bytes. This patch fixes the typo and enables FIFO size autodetection for Intel MID devices. Fixes: 7063c0d942a1 (spi/dw_spi: add DMA support) Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-dw-mid.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index a4c45ea8f688..996e16d12cd4 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -222,7 +222,6 @@ int dw_spi_mid_init(struct dw_spi *dws) iounmap(clk_reg); dws->num_cs = 16; - dws->fifo_len = 40; /* FIFO has 40 words buffer */ #ifdef CONFIG_SPI_DW_MID_DMA dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL); From 6ed72cce43a5599a3df4d7c658a887795c46f5e8 Mon Sep 17 00:00:00 2001 From: Zidan Wang Date: Wed, 31 Dec 2014 11:39:14 +0800 Subject: [PATCH 207/298] ASoC: wm8960: Fix capture sample rate from 11250 to 11025 commit 22ee76daddb87f88d2336d1b4737ef27c4f307ac upstream. wm8960 codec can't support sample rate 11250, it must be 11025. Signed-off-by: Zidan Wang Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/wm8960.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index f156010e52bc..942ef8427347 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -555,7 +555,7 @@ static struct { { 22050, 2 }, { 24000, 2 }, { 16000, 3 }, - { 11250, 4 }, + { 11025, 4 }, { 12000, 4 }, { 8000, 5 }, }; From f1251f53c8da53443b33eefce82772176be9452a Mon Sep 17 00:00:00 2001 From: Aurelien BOUIN Date: Mon, 29 Dec 2014 16:13:51 -0800 Subject: [PATCH 208/298] ASoC: fsl_esai: Fix incorrect xDC field width of xCCR registers commit adc60298c80efef4c2d7a7860b91b450931a7cf8 upstream. The xDC field should have 5 bit width according to Reference Manual. Thus this patch fixes it. Signed-off-by: Aurelien BOUIN Signed-off-by: Nicolin Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_esai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_esai.h b/sound/soc/fsl/fsl_esai.h index 75e14033e8d8..dfdbaa014561 100644 --- a/sound/soc/fsl/fsl_esai.h +++ b/sound/soc/fsl/fsl_esai.h @@ -302,7 +302,7 @@ #define ESAI_xCCR_xFP_MASK (((1 << ESAI_xCCR_xFP_WIDTH) - 1) << ESAI_xCCR_xFP_SHIFT) #define ESAI_xCCR_xFP(v) ((((v) - 1) << ESAI_xCCR_xFP_SHIFT) & ESAI_xCCR_xFP_MASK) #define ESAI_xCCR_xDC_SHIFT 9 -#define ESAI_xCCR_xDC_WIDTH 4 +#define ESAI_xCCR_xDC_WIDTH 5 #define ESAI_xCCR_xDC_MASK (((1 << ESAI_xCCR_xDC_WIDTH) - 1) << ESAI_xCCR_xDC_SHIFT) #define ESAI_xCCR_xDC(v) ((((v) - 1) << ESAI_xCCR_xDC_SHIFT) & ESAI_xCCR_xDC_MASK) #define ESAI_xCCR_xPSR_SHIFT 8 From f48df470bc8d9f106d1833f36a3f8944d3588346 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Wed, 14 Jan 2015 08:47:29 +0000 Subject: [PATCH 209/298] ASoC: soc-compress.c: fix NULL dereference commit d3268a40d4b19ff7bee23f52eabbc4e96bb685e8 upstream. In soc_new_compress() when rtd->dai_link->dynamic is set, we create the pcm substreams with this call: ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num, 1, 0, &be_pcm); which passes 0 as capture_count leading to be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream being NULL, hence when trying to set rtd a few lines below we get an oops. Fix by using rtd->dai_link->dpcm_playback and rtd->dai_link->dpcm_capture as playback_count and capture_count to snd_pcm_new_internal(). Signed-off-by: Qais Yousef Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-compress.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index 5e9690c85d8f..4f98ff14cf12 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -696,7 +696,8 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) rtd->dai_link->stream_name); ret = snd_pcm_new_internal(rtd->card->snd_card, new_name, num, - 1, 0, &be_pcm); + rtd->dai_link->dpcm_playback, + rtd->dai_link->dpcm_capture, &be_pcm); if (ret < 0) { dev_err(rtd->card->dev, "ASoC: can't create compressed for %s\n", rtd->dai_link->name); @@ -705,8 +706,10 @@ int soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num) rtd->pcm = be_pcm; rtd->fe_compr = 1; - be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd; - be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd; + if (rtd->dai_link->dpcm_playback) + be_pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream->private_data = rtd; + else if (rtd->dai_link->dpcm_capture) + be_pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream->private_data = rtd; memcpy(compr->ops, &soc_compr_dyn_ops, sizeof(soc_compr_dyn_ops)); } else memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops)); From 247675d55549989df70d494c60c2e1b1b5b7dcd0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 16 Jan 2015 11:20:25 +0200 Subject: [PATCH 210/298] ASoC: omap-mcbsp: Correct CBM_CFS dai format configuration commit 20602e34cd33dd452bc1836fa7c9b59978f75db0 upstream. We should select FSR also to be driven by McBSP, not only FSX. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/omap/omap-mcbsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 6c19bba23570..6a339fb55479 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -436,7 +436,7 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai, case SND_SOC_DAIFMT_CBM_CFS: /* McBSP slave. FS clock as output */ regs->srgr2 |= FSGM; - regs->pcr0 |= FSXM; + regs->pcr0 |= FSXM | FSRM; break; case SND_SOC_DAIFMT_CBM_CFM: /* McBSP slave */ From 020c2f9f79e798d77ef9002a412c92659fff4996 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:20:39 +0200 Subject: [PATCH 211/298] can: kvaser_usb: Do not sleep in atomic context commit ded5006667318c06df875609535176bd33f243a1 upstream. Upon receiving a hardware event with the BUS_RESET flag set, the driver kills all of its anchored URBs and resets all of its transmit URB contexts. Unfortunately it does so under the context of URB completion handler `kvaser_usb_read_bulk_callback()', which is often called in an atomic context. While the device is flooded with many received error packets, usb_kill_urb() typically sleeps/reschedules till the transfer request of each killed URB in question completes, leading to the sleep in atomic bug. [3] In v2 submission of the original driver patch [1], it was stated that the URBs kill and tx contexts reset was needed since we don't receive any tx acknowledgments later and thus such resources will be locked down forever. Fortunately this is no longer needed since an earlier bugfix in this patch series is now applied: all tx URB contexts are reset upon CAN channel close. [2] Moreover, a BUS_RESET is now treated _exactly_ like a BUS_OFF event, which is the recommended handling method advised by the device manufacturer. [1] http://article.gmane.org/gmane.linux.network/239442 http://www.webcitation.org/6Vr2yagAQ [2] can: kvaser_usb: Reset all URB tx contexts upon channel close 889b77f7fd2bcc922493d73a4c51d8a851505815 [3] Stacktrace: [] dump_stack+0x45/0x57 [] __schedule_bug+0x41/0x4f [] __schedule+0x5f1/0x700 [] ? _raw_spin_unlock_irqrestore+0xa/0x10 [] schedule+0x24/0x70 [] usb_kill_urb+0x65/0xa0 [] ? prepare_to_wait_event+0x110/0x110 [] usb_kill_anchored_urbs+0x48/0x80 [] kvaser_usb_unlink_tx_urbs+0x18/0x50 [kvaser_usb] [] kvaser_usb_rx_error+0xc0/0x400 [kvaser_usb] [] ? vprintk_default+0x1a/0x20 [] kvaser_usb_read_bulk_callback+0x4c1/0x5f0 [kvaser_usb] [] __usb_hcd_giveback_urb+0x5e/0xc0 [] usb_hcd_giveback_urb+0x41/0x110 [] finish_urb+0x98/0x180 [ohci_hcd] [] ? acct_account_cputime+0x17/0x20 [] ? local_clock+0x15/0x30 [] ohci_work+0x1fb/0x5a0 [ohci_hcd] [] ? process_backlog+0xb1/0x130 [] ohci_irq+0xeb/0x270 [ohci_hcd] [] usb_hcd_irq+0x21/0x30 [] handle_irq_event_percpu+0x43/0x120 [] handle_irq_event+0x3d/0x60 [] handle_fasteoi_irq+0x74/0x110 [] handle_irq+0x1d/0x30 [] do_IRQ+0x57/0x100 [] common_interrupt+0x6a/0x6a Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 4e65b35bebc0..81385b2c7adc 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -653,11 +653,6 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, priv = dev->nets[channel]; stats = &priv->netdev->stats; - if (status & M16C_STATE_BUS_RESET) { - kvaser_usb_unlink_tx_urbs(priv); - return; - } - skb = alloc_can_err_skb(priv->netdev, &cf); if (!skb) { stats->rx_dropped++; @@ -668,7 +663,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, netdev_dbg(priv->netdev, "Error status: 0x%02x\n", status); - if (status & M16C_STATE_BUS_OFF) { + if (status & (M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) { cf->can_id |= CAN_ERR_BUSOFF; priv->can.can_stats.bus_off++; From 4a2c136417e2e16fedc8e426cc0a739396bad132 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:22:54 +0200 Subject: [PATCH 212/298] can: kvaser_usb: Send correct context to URB completion commit 3803fa6977f1de15fda4e8646c8fec97c8045cae upstream. Send expected argument to the URB completion hander: a CAN netdevice instead of the network interface private context `kvaser_usb_net_priv'. This was discovered by having some garbage in the kernel log in place of the netdevice names: can0 and can1. Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 81385b2c7adc..1a20892f155a 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -578,7 +578,7 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), buf, msg->len, - kvaser_usb_simple_msg_callback, priv); + kvaser_usb_simple_msg_callback, netdev); usb_anchor_urb(urb, &priv->tx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); From 163113183b105d61446bb6395e14ae156db3ecd5 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:24:06 +0200 Subject: [PATCH 213/298] can: kvaser_usb: Retry the first bulk transfer on -ETIMEDOUT commit 14c10c2a1dd8eb8e00b750b521753260befa2789 upstream. On some x86 laptops, plugging a Kvaser device again after an unplug makes the firmware always ignore the very first command. For such a case, provide some room for retries instead of completely exiting the driver init code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 1a20892f155a..f3b8e4742d8b 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1577,7 +1577,7 @@ static int kvaser_usb_probe(struct usb_interface *intf, { struct kvaser_usb *dev; int err = -ENOMEM; - int i; + int i, retry = 3; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) @@ -1595,7 +1595,15 @@ static int kvaser_usb_probe(struct usb_interface *intf, usb_set_intfdata(intf, dev); - err = kvaser_usb_get_software_info(dev); + /* On some x86 laptops, plugging a Kvaser device again after + * an unplug makes the firmware always ignore the very first + * command. For such a case, provide some room for retries + * instead of completely exiting the driver. + */ + do { + err = kvaser_usb_get_software_info(dev); + } while (--retry && err == -ETIMEDOUT); + if (err) { dev_err(&intf->dev, "Cannot get software infos, error %d\n", err); From 91ea34506c9cd98424a689e855a347445457b032 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 26 Jan 2015 07:25:43 +0200 Subject: [PATCH 214/298] can: kvaser_usb: Fix state handling upon BUS_ERROR events commit e638642b08c170d2021b706f0b1c4f4ae93d8cbd upstream. While being in an ERROR_WARNING state, and receiving further bus error events with error counters still in the ERROR_WARNING range of 97-127 inclusive, the state handling code erroneously reverts back to ERROR_ACTIVE. Per the CAN standard, only revert to ERROR_ACTIVE when the error counters are less than 96. Moreover, in certain Kvaser models, the BUS_ERROR flag is always set along with undefined bits in the M16C status register. Thus use bitwise operators instead of full equality for checking that register against bus errors. Signed-off-by: Ahmed S. Darwish Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/kvaser_usb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index f3b8e4742d8b..3d198679510b 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -689,9 +689,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, } new_state = CAN_STATE_ERROR_PASSIVE; - } - - if (status == M16C_STATE_BUS_ERROR) { + } else if (status & M16C_STATE_BUS_ERROR) { if ((priv->can.state < CAN_STATE_ERROR_WARNING) && ((txerr >= 96) || (rxerr >= 96))) { cf->can_id |= CAN_ERR_CRTL; @@ -701,7 +699,8 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev, priv->can.can_stats.error_warning++; new_state = CAN_STATE_ERROR_WARNING; - } else if (priv->can.state > CAN_STATE_ERROR_ACTIVE) { + } else if ((priv->can.state > CAN_STATE_ERROR_ACTIVE) && + ((txerr < 96) && (rxerr < 96))) { cf->can_id |= CAN_ERR_PROT; cf->data[2] = CAN_ERR_PROT_ACTIVE; From d403171d23bf990c18a40fc3af8df09cfd0dfa93 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 15 Jan 2015 18:23:47 +0100 Subject: [PATCH 215/298] powerpc/xmon: Fix another endiannes issue in RTAS call from xmon commit e6eb2eba494d6f99e69ca3c3748cd37a2544ab38 upstream. The commit 3b8a3c010969 ("powerpc/pseries: Fix endiannes issue in RTAS call from xmon") was fixing an endianness issue in the call made from xmon to RTAS. However, as Michael Ellerman noticed, this fix was not complete, the token value was not byte swapped. This lead to call an unexpected and most of the time unexisting RTAS function, which is silently ignored by RTAS. This fix addresses this hole. Reported-by: Michael Ellerman Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/xmon/xmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index bc5fbc201bcb..f89389f634d7 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -288,6 +288,7 @@ static inline void disable_surveillance(void) args.token = rtas_token("set-indicator"); if (args.token == RTAS_UNKNOWN_SERVICE) return; + args.token = cpu_to_be32(args.token); args.nargs = cpu_to_be32(3); args.nret = cpu_to_be32(1); args.rets = &args.args[3]; From fe0b03adb9bce09149de2a1f4b419c762d9a2f66 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 25 Jan 2015 14:34:29 +0100 Subject: [PATCH 216/298] ALSA: seq-dummy: remove deadlock-causing events on close commit 0767e95bb96d7fdddcd590fb809e6975d93aebc5 upstream. When the last subscriber to a "Through" port has been removed, the subscribed destination ports might still be active, so it would be wrong to send "all sounds off" and "reset controller" events to them. The proper place for such a shutdown would be the closing of the actual MIDI port (and close_substream() in rawmidi.c already can do this). This also fixes a deadlock when dummy_unuse() tries to send events to its own port that is already locked because it is being freed. Reported-by: Peter Billam Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/seq_dummy.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/sound/core/seq/seq_dummy.c b/sound/core/seq/seq_dummy.c index dbc550716790..f60d81497f28 100644 --- a/sound/core/seq/seq_dummy.c +++ b/sound/core/seq/seq_dummy.c @@ -81,36 +81,6 @@ struct snd_seq_dummy_port { static int my_client = -1; -/* - * unuse callback - send ALL_SOUNDS_OFF and RESET_CONTROLLERS events - * to subscribers. - * Note: this callback is called only after all subscribers are removed. - */ -static int -dummy_unuse(void *private_data, struct snd_seq_port_subscribe *info) -{ - struct snd_seq_dummy_port *p; - int i; - struct snd_seq_event ev; - - p = private_data; - memset(&ev, 0, sizeof(ev)); - if (p->duplex) - ev.source.port = p->connect; - else - ev.source.port = p->port; - ev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; - ev.type = SNDRV_SEQ_EVENT_CONTROLLER; - for (i = 0; i < 16; i++) { - ev.data.control.channel = i; - ev.data.control.param = MIDI_CTL_ALL_SOUNDS_OFF; - snd_seq_kernel_client_dispatch(p->client, &ev, 0, 0); - ev.data.control.param = MIDI_CTL_RESET_CONTROLLERS; - snd_seq_kernel_client_dispatch(p->client, &ev, 0, 0); - } - return 0; -} - /* * event input callback - just redirect events to subscribers */ @@ -175,7 +145,6 @@ create_port(int idx, int type) | SNDRV_SEQ_PORT_TYPE_PORT; memset(&pcb, 0, sizeof(pcb)); pcb.owner = THIS_MODULE; - pcb.unuse = dummy_unuse; pcb.event_input = dummy_input; pcb.private_free = dummy_free; pcb.private_data = rec; From 1027dfa58f3655ee5a2309b44ce5be637fd621fd Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jan 2015 12:06:14 +0300 Subject: [PATCH 217/298] rbd: drop parent_ref in rbd_dev_unprobe() unconditionally commit e69b8d414f948c242ad9f3eb2b7e24fba783dbbd upstream. This effectively reverts the last hunk of 392a9dad7e77 ("rbd: detect when clone image is flattened"). The problem with parent_overlap != 0 condition is that it's possible and completely valid to have an image with parent_overlap == 0 whose parent state needs to be cleaned up on unmap. The next commit, which drops the "clone image now standalone" logic, opens up another window of opportunity to hit this, but even without it # cat parent-ref.sh #!/bin/bash rbd create --image-format 2 --size 1 foo rbd snap create foo@snap rbd snap protect foo@snap rbd clone foo@snap bar rbd resize --allow-shrink --size 0 bar rbd resize --size 1 bar DEV=$(rbd map bar) rbd unmap $DEV leaves rbd_device/rbd_spec/etc and rbd_client along with ceph_client hanging around. My thinking behind calling rbd_dev_parent_put() unconditionally is that there shouldn't be any requests in flight at that point in time as we are deep into unmap sequence. Hence, even if rbd_dev_unparent() caused by flatten is delayed by in-flight requests, it will have finished by the time we reach rbd_dev_unprobe() caused by unmap, thus turning unconditional rbd_dev_parent_put() into a no-op. Fixes: http://tracker.ceph.com/issues/10352 Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 255ca232ecc7..784cb0fd21a2 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4764,10 +4764,7 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { struct rbd_image_header *header; - /* Drop parent reference unless it's already been done (or none) */ - - if (rbd_dev->parent_overlap) - rbd_dev_parent_put(rbd_dev); + rbd_dev_parent_put(rbd_dev); /* Free dynamic fields from the header, then zero it out */ From ca7591a3c8fa06453a8a402dca3c82c0f70a670c Mon Sep 17 00:00:00 2001 From: Paul Osmialowski Date: Mon, 19 Jan 2015 17:03:33 +0100 Subject: [PATCH 218/298] i2c: s3c2410: fix ABBA deadlock by keeping clock prepared commit 34e81ad5f0b60007c95995eb7803da7e00c6c611 upstream. This patch solves deadlock between clock prepare mutex and regmap mutex reported by Tomasz Figa in [1] by implementing solution from [2]: "always leave the clock of the i2c controller in a prepared state". [1] https://lkml.org/lkml/2014/7/2/171 [2] https://lkml.org/lkml/2014/7/2/207 On each i2c transfer handled by s3c24xx_i2c_xfer(), clk_prepare_enable() was called, which calls clk_prepare() then clk_enable(). clk_prepare() takes prepare_lock mutex before proceeding. Note that i2c transfer functions are invoked from many places in kernel, typically with some other additional lock held. It may happen that function on CPU1 (e.g. regmap_update_bits()) has taken a mutex (i.e. regmap lock mutex) then it attempts i2c communication in order to proceed (so it needs to obtain clock related prepare_lock mutex during transfer preparation stage due to clk_prepare() call). At the same time other task on CPU0 wants to operate on clock (e.g. to (un)prepare clock for some other reason) so it has taken prepare_lock mutex. CPU0: CPU1: clk_disable_unused() regulator_disable() clk_prepare_lock() map->lock(map->lock_arg) regmap_read() s3c24xx_i2c_xfer() map->lock(map->lock_arg) clk_prepare_lock() Implemented solution from [2] leaves i2c clock prepared. Preparation is done in s3c24xx_i2c_probe() function. Without this patch, it is immediately unprepared by clk_disable_unprepare() call. I've replaced this call with clk_disable() and I've added clk_unprepare() call in s3c24xx_i2c_remove(). The s3c24xx_i2c_xfer() function now uses clk_enable() instead of clk_prepare_enable() (and clk_disable() instead of clk_unprepare_disable()). Signed-off-by: Paul Osmialowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-s3c2410.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 93ec37649346..79a211782766 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -753,14 +753,16 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, int ret; pm_runtime_get_sync(&adap->dev); - clk_prepare_enable(i2c->clk); + ret = clk_enable(i2c->clk); + if (ret) + return ret; for (retry = 0; retry < adap->retries; retry++) { ret = s3c24xx_i2c_doxfer(i2c, msgs, num); if (ret != -EAGAIN) { - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); pm_runtime_put(&adap->dev); return ret; } @@ -770,7 +772,7 @@ static int s3c24xx_i2c_xfer(struct i2c_adapter *adap, udelay(100); } - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); pm_runtime_put(&adap->dev); return -EREMOTEIO; } @@ -1153,7 +1155,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) clk_prepare_enable(i2c->clk); ret = s3c24xx_i2c_init(i2c); - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); if (ret != 0) { dev_err(&pdev->dev, "I2C controller init failed\n"); return ret; @@ -1166,6 +1168,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) i2c->irq = ret = platform_get_irq(pdev, 0); if (ret <= 0) { dev_err(&pdev->dev, "cannot find IRQ\n"); + clk_unprepare(i2c->clk); return ret; } @@ -1174,6 +1177,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) if (ret != 0) { dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq); + clk_unprepare(i2c->clk); return ret; } } @@ -1181,6 +1185,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) ret = s3c24xx_i2c_register_cpufreq(i2c); if (ret < 0) { dev_err(&pdev->dev, "failed to register cpufreq notifier\n"); + clk_unprepare(i2c->clk); return ret; } @@ -1197,6 +1202,7 @@ static int s3c24xx_i2c_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "failed to add bus to i2c core\n"); s3c24xx_i2c_deregister_cpufreq(i2c); + clk_unprepare(i2c->clk); return ret; } @@ -1218,6 +1224,8 @@ static int s3c24xx_i2c_remove(struct platform_device *pdev) { struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); + clk_unprepare(i2c->clk); + pm_runtime_disable(&i2c->adap.dev); pm_runtime_disable(&pdev->dev); @@ -1246,10 +1254,13 @@ static int s3c24xx_i2c_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c24xx_i2c *i2c = platform_get_drvdata(pdev); + int ret; - clk_prepare_enable(i2c->clk); + ret = clk_enable(i2c->clk); + if (ret) + return ret; s3c24xx_i2c_init(i2c); - clk_disable_unprepare(i2c->clk); + clk_disable(i2c->clk); i2c->suspended = 0; return 0; From d02122d00448833a8798d121a8e790e2ff2b339b Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 19 Jan 2015 16:29:25 -0800 Subject: [PATCH 219/298] Input: synaptics - adjust min/max for Lenovo ThinkPad X1 Carbon 2nd commit 8543cf1c247909ce85850ca6e2714adba351d6aa upstream. LEN0037 found in the Lenovo ThinkPad X1 Carbon 2nd (2014 model) Reported-and-tested-by: Bjoern Olausson Signed-off-by: Peter Hutterer Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index a3769cf84381..b00e282ef166 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -132,8 +132,9 @@ static const struct min_max_quirk min_max_pnpid_table[] = { 1232, 5710, 1156, 4696 }, { - (const char * const []){"LEN0034", "LEN0036", "LEN0039", - "LEN2002", "LEN2004", NULL}, + (const char * const []){"LEN0034", "LEN0036", "LEN0037", + "LEN0039", "LEN2002", "LEN2004", + NULL}, 1024, 5112, 2024, 4832 }, { @@ -162,7 +163,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0034", /* T431s, L440, L540, T540, W540, X1 Carbon 2nd */ "LEN0035", /* X240 */ "LEN0036", /* T440 */ - "LEN0037", + "LEN0037", /* X1 Carbon 2nd */ "LEN0038", "LEN0039", /* T440s */ "LEN0041", From 907fe767f37d75968acdab964a8ec98484e60243 Mon Sep 17 00:00:00 2001 From: Jochen Hein Date: Thu, 22 Jan 2015 12:03:15 -0800 Subject: [PATCH 220/298] Input: i8042 - add noloop quirk for Medion Akoya E7225 (MD98857) commit 1d90d6d5522befa8efa1a7ea406be65cf865ded4 upstream. Without this the aux port does not get detected, and consequently the touchpad will not work. With this patch the touchpad is detected: $ dmesg | grep -E "(SYN|i8042|serio)" pnp 00:03: Plug and Play ACPI device, IDs SYN1d22 PNP0f13 (active) i8042: PNP: PS/2 Controller [PNP0303:PS2K,PNP0f13:PS2M] at 0x60,0x64 irq 1,12 serio: i8042 KBD port at 0x60,0x64 irq 1 serio: i8042 AUX port at 0x60,0x64 irq 12 input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input4 psmouse serio1: synaptics: Touchpad model: 1, fw: 8.1, id: 0x1e2b1, caps: 0xd00123/0x840300/0x126800, board id: 2863, fw id: 1473085 input: SynPS/2 Synaptics TouchPad as /devices/platform/i8042/serio1/input/input6 dmidecode excerpt for this laptop is: Handle 0x0001, DMI type 1, 27 bytes System Information Manufacturer: Medion Product Name: Akoya E7225 Version: 1.0 Signed-off-by: Jochen Hein Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index c43c46f7dcd0..dd6d14d2337f 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -151,6 +151,14 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "5a"), }, }, + { + /* Medion Akoya E7225 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Medion"), + DMI_MATCH(DMI_PRODUCT_NAME, "Akoya E7225"), + DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), + }, + }, { /* Blue FB5601 */ .matches = { From 1066a175c7ef86259ea07f796c9a66dd26f28f5f Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 20 Jan 2015 07:44:29 +0800 Subject: [PATCH 221/298] nfs: fix dio deadlock when O_DIRECT flag is flipped commit ee8a1a8b160a87dc3a9c81a86796aa4db85ea815 upstream. We only support swap file calling nfs_direct_IO. However, application might be able to get to nfs_direct_IO if it toggles O_DIRECT flag during IO and it can deadlock because we grab inode->i_mutex in nfs_file_direct_write(). So return 0 for such case. Then the generic layer will fall back to buffer IO. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/direct.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index de2543d3c283..7ececa1c6c4f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -123,6 +123,12 @@ static inline int put_dreq(struct nfs_direct_req *dreq) */ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) { + struct inode *inode = iocb->ki_filp->f_mapping->host; + + /* we only support swap file calling nfs_direct_IO */ + if (!IS_SWAPFILE(inode)) + return 0; + #ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", iocb->ki_filp, (long long) pos, nr_segs); From 6c90be66183adf9497a9e5f1e6810f3a4a9ab934 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Jan 2015 14:37:44 -0500 Subject: [PATCH 222/298] NFSv4.1: Fix an Oops in nfs41_walk_client_list commit 3175e1dcec40fab1a444c010087f2068b6b04732 upstream. If we start state recovery on a client that failed to initialise correctly, then we are very likely to Oops. Reported-by: "Mkrtchyan, Tigran" Link: http://lkml.kernel.org/r/130621862.279655.1421851650684.JavaMail.zimbra@desy.de Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 037f9572b94c..d3f606255b99 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -633,7 +633,7 @@ int nfs41_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - if (status == 0) { + if (pos->cl_cons_state == NFS_CS_SESSION_INITING) { nfs4_schedule_lease_recovery(pos); status = nfs4_wait_clnt_recover(pos); } From a30f0cb99fbc11053934edaf91233f0d0ebb1b5f Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 20 Jan 2015 15:05:08 +0100 Subject: [PATCH 223/298] mac80211: properly set CCK flag in radiotap commit 3a5c5e81d8128a9e43abc52b75dd21d3da7a0cfc upstream. Fix a regression introduced by commit a5e70697d0c4 ("mac80211: add radiotap flag and handling for 5/10 MHz") where the IEEE80211_CHAN_CCK channel type flag was incorrectly replaced by the IEEE80211_CHAN_OFDM flag. This commit fixes that by using the CCK flag again. Fixes: a5e70697d0c4 ("mac80211: add radiotap flag and handling for 5/10 MHz") Signed-off-by: Mathy Vanhoef Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1e4dc4ed5e0d..815ca56b39e7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -261,7 +261,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, else if (rate && rate->flags & IEEE80211_RATE_ERP_G) channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; else if (rate) - channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ; + channel_flags |= IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ; else channel_flags |= IEEE80211_CHAN_2GHZ; put_unaligned_le16(channel_flags, pos); From e98f093f513ee393d55232217e0ac6fbaa50bb7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 23 Jan 2015 11:10:12 +0100 Subject: [PATCH 224/298] nl80211: fix per-station group key get/del and memory leak commit 0fa7b39131576dd1baa6ca17fca53c65d7f62249 upstream. In case userspace attempts to obtain key information for or delete a unicast key, this is currently erroneously rejected unless the driver sets the WIPHY_FLAG_IBSS_RSN flag. Apparently enough drivers do so it was never noticed. Fix that, and while at it fix a potential memory leak: the error path in the get_key() function was placed after allocating a message but didn't free it - move it to a better place. Luckily admin permissions are needed to call this operation. Fixes: e31b82136d1ad ("cfg80211/mac80211: allow per-station GTKs") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index df33156ecd2d..18d73df72531 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2697,6 +2697,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->get_key) return -EOPNOTSUPP; + if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + return -ENOENT; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -2716,10 +2719,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; - if (pairwise && mac_addr && - !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) - return -ENOENT; - err = rdev_get_key(rdev, dev, key_idx, pairwise, mac_addr, &cookie, get_key_callback); @@ -2890,7 +2889,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); - if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr && + if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; From e6f2661f9e1b7d5454f5f2d6e55c7d41460bc36c Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 26 Jan 2015 11:38:21 +0000 Subject: [PATCH 225/298] dm thin: don't allow messages to be sent to a pool target in READ_ONLY or FAIL mode commit 2a7eaea02b99b6e267b1e89c79acc6e9a51cee3b upstream. You can't modify the metadata in these modes. It's better to fail these messages immediately than let the block-manager deny write locks on metadata blocks. Otherwise these failed metadata changes will trigger 'needs_check' to get set in the metadata superblock -- requiring repair using the thin_check utility. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f7e052c7ab5f..c1120eb96d86 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2744,6 +2744,12 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; + if (get_pool_mode(pool) >= PM_READ_ONLY) { + DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode", + dm_device_name(pool->pool_md)); + return -EINVAL; + } + if (!strcasecmp(argv[0], "create_thin")) r = process_create_thin_mesg(argc, argv, pool); From ac5d430c72e5104a82bdb2c8d86aaa1797cd185b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Jan 2015 12:07:46 +0000 Subject: [PATCH 226/298] dm cache: fix missing ERR_PTR returns and handling commit 766a78882ddf79b162243649d7dfdbac1fb6fb88 upstream. Commit 9b1cc9f251 ("dm cache: share cache-metadata object across inactive and active DM tables") mistakenly ignored the use of ERR_PTR returns. Restore missing IS_ERR checks and ERR_PTR returns where appropriate. Reported-by: Dan Carpenter Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-cache-metadata.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index d290e8396116..b950a803bae8 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -683,7 +683,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (!cmd) { DMERR("could not allocate metadata struct"); - return NULL; + return ERR_PTR(-ENOMEM); } atomic_set(&cmd->ref_count, 1); @@ -745,7 +745,7 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev, return cmd; cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size); - if (cmd) { + if (!IS_ERR(cmd)) { mutex_lock(&table_lock); cmd2 = lookup(bdev); if (cmd2) { @@ -780,9 +780,10 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, { struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device, policy_hint_size); - if (cmd && !same_params(cmd, data_block_size)) { + + if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) { dm_cache_metadata_close(cmd); - return NULL; + return ERR_PTR(-EINVAL); } return cmd; From 03c5a83d8c13cf8d182290cb8771dab3a943cc35 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 29 Dec 2014 10:33:36 +0200 Subject: [PATCH 227/298] spi/pxa2xx: Clear cur_chip pointer before starting next message commit c957e8f084e0d21febcd6b8a0ea9631eccc92f36 upstream. Once the current message is finished, the driver notifies SPI core about this by calling spi_finalize_current_message(). This function queues next message to be transferred. If there are more messages in the queue, it is possible that the driver is asked to transfer the next message at this point. When spi_finalize_current_message() returns the driver clears the drv_data->cur_chip pointer to NULL. The problem is that if the driver already started the next message clearing drv_data->cur_chip will cause NULL pointer dereference which crashes the kernel like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 IP: [] cs_deassert+0x18/0x70 [spi_pxa2xx_platform] PGD 78bb8067 PUD 37712067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 1 PID: 11 Comm: ksoftirqd/1 Tainted: G O 3.18.0-rc4-mjo #5 Hardware name: Intel Corp. VALLEYVIEW B3 PLATFORM/NOTEBOOK, BIOS MNW2CRB1.X64.0071.R30.1408131301 08/13/2014 task: ffff880077f9f290 ti: ffff88007a820000 task.ti: ffff88007a820000 RIP: 0010:[] [] cs_deassert+0x18/0x70 [spi_pxa2xx_platform] RSP: 0018:ffff88007a823d08 EFLAGS: 00010202 RAX: 0000000000000008 RBX: ffff8800379a4430 RCX: 0000000000000026 RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffff8800379a4430 RBP: ffff88007a823d18 R08: 00000000ffffffff R09: 000000007a9bc65a R10: 000000000000028f R11: 0000000000000005 R12: ffff880070123e98 R13: ffff880070123de8 R14: 0000000000000100 R15: ffffc90004888000 FS: 0000000000000000(0000) GS:ffff880079a80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000048 CR3: 000000007029b000 CR4: 00000000001007e0 Stack: ffff88007a823d58 ffff8800379a4430 ffff88007a823d48 ffffffffa0022c89 0000000000000000 ffff8800379a4430 0000000000000000 0000000000000006 ffff88007a823da8 ffffffffa0023be0 ffff88007a823dd8 ffffffff81076204 Call Trace: [] giveback+0x69/0xa0 [spi_pxa2xx_platform] [] pump_transfers+0x710/0x740 [spi_pxa2xx_platform] [] ? pick_next_task_fair+0x744/0x830 [] tasklet_action+0xa9/0xe0 [] __do_softirq+0xee/0x280 [] run_ksoftirqd+0x20/0x40 [] smpboot_thread_fn+0xff/0x1b0 [] ? SyS_setgroups+0x150/0x150 [] kthread+0xcd/0xf0 [] ? kthread_create_on_node+0x180/0x180 [] ret_from_fork+0x7c/0xb0 Fix this by clearing drv_data->cur_chip before we call spi_finalize_current_message(). Reported-by: Martin Oldfield Signed-off-by: Mika Westerberg Acked-by: Robert Jarzmik Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-pxa2xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 7ab3ccb592eb..458a1480dc07 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -400,8 +400,8 @@ static void giveback(struct driver_data *drv_data) cs_deassert(drv_data); } - spi_finalize_current_message(drv_data->master); drv_data->cur_chip = NULL; + spi_finalize_current_message(drv_data->master); } static void reset_sccr1(struct driver_data *drv_data) From b17ab0146dc0842d1014304ee73914662993cfc9 Mon Sep 17 00:00:00 2001 From: Ashay Jaiswal Date: Thu, 8 Jan 2015 18:54:25 +0530 Subject: [PATCH 228/298] regulator: core: fix race condition in regulator_put() commit 83b0302d347a49f951e904184afe57ac3723476e upstream. The regulator framework maintains a list of consumer regulators for a regulator device and protects it from concurrent access using the regulator device's mutex lock. In the case of regulator_put() the consumer is removed and regulator device's parameters are updated without holding the regulator device's mutex. This would lead to a race condition between the regulator_put() and any function which traverses the consumer list or modifies regulator device's parameters. Fix this race condition by holding the regulator device's mutex in case of regulator_put. Signed-off-by: Ashay Jaiswal Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index afca1bc24f26..b7984044232d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1479,7 +1479,7 @@ struct regulator *regulator_get_optional(struct device *dev, const char *id) } EXPORT_SYMBOL_GPL(regulator_get_optional); -/* Locks held by regulator_put() */ +/* regulator_list_mutex lock held by regulator_put() */ static void _regulator_put(struct regulator *regulator) { struct regulator_dev *rdev; @@ -1494,12 +1494,14 @@ static void _regulator_put(struct regulator *regulator) /* remove any sysfs entries */ if (regulator->dev) sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name); + mutex_lock(&rdev->mutex); kfree(regulator->supply_name); list_del(®ulator->list); kfree(regulator); rdev->open_count--; rdev->exclusive = 0; + mutex_unlock(&rdev->mutex); module_put(rdev->owner); } From f95875d828b0193b61508fe93bdeb13958221fdd Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Thu, 22 Jan 2015 15:19:22 +0530 Subject: [PATCH 229/298] drivers: net: cpsw: discard dual emac default vlan configuration commit 02a54164c52ed6eca3089a0d402170fbf34d6cf5 upstream. In Dual EMAC, the default VLANs are used to segregate Rx packets between the ports, so adding the same default VLAN to the switch will affect the normal packet transfers. So returning error on addition of dual EMAC default VLANs. Even if EMAC 0 default port VLAN is added to EMAC 1, it will lead to break dual EMAC port separations. Fixes: d9ba8f9e6298 (driver: net: ethernet: cpsw: dual emac interface implementation) Reported-by: Felipe Balbi Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ti/cpsw.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 316650c3b5d7..4eb091da7fb6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1610,6 +1610,19 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, if (vid == priv->data.default_vlan) return 0; + if (priv->data.dual_emac) { + /* In dual EMAC, reserved VLAN id should not be used for + * creating VLAN interfaces as this can break the dual + * EMAC port separation + */ + int i; + + for (i = 0; i < priv->data.slaves; i++) { + if (vid == priv->slaves[i].port_vlan) + return -EINVAL; + } + } + dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); return cpsw_add_vlan_ale_entry(priv, vid); } @@ -1623,6 +1636,15 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, if (vid == priv->data.default_vlan) return 0; + if (priv->data.dual_emac) { + int i; + + for (i = 0; i < priv->data.slaves; i++) { + if (vid == priv->slaves[i].port_vlan) + return -EINVAL; + } + } + dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); ret = cpsw_ale_del_vlan(priv->ale, vid, 0); if (ret != 0) From 5e45ff13211e09d5d271a31008d9ccb536c4606b Mon Sep 17 00:00:00 2001 From: Bob Paauwe Date: Thu, 18 Dec 2014 09:51:26 -0800 Subject: [PATCH 230/298] drm/i915: Only fence tiled region of object. commit af1a7301c7cf8912dca03065d448c4437c5c239f upstream. When creating a fence for a tiled object, only fence the area that makes up the actual tiles. The object may be larger than the tiled area and if we allow those extra addresses to be fenced, they'll get converted to addresses beyond where the object is mapped. This opens up the possiblity of writes beyond the end of object. To prevent this, we adjust the size of the fence to only encompass the area that makes up the actual tiles. The extra space is considered un-tiled and now behaves as if it was a linear object. Testcase: igt/gem_tiled_fence_overflow Reported-by: Dan Hettena Signed-off-by: Bob Paauwe Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3153eabde39b..de5ab4876a89 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2893,6 +2893,13 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg, u32 size = i915_gem_obj_ggtt_size(obj); uint64_t val; + /* Adjust fence size to match tiled area */ + if (obj->tiling_mode != I915_TILING_NONE) { + uint32_t row_size = obj->stride * + (obj->tiling_mode == I915_TILING_Y ? 32 : 8); + size = (size / row_size) * row_size; + } + val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & 0xfffff000) << 32; val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; From 73e8cfe5a403444d87a46df56007bd676196356d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 17 Jul 2014 12:17:45 +0100 Subject: [PATCH 231/298] ARM: DMA: ensure that old section mappings are flushed from the TLB commit 6b076991dca9817e75c37e2f0db6d52611ea42fa upstream. When setting up the CMA region, we must ensure that the old section mappings are flushed from the TLB before replacing them with page tables, otherwise we can suffer from mismatched aliases if the CPU speculatively prefetches from these mappings at an inopportune time. A mismatched alias can occur when the TLB contains a section mapping, but a subsequent prefetch causes it to load a page table mapping, resulting in the possibility of the TLB containing two matching mappings for the same virtual address region. Acked-by: Will Deacon Signed-off-by: Russell King Cc: Hou Pengyang Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 11b3914660d2..42f2fb8c5a00 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -464,12 +464,21 @@ void __init dma_contiguous_remap(void) map.type = MT_MEMORY_DMA_READY; /* - * Clear previous low-memory mapping + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. */ for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + iotable_init(&map, 1); } } From 90ebdaa3d0ff2668a59dfebea2efee220d227e61 Mon Sep 17 00:00:00 2001 From: Liu ShuoX Date: Mon, 17 Mar 2014 11:24:49 +1100 Subject: [PATCH 232/298] pstore: clarify clearing of _read_cnt in ramoops_context commit 57fd835385a043577457a385f28c08be693991bf upstream. *_read_cnt in ramoops_context need to be cleared during pstore ->open to support mutli times getting the records. The patch added missed ftrace_read_cnt clearing and removed duplicate clearing in ramoops_probe. Signed-off-by: Liu ShuoX Cc: "Zhang, Yanmin" Cc: Colin Cross Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Tony Luck Cc: HuKeping Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index e7d95f959333..36ffe55b579d 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -92,6 +92,7 @@ struct ramoops_context { struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; unsigned int dump_write_cnt; + /* _read_cnt need clear on ramoops_pstore_open */ unsigned int dump_read_cnt; unsigned int console_read_cnt; unsigned int ftrace_read_cnt; @@ -107,6 +108,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) cxt->dump_read_cnt = 0; cxt->console_read_cnt = 0; + cxt->ftrace_read_cnt = 0; return 0; } @@ -435,7 +437,6 @@ static int ramoops_probe(struct platform_device *pdev) if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); - cxt->dump_read_cnt = 0; cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; cxt->memtype = pdata->mem_type; From b310479c591d123811c18f19f55896cda0fc4f5e Mon Sep 17 00:00:00 2001 From: Liu ShuoX Date: Mon, 17 Mar 2014 11:24:49 +1100 Subject: [PATCH 233/298] pstore: skip zero size persistent ram buffer in traverse commit aa9a4a1edfbd3d223af01db833da2f07850bc655 upstream. In ramoops_pstore_read, a valid prz pointer with zero size buffer will break traverse of all persistent ram buffers. The latter buffer might be lost. Signed-off-by: Liu ShuoX Cc: "Zhang, Yanmin" Cc: Colin Cross Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Tony Luck Cc: HuKeping Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 36ffe55b579d..808435a9e2e1 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -126,12 +126,12 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, prz = przs[i]; - if (update) { - /* Update old/shadowed buffer. */ + /* Update old/shadowed buffer. */ + if (update) persistent_ram_save_old(prz); - if (!persistent_ram_old_size(prz)) - return NULL; - } + + if (!persistent_ram_old_size(prz)) + return NULL; *typep = type; *id = i; From c1f4213810462d58baef282eb34c1c28c1edf4cf Mon Sep 17 00:00:00 2001 From: Liu ShuoX Date: Mon, 17 Mar 2014 13:57:49 -0700 Subject: [PATCH 234/298] pstore: Fix NULL pointer fault if get NULL prz in ramoops_get_next_prz commit b0aa931fb84431394d995472d0af2a6c2b61064d upstream. ramoops_get_next_prz get the prz according the paramters. If it get a uninitialized prz, access its members by following persistent_ram_old_size(prz) will cause a NULL pointer crash. Ex: if ftrace_size is 0, fprz will be NULL. Fix it by return NULL in advance. Signed-off-by: Liu ShuoX Acked-by: Kees Cook Signed-off-by: Tony Luck Cc: HuKeping Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 808435a9e2e1..fe68d8ac4d3d 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -125,6 +125,8 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, return NULL; prz = przs[i]; + if (!prz) + return NULL; /* Update old/shadowed buffer. */ if (update) From b49e4a878abbfdb9c12e4170a61a9a01dbdd6328 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 19 Jan 2015 18:13:43 +0300 Subject: [PATCH 235/298] rbd: fix rbd_dev_parent_get() when parent_overlap == 0 commit ae43e9d05eb4bd324155292f889fbd001c4faea8 upstream. The comment for rbd_dev_parent_get() said * We must get the reference before checking for the overlap to * coordinate properly with zeroing the parent overlap in * rbd_dev_v2_parent_info() when an image gets flattened. We * drop it again if there is no overlap. but the "drop it again if there is no overlap" part was missing from the implementation. This lead to absurd parent_ref values for images with parent_overlap == 0, as parent_ref was incremented for each img_request and virtually never decremented. Fix this by leveraging the fact that refresh path calls rbd_dev_v2_parent_info() under header_rwsem and use it for read in rbd_dev_parent_get(), instead of messing around with atomics. Get rid of barriers in rbd_dev_v2_parent_info() while at it - I don't see what they'd pair with now and I suspect we are in a pretty miserable situation as far as proper locking goes regardless. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin Reviewed-by: Alex Elder [idryomov@redhat.com: backport to 3.14: context] Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 784cb0fd21a2..275a7dc2b06c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1926,32 +1926,26 @@ static void rbd_dev_parent_put(struct rbd_device *rbd_dev) * If an image has a non-zero parent overlap, get a reference to its * parent. * - * We must get the reference before checking for the overlap to - * coordinate properly with zeroing the parent overlap in - * rbd_dev_v2_parent_info() when an image gets flattened. We - * drop it again if there is no overlap. - * * Returns true if the rbd device has a parent with a non-zero * overlap and a reference for it was successfully taken, or * false otherwise. */ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) { - int counter; + int counter = 0; if (!rbd_dev->parent_spec) return false; - counter = atomic_inc_return_safe(&rbd_dev->parent_ref); - if (counter > 0 && rbd_dev->parent_overlap) - return true; - - /* Image was flattened, but parent is not yet torn down */ + down_read(&rbd_dev->header_rwsem); + if (rbd_dev->parent_overlap) + counter = atomic_inc_return_safe(&rbd_dev->parent_ref); + up_read(&rbd_dev->header_rwsem); if (counter < 0) rbd_warn(rbd_dev, "parent reference overflow\n"); - return false; + return counter > 0; } /* @@ -3904,7 +3898,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) */ if (rbd_dev->parent_overlap) { rbd_dev->parent_overlap = 0; - smp_mb(); rbd_dev_parent_put(rbd_dev); pr_info("%s: clone image has been flattened\n", rbd_dev->disk->disk_name); @@ -3948,7 +3941,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) * treat it specially. */ rbd_dev->parent_overlap = overlap; - smp_mb(); if (!overlap) { /* A null parent_spec indicates it's the initial probe */ From 336d55193a33b2922f2cd3d70ca4e81a0e76c30f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2015 14:21:16 -0500 Subject: [PATCH 236/298] workqueue: fix subtle pool management issue which can stall whole worker_pool commit 29187a9eeaf362d8422e62e17a22a6e115277a49 upstream. A worker_pool's forward progress is guaranteed by the fact that the last idle worker assumes the manager role to create more workers and summon the rescuers if creating workers doesn't succeed in timely manner before proceeding to execute work items. This manager role is implemented in manage_workers(), which indicates whether the worker may proceed to work item execution with its return value. This is necessary because multiple workers may contend for the manager role, and, if there already is a manager, others should proceed to work item execution. Unfortunately, the function also indicates that the worker may proceed to work item execution if need_to_create_worker() is false at the head of the function. need_to_create_worker() tests the following conditions. pending work items && !nr_running && !nr_idle The first and third conditions are protected by pool->lock and thus won't change while holding pool->lock; however, nr_running can change asynchronously as other workers block and resume and while it's likely to be zero, as someone woke this worker up in the first place, some other workers could have become runnable inbetween making it non-zero. If this happens, manage_worker() could return false even with zero nr_idle making the worker, the last idle one, proceed to execute work items. If then all workers of the pool end up blocking on a resource which can only be released by a work item which is pending on that pool, the whole pool can deadlock as there's no one to create more workers or summon the rescuers. This patch fixes the problem by removing the early exit condition from maybe_create_worker() and making manage_workers() return false iff there's already another manager, which ensures that the last worker doesn't start executing work items. We can leave the early exit condition alone and just ignore the return value but the only reason it was put there is because the manage_workers() used to perform both creations and destructions of workers and thus the function may be invoked while the pool is trying to reduce the number of workers. Now that manage_workers() is called only when more workers are needed, the only case this early exit condition is triggered is rare race conditions rendering it pointless. Tested with simulated workload and modified workqueue code which trigger the pool deadlock reliably without this patch. tj: Updated to v3.14 where manage_workers() is responsible not only for creating more workers but also destroying surplus ones. maybe_create_worker() needs to keep its early exit condition to avoid creating a new worker when manage_workers() is called to destroy surplus ones. Other than that, the adaptabion is straight-forward. Both maybe_{create|destroy}_worker() functions are converted to return void and manage_workers() returns %false iff it lost manager arbitration. Signed-off-by: Tejun Heo Reported-by: Eric Sandeen Link: http://lkml.kernel.org/g/54B019F4.8030009@sandeen.net Cc: Dave Chinner Cc: Lai Jiangshan Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b4defdecec8a..f6f31d823e8e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1962,17 +1962,13 @@ static void pool_mayday_timeout(unsigned long __pool) * spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. Called only from * manager. - * - * Return: - * %false if no action was taken and pool->lock stayed locked, %true - * otherwise. */ -static bool maybe_create_worker(struct worker_pool *pool) +static void maybe_create_worker(struct worker_pool *pool) __releases(&pool->lock) __acquires(&pool->lock) { if (!need_to_create_worker(pool)) - return false; + return; restart: spin_unlock_irq(&pool->lock); @@ -1989,7 +1985,7 @@ restart: start_worker(worker); if (WARN_ON_ONCE(need_to_create_worker(pool))) goto restart; - return true; + return; } if (!need_to_create_worker(pool)) @@ -2006,7 +2002,7 @@ restart: spin_lock_irq(&pool->lock); if (need_to_create_worker(pool)) goto restart; - return true; + return; } /** @@ -2019,15 +2015,9 @@ restart: * LOCKING: * spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Called only from manager. - * - * Return: - * %false if no action was taken and pool->lock stayed locked, %true - * otherwise. */ -static bool maybe_destroy_workers(struct worker_pool *pool) +static void maybe_destroy_workers(struct worker_pool *pool) { - bool ret = false; - while (too_many_workers(pool)) { struct worker *worker; unsigned long expires; @@ -2041,10 +2031,7 @@ static bool maybe_destroy_workers(struct worker_pool *pool) } destroy_worker(worker); - ret = true; } - - return ret; } /** @@ -2064,16 +2051,14 @@ static bool maybe_destroy_workers(struct worker_pool *pool) * multiple times. Does GFP_KERNEL allocations. * * Return: - * %false if the pool don't need management and the caller can safely start - * processing works, %true indicates that the function released pool->lock - * and reacquired it to perform some management function and that the - * conditions that the caller verified while holding the lock before - * calling the function might no longer be true. + * %false if the pool doesn't need management and the caller can safely + * start processing works, %true if management function was performed and + * the conditions that the caller verified before calling the function may + * no longer be true. */ static bool manage_workers(struct worker *worker) { struct worker_pool *pool = worker->pool; - bool ret = false; /* * Managership is governed by two mutexes - manager_arb and @@ -2097,7 +2082,7 @@ static bool manage_workers(struct worker *worker) * manager_mutex. */ if (!mutex_trylock(&pool->manager_arb)) - return ret; + return false; /* * With manager arbitration won, manager_mutex would be free in @@ -2107,7 +2092,6 @@ static bool manage_workers(struct worker *worker) spin_unlock_irq(&pool->lock); mutex_lock(&pool->manager_mutex); spin_lock_irq(&pool->lock); - ret = true; } pool->flags &= ~POOL_MANAGE_WORKERS; @@ -2116,12 +2100,12 @@ static bool manage_workers(struct worker *worker) * Destroy and then create so that may_start_working() is true * on return. */ - ret |= maybe_destroy_workers(pool); - ret |= maybe_create_worker(pool); + maybe_destroy_workers(pool); + maybe_create_worker(pool); mutex_unlock(&pool->manager_mutex); mutex_unlock(&pool->manager_arb); - return ret; + return true; } /** From 4a8860d867e204adb05b0fad16ba0a6f09ba8e59 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 6 Jan 2015 16:10:37 -0800 Subject: [PATCH 237/298] target: Drop arbitrary maximum I/O size limit commit 046ba64285a4389ae5e9a7dfa253c6bff3d7c341 upstream. This patch drops the arbitrary maximum I/O size limit in sbc_parse_cdb(), which currently for fabric_max_sectors is hardcoded to 8192 (4 MB for 512 byte sector devices), and for hw_max_sectors is a backend driver dependent value. This limit is problematic because Linux initiators have only recently started to honor block limits MAXIMUM TRANSFER LENGTH, and other non-Linux based initiators (eg: MSFT Fibre Channel) can also generate I/Os larger than 4 MB in size. Currently when this happens, the following message will appear on the target resulting in I/Os being returned with non recoverable status: SCSI OP 28h with too big sectors 16384 exceeds fabric_max_sectors: 8192 Instead, drop both [fabric,hw]_max_sector checks in sbc_parse_cdb(), and convert the existing hw_max_sectors into a purely informational attribute used to represent the granuality that backend driver and/or subsystem code is splitting I/Os upon. Also, update FILEIO with an explicit FD_MAX_BYTES check in fd_execute_rw() to deal with the one special iovec limitiation case. v2 changes: - Drop hw_max_sectors check in sbc_parse_cdb() Reported-by: Lance Gropper Reported-by: Stefan Priebe Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: Roland Dreier Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_device.c | 8 ++++---- drivers/target/target_core_file.c | 11 ++++++++++- drivers/target/target_core_iblock.c | 2 +- drivers/target/target_core_sbc.c | 15 --------------- drivers/target/target_core_spc.c | 5 +---- 5 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 38b4be24d13f..26ae6886ac59 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1153,10 +1153,10 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) " changed for TCM/pSCSI\n", dev); return -EINVAL; } - if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) { + if (optimal_sectors > dev->dev_attrib.hw_max_sectors) { pr_err("dev[%p]: Passed optimal_sectors %u cannot be" - " greater than fabric_max_sectors: %u\n", dev, - optimal_sectors, dev->dev_attrib.fabric_max_sectors); + " greater than hw_max_sectors: %u\n", dev, + optimal_sectors, dev->dev_attrib.hw_max_sectors); return -EINVAL; } @@ -1565,7 +1565,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT; dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; dev->dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS; - dev->dev_attrib.optimal_sectors = DA_FABRIC_MAX_SECTORS; xcopy_lun = &dev->xcopy_lun; xcopy_lun->lun_se_dev = dev; @@ -1606,6 +1605,7 @@ int target_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = se_dev_align_max_sectors(dev->dev_attrib.hw_max_sectors, dev->dev_attrib.hw_block_size); + dev->dev_attrib.optimal_sectors = dev->dev_attrib.hw_max_sectors; dev->dev_index = scsi_get_new_index(SCSI_DEVICE_INDEX); dev->creation_time = get_jiffies_64(); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index cf991a91a8a9..41eff7d64cb8 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -620,7 +620,16 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct fd_prot fd_prot; sense_reason_t rc; int ret = 0; - + /* + * We are currently limited by the number of iovecs (2048) per + * single vfs_[writev,readv] call. + */ + if (cmd->data_length > FD_MAX_BYTES) { + pr_err("FILEIO: Not able to process I/O of %u bytes due to" + "FD_MAX_BYTES: %u iovec count limitiation\n", + cmd->data_length, FD_MAX_BYTES); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } /* * Call vectorized fileio functions to map struct scatterlist * physical memory addresses to struct iovec virtual memory. diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 9e0232cca92e..feefe24a88f7 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -123,7 +123,7 @@ static int iblock_configure_device(struct se_device *dev) q = bdev_get_queue(bd); dev->dev_attrib.hw_block_size = bdev_logical_block_size(bd); - dev->dev_attrib.hw_max_sectors = UINT_MAX; + dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; /* diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 379033f5903b..5216acd68b4b 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -910,21 +910,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { unsigned long long end_lba; - if (sectors > dev->dev_attrib.fabric_max_sectors) { - printk_ratelimited(KERN_ERR "SCSI OP %02xh with too" - " big sectors %u exceeds fabric_max_sectors:" - " %u\n", cdb[0], sectors, - dev->dev_attrib.fabric_max_sectors); - return TCM_INVALID_CDB_FIELD; - } - if (sectors > dev->dev_attrib.hw_max_sectors) { - printk_ratelimited(KERN_ERR "SCSI OP %02xh with too" - " big sectors %u exceeds backend hw_max_sectors:" - " %u\n", cdb[0], sectors, - dev->dev_attrib.hw_max_sectors); - return TCM_INVALID_CDB_FIELD; - } - end_lba = dev->transport->get_blocks(dev) + 1; if (cmd->t_task_lba + sectors > end_lba) { pr_err("cmd exceeds last lba %llu " diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index fcdf98fc947c..12a74f646d38 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -503,7 +503,6 @@ static sense_reason_t spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; - u32 max_sectors; int have_tp = 0; int opt, min; @@ -537,9 +536,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set MAXIMUM TRANSFER LENGTH */ - max_sectors = min(dev->dev_attrib.fabric_max_sectors, - dev->dev_attrib.hw_max_sectors); - put_unaligned_be32(max_sectors, &buf[8]); + put_unaligned_be32(dev->dev_attrib.hw_max_sectors, &buf[8]); /* * Set OPTIMAL TRANSFER LENGTH From 4ccf212fb84d79b75fc66a2e26ac6bdbab0aedbf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 6 Feb 2015 06:53:31 -0800 Subject: [PATCH 238/298] Linux 3.14.32 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5abf670c6651..00fffa3f2310 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 14 -SUBLEVEL = 31 +SUBLEVEL = 32 EXTRAVERSION = NAME = Remembering Coco From a387b75638351a7071c7b95e118648bfe72d5da1 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 20 Feb 2014 18:02:11 +0000 Subject: [PATCH 239/298] of: Make device nodes kobjects so they show up in sysfs Device tree nodes are already treated as objects, and we already want to expose them to userspace which is done using the /proc filesystem today. Right now the kernel has to do a lot of work to keep the /proc view in sync with the in-kernel representation. If device_nodes are switched to be kobjects then the device tree code can be a whole lot simpler. It also turns out that switching to using /sysfs from /proc results in smaller code and data size, and the userspace ABI won't change if /proc/device-tree symlinks to /sys/firmware/devicetree/base. v7: Add missing sysfs_bin_attr_init() v6: Add __of_add_property() early init fixes from Pantelis v5: Rename firmware/ofw to firmware/devicetree Fix updating property values in sysfs v4: Fixed build error on Powerpc Fixed handling of dynamic nodes on powerpc v3: Fixed handling of duplicate attribute and child node names v2: switch to using sysfs bin_attributes which solve the problem of reporting incorrect property size. Signed-off-by: Grant Likely Tested-by: Sascha Hauer Cc: Rob Herring Cc: Benjamin Herrenschmidt Cc: David S. Miller Cc: Nathan Fontenot Cc: Pantelis Antoniou (cherry picked from commit 75b57ecf9d1d1e17d099ab13b8f48e6e038676be) Signed-off-by: Mark Brown --- Documentation/ABI/testing/sysfs-firmware-ofw | 28 +++ arch/powerpc/platforms/pseries/dlpar.c | 2 - arch/powerpc/platforms/pseries/reconfig.c | 2 - arch/powerpc/sysdev/msi_bitmap.c | 2 +- drivers/of/base.c | 172 ++++++++++++++++++- drivers/of/fdt.c | 3 +- drivers/of/pdt.c | 4 +- drivers/of/testcase-data/tests-phandle.dtsi | 3 + include/linux/of.h | 9 +- 9 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-ofw diff --git a/Documentation/ABI/testing/sysfs-firmware-ofw b/Documentation/ABI/testing/sysfs-firmware-ofw new file mode 100644 index 000000000000..f562b188e71d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-ofw @@ -0,0 +1,28 @@ +What: /sys/firmware/devicetree/* +Date: November 2013 +Contact: Grant Likely +Description: + When using OpenFirmware or a Flattened Device Tree to enumerate + hardware, the device tree structure will be exposed in this + directory. + + It is possible for multiple device-tree directories to exist. + Some device drivers use a separate detached device tree which + have no attachment to the system tree and will appear in a + different subdirectory under /sys/firmware/devicetree. + + Userspace must not use the /sys/firmware/devicetree/base + path directly, but instead should follow /proc/device-tree + symlink. It is possible that the absolute path will change + in the future, but the symlink is the stable ABI. + + The /proc/device-tree symlink replaces the devicetree /proc + filesystem support, and has largely the same semantics and + should be compatible with existing userspace. + + The contents of /sys/firmware/devicetree/ is a + hierarchy of directories, one per device tree node. The + directory name is the resolved path component name (node + name plus address). Properties are represented as files + in the directory. The contents of each file is the exact + binary data from the device tree. diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a8fe5aa3d34f..022b38e6a80b 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -11,7 +11,6 @@ */ #include -#include #include #include #include @@ -87,7 +86,6 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, } of_node_set_flag(dn, OF_DYNAMIC); - kref_init(&dn->kref); return dn; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index f93cdf55628c..0435bb65d0aa 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -12,7 +12,6 @@ */ #include -#include #include #include #include @@ -70,7 +69,6 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist np->properties = proplist; of_node_set_flag(np, OF_DYNAMIC); - kref_init(&np->kref); np->parent = derive_parent(path); if (IS_ERR(np->parent)) { diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 0968b66b4cf9..8ba60424be95 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -202,7 +202,7 @@ void __init test_of_node(void) /* There should really be a struct device_node allocator */ memset(&of_node, 0, sizeof(of_node)); - kref_init(&of_node.kref); + kref_init(&of_node.kobj.kref); of_node.full_name = node_name; check(0 == msi_bitmap_alloc(&bmp, size, &of_node)); diff --git a/drivers/of/base.c b/drivers/of/base.c index 89e888a78899..028bbaf62e52 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "of_private.h" @@ -35,6 +36,12 @@ struct device_node *of_chosen; struct device_node *of_aliases; static struct device_node *of_stdout; +static struct kset *of_kset; + +/* + * Used to protect the of_aliases; but also overloaded to hold off addition of + * nodes to sysfs + */ DEFINE_MUTEX(of_aliases_mutex); /* use when traversing tree through the allnext, child, sibling, @@ -92,14 +99,14 @@ int __weak of_node_to_nid(struct device_node *np) struct device_node *of_node_get(struct device_node *node) { if (node) - kref_get(&node->kref); + kobject_get(&node->kobj); return node; } EXPORT_SYMBOL(of_node_get); -static inline struct device_node *kref_to_device_node(struct kref *kref) +static inline struct device_node *kobj_to_device_node(struct kobject *kobj) { - return container_of(kref, struct device_node, kref); + return container_of(kobj, struct device_node, kobj); } /** @@ -109,16 +116,15 @@ static inline struct device_node *kref_to_device_node(struct kref *kref) * In of_node_put() this function is passed to kref_put() * as the destructor. */ -static void of_node_release(struct kref *kref) +static void of_node_release(struct kobject *kobj) { - struct device_node *node = kref_to_device_node(kref); + struct device_node *node = kobj_to_device_node(kobj); struct property *prop = node->properties; /* We should never be releasing nodes that haven't been detached. */ if (!of_node_check_flag(node, OF_DETACHED)) { pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name); dump_stack(); - kref_init(&node->kref); return; } @@ -151,11 +157,140 @@ static void of_node_release(struct kref *kref) void of_node_put(struct device_node *node) { if (node) - kref_put(&node->kref, of_node_release); + kobject_put(&node->kobj); } EXPORT_SYMBOL(of_node_put); +#else +static void of_node_release(struct kobject *kobj) +{ + /* Without CONFIG_OF_DYNAMIC, no nodes gets freed */ +} #endif /* CONFIG_OF_DYNAMIC */ +struct kobj_type of_node_ktype = { + .release = of_node_release, +}; + +static ssize_t of_node_property_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t offset, size_t count) +{ + struct property *pp = container_of(bin_attr, struct property, attr); + return memory_read_from_buffer(buf, count, &offset, pp->value, pp->length); +} + +static const char *safe_name(struct kobject *kobj, const char *orig_name) +{ + const char *name = orig_name; + struct kernfs_node *kn; + int i = 0; + + /* don't be a hero. After 16 tries give up */ + while (i < 16 && (kn = sysfs_get_dirent(kobj->sd, name))) { + sysfs_put(kn); + if (name != orig_name) + kfree(name); + name = kasprintf(GFP_KERNEL, "%s#%i", orig_name, ++i); + } + + if (name != orig_name) + pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", + kobject_name(kobj), name); + return name; +} + +static int __of_add_property_sysfs(struct device_node *np, struct property *pp) +{ + int rc; + + /* Important: Don't leak passwords */ + bool secure = strncmp(pp->name, "security-", 9) == 0; + + sysfs_bin_attr_init(&pp->attr); + pp->attr.attr.name = safe_name(&np->kobj, pp->name); + pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO; + pp->attr.size = secure ? 0 : pp->length; + pp->attr.read = of_node_property_read; + + rc = sysfs_create_bin_file(&np->kobj, &pp->attr); + WARN(rc, "error adding attribute %s to node %s\n", pp->name, np->full_name); + return rc; +} + +static int __of_node_add(struct device_node *np) +{ + const char *name; + struct property *pp; + int rc; + + np->kobj.kset = of_kset; + if (!np->parent) { + /* Nodes without parents are new top level trees */ + rc = kobject_add(&np->kobj, NULL, safe_name(&of_kset->kobj, "base")); + } else { + name = safe_name(&np->parent->kobj, kbasename(np->full_name)); + if (!name || !name[0]) + return -EINVAL; + + rc = kobject_add(&np->kobj, &np->parent->kobj, "%s", name); + } + if (rc) + return rc; + + for_each_property_of_node(np, pp) + __of_add_property_sysfs(np, pp); + + return 0; +} + +int of_node_add(struct device_node *np) +{ + int rc = 0; + kobject_init(&np->kobj, &of_node_ktype); + mutex_lock(&of_aliases_mutex); + if (of_kset) + rc = __of_node_add(np); + mutex_unlock(&of_aliases_mutex); + return rc; +} + +#if defined(CONFIG_OF_DYNAMIC) +static void of_node_remove(struct device_node *np) +{ + struct property *pp; + + for_each_property_of_node(np, pp) + sysfs_remove_bin_file(&np->kobj, &pp->attr); + + kobject_del(&np->kobj); +} +#endif + +static int __init of_init(void) +{ + struct device_node *np; + + /* Create the kset, and register existing nodes */ + mutex_lock(&of_aliases_mutex); + of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); + if (!of_kset) { + mutex_unlock(&of_aliases_mutex); + return -ENOMEM; + } + for_each_of_allnodes(np) + __of_node_add(np); + mutex_unlock(&of_aliases_mutex); + +#if !defined(CONFIG_PROC_DEVICETREE) + /* Symlink to the new tree when PROC_DEVICETREE is disabled */ + if (of_allnodes) + proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); +#endif /* CONFIG_PROC_DEVICETREE */ + + return 0; +} +core_initcall(of_init); + static struct property *__of_find_property(const struct device_node *np, const char *name, int *lenp) { @@ -1536,6 +1671,14 @@ int of_add_property(struct device_node *np, struct property *prop) } *next = prop; raw_spin_unlock_irqrestore(&devtree_lock, flags); + if (rc) + return rc; + + /* at early boot, bail hear and defer setup to of_init() */ + if (!of_kset) + return 0; + + __of_add_property_sysfs(np, prop); #ifdef CONFIG_PROC_DEVICETREE /* try to add to proc as well if it was initialized */ @@ -1583,6 +1726,12 @@ int of_remove_property(struct device_node *np, struct property *prop) if (!found) return -ENODEV; + /* at early boot, bail hear and defer setup to of_init() */ + if (!of_kset) + return 0; + + sysfs_remove_bin_file(&np->kobj, &prop->attr); + #ifdef CONFIG_PROC_DEVICETREE /* try to remove the proc node as well */ if (np->pde) @@ -1633,6 +1782,13 @@ int of_update_property(struct device_node *np, struct property *newprop) next = &(*next)->next; } raw_spin_unlock_irqrestore(&devtree_lock, flags); + if (rc) + return rc; + + /* Update the sysfs attribute */ + if (oldprop) + sysfs_remove_bin_file(&np->kobj, &oldprop->attr); + __of_add_property_sysfs(np, newprop); if (!found) return -ENODEV; @@ -1712,6 +1868,7 @@ int of_attach_node(struct device_node *np) of_allnodes = np; raw_spin_unlock_irqrestore(&devtree_lock, flags); + of_node_add(np); of_add_proc_dt_entry(np); return 0; } @@ -1784,6 +1941,7 @@ int of_detach_node(struct device_node *np) raw_spin_unlock_irqrestore(&devtree_lock, flags); of_remove_proc_dt_entry(np); + of_node_remove(np); return rc; } #endif /* defined(CONFIG_OF_DYNAMIC) */ diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 758b4f8b30b7..96ad1ab7f9d6 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -232,7 +232,6 @@ static void * unflatten_dt_node(struct boot_param_header *blob, dad->next->sibling = np; dad->next = np; } - kref_init(&np->kref); } /* process properties */ while (1) { @@ -327,6 +326,8 @@ static void * unflatten_dt_node(struct boot_param_header *blob, np->name = ""; if (!np->type) np->type = ""; + + of_node_add(np); } while (tag == OF_DT_BEGIN_NODE || tag == OF_DT_NOP) { if (tag == OF_DT_NOP) diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c index 7b666736c168..e64fa3d3da5f 100644 --- a/drivers/of/pdt.c +++ b/drivers/of/pdt.c @@ -179,8 +179,6 @@ static struct device_node * __init of_pdt_create_node(phandle node, of_pdt_incr_unique_id(dp); dp->parent = parent; - kref_init(&dp->kref); - dp->name = of_pdt_get_one_property(node, "name"); dp->type = of_pdt_get_one_property(node, "device_type"); dp->phandle = node; @@ -215,6 +213,7 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent, *nextp = &dp->allnext; dp->full_name = of_pdt_build_full_name(dp); + of_node_add(dp); dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node), nextp); @@ -245,6 +244,7 @@ void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops) of_allnodes->path_component_name = ""; #endif of_allnodes->full_name = "/"; + of_node_add(of_allnodes); nextp = &of_allnodes->allnext; of_allnodes->child = of_pdt_build_tree(of_allnodes, diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/testcase-data/tests-phandle.dtsi index 0007d3cd7dc2..788a4c24b8f5 100644 --- a/drivers/of/testcase-data/tests-phandle.dtsi +++ b/drivers/of/testcase-data/tests-phandle.dtsi @@ -1,6 +1,9 @@ / { testcase-data { + security-password = "password"; + duplicate-name = "duplicate"; + duplicate-name { }; phandle-tests { provider0: provider0 { #phandle-cells = <0>; diff --git a/include/linux/of.h b/include/linux/of.h index 435cb995904d..f370e312f539 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -37,6 +37,7 @@ struct property { struct property *next; unsigned long _flags; unsigned int unique_id; + struct bin_attribute attr; }; #if defined(CONFIG_SPARC) @@ -57,7 +58,7 @@ struct device_node { struct device_node *next; /* next device of same type */ struct device_node *allnext; /* next in list of all nodes */ struct proc_dir_entry *pde; /* this node's proc directory */ - struct kref kref; + struct kobject kobj; unsigned long _flags; void *data; #if defined(CONFIG_SPARC) @@ -74,6 +75,8 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; +extern int of_node_add(struct device_node *node); + #ifdef CONFIG_OF_DYNAMIC extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); @@ -167,6 +170,8 @@ static inline const char *of_node_full_name(const struct device_node *np) return np ? np->full_name : ""; } +#define for_each_of_allnodes(dn) \ + for (dn = of_allnodes; dn; dn = dn->allnext) extern struct device_node *of_find_node_by_name(struct device_node *from, const char *name); extern struct device_node *of_find_node_by_type(struct device_node *from, From 64b50663ba8b2773a18c6ce17c5be441579d0ce8 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 12 Feb 2014 01:00:34 +0100 Subject: [PATCH 240/298] of: add functions to count number of elements in a property The need to know the number of array elements in a property is a common pattern. To prevent duplication of open-coded implementations add a helper static function that also centralises strict sanity checking and DTB format details, as well as a set of wrapper functions for u8, u16, u32 and u64. Suggested-by: Mark Rutland Signed-off-by: Heiko Stuebner Acked-by: Rob Herring Acked-by: Grant Likely Acked-by: Mark Rutland Signed-off-by: Mark Brown (cherry picked from commit ad54a0cfbeb4bd4033d09017557ccbc423f9d5ff) Signed-off-by: Mark Brown --- drivers/of/base.c | 32 +++++++++++++++++++ include/linux/of.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 028bbaf62e52..aadea0babe16 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1038,6 +1038,38 @@ struct device_node *of_find_node_by_phandle(phandle handle) } EXPORT_SYMBOL(of_find_node_by_phandle); +/** + * of_property_count_elems_of_size - Count the number of elements in a property + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @elem_size: size of the individual element + * + * Search for a property in a device node and count the number of elements of + * size elem_size in it. Returns number of elements on sucess, -EINVAL if the + * property does not exist or its length does not match a multiple of elem_size + * and -ENODATA if the property does not have a value. + */ +int of_property_count_elems_of_size(const struct device_node *np, + const char *propname, int elem_size) +{ + struct property *prop = of_find_property(np, propname, NULL); + + if (!prop) + return -EINVAL; + if (!prop->value) + return -ENODATA; + + if (prop->length % elem_size != 0) { + pr_err("size of %s in node %s is not a multiple of %d\n", + propname, np->full_name, elem_size); + return -EINVAL; + } + + return prop->length / elem_size; +} +EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); + /** * of_find_property_value_of_size * diff --git a/include/linux/of.h b/include/linux/of.h index f370e312f539..5b1e02714901 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -203,6 +203,8 @@ extern struct device_node *of_find_node_with_property( extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); +extern int of_property_count_elems_of_size(const struct device_node *np, + const char *propname, int elem_size); extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); @@ -395,6 +397,12 @@ static inline struct device_node *of_find_compatible_node( return NULL; } +static inline int of_property_count_elems_of_size(const struct device_node *np, + const char *propname, int elem_size) +{ + return -ENOSYS; +} + static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { @@ -540,6 +548,74 @@ static inline struct device_node *of_find_matching_node( return of_find_matching_node_and_match(from, matches, NULL); } +/** + * of_property_count_u8_elems - Count the number of u8 elements in a property + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device node and count the number of u8 elements + * in it. Returns number of elements on sucess, -EINVAL if the property does + * not exist or its length does not match a multiple of u8 and -ENODATA if the + * property does not have a value. + */ +static inline int of_property_count_u8_elems(const struct device_node *np, + const char *propname) +{ + return of_property_count_elems_of_size(np, propname, sizeof(u8)); +} + +/** + * of_property_count_u16_elems - Count the number of u16 elements in a property + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device node and count the number of u16 elements + * in it. Returns number of elements on sucess, -EINVAL if the property does + * not exist or its length does not match a multiple of u16 and -ENODATA if the + * property does not have a value. + */ +static inline int of_property_count_u16_elems(const struct device_node *np, + const char *propname) +{ + return of_property_count_elems_of_size(np, propname, sizeof(u16)); +} + +/** + * of_property_count_u32_elems - Count the number of u32 elements in a property + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device node and count the number of u32 elements + * in it. Returns number of elements on sucess, -EINVAL if the property does + * not exist or its length does not match a multiple of u32 and -ENODATA if the + * property does not have a value. + */ +static inline int of_property_count_u32_elems(const struct device_node *np, + const char *propname) +{ + return of_property_count_elems_of_size(np, propname, sizeof(u32)); +} + +/** + * of_property_count_u64_elems - Count the number of u64 elements in a property + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device node and count the number of u64 elements + * in it. Returns number of elements on sucess, -EINVAL if the property does + * not exist or its length does not match a multiple of u64 and -ENODATA if the + * property does not have a value. + */ +static inline int of_property_count_u64_elems(const struct device_node *np, + const char *propname) +{ + return of_property_count_elems_of_size(np, propname, sizeof(u64)); +} + /** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. From c869816054f07ad2c54311ab3bdae3d1e0248b2b Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 6 Nov 2012 21:03:27 +0000 Subject: [PATCH 241/298] of: remove /proc/device-tree The same data is now available in sysfs, so we can remove the code that exports it in /proc and replace it with a symlink to the sysfs version. Tested on versatile qemu model and mpc5200 eval board. More testing would be appreciated. v5: Fixed up conflicts with mainline changes Signed-off-by: Grant Likely Cc: Rob Herring Cc: Benjamin Herrenschmidt Cc: David S. Miller Cc: Nathan Fontenot Cc: Pantelis Antoniou (cherry picked from commit 8357041a69b368991d1b04d9f1d297f8d71e1314) Signed-off-by: Mark Brown Conflicts: drivers/of/base.c --- drivers/of/Kconfig | 8 -- drivers/of/base.c | 52 +-------- fs/proc/Makefile | 1 - fs/proc/internal.h | 7 -- fs/proc/proc_devtree.c | 241 ----------------------------------------- fs/proc/root.c | 3 - include/linux/of.h | 11 -- 7 files changed, 1 insertion(+), 322 deletions(-) delete mode 100644 fs/proc/proc_devtree.c diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index c6973f101a3e..a71c12d4bad7 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -7,14 +7,6 @@ config OF menu "Device Tree and Open Firmware support" depends on OF -config PROC_DEVICETREE - bool "Support for device tree in /proc" - depends on PROC_FS && !SPARC - help - This option adds a device-tree directory under /proc which contains - an image of the device tree that the kernel copies from Open - Firmware or other boot firmware. If unsure, say Y here. - config OF_SELFTEST bool "Device Tree Runtime self tests" depends on OF_IRQ diff --git a/drivers/of/base.c b/drivers/of/base.c index aadea0babe16..620740c71cd9 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -281,11 +281,9 @@ static int __init of_init(void) __of_node_add(np); mutex_unlock(&of_aliases_mutex); -#if !defined(CONFIG_PROC_DEVICETREE) - /* Symlink to the new tree when PROC_DEVICETREE is disabled */ + /* Symlink in /proc as required by userspace ABI */ if (of_allnodes) proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base"); -#endif /* CONFIG_PROC_DEVICETREE */ return 0; } @@ -1712,12 +1710,6 @@ int of_add_property(struct device_node *np, struct property *prop) __of_add_property_sysfs(np, prop); -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_add_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - return 0; } @@ -1764,12 +1756,6 @@ int of_remove_property(struct device_node *np, struct property *prop) sysfs_remove_bin_file(&np->kobj, &prop->attr); -#ifdef CONFIG_PROC_DEVICETREE - /* try to remove the proc node as well */ - if (np->pde) - proc_device_tree_remove_prop(np->pde, prop); -#endif /* CONFIG_PROC_DEVICETREE */ - return 0; } @@ -1825,12 +1811,6 @@ int of_update_property(struct device_node *np, struct property *newprop) if (!found) return -ENODEV; -#ifdef CONFIG_PROC_DEVICETREE - /* try to add to proc as well if it was initialized */ - if (np->pde) - proc_device_tree_update_prop(np->pde, newprop, oldprop); -#endif /* CONFIG_PROC_DEVICETREE */ - return 0; } @@ -1865,22 +1845,6 @@ int of_reconfig_notify(unsigned long action, void *p) return notifier_to_errno(rc); } -#ifdef CONFIG_PROC_DEVICETREE -static void of_add_proc_dt_entry(struct device_node *dn) -{ - struct proc_dir_entry *ent; - - ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde); - if (ent) - proc_device_tree_add_node(dn, ent); -} -#else -static void of_add_proc_dt_entry(struct device_node *dn) -{ - return; -} -#endif - /** * of_attach_node - Plug a device node into the tree and global list. */ @@ -1901,22 +1865,9 @@ int of_attach_node(struct device_node *np) raw_spin_unlock_irqrestore(&devtree_lock, flags); of_node_add(np); - of_add_proc_dt_entry(np); return 0; } -#ifdef CONFIG_PROC_DEVICETREE -static void of_remove_proc_dt_entry(struct device_node *dn) -{ - proc_remove(dn->pde); -} -#else -static void of_remove_proc_dt_entry(struct device_node *dn) -{ - return; -} -#endif - /** * of_detach_node - "Unplug" a node from the device tree. * @@ -1972,7 +1923,6 @@ int of_detach_node(struct device_node *np) of_node_set_flag(np, OF_DETACHED); raw_spin_unlock_irqrestore(&devtree_lock, flags); - of_remove_proc_dt_entry(np); of_node_remove(np); return rc; } diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ab30716584f5..239493ec718e 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o -proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 651d09a11dde..3ab6d14e71c5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -210,13 +210,6 @@ extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry extern int proc_fill_super(struct super_block *); extern void proc_entry_rundown(struct proc_dir_entry *); -/* - * proc_devtree.c - */ -#ifdef CONFIG_PROC_DEVICETREE -extern void proc_device_tree_init(void); -#endif - /* * proc_namespaces.c */ diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c deleted file mode 100644 index c82dd5147845..000000000000 --- a/fs/proc/proc_devtree.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * proc_devtree.c - handles /proc/device-tree - * - * Copyright 1997 Paul Mackerras - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "internal.h" - -static inline void set_node_proc_entry(struct device_node *np, - struct proc_dir_entry *de) -{ - np->pde = de; -} - -static struct proc_dir_entry *proc_device_tree; - -/* - * Supply data on a read from /proc/device-tree/node/property. - */ -static int property_proc_show(struct seq_file *m, void *v) -{ - struct property *pp = m->private; - - seq_write(m, pp->value, pp->length); - return 0; -} - -static int property_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, property_proc_show, __PDE_DATA(inode)); -} - -static const struct file_operations property_proc_fops = { - .owner = THIS_MODULE, - .open = property_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * For a node with a name like "gc@10", we make symlinks called "gc" - * and "@10" to it. - */ - -/* - * Add a property to a node - */ -static struct proc_dir_entry * -__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp, - const char *name) -{ - struct proc_dir_entry *ent; - - /* - * Unfortunately proc_register puts each new entry - * at the beginning of the list. So we rearrange them. - */ - ent = proc_create_data(name, - strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR, - de, &property_proc_fops, pp); - if (ent == NULL) - return NULL; - - if (!strncmp(name, "security-", 9)) - proc_set_size(ent, 0); /* don't leak number of password chars */ - else - proc_set_size(ent, pp->length); - - return ent; -} - - -void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) -{ - __proc_device_tree_add_prop(pde, prop, prop->name); -} - -void proc_device_tree_remove_prop(struct proc_dir_entry *pde, - struct property *prop) -{ - remove_proc_entry(prop->name, pde); -} - -void proc_device_tree_update_prop(struct proc_dir_entry *pde, - struct property *newprop, - struct property *oldprop) -{ - struct proc_dir_entry *ent; - - if (!oldprop) { - proc_device_tree_add_prop(pde, newprop); - return; - } - - for (ent = pde->subdir; ent != NULL; ent = ent->next) - if (ent->data == oldprop) - break; - if (ent == NULL) { - pr_warn("device-tree: property \"%s\" does not exist\n", - oldprop->name); - } else { - ent->data = newprop; - ent->size = newprop->length; - } -} - -/* - * Various dodgy firmware might give us nodes and/or properties with - * conflicting names. That's generally ok, except for exporting via /proc, - * so munge names here to ensure they're unique. - */ - -static int duplicate_name(struct proc_dir_entry *de, const char *name) -{ - struct proc_dir_entry *ent; - int found = 0; - - spin_lock(&proc_subdir_lock); - - for (ent = de->subdir; ent != NULL; ent = ent->next) { - if (strcmp(ent->name, name) == 0) { - found = 1; - break; - } - } - - spin_unlock(&proc_subdir_lock); - - return found; -} - -static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, - const char *name) -{ - char *fixed_name; - int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */ - int i = 1, size; - -realloc: - fixed_name = kmalloc(fixup_len, GFP_KERNEL); - if (fixed_name == NULL) { - pr_err("device-tree: Out of memory trying to fixup " - "name \"%s\"\n", name); - return name; - } - -retry: - size = snprintf(fixed_name, fixup_len, "%s#%d", name, i); - size++; /* account for NULL */ - - if (size > fixup_len) { - /* We ran out of space, free and reallocate. */ - kfree(fixed_name); - fixup_len = size; - goto realloc; - } - - if (duplicate_name(de, fixed_name)) { - /* Multiple duplicates. Retry with a different offset. */ - i++; - goto retry; - } - - pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", - np->full_name, fixed_name); - - return fixed_name; -} - -/* - * Process a node, adding entries for its children and its properties. - */ -void proc_device_tree_add_node(struct device_node *np, - struct proc_dir_entry *de) -{ - struct property *pp; - struct proc_dir_entry *ent; - struct device_node *child; - const char *p; - - set_node_proc_entry(np, de); - for (child = NULL; (child = of_get_next_child(np, child));) { - /* Use everything after the last slash, or the full name */ - p = kbasename(child->full_name); - - if (duplicate_name(de, p)) - p = fixup_name(np, de, p); - - ent = proc_mkdir(p, de); - if (ent == NULL) - break; - proc_device_tree_add_node(child, ent); - } - of_node_put(child); - - for (pp = np->properties; pp != NULL; pp = pp->next) { - p = pp->name; - - if (strchr(p, '/')) - continue; - - if (duplicate_name(de, p)) - p = fixup_name(np, de, p); - - ent = __proc_device_tree_add_prop(de, pp, p); - if (ent == NULL) - break; - } -} - -/* - * Called on initialization to set up the /proc/device-tree subtree - */ -void __init proc_device_tree_init(void) -{ - struct device_node *root; - - proc_device_tree = proc_mkdir("device-tree", NULL); - if (proc_device_tree == NULL) - return; - root = of_find_node_by_path("/"); - if (root == NULL) { - remove_proc_entry("device-tree", NULL); - pr_debug("/proc/device-tree: can't find root\n"); - return; - } - proc_device_tree_add_node(root, proc_device_tree); - of_node_put(root); -} diff --git a/fs/proc/root.c b/fs/proc/root.c index 87dbcbef7fe4..7bbeb5257af1 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -183,9 +183,6 @@ void __init proc_root_init(void) proc_mkdir("openprom", NULL); #endif proc_tty_init(); -#ifdef CONFIG_PROC_DEVICETREE - proc_device_tree_init(); -#endif proc_mkdir("bus", NULL); proc_sys_init(); } diff --git a/include/linux/of.h b/include/linux/of.h index 5b1e02714901..3ac5d6839b75 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -57,7 +57,6 @@ struct device_node { struct device_node *sibling; struct device_node *next; /* next device of same type */ struct device_node *allnext; /* next in list of all nodes */ - struct proc_dir_entry *pde; /* this node's proc directory */ struct kobject kobj; unsigned long _flags; void *data; @@ -714,14 +713,4 @@ static inline int of_get_available_child_count(const struct device_node *np) return num; } -#if defined(CONFIG_PROC_FS) && defined(CONFIG_PROC_DEVICETREE) -extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *); -extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop); -extern void proc_device_tree_remove_prop(struct proc_dir_entry *pde, - struct property *prop); -extern void proc_device_tree_update_prop(struct proc_dir_entry *pde, - struct property *newprop, - struct property *oldprop); -#endif - #endif /* _LINUX_OF_H */ From 093b06de283951dfb1e1aadf5e27c9ac77879c18 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 13 Dec 2013 20:08:59 +0200 Subject: [PATCH 242/298] of: device_node kobject lifecycle fixes After the move to having device nodes be proper kobjects the lifecycle of the node needs to be controlled better. At first convert of_add_node() in the unflattened functions to of_init_node() which initializes the kobject so that of_node_get/put work correctly even before of_init is called. Afterwards introduce of_node_is_initialized & of_node_is_attached that query the underlying kobject about the state (attached means kobj is visible in sysfs) Using that make sure the lifecycle of the tree is correct at all times. Signed-off-by: Pantelis Antoniou [grant.likely: moved of_node_init() calls, fixed up locking, and dropped __of_populate() hunks] Signed-off-by: Grant Likely (cherry picked from commit 0829f6d1f69e4f2fae4062987ae6531a9af1a2e3) Signed-off-by: Mark Brown --- drivers/of/base.c | 35 ++++++++++++++++++++++++++--------- drivers/of/fdt.c | 3 +-- drivers/of/pdt.c | 3 +-- include/linux/of.h | 19 +++++++++++++++++++ 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 620740c71cd9..442f42d1817b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -246,10 +246,19 @@ static int __of_node_add(struct device_node *np) int of_node_add(struct device_node *np) { int rc = 0; - kobject_init(&np->kobj, &of_node_ktype); + + BUG_ON(!of_node_is_initialized(np)); + + /* + * Grab the mutex here so that in a race condition between of_init() and + * of_node_add(), node addition will still be consistent. + */ mutex_lock(&of_aliases_mutex); if (of_kset) rc = __of_node_add(np); + else + /* This scenario may be perfectly valid, but report it anyway */ + pr_info("of_node_add(%s) before of_init()\n", np->full_name); mutex_unlock(&of_aliases_mutex); return rc; } @@ -259,10 +268,17 @@ static void of_node_remove(struct device_node *np) { struct property *pp; - for_each_property_of_node(np, pp) - sysfs_remove_bin_file(&np->kobj, &pp->attr); + BUG_ON(!of_node_is_initialized(np)); - kobject_del(&np->kobj); + /* only remove properties if on sysfs */ + if (of_node_is_attached(np)) { + for_each_property_of_node(np, pp) + sysfs_remove_bin_file(&np->kobj, &pp->attr); + kobject_del(&np->kobj); + } + + /* finally remove the kobj_init ref */ + of_node_put(np); } #endif @@ -1663,6 +1679,10 @@ static int of_property_notify(int action, struct device_node *np, { struct of_prop_reconfig pr; + /* only call notifiers if the node is attached */ + if (!of_node_is_attached(np)) + return 0; + pr.dn = np; pr.prop = prop; return of_reconfig_notify(action, &pr); @@ -1704,11 +1724,8 @@ int of_add_property(struct device_node *np, struct property *prop) if (rc) return rc; - /* at early boot, bail hear and defer setup to of_init() */ - if (!of_kset) - return 0; - - __of_add_property_sysfs(np, prop); + if (of_node_is_attached(np)) + __of_add_property_sysfs(np, prop); return 0; } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 96ad1ab7f9d6..70ccc36513e7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -202,6 +202,7 @@ static void * unflatten_dt_node(struct boot_param_header *blob, __alignof__(struct device_node)); if (allnextpp) { char *fn; + of_node_init(np); np->full_name = fn = ((char *)np) + sizeof(*np); if (new_format) { /* rebuild full path for new format */ @@ -326,8 +327,6 @@ static void * unflatten_dt_node(struct boot_param_header *blob, np->name = ""; if (!np->type) np->type = ""; - - of_node_add(np); } while (tag == OF_DT_BEGIN_NODE || tag == OF_DT_NOP) { if (tag == OF_DT_NOP) diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c index e64fa3d3da5f..36b4035881b0 100644 --- a/drivers/of/pdt.c +++ b/drivers/of/pdt.c @@ -176,6 +176,7 @@ static struct device_node * __init of_pdt_create_node(phandle node, return NULL; dp = prom_early_alloc(sizeof(*dp)); + of_node_init(dp); of_pdt_incr_unique_id(dp); dp->parent = parent; @@ -213,7 +214,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent, *nextp = &dp->allnext; dp->full_name = of_pdt_build_full_name(dp); - of_node_add(dp); dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node), nextp); @@ -244,7 +244,6 @@ void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops) of_allnodes->path_component_name = ""; #endif of_allnodes->full_name = "/"; - of_node_add(of_allnodes); nextp = &of_allnodes->allnext; of_allnodes->child = of_pdt_build_tree(of_allnodes, diff --git a/include/linux/of.h b/include/linux/of.h index 3ac5d6839b75..7d036aed1328 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -76,6 +76,25 @@ struct of_phandle_args { extern int of_node_add(struct device_node *node); +/* initialize a node */ +extern struct kobj_type of_node_ktype; +static inline void of_node_init(struct device_node *node) +{ + kobject_init(&node->kobj, &of_node_ktype); +} + +/* true when node is initialized */ +static inline int of_node_is_initialized(struct device_node *node) +{ + return node && node->kobj.state_initialized; +} + +/* true when node is attached (i.e. present on sysfs) */ +static inline int of_node_is_attached(struct device_node *node) +{ + return node && node->kobj.state_in_sysfs; +} + #ifdef CONFIG_OF_DYNAMIC extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); From 8c8d0df5942936e46f1ee4870a3848d93f2f724f Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 8 Nov 2013 17:03:56 +0200 Subject: [PATCH 243/298] of: Introduce device tree node flag helpers. Helper functions for working with device node flags. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 588453c69dace39351129a038dd2796608f74bb3) Signed-off-by: Mark Brown --- include/linux/of.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/include/linux/of.h b/include/linux/of.h index 7d036aed1328..745a43d8e3fb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -135,6 +135,26 @@ static inline void of_node_set_flag(struct device_node *n, unsigned long flag) set_bit(flag, &n->_flags); } +static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) +{ + clear_bit(flag, &n->_flags); +} + +static inline int of_property_check_flag(struct property *p, unsigned long flag) +{ + return test_bit(flag, &p->_flags); +} + +static inline void of_property_set_flag(struct property *p, unsigned long flag) +{ + set_bit(flag, &p->_flags); +} + +static inline void of_property_clear_flag(struct property *p, unsigned long flag) +{ + clear_bit(flag, &p->_flags); +} + extern struct device_node *of_find_all_nodes(struct device_node *prev); /* From bb83c1926c971c8bc42dd728a79e12d125e4324a Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 26 Jun 2014 15:40:48 +0100 Subject: [PATCH 244/298] of: Move CONFIG_OF_DYNAMIC code into a separate file Split the dynamic device tree code into a separate file to make it really clear what features CONFIF_OF_DYNAMIC add to the kernel. Without CONFIG_OF_DYNAMIC only properties can be changed, and notifiers do not get sent. Enabling it turns on reference counting, notifiers and the ability to add and remove nodes. v2: Moved of_node_release() into dynamic.c Signed-off-by: Grant Likely Signed-off-by: Pantelis Antoniou Cc: Rob Herring (cherry picked from commit 6afc0dc381573559251de9a8259404f49e6aed14) Signed-off-by: Mark Brown Conflicts: drivers/of/base.c --- drivers/of/Makefile | 1 + drivers/of/base.c | 229 +--------------------------------------- drivers/of/dynamic.c | 216 +++++++++++++++++++++++++++++++++++++ drivers/of/of_private.h | 18 ++++ 4 files changed, 236 insertions(+), 228 deletions(-) create mode 100644 drivers/of/dynamic.c diff --git a/drivers/of/Makefile b/drivers/of/Makefile index efd05102c405..35b9d1f9865e 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -1,4 +1,5 @@ obj-y = base.o device.o platform.o +obj-$(CONFIG_OF_DYNAMIC) += dynamic.o obj-$(CONFIG_OF_FLATTREE) += fdt.o obj-$(CONFIG_OF_PROMTREE) += pdt.o obj-$(CONFIG_OF_ADDRESS) += address.o diff --git a/drivers/of/base.c b/drivers/of/base.c index 442f42d1817b..cde7f57be96c 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -88,79 +88,7 @@ int __weak of_node_to_nid(struct device_node *np) } #endif -#if defined(CONFIG_OF_DYNAMIC) -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kobject_get(&node->kobj); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node *kobj_to_device_node(struct kobject *kobj) -{ - return container_of(kobj, struct device_node, kobj); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kobject *kobj) -{ - struct device_node *node = kobj_to_device_node(kobj); - struct property *prop = node->properties; - - /* We should never be releasing nodes that haven't been detached. */ - if (!of_node_check_flag(node, OF_DETACHED)) { - pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name); - dump_stack(); - return; - } - - if (!of_node_check_flag(node, OF_DYNAMIC)) - return; - - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - - if (!prop) { - prop = node->deadprops; - node->deadprops = NULL; - } - } - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kobject_put(&node->kobj); -} -EXPORT_SYMBOL(of_node_put); -#else +#ifndef CONFIG_OF_DYNAMIC static void of_node_release(struct kobject *kobj) { /* Without CONFIG_OF_DYNAMIC, no nodes gets freed */ @@ -263,25 +191,6 @@ int of_node_add(struct device_node *np) return rc; } -#if defined(CONFIG_OF_DYNAMIC) -static void of_node_remove(struct device_node *np) -{ - struct property *pp; - - BUG_ON(!of_node_is_initialized(np)); - - /* only remove properties if on sysfs */ - if (of_node_is_attached(np)) { - for_each_property_of_node(np, pp) - sysfs_remove_bin_file(&np->kobj, &pp->attr); - kobject_del(&np->kobj); - } - - /* finally remove the kobj_init ref */ - of_node_put(np); -} -#endif - static int __init of_init(void) { struct device_node *np; @@ -1673,28 +1582,6 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na } EXPORT_SYMBOL(of_count_phandle_with_args); -#if defined(CONFIG_OF_DYNAMIC) -static int of_property_notify(int action, struct device_node *np, - struct property *prop) -{ - struct of_prop_reconfig pr; - - /* only call notifiers if the node is attached */ - if (!of_node_is_attached(np)) - return 0; - - pr.dn = np; - pr.prop = prop; - return of_reconfig_notify(action, &pr); -} -#else -static int of_property_notify(int action, struct device_node *np, - struct property *prop) -{ - return 0; -} -#endif - /** * of_add_property - Add a property to a node */ @@ -1831,120 +1718,6 @@ int of_update_property(struct device_node *np, struct property *newprop) return 0; } -#if defined(CONFIG_OF_DYNAMIC) -/* - * Support for dynamic device trees. - * - * On some platforms, the device tree can be manipulated at runtime. - * The routines in this section support adding, removing and changing - * device tree nodes. - */ - -static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain); - -int of_reconfig_notifier_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&of_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(of_reconfig_notifier_register); - -int of_reconfig_notifier_unregister(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&of_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); - -int of_reconfig_notify(unsigned long action, void *p) -{ - int rc; - - rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); - return notifier_to_errno(rc); -} - -/** - * of_attach_node - Plug a device node into the tree and global list. - */ -int of_attach_node(struct device_node *np) -{ - unsigned long flags; - int rc; - - rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); - if (rc) - return rc; - - raw_spin_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = of_allnodes; - np->parent->child = np; - of_allnodes = np; - raw_spin_unlock_irqrestore(&devtree_lock, flags); - - of_node_add(np); - return 0; -} - -/** - * of_detach_node - "Unplug" a node from the device tree. - * - * The caller must hold a reference to the node. The memory associated with - * the node is not freed until its refcount goes to zero. - */ -int of_detach_node(struct device_node *np) -{ - struct device_node *parent; - unsigned long flags; - int rc = 0; - - rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); - if (rc) - return rc; - - raw_spin_lock_irqsave(&devtree_lock, flags); - - if (of_node_check_flag(np, OF_DETACHED)) { - /* someone already detached it */ - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } - - parent = np->parent; - if (!parent) { - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } - - if (of_allnodes == np) - of_allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = of_allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } - - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - of_node_set_flag(np, OF_DETACHED); - raw_spin_unlock_irqrestore(&devtree_lock, flags); - - of_node_remove(np); - return rc; -} -#endif /* defined(CONFIG_OF_DYNAMIC) */ - static void of_alias_add(struct alias_prop *ap, struct device_node *np, int id, const char *stem, int stem_len) { diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c new file mode 100644 index 000000000000..125994330437 --- /dev/null +++ b/drivers/of/dynamic.c @@ -0,0 +1,216 @@ +/* + * Support for dynamic device trees. + * + * On some platforms, the device tree can be manipulated at runtime. + * The routines in this section support adding, removing and changing + * device tree nodes. + */ + +#include +#include +#include +#include +#include + +#include "of_private.h" + +/** + * of_node_get() - Increment refcount of a node + * @node: Node to inc refcount, NULL is supported to simplify writing of + * callers + * + * Returns node. + */ +struct device_node *of_node_get(struct device_node *node) +{ + if (node) + kobject_get(&node->kobj); + return node; +} +EXPORT_SYMBOL(of_node_get); + +/** + * of_node_put() - Decrement refcount of a node + * @node: Node to dec refcount, NULL is supported to simplify writing of + * callers + */ +void of_node_put(struct device_node *node) +{ + if (node) + kobject_put(&node->kobj); +} +EXPORT_SYMBOL(of_node_put); + +static void of_node_remove(struct device_node *np) +{ + struct property *pp; + + BUG_ON(!of_node_is_initialized(np)); + + /* only remove properties if on sysfs */ + if (of_node_is_attached(np)) { + for_each_property_of_node(np, pp) + sysfs_remove_bin_file(&np->kobj, &pp->attr); + kobject_del(&np->kobj); + } + + /* finally remove the kobj_init ref */ + of_node_put(np); +} + +static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain); + +int of_reconfig_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&of_reconfig_chain, nb); +} +EXPORT_SYMBOL_GPL(of_reconfig_notifier_register); + +int of_reconfig_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&of_reconfig_chain, nb); +} +EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); + +int of_reconfig_notify(unsigned long action, void *p) +{ + int rc; + + rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); + return notifier_to_errno(rc); +} + +int of_property_notify(int action, struct device_node *np, + struct property *prop) +{ + struct of_prop_reconfig pr; + + /* only call notifiers if the node is attached */ + if (!of_node_is_attached(np)) + return 0; + + pr.dn = np; + pr.prop = prop; + return of_reconfig_notify(action, &pr); +} + +/** + * of_attach_node() - Plug a device node into the tree and global list. + */ +int of_attach_node(struct device_node *np) +{ + unsigned long flags; + int rc; + + rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + if (rc) + return rc; + + raw_spin_lock_irqsave(&devtree_lock, flags); + np->sibling = np->parent->child; + np->allnext = np->parent->allnext; + np->parent->allnext = np; + np->parent->child = np; + of_node_clear_flag(np, OF_DETACHED); + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + of_node_add(np); + return 0; +} + +/** + * of_detach_node() - "Unplug" a node from the device tree. + * + * The caller must hold a reference to the node. The memory associated with + * the node is not freed until its refcount goes to zero. + */ +int of_detach_node(struct device_node *np) +{ + struct device_node *parent; + unsigned long flags; + int rc = 0; + + rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + if (rc) + return rc; + + raw_spin_lock_irqsave(&devtree_lock, flags); + + if (of_node_check_flag(np, OF_DETACHED)) { + /* someone already detached it */ + raw_spin_unlock_irqrestore(&devtree_lock, flags); + return rc; + } + + parent = np->parent; + if (!parent) { + raw_spin_unlock_irqrestore(&devtree_lock, flags); + return rc; + } + + if (of_allnodes == np) + of_allnodes = np->allnext; + else { + struct device_node *prev; + for (prev = of_allnodes; + prev->allnext != np; + prev = prev->allnext) + ; + prev->allnext = np->allnext; + } + + if (parent->child == np) + parent->child = np->sibling; + else { + struct device_node *prevsib; + for (prevsib = np->parent->child; + prevsib->sibling != np; + prevsib = prevsib->sibling) + ; + prevsib->sibling = np->sibling; + } + + of_node_set_flag(np, OF_DETACHED); + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + of_node_remove(np); + return rc; +} + +/** + * of_node_release() - release a dynamically allocated node + * @kref: kref element of the node to be released + * + * In of_node_put() this function is passed to kref_put() as the destructor. + */ +void of_node_release(struct kobject *kobj) +{ + struct device_node *node = kobj_to_device_node(kobj); + struct property *prop = node->properties; + + /* We should never be releasing nodes that haven't been detached. */ + if (!of_node_check_flag(node, OF_DETACHED)) { + pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name); + dump_stack(); + return; + } + + if (!of_node_check_flag(node, OF_DYNAMIC)) + return; + + while (prop) { + struct property *next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + prop = next; + + if (!prop) { + prop = node->deadprops; + node->deadprops = NULL; + } + } + kfree(node->full_name); + kfree(node->data); + kfree(node); +} diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index ff350c8fa7ac..aade823aa1cc 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -33,4 +33,22 @@ struct alias_prop { extern struct mutex of_aliases_mutex; extern struct list_head aliases_lookup; + +static inline struct device_node *kobj_to_device_node(struct kobject *kobj) +{ + return container_of(kobj, struct device_node, kobj); +} + +#if defined(CONFIG_OF_DYNAMIC) +extern int of_property_notify(int action, struct device_node *np, + struct property *prop); +extern void of_node_release(struct kobject *kobj); +#else /* CONFIG_OF_DYNAMIC */ +static inline int of_property_notify(int action, struct device_node *np, + struct property *prop) +{ + return 0; +} +#endif /* CONFIG_OF_DYNAMIC */ + #endif /* _LINUX_OF_PRIVATE_H */ From ffefdf1ee69f78759a9c0bdeca1cd020143996e5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 22 Jan 2014 13:57:39 +0800 Subject: [PATCH 245/298] of: add __of_add_property() without lock operations There two places will use the same code for adding one new property to the DT node. Adding __of_add_property() and prepare for fixing of_update_property()'s bug. Signed-off-by: Xiubo Li Signed-off-by: Grant Likely (cherry picked from commit 62664f67775fad840cf6f68d6b5f428817bef6c5) Signed-off-by: Mark Brown Conflicts: drivers/of/base.c --- drivers/of/base.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index cde7f57be96c..e653e658e76e 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1582,12 +1582,32 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na } EXPORT_SYMBOL(of_count_phandle_with_args); +/** + * __of_add_property - Add a property to a node without lock operations + */ +static int __of_add_property(struct device_node *np, struct property *prop) +{ + struct property **next; + + prop->next = NULL; + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) + /* duplicate ! don't insert it */ + return -EEXIST; + + next = &(*next)->next; + } + *next = prop; + + return 0; +} + /** * of_add_property - Add a property to a node */ int of_add_property(struct device_node *np, struct property *prop) { - struct property **next; unsigned long flags; int rc; @@ -1595,26 +1615,13 @@ int of_add_property(struct device_node *np, struct property *prop) if (rc) return rc; - prop->next = NULL; raw_spin_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (strcmp(prop->name, (*next)->name) == 0) { - /* duplicate ! don't insert it */ - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return -1; - } - next = &(*next)->next; - } - *next = prop; + rc = __of_add_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - - if (of_node_is_attached(np)) + if (!rc) __of_add_property_sysfs(np, prop); - return 0; + return rc; } /** From 967758472ad2cb976d8317e141f3395c38213a0e Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:58:46 +0300 Subject: [PATCH 246/298] of: Create unlocked versions of node and property add/remove functions The DT overlay code will need to manipulate nodes and properties while already holding the devicetree lock, or on nodes that are not yet attached to the tree, but the current helper functions don't allow that. Extract the core behaviour from the accessors and create the following unlocked variants. The unlocked variants require either the lock to already be held or for the nodes to be detached from the tree. Changes to live nodes will not get updated in sysfs, so the caller must arrange for housekeeping to take place after dropping the lock. The new functions are: __of_add_property(), __of_remove_property(), __of_update_property(), __of_attach_node() and __of_detach_node(). Signed-off-by: Pantelis Antoniou [Remove unnecessary diff hunks and rewrite commit text] Signed-off-by: Grant Likely (cherry picked from commit d8c50088417ebf61ad8b132caad20d10f7736034) Signed-off-by: Mark Brown Conflicts: drivers/of/base.c drivers/of/of_private.h --- drivers/of/base.c | 92 ++++++++++++++++++++++++----------------- drivers/of/dynamic.c | 63 +++++++++++++++------------- drivers/of/of_private.h | 18 ++++++++ 3 files changed, 106 insertions(+), 67 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index e653e658e76e..a2f4be16f100 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1585,7 +1585,7 @@ EXPORT_SYMBOL(of_count_phandle_with_args); /** * __of_add_property - Add a property to a node without lock operations */ -static int __of_add_property(struct device_node *np, struct property *prop) +int __of_add_property(struct device_node *np, struct property *prop) { struct property **next; @@ -1624,6 +1624,25 @@ int of_add_property(struct device_node *np, struct property *prop) return rc; } +int __of_remove_property(struct device_node *np, struct property *prop) +{ + struct property **next; + + for (next = &np->properties; *next; next = &(*next)->next) { + if (*next == prop) + break; + } + if (*next == NULL) + return -ENODEV; + + /* found the node */ + *next = prop->next; + prop->next = np->deadprops; + np->deadprops = prop; + + return 0; +} + /** * of_remove_property - Remove a property from a node. * @@ -1634,9 +1653,7 @@ int of_add_property(struct device_node *np, struct property *prop) */ int of_remove_property(struct device_node *np, struct property *prop) { - struct property **next; unsigned long flags; - int found = 0; int rc; rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); @@ -1644,22 +1661,11 @@ int of_remove_property(struct device_node *np, struct property *prop) return rc; raw_spin_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == prop) { - /* found the node */ - *next = prop->next; - prop->next = np->deadprops; - np->deadprops = prop; - found = 1; - break; - } - next = &(*next)->next; - } + rc = __of_remove_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (!found) - return -ENODEV; + if (rc) + return rc; /* at early boot, bail hear and defer setup to of_init() */ if (!of_kset) @@ -1670,6 +1676,32 @@ int of_remove_property(struct device_node *np, struct property *prop) return 0; } +int __of_update_property(struct device_node *np, struct property *newprop, + struct property **oldpropp) +{ + struct property **next, *oldprop; + + for (next = &np->properties; *next; next = &(*next)->next) { + if (of_prop_cmp((*next)->name, newprop->name) == 0) + break; + } + *oldpropp = oldprop = *next; + + if (oldprop) { + /* replace the node */ + newprop->next = oldprop->next; + *next = newprop; + oldprop->next = np->deadprops; + np->deadprops = oldprop; + } else { + /* new node */ + newprop->next = NULL; + *next = newprop; + } + + return 0; +} + /* * of_update_property - Update a property in a node, if the property does * not exist, add it. @@ -1681,35 +1713,19 @@ int of_remove_property(struct device_node *np, struct property *prop) */ int of_update_property(struct device_node *np, struct property *newprop) { - struct property **next, *oldprop; + struct property *oldprop; unsigned long flags; int rc, found = 0; + if (!newprop->name) + return -EINVAL; + rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop); if (rc) return rc; - if (!newprop->name) - return -EINVAL; - - oldprop = of_find_property(np, newprop->name, NULL); - if (!oldprop) - return of_add_property(np, newprop); - raw_spin_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == oldprop) { - /* found the node */ - newprop->next = oldprop->next; - *next = newprop; - oldprop->next = np->deadprops; - np->deadprops = oldprop; - found = 1; - break; - } - next = &(*next)->next; - } + rc = __of_update_property(np, newprop, &oldprop); raw_spin_unlock_irqrestore(&devtree_lock, flags); if (rc) return rc; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 125994330437..2f1c84eb3dba 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -94,6 +94,15 @@ int of_property_notify(int action, struct device_node *np, return of_reconfig_notify(action, &pr); } +void __of_attach_node(struct device_node *np) +{ + np->sibling = np->parent->child; + np->allnext = np->parent->allnext; + np->parent->allnext = np; + np->parent->child = np; + of_node_clear_flag(np, OF_DETACHED); +} + /** * of_attach_node() - Plug a device node into the tree and global list. */ @@ -107,46 +116,23 @@ int of_attach_node(struct device_node *np) return rc; raw_spin_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = np->parent->allnext; - np->parent->allnext = np; - np->parent->child = np; - of_node_clear_flag(np, OF_DETACHED); + __of_attach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); of_node_add(np); return 0; } -/** - * of_detach_node() - "Unplug" a node from the device tree. - * - * The caller must hold a reference to the node. The memory associated with - * the node is not freed until its refcount goes to zero. - */ -int of_detach_node(struct device_node *np) +void __of_detach_node(struct device_node *np) { struct device_node *parent; - unsigned long flags; - int rc = 0; - rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); - if (rc) - return rc; - - raw_spin_lock_irqsave(&devtree_lock, flags); - - if (of_node_check_flag(np, OF_DETACHED)) { - /* someone already detached it */ - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } + if (WARN_ON(of_node_check_flag(np, OF_DETACHED))) + return; parent = np->parent; - if (!parent) { - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } + if (WARN_ON(!parent)) + return; if (of_allnodes == np) of_allnodes = np->allnext; @@ -171,6 +157,25 @@ int of_detach_node(struct device_node *np) } of_node_set_flag(np, OF_DETACHED); +} + +/** + * of_detach_node() - "Unplug" a node from the device tree. + * + * The caller must hold a reference to the node. The memory associated with + * the node is not freed until its refcount goes to zero. + */ +int of_detach_node(struct device_node *np) +{ + unsigned long flags; + int rc = 0; + + rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + if (rc) + return rc; + + raw_spin_lock_irqsave(&devtree_lock, flags); + __of_detach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); of_node_remove(np); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index aade823aa1cc..0384f097bcc8 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -51,4 +51,22 @@ static inline int of_property_notify(int action, struct device_node *np, } #endif /* CONFIG_OF_DYNAMIC */ +/** + * General utilities for working with live trees. + * + * All functions with two leading underscores operate + * without taking node references, so you either have to + * own the devtree lock or work on detached trees only. + */ +struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); +struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); + +extern int __of_add_property(struct device_node *np, struct property *prop); +extern int __of_remove_property(struct device_node *np, struct property *prop); +extern int __of_update_property(struct device_node *np, + struct property *newprop, struct property **oldprop); + +extern void __of_attach_node(struct device_node *np); +extern void __of_detach_node(struct device_node *np); + #endif /* _LINUX_OF_PRIVATE_H */ From e8f2e82d79926019ee15ff3b3c4252e3bcab45ef Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:58:03 +0300 Subject: [PATCH 247/298] of: rename of_aliases_mutex to just of_mutex We're overloading usage of of_aliases_mutex for sysfs changes, so rename to something that is more generic. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit c05aba2bd5f9dd3f363611ff844513de1341ac60) Signed-off-by: Mark Brown --- drivers/of/base.c | 19 +++++++++---------- drivers/of/device.c | 4 ++-- drivers/of/of_private.h | 2 +- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index a2f4be16f100..36574ee6a8c5 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -39,10 +39,9 @@ static struct device_node *of_stdout; static struct kset *of_kset; /* - * Used to protect the of_aliases; but also overloaded to hold off addition of - * nodes to sysfs + * Used to protect the of_aliases, to hold off addition of nodes to sysfs */ -DEFINE_MUTEX(of_aliases_mutex); +DEFINE_MUTEX(of_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -181,13 +180,13 @@ int of_node_add(struct device_node *np) * Grab the mutex here so that in a race condition between of_init() and * of_node_add(), node addition will still be consistent. */ - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); if (of_kset) rc = __of_node_add(np); else /* This scenario may be perfectly valid, but report it anyway */ pr_info("of_node_add(%s) before of_init()\n", np->full_name); - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); return rc; } @@ -196,15 +195,15 @@ static int __init of_init(void) struct device_node *np; /* Create the kset, and register existing nodes */ - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); if (!of_kset) { - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); return -ENOMEM; } for_each_of_allnodes(np) __of_node_add(np); - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); /* Symlink in /proc as required by userspace ABI */ if (of_allnodes) @@ -1832,7 +1831,7 @@ int of_alias_get_id(struct device_node *np, const char *stem) struct alias_prop *app; int id = -ENODEV; - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (strcmp(app->stem, stem) != 0) continue; @@ -1842,7 +1841,7 @@ int of_alias_get_id(struct device_node *np, const char *stem) break; } } - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); return id; } diff --git a/drivers/of/device.c b/drivers/of/device.c index dafb9736ab9b..46d6c75c1404 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen); seen = 0; - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (dev->of_node == app->np) { add_uevent_var(env, "OF_ALIAS_%d=%s", seen, @@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) seen++; } } - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); } int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 0384f097bcc8..0f6089722af9 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -31,7 +31,7 @@ struct alias_prop { char stem[0]; }; -extern struct mutex of_aliases_mutex; +extern struct mutex of_mutex; extern struct list_head aliases_lookup; static inline struct device_node *kobj_to_device_node(struct kobject *kobj) From 8685f844d2ec342529a948b202e96135e81b64e1 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 23 Jul 2014 17:05:06 -0600 Subject: [PATCH 248/298] of: Make devicetree sysfs update functions consistent. All of the DT modification functions are split into two parts, the first part manipulates the DT data structure, and the second part updates sysfs, but the code isn't very consistent about how the second half is called. They don't all enforce the same rules about when it is valid to update sysfs, and there isn't any clarity on locking. The transactional DT modification feature that is coming also needs access to these functions so that it can perform all the structure changes together, and then all the sysfs updates as a second stage instead of doing each one at a time. Fix up the second have by creating a separate __of_*_sysfs() function for each of the helpers. The new functions have consistent naming (ie. of_node_add() becomes __of_attach_node_sysfs()) and all of them now defer if of_init hasn't been called yet. Callers of the new functions must hold the of_mutex to ensure there are no race conditions with of_init(). The mutex ensures that there will only ever be one writer to the tree at any given time. There can still be any number of readers and the raw_spin_lock is still used to make sure access to the data structure is still consistent. Finally, put the function prototypes into of_private.h so they are accessible to the transaction code. Signed-off-by: Pantelis Antoniou [grant.likely: Changed suffix from _post to _sysfs to match existing code] [grant.likely: Reorganized to eliminate trivial wrappers] Signed-off-by: Grant Likely (cherry picked from commit 8a2b22a2595bf89d4396530edf8388159fad9d83) Signed-off-by: Mark Brown Conflicts: drivers/of/base.c --- drivers/of/base.c | 94 ++++++++++++++++++++++------------------- drivers/of/dynamic.c | 12 ++++-- drivers/of/of_private.h | 10 +++++ include/linux/of.h | 2 - 4 files changed, 70 insertions(+), 48 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 36574ee6a8c5..6ed7eb1a8b48 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -36,10 +36,13 @@ struct device_node *of_chosen; struct device_node *of_aliases; static struct device_node *of_stdout; -static struct kset *of_kset; +struct kset *of_kset; /* - * Used to protect the of_aliases, to hold off addition of nodes to sysfs + * Used to protect the of_aliases, to hold off addition of nodes to sysfs. + * This mutex must be held whenever modifications are being made to the + * device tree. The of_{attach,detach}_node() and + * of_{add,remove,update}_property() helpers make sure this happens. */ DEFINE_MUTEX(of_mutex); @@ -126,13 +129,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name) return name; } -static int __of_add_property_sysfs(struct device_node *np, struct property *pp) +int __of_add_property_sysfs(struct device_node *np, struct property *pp) { int rc; /* Important: Don't leak passwords */ bool secure = strncmp(pp->name, "security-", 9) == 0; + if (!of_kset || !of_node_is_attached(np)) + return 0; + sysfs_bin_attr_init(&pp->attr); pp->attr.attr.name = safe_name(&np->kobj, pp->name); pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO; @@ -144,12 +150,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp) return rc; } -static int __of_node_add(struct device_node *np) +int __of_attach_node_sysfs(struct device_node *np) { const char *name; struct property *pp; int rc; + if (!of_kset) + return 0; + np->kobj.kset = of_kset; if (!np->parent) { /* Nodes without parents are new top level trees */ @@ -170,26 +179,6 @@ static int __of_node_add(struct device_node *np) return 0; } -int of_node_add(struct device_node *np) -{ - int rc = 0; - - BUG_ON(!of_node_is_initialized(np)); - - /* - * Grab the mutex here so that in a race condition between of_init() and - * of_node_add(), node addition will still be consistent. - */ - mutex_lock(&of_mutex); - if (of_kset) - rc = __of_node_add(np); - else - /* This scenario may be perfectly valid, but report it anyway */ - pr_info("of_node_add(%s) before of_init()\n", np->full_name); - mutex_unlock(&of_mutex); - return rc; -} - static int __init of_init(void) { struct device_node *np; @@ -202,7 +191,7 @@ static int __init of_init(void) return -ENOMEM; } for_each_of_allnodes(np) - __of_node_add(np); + __of_attach_node_sysfs(np); mutex_unlock(&of_mutex); /* Symlink in /proc as required by userspace ABI */ @@ -1614,12 +1603,17 @@ int of_add_property(struct device_node *np, struct property *prop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_add_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); + if (!rc) __of_add_property_sysfs(np, prop); + mutex_unlock(&of_mutex); + return rc; } @@ -1642,6 +1636,13 @@ int __of_remove_property(struct device_node *np, struct property *prop) return 0; } +void __of_remove_property_sysfs(struct device_node *np, struct property *prop) +{ + /* at early boot, bail here and defer setup to of_init() */ + if (of_kset && of_node_is_attached(np)) + sysfs_remove_bin_file(&np->kobj, &prop->attr); +} + /** * of_remove_property - Remove a property from a node. * @@ -1659,20 +1660,18 @@ int of_remove_property(struct device_node *np, struct property *prop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_remove_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; + if (!rc) + __of_remove_property_sysfs(np, prop); - /* at early boot, bail hear and defer setup to of_init() */ - if (!of_kset) - return 0; + mutex_unlock(&of_mutex); - sysfs_remove_bin_file(&np->kobj, &prop->attr); - - return 0; + return rc; } int __of_update_property(struct device_node *np, struct property *newprop, @@ -1701,6 +1700,18 @@ int __of_update_property(struct device_node *np, struct property *newprop, return 0; } +void __of_update_property_sysfs(struct device_node *np, struct property *newprop, + struct property *oldprop) +{ + /* At early boot, bail out and defer setup to of_init() */ + if (!of_kset) + return; + + if (oldprop) + sysfs_remove_bin_file(&np->kobj, &oldprop->attr); + __of_add_property_sysfs(np, newprop); +} + /* * of_update_property - Update a property in a node, if the property does * not exist, add it. @@ -1714,7 +1725,7 @@ int of_update_property(struct device_node *np, struct property *newprop) { struct property *oldprop; unsigned long flags; - int rc, found = 0; + int rc; if (!newprop->name) return -EINVAL; @@ -1723,21 +1734,18 @@ int of_update_property(struct device_node *np, struct property *newprop) if (rc) return rc; + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_update_property(np, newprop, &oldprop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - /* Update the sysfs attribute */ - if (oldprop) - sysfs_remove_bin_file(&np->kobj, &oldprop->attr); - __of_add_property_sysfs(np, newprop); + if (!rc) + __of_update_property_sysfs(np, newprop, oldprop); - if (!found) - return -ENODEV; + mutex_unlock(&of_mutex); - return 0; + return rc; } static void of_alias_add(struct alias_prop *ap, struct device_node *np, diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 2f1c84eb3dba..935e67f9dbd2 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -41,11 +41,13 @@ void of_node_put(struct device_node *node) } EXPORT_SYMBOL(of_node_put); -static void of_node_remove(struct device_node *np) +void __of_detach_node_sysfs(struct device_node *np) { struct property *pp; BUG_ON(!of_node_is_initialized(np)); + if (!of_kset) + return; /* only remove properties if on sysfs */ if (of_node_is_attached(np)) { @@ -115,11 +117,13 @@ int of_attach_node(struct device_node *np) if (rc) return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_attach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); - of_node_add(np); + __of_attach_node_sysfs(np); + mutex_unlock(&of_mutex); return 0; } @@ -174,11 +178,13 @@ int of_detach_node(struct device_node *np) if (rc) return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_detach_node(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); - of_node_remove(np); + __of_detach_node_sysfs(np); + mutex_unlock(&of_mutex); return rc; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 0f6089722af9..0d99ba8caeed 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -33,6 +33,8 @@ struct alias_prop { extern struct mutex of_mutex; extern struct list_head aliases_lookup; +extern struct kset *of_kset; + static inline struct device_node *kobj_to_device_node(struct kobject *kobj) { @@ -62,11 +64,19 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); extern int __of_add_property(struct device_node *np, struct property *prop); +extern int __of_add_property_sysfs(struct device_node *np, + struct property *prop); extern int __of_remove_property(struct device_node *np, struct property *prop); +extern void __of_remove_property_sysfs(struct device_node *np, + struct property *prop); extern int __of_update_property(struct device_node *np, struct property *newprop, struct property **oldprop); +extern void __of_update_property_sysfs(struct device_node *np, + struct property *newprop, struct property *oldprop); extern void __of_attach_node(struct device_node *np); +extern int __of_attach_node_sysfs(struct device_node *np); extern void __of_detach_node(struct device_node *np); +extern void __of_detach_node_sysfs(struct device_node *np); #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/include/linux/of.h b/include/linux/of.h index 745a43d8e3fb..c9cabc4ffbc9 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -74,8 +74,6 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; -extern int of_node_add(struct device_node *node); - /* initialize a node */ extern struct kobj_type of_node_ktype; static inline void of_node_init(struct device_node *node) From f0122c35d9071b50a79833bd227b9b0537c0ee07 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 16 Jul 2014 08:48:46 -0600 Subject: [PATCH 249/298] of: Make sure attached nodes don't carry along extra children The child pointer does not get cleared when attaching new nodes which could cause the tree to be inconsistent. Clear the child pointer in __of_attach_node() to be absolutely sure that the structure remains in a consistent layout. Signed-off-by: Grant Likely (cherry picked from commit 6162dbe49a451f96431a23b4821f05e3bd925bc1) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 935e67f9dbd2..3f2eb9fa7fb3 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -98,6 +98,7 @@ int of_property_notify(int action, struct device_node *np, void __of_attach_node(struct device_node *np) { + np->child = NULL; np->sibling = np->parent->child; np->allnext = np->parent->allnext; np->parent->allnext = np; From 915ec64e24a02a4562c855376ce7b4abb8bf558d Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 15 Jul 2014 23:25:43 -0600 Subject: [PATCH 250/298] of: Move dynamic node fixups out of powerpc and into common code PowerPC does an odd thing with dynamic nodes. It uses a notifier to catch new node additions and set some of the values like name and type. This makes no sense since that same code can be put directly into of_attach_node(). Besides, all dynamic node users need this, not just powerpc. Fix this problem by moving the logic out of arch/powerpc and into drivers/of/dynamic.c. It is also important to remove this notifier because we want to move the firing of notifiers from before the tree is modified to after so that the receiver gets a consistent view of the tree, but that is incompatible with notifiers that modify the node. Signed-off-by: Grant Likely Cc: Nathan Fontenot Cc: Benjamin Herrenschmidt (cherry picked from commit a25095d451ece23b1fef34474f3230100db7aa05) Signed-off-by: Mark Brown --- arch/powerpc/kernel/prom.c | 70 -------------------------------------- drivers/of/base.c | 4 +-- drivers/of/dynamic.c | 13 +++++++ drivers/of/of_private.h | 2 ++ 4 files changed, 17 insertions(+), 72 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f58c0d3aaeb4..60e808d32cff 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -836,76 +836,6 @@ int cpu_to_chip_id(int cpu) } EXPORT_SYMBOL(cpu_to_chip_id); -#ifdef CONFIG_PPC_PSERIES -/* - * Fix up the uninitialized fields in a new device node: - * name, type and pci-specific fields - */ - -static int of_finish_dynamic_node(struct device_node *node) -{ - struct device_node *parent = of_get_parent(node); - int err = 0; - const phandle *ibm_phandle; - - node->name = of_get_property(node, "name", NULL); - node->type = of_get_property(node, "device_type", NULL); - - if (!node->name) - node->name = ""; - if (!node->type) - node->type = ""; - - if (!parent) { - err = -ENODEV; - goto out; - } - - /* We don't support that function on PowerMac, at least - * not yet - */ - if (machine_is(powermac)) - return -ENODEV; - - /* fix up new node's phandle field */ - if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL))) - node->phandle = *ibm_phandle; - -out: - of_node_put(parent); - return err; -} - -static int prom_reconfig_notifier(struct notifier_block *nb, - unsigned long action, void *node) -{ - int err; - - switch (action) { - case OF_RECONFIG_ATTACH_NODE: - err = of_finish_dynamic_node(node); - if (err < 0) - printk(KERN_ERR "finish_node returned %d\n", err); - break; - default: - err = 0; - break; - } - return notifier_from_errno(err); -} - -static struct notifier_block prom_reconfig_nb = { - .notifier_call = prom_reconfig_notifier, - .priority = 10, /* This one needs to run first */ -}; - -static int __init prom_reconfig_setup(void) -{ - return of_reconfig_notifier_register(&prom_reconfig_nb); -} -__initcall(prom_reconfig_setup); -#endif - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return (int)phys_id == get_hard_smp_processor_id(cpu); diff --git a/drivers/of/base.c b/drivers/of/base.c index 6ed7eb1a8b48..ae220dfebadd 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -264,8 +264,8 @@ EXPORT_SYMBOL(of_find_all_nodes); * Find a property with a given name for a given node * and return the value. */ -static const void *__of_get_property(const struct device_node *np, - const char *name, int *lenp) +const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp) { struct property *pp = __of_find_property(np, name, lenp); diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 3f2eb9fa7fb3..2f50de87ae08 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -98,6 +98,19 @@ int of_property_notify(int action, struct device_node *np, void __of_attach_node(struct device_node *np) { + const __be32 *phandle; + int sz; + + np->name = __of_get_property(np, "name", NULL) ? : ""; + np->type = __of_get_property(np, "device_type", NULL) ? : ""; + + phandle = __of_get_property(np, "phandle", &sz); + if (!phandle) + phandle = __of_get_property(np, "linux,phandle", &sz); + if (IS_ENABLED(PPC_PSERIES) && !phandle) + phandle = __of_get_property(np, "ibm,phandle", &sz); + np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0; + np->child = NULL; np->sibling = np->parent->child; np->allnext = np->parent->allnext; diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 0d99ba8caeed..8129c0e58d70 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -63,6 +63,8 @@ static inline int of_property_notify(int action, struct device_node *np, struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); +extern const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp); extern int __of_add_property(struct device_node *np, struct property *prop); extern int __of_add_property_sysfs(struct device_node *np, struct property *prop); From 9b989b5586997cff26c1a1644a6d4dcdbe78ecf5 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 16 Jul 2014 12:48:23 -0600 Subject: [PATCH 251/298] of: Reorder device tree changes and notifiers Currently, devicetree reconfig notifiers get emitted before the change is applied to the tree, but that behaviour is problematic if the receiver wants the determine the new state of the tree. The current users don't care, but the changeset code to follow will be making multiple changes at once. Reorder notifiers to get emitted after the change has been applied to the tree so that callbacks see the new tree state. At the same time, fixup the existing callbacks to expect the new order. There are a few callbacks that compare the old and new values of a changed property. Put both property pointers into the of_prop_reconfig structure. The current notifiers also allow the notifier callback to fail and cancel the change to the tree, but that feature isn't actually used. It really isn't valid to ignore a tree modification provided by firmware anyway, so remove the ability to cancel a change to the tree. Signed-off-by: Grant Likely Cc: Nathan Fontenot (cherry picked from commit 259092a35c7e11f1d4616b0f5b3ba7b851fe4fa6) Signed-off-by: Mark Brown --- .../platforms/pseries/hotplug-memory.c | 2 +- drivers/crypto/nx/nx-842.c | 30 +++++-------------- drivers/of/base.c | 21 ++++++------- drivers/of/dynamic.c | 18 +++++------ drivers/of/of_private.h | 4 +-- include/linux/of.h | 1 + 6 files changed, 29 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9590dbb756f2..daae52670a0a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -212,7 +212,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) if (!memblock_size) return -EINVAL; - p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); + p = (u32 *) pr->old_prop->value; if (!p) return -EINVAL; diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 1e5481d88a26..a86359765fda 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop) goto error_out; } - /* Set ptr to new property if provided */ - if (new_prop) { - /* Single property */ - if (!strncmp(new_prop->name, "status", new_prop->length)) { - status = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sg-len", - new_prop->length)) { - maxsglen = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", - new_prop->length)) { - maxsyncop = new_prop; - - } else { - /* - * Skip the update, the property being updated - * has no impact. - */ - goto out; - } - } + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; /* Perform property updates */ ret = nx842_OF_upd_status(new_devdata, status); diff --git a/drivers/of/base.c b/drivers/of/base.c index ae220dfebadd..8059f149d171 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1599,10 +1599,6 @@ int of_add_property(struct device_node *np, struct property *prop) unsigned long flags; int rc; - rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1614,6 +1610,9 @@ int of_add_property(struct device_node *np, struct property *prop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL); + return rc; } @@ -1656,10 +1655,6 @@ int of_remove_property(struct device_node *np, struct property *prop) unsigned long flags; int rc; - rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1671,6 +1666,9 @@ int of_remove_property(struct device_node *np, struct property *prop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL); + return rc; } @@ -1730,10 +1728,6 @@ int of_update_property(struct device_node *np, struct property *newprop) if (!newprop->name) return -EINVAL; - rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -1745,6 +1739,9 @@ int of_update_property(struct device_node *np, struct property *newprop) mutex_unlock(&of_mutex); + if (!rc) + of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop); + return rc; } diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 2f50de87ae08..f298dc55368f 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -83,7 +83,7 @@ int of_reconfig_notify(unsigned long action, void *p) } int of_property_notify(int action, struct device_node *np, - struct property *prop) + struct property *prop, struct property *oldprop) { struct of_prop_reconfig pr; @@ -93,6 +93,7 @@ int of_property_notify(int action, struct device_node *np, pr.dn = np; pr.prop = prop; + pr.old_prop = oldprop; return of_reconfig_notify(action, &pr); } @@ -125,11 +126,6 @@ void __of_attach_node(struct device_node *np) int of_attach_node(struct device_node *np) { unsigned long flags; - int rc; - - rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); - if (rc) - return rc; mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); @@ -138,6 +134,9 @@ int of_attach_node(struct device_node *np) __of_attach_node_sysfs(np); mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + return 0; } @@ -188,10 +187,6 @@ int of_detach_node(struct device_node *np) unsigned long flags; int rc = 0; - rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); - if (rc) - return rc; - mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_detach_node(np); @@ -199,6 +194,9 @@ int of_detach_node(struct device_node *np) __of_detach_node_sysfs(np); mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + return rc; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 8129c0e58d70..f69ccb1fa308 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -43,11 +43,11 @@ static inline struct device_node *kobj_to_device_node(struct kobject *kobj) #if defined(CONFIG_OF_DYNAMIC) extern int of_property_notify(int action, struct device_node *np, - struct property *prop); + struct property *prop, struct property *old_prop); extern void of_node_release(struct kobject *kobj); #else /* CONFIG_OF_DYNAMIC */ static inline int of_property_notify(int action, struct device_node *np, - struct property *prop) + struct property *prop, struct property *old_prop) { return 0; } diff --git a/include/linux/of.h b/include/linux/of.h index c9cabc4ffbc9..689e26274cf0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -313,6 +313,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop); struct of_prop_reconfig { struct device_node *dn; struct property *prop; + struct property *old_prop; }; extern int of_reconfig_notifier_register(struct notifier_block *); From 3f96d9be7b0bc4f1af632331e67c328290b920b7 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 15 Nov 2013 17:19:09 +0000 Subject: [PATCH 252/298] of/selftest: Add self tests for manipulation of properties Adds a few simple test cases to ensure that addition, update and removal of device tree node properties works correctly. Signed-off-by: Grant Likely Cc: Rob Herring Cc: Benjamin Herrenschmidt Cc: David S. Miller Cc: Nathan Fontenot Cc: Pantelis Antoniou (cherry picked from commit 7e66c5c74f7348a96d5a3671f8cda4a478242679) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 6643d1920985..ae4450070503 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -30,6 +30,67 @@ static struct selftest_results { } \ } +static void __init of_selftest_dynamic(void) +{ + struct device_node *np; + struct property *prop; + + np = of_find_node_by_path("/testcase-data"); + if (!np) { + pr_err("missing testcase data\n"); + return; + } + + /* Array of 4 properties for the purpose of testing */ + prop = kzalloc(sizeof(*prop) * 4, GFP_KERNEL); + if (!prop) { + selftest(0, "kzalloc() failed\n"); + return; + } + + /* Add a new property - should pass*/ + prop->name = "new-property"; + prop->value = "new-property-data"; + prop->length = strlen(prop->value); + selftest(of_add_property(np, prop) == 0, "Adding a new property failed\n"); + + /* Try to add an existing property - should fail */ + prop++; + prop->name = "new-property"; + prop->value = "new-property-data-should-fail"; + prop->length = strlen(prop->value); + selftest(of_add_property(np, prop) != 0, + "Adding an existing property should have failed\n"); + + /* Try to modify an existing property - should pass */ + prop->value = "modify-property-data-should-pass"; + prop->length = strlen(prop->value); + selftest(of_update_property(np, prop) == 0, + "Updating an existing property should have passed\n"); + + /* Try to modify non-existent property - should pass*/ + prop++; + prop->name = "modify-property"; + prop->value = "modify-missing-property-data-should-pass"; + prop->length = strlen(prop->value); + selftest(of_update_property(np, prop) == 0, + "Updating a missing property should have passed\n"); + + /* Remove property - should pass */ + selftest(of_remove_property(np, prop) == 0, + "Removing a property should have passed\n"); + + /* Adding very large property - should pass */ + prop++; + prop->name = "large-property-PAGE_SIZEx8"; + prop->length = PAGE_SIZE * 8; + prop->value = kzalloc(prop->length, GFP_KERNEL); + selftest(prop->value != NULL, "Unable to allocate large buffer\n"); + if (prop->value) + selftest(of_add_property(np, prop) == 0, + "Adding a large property should have passed\n"); +} + static void __init of_selftest_parse_phandle_with_args(void) { struct device_node *np; @@ -378,6 +439,7 @@ static int __init of_selftest(void) of_node_put(np); pr_info("start of selftest - you will see error messages\n"); + of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); of_selftest_parse_interrupts(); From b8524c272b6fef2df4dbb0b6306188946ab4f75c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 23 Apr 2014 17:57:40 -0500 Subject: [PATCH 253/298] of: selftest: add deferred probe interrupt test Signed-off-by: Rob Herring [grant.likely: fixed failure when root node specifies the interrupt parent] Signed-off-by: Grant Likely (cherry picked from commit 82c0f5897a8708dbf7abe08e49efd9a4a8d8cd3a) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 32 +++++++++++++++++++ .../of/testcase-data/tests-interrupts.dtsi | 13 ++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index ae4450070503..fe70b86bcffb 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -427,6 +428,36 @@ static void __init of_selftest_match_node(void) } } +static void __init of_selftest_platform_populate(void) +{ + int irq; + struct device_node *np; + struct platform_device *pdev; + + np = of_find_node_by_path("/testcase-data"); + of_platform_populate(np, of_default_bus_match_table, NULL, NULL); + + /* Test that a missing irq domain returns -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device1"); + pdev = of_find_device_by_node(np); + if (!pdev) + selftest(0, "device 1 creation failed\n"); + irq = platform_get_irq(pdev, 0); + if (irq != -EPROBE_DEFER) + selftest(0, "device deferred probe failed - %d\n", irq); + + /* Test that a parsing failure does not return -EPROBE_DEFER */ + np = of_find_node_by_path("/testcase-data/testcase-device2"); + pdev = of_find_device_by_node(np); + if (!pdev) + selftest(0, "device 2 creation failed\n"); + irq = platform_get_irq(pdev, 0); + if (irq >= 0 || irq == -EPROBE_DEFER) + selftest(0, "device parsing error failed - %d\n", irq); + + selftest(1, "passed"); +} + static int __init of_selftest(void) { struct device_node *np; @@ -445,6 +476,7 @@ static int __init of_selftest(void) of_selftest_parse_interrupts(); of_selftest_parse_interrupts_extended(); of_selftest_match_node(); + of_selftest_platform_populate(); pr_info("end of selftest - %i passed, %i failed\n", selftest_results.passed, selftest_results.failed); return 0; diff --git a/drivers/of/testcase-data/tests-interrupts.dtsi b/drivers/of/testcase-data/tests-interrupts.dtsi index c843720bd3e5..da4695f60351 100644 --- a/drivers/of/testcase-data/tests-interrupts.dtsi +++ b/drivers/of/testcase-data/tests-interrupts.dtsi @@ -54,5 +54,18 @@ <&test_intmap1 1 2>; }; }; + + testcase-device1 { + compatible = "testcase-device"; + interrupt-parent = <&test_intc0>; + interrupts = <1>; + }; + + testcase-device2 { + compatible = "testcase-device"; + interrupt-parent = <&test_intc2>; + interrupts = <1>; /* invalid specifier - too short */ + }; }; + }; From b0c0283496dc772b005dda5e38d956917bbd1f16 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 May 2014 10:07:29 -0500 Subject: [PATCH 254/298] of/selftest: clean-up of_selftest_platform_populate pass/fail handling Move the pass/fail checks into selftest() calls instead of a separate if condition. Unconditionally calling pass was wrong. Signed-off-by: Rob Herring Cc: Grant Likely (cherry picked from commit 7d1cdc89c54d2cb8157cf1f36fc65e8583d26484) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index fe70b86bcffb..c1d7d38009f1 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -440,22 +440,18 @@ static void __init of_selftest_platform_populate(void) /* Test that a missing irq domain returns -EPROBE_DEFER */ np = of_find_node_by_path("/testcase-data/testcase-device1"); pdev = of_find_device_by_node(np); - if (!pdev) - selftest(0, "device 1 creation failed\n"); + selftest(pdev, "device 1 creation failed\n"); + irq = platform_get_irq(pdev, 0); - if (irq != -EPROBE_DEFER) - selftest(0, "device deferred probe failed - %d\n", irq); + selftest(irq == -EPROBE_DEFER, "device deferred probe failed - %d\n", irq); /* Test that a parsing failure does not return -EPROBE_DEFER */ np = of_find_node_by_path("/testcase-data/testcase-device2"); pdev = of_find_device_by_node(np); - if (!pdev) - selftest(0, "device 2 creation failed\n"); + selftest(pdev, "device 2 creation failed\n"); irq = platform_get_irq(pdev, 0); - if (irq >= 0 || irq == -EPROBE_DEFER) - selftest(0, "device parsing error failed - %d\n", irq); + selftest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); - selftest(1, "passed"); } static int __init of_selftest(void) From 4191d2cc394d52ab21b708a358f99623d6dfa160 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 13 May 2014 10:07:54 -0500 Subject: [PATCH 255/298] of/selftest: add testcase for nodes with same name and address Add a test case for nodes which have the same name and same non-translatable unit address. Signed-off-by: Rob Herring Reviewed-by: Grant Likely Reviewed-by: Frank Rowand (cherry picked from commit fb2caa50fbacd21719a90dd66b617ce3cb4fd6d7) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 20 ++++++++++- drivers/of/testcase-data/testcases.dtsi | 1 + drivers/of/testcase-data/tests-platform.dtsi | 35 ++++++++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 drivers/of/testcase-data/tests-platform.dtsi diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index c1d7d38009f1..2588faaaa305 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -431,8 +431,12 @@ static void __init of_selftest_match_node(void) static void __init of_selftest_platform_populate(void) { int irq; - struct device_node *np; + struct device_node *np, *child; struct platform_device *pdev; + struct of_device_id match[] = { + { .compatible = "test-device", }, + {} + }; np = of_find_node_by_path("/testcase-data"); of_platform_populate(np, of_default_bus_match_table, NULL, NULL); @@ -452,6 +456,20 @@ static void __init of_selftest_platform_populate(void) irq = platform_get_irq(pdev, 0); selftest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); + np = of_find_node_by_path("/testcase-data/platform-tests"); + if (!np) { + pr_err("No testcase data in device tree\n"); + return; + } + + for_each_child_of_node(np, child) { + struct device_node *grandchild; + of_platform_populate(child, match, NULL, NULL); + for_each_child_of_node(child, grandchild) + selftest(of_find_device_by_node(grandchild), + "Could not create device for node '%s'\n", + grandchild->name); + } } static int __init of_selftest(void) diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi index 3a5b75a8e4d7..6d8d980ac858 100644 --- a/drivers/of/testcase-data/testcases.dtsi +++ b/drivers/of/testcase-data/testcases.dtsi @@ -1,3 +1,4 @@ #include "tests-phandle.dtsi" #include "tests-interrupts.dtsi" #include "tests-match.dtsi" +#include "tests-platform.dtsi" diff --git a/drivers/of/testcase-data/tests-platform.dtsi b/drivers/of/testcase-data/tests-platform.dtsi new file mode 100644 index 000000000000..eb20eeb2b062 --- /dev/null +++ b/drivers/of/testcase-data/tests-platform.dtsi @@ -0,0 +1,35 @@ + +/ { + testcase-data { + platform-tests { + #address-cells = <1>; + #size-cells = <0>; + + test-device@0 { + compatible = "test-device"; + reg = <0x0>; + + #address-cells = <1>; + #size-cells = <0>; + + dev@100 { + compatible = "test-sub-device"; + reg = <0x100>; + }; + }; + + test-device@1 { + compatible = "test-device"; + reg = <0x1>; + + #address-cells = <1>; + #size-cells = <0>; + + dev@100 { + compatible = "test-sub-device"; + reg = <0x100>; + }; + }; + }; + }; +}; From d5ba7b3ca35a9cb01680fac0e249ad6e12b56389 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 14 Mar 2014 13:53:10 +0000 Subject: [PATCH 256/298] of: Add a testcase for of_find_node_by_path() Add a testcase for the find_node_by_path() function to make sure it handles all the valid scenarios. Signed-off-by: Grant Likely (cherry picked from commit ae91ff72e9132abe47f726424d9420fce004ca04) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 46 +++++++++++++++++++++ drivers/of/testcase-data/tests-phandle.dtsi | 6 ++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 2588faaaa305..077314eebb95 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -31,6 +31,51 @@ static struct selftest_results { } \ } +static void __init of_selftest_find_node_by_name(void) +{ + struct device_node *np; + + np = of_find_node_by_path("/testcase-data"); + selftest(np && !strcmp("/testcase-data", np->full_name), + "find /testcase-data failed\n"); + of_node_put(np); + + /* Test if trailing '/' works */ + np = of_find_node_by_path("/testcase-data/"); + selftest(!np, "trailing '/' on /testcase-data/ should fail\n"); + + np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); + selftest(np && !strcmp("/testcase-data/phandle-tests/consumer-a", np->full_name), + "find /testcase-data/phandle-tests/consumer-a failed\n"); + of_node_put(np); + + np = of_find_node_by_path("testcase-alias"); + selftest(np && !strcmp("/testcase-data", np->full_name), + "find testcase-alias failed\n"); + of_node_put(np); + + /* Test if trailing '/' works on aliases */ + np = of_find_node_by_path("testcase-alias/"); + selftest(!np, "trailing '/' on testcase-alias/ should fail\n"); + + np = of_find_node_by_path("testcase-alias/phandle-tests/consumer-a"); + selftest(np && !strcmp("/testcase-data/phandle-tests/consumer-a", np->full_name), + "find testcase-alias/phandle-tests/consumer-a failed\n"); + of_node_put(np); + + np = of_find_node_by_path("/testcase-data/missing-path"); + selftest(!np, "non-existent path returned node %s\n", np->full_name); + of_node_put(np); + + np = of_find_node_by_path("missing-alias"); + selftest(!np, "non-existent alias returned node %s\n", np->full_name); + of_node_put(np); + + np = of_find_node_by_path("testcase-alias/missing-path"); + selftest(!np, "non-existent alias with relative path returned node %s\n", np->full_name); + of_node_put(np); +} + static void __init of_selftest_dynamic(void) { struct device_node *np; @@ -484,6 +529,7 @@ static int __init of_selftest(void) of_node_put(np); pr_info("start of selftest - you will see error messages\n"); + of_selftest_find_node_by_name(); of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/testcase-data/tests-phandle.dtsi index 788a4c24b8f5..ce0fe083d406 100644 --- a/drivers/of/testcase-data/tests-phandle.dtsi +++ b/drivers/of/testcase-data/tests-phandle.dtsi @@ -1,6 +1,10 @@ / { - testcase-data { + aliases { + testcase-alias = &testcase; + }; + + testcase: testcase-data { security-password = "password"; duplicate-name = "duplicate"; duplicate-name { }; From a39492cbb90cebc0c1cb55d95e913848f6d68109 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:58:47 +0300 Subject: [PATCH 257/298] OF: Utility helper functions for dynamic nodes Introduce helper functions for working with the live DT tree, all of them related to dynamically adding/removing nodes and properties. __of_prop_dup() copies a property dynamically __of_node_alloc() creates an empty node Bug fix about prop->len == 0 by Ionut Nicu Signed-off-by: Pantelis Antoniou [glikely: Added unittest for of_copy_property and dropped fine-grained allocations] [glikely: removed name, type and phandle arguments from __of_node_alloc] Signed-off-by: Grant Likely (cherry picked from commit 698433963b98d6de7b102c242805c99fda4fa1fb) Signed-off-by: Mark Brown Conflicts: drivers/of/of_private.h --- drivers/of/dynamic.c | 77 +++++++++++++++++++++++++++++++++++++++++++ drivers/of/selftest.c | 28 ++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f298dc55368f..7bd5501736a6 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -237,3 +237,80 @@ void of_node_release(struct kobject *kobj) kfree(node->data); kfree(node); } + +/** + * __of_prop_dup - Copy a property dynamically. + * @prop: Property to copy + * @allocflags: Allocation flags (typically pass GFP_KERNEL) + * + * Copy a property by dynamically allocating the memory of both the + * property stucture and the property name & contents. The property's + * flags have the OF_DYNAMIC bit set so that we can differentiate between + * dynamically allocated properties and not. + * Returns the newly allocated property or NULL on out of memory error. + */ +struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) +{ + struct property *new; + + new = kzalloc(sizeof(*new), allocflags); + if (!new) + return NULL; + + /* + * NOTE: There is no check for zero length value. + * In case of a boolean property This will allocate a value + * of zero bytes. We do this to work around the use + * of of_get_property() calls on boolean values. + */ + new->name = kstrdup(prop->name, allocflags); + new->value = kmemdup(prop->value, prop->length, allocflags); + new->length = prop->length; + if (!new->name || !new->value) + goto err_free; + + /* mark the property as dynamic */ + of_property_set_flag(new, OF_DYNAMIC); + + return new; + + err_free: + kfree(new->name); + kfree(new->value); + kfree(new); + return NULL; +} + +/** + * __of_node_alloc() - Create an empty device node dynamically. + * @full_name: Full name of the new device node + * @allocflags: Allocation flags (typically pass GFP_KERNEL) + * + * Create an empty device tree node, suitable for further modification. + * The node data are dynamically allocated and all the node flags + * have the OF_DYNAMIC & OF_DETACHED bits set. + * Returns the newly allocated node or NULL on out of memory error. + */ +struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags) +{ + struct device_node *node; + + node = kzalloc(sizeof(*node), allocflags); + if (!node) + return NULL; + + node->full_name = kstrdup(full_name, allocflags); + of_node_set_flag(node, OF_DYNAMIC); + of_node_set_flag(node, OF_DETACHED); + if (!node->full_name) + goto err_free; + + of_node_init(node); + + return node; + + err_free: + kfree(node->full_name); + kfree(node); + return NULL; +} diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 077314eebb95..ee2166f0f36a 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -16,6 +16,8 @@ #include #include +#include "of_private.h" + static struct selftest_results { int passed; int failed; @@ -266,6 +268,31 @@ static void __init of_selftest_property_match_string(void) selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc); } +#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \ + (p1)->value && (p2)->value && \ + !memcmp((p1)->value, (p2)->value, (p1)->length) && \ + !strcmp((p1)->name, (p2)->name)) +static void __init of_selftest_property_copy(void) +{ +#ifdef CONFIG_OF_DYNAMIC + struct property p1 = { .name = "p1", .length = 0, .value = "" }; + struct property p2 = { .name = "p2", .length = 5, .value = "abcd" }; + struct property *new; + + new = __of_prop_dup(&p1, GFP_KERNEL); + selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n"); + kfree(new->value); + kfree(new->name); + kfree(new); + + new = __of_prop_dup(&p2, GFP_KERNEL); + selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n"); + kfree(new->value); + kfree(new->name); + kfree(new); +#endif +} + static void __init of_selftest_parse_interrupts(void) { struct device_node *np; @@ -533,6 +560,7 @@ static int __init of_selftest(void) of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); + of_selftest_property_copy(); of_selftest_parse_interrupts(); of_selftest_parse_interrupts_extended(); of_selftest_match_node(); From 33ef56d1b95d9b628f9ed78c34d40b746668855e Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:58:49 +0300 Subject: [PATCH 258/298] of: Transactional DT support. Introducing DT transactional support. A DT transaction is a method which allows one to apply changes in the live tree, in such a way that either the full set of changes take effect, or the state of the tree can be rolled-back to the state it was before it was attempted. An applied transaction can be rolled-back at any time. Documentation is in Documentation/devicetree/changesets.txt Signed-off-by: Pantelis Antoniou [glikely: Removed device notifiers and reworked to be more consistent] Signed-off-by: Grant Likely (cherry picked from commit 201c910bd6898d81d4ac6685d0f421b7e10f3c5d) Signed-off-by: Mark Brown Conflicts: include/linux/of.h --- Documentation/devicetree/changesets.txt | 40 +++ drivers/of/dynamic.c | 344 ++++++++++++++++++++++++ drivers/of/of_private.h | 9 + drivers/of/selftest.c | 51 ++++ drivers/of/testcase-data/testcases.dtsi | 10 + include/linux/of.h | 98 +++++++ 6 files changed, 552 insertions(+) create mode 100644 Documentation/devicetree/changesets.txt diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt new file mode 100644 index 000000000000..935ba5acc34e --- /dev/null +++ b/Documentation/devicetree/changesets.txt @@ -0,0 +1,40 @@ +A DT changeset is a method which allows one to apply changes +in the live tree in such a way that either the full set of changes +will be applied, or none of them will be. If an error occurs partway +through applying the changeset, then the tree will be rolled back to the +previous state. A changeset can also be removed after it has been +applied. + +When a changeset is applied, all of the changes get applied to the tree +at once before emitting OF_RECONFIG notifiers. This is so that the +receiver sees a complete and consistent state of the tree when it +receives the notifier. + +The sequence of a changeset is as follows. + +1. of_changeset_init() - initializes a changeset + +2. A number of DT tree change calls, of_changeset_attach_node(), +of_changeset_detach_node(), of_changeset_add_property(), +of_changeset_remove_property, of_changeset_update_property() to prepare +a set of changes. No changes to the active tree are made at this point. +All the change operations are recorded in the of_changeset 'entries' +list. + +3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex +ensures there can only be one editor at a time. + +4. of_changeset_apply() - Apply the changes to the tree. Either the +entire changeset will get applied, or if there is an error the tree will +be restored to the previous state + +5. mutex_unlock(of_mutex) - All operations complete, release the mutex + +If a successfully applied changeset needs to be removed, it can be done +with the following sequence. + +1. mutex_lock(of_mutex) + +2. of_changeset_revert() + +3. mutex_unlock(of_mutex) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 7bd5501736a6..c1002b7be786 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -314,3 +314,347 @@ struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags) kfree(node); return NULL; } + +static void __of_changeset_entry_destroy(struct of_changeset_entry *ce) +{ + of_node_put(ce->np); + list_del(&ce->node); + kfree(ce); +} + +#ifdef DEBUG +static void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + switch (ce->action) { + case OF_RECONFIG_ADD_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "ADD_PROPERTY ", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "REMOVE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "UPDATE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_ATTACH_NODE: + pr_debug("%p: %s %s\n", + ce, "ATTACH_NODE ", ce->np->full_name); + break; + case OF_RECONFIG_DETACH_NODE: + pr_debug("%p: %s %s\n", + ce, "DETACH_NODE ", ce->np->full_name); + break; + } +} +#else +static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + /* empty */ +} +#endif + +static void __of_changeset_entry_invert(struct of_changeset_entry *ce, + struct of_changeset_entry *rce) +{ + memcpy(rce, ce, sizeof(*rce)); + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + rce->action = OF_RECONFIG_DETACH_NODE; + break; + case OF_RECONFIG_DETACH_NODE: + rce->action = OF_RECONFIG_ATTACH_NODE; + break; + case OF_RECONFIG_ADD_PROPERTY: + rce->action = OF_RECONFIG_REMOVE_PROPERTY; + break; + case OF_RECONFIG_REMOVE_PROPERTY: + rce->action = OF_RECONFIG_ADD_PROPERTY; + break; + case OF_RECONFIG_UPDATE_PROPERTY: + rce->old_prop = ce->prop; + rce->prop = ce->old_prop; + break; + } +} + +static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert) +{ + struct of_changeset_entry ce_inverted; + int ret; + + if (revert) { + __of_changeset_entry_invert(ce, &ce_inverted); + ce = &ce_inverted; + } + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + case OF_RECONFIG_DETACH_NODE: + ret = of_reconfig_notify(ce->action, ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + case OF_RECONFIG_REMOVE_PROPERTY: + case OF_RECONFIG_UPDATE_PROPERTY: + ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop); + break; + default: + pr_err("%s: invalid devicetree changeset action: %i\n", __func__, + (int)ce->action); + return; + } + + if (ret) + pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name); +} + +static int __of_changeset_entry_apply(struct of_changeset_entry *ce) +{ + struct property *old_prop, **propp; + unsigned long flags; + int ret = 0; + + __of_changeset_entry_dump(ce); + + raw_spin_lock_irqsave(&devtree_lock, flags); + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_add_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: add_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + case OF_RECONFIG_REMOVE_PROPERTY: + ret = __of_remove_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: remove_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + + case OF_RECONFIG_UPDATE_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_update_property(ce->np, ce->prop, &old_prop); + if (ret) { + pr_err("%s: update_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + default: + ret = -EINVAL; + } + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + if (ret) + return ret; + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node_sysfs(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node_sysfs(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* ignore duplicate names */ + __of_add_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + __of_remove_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop); + break; + } + + return 0; +} + +static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce) +{ + struct of_changeset_entry ce_inverted; + + __of_changeset_entry_invert(ce, &ce_inverted); + return __of_changeset_entry_apply(&ce_inverted); +} + +/** + * of_changeset_init - Initialize a changeset for use + * + * @ocs: changeset pointer + * + * Initialize a changeset structure + */ +void of_changeset_init(struct of_changeset *ocs) +{ + memset(ocs, 0, sizeof(*ocs)); + INIT_LIST_HEAD(&ocs->entries); +} + +/** + * of_changeset_destroy - Destroy a changeset + * + * @ocs: changeset pointer + * + * Destroys a changeset. Note that if a changeset is applied, + * its changes to the tree cannot be reverted. + */ +void of_changeset_destroy(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce, *cen; + + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) + __of_changeset_entry_destroy(ce); +} + +/** + * of_changeset_apply - Applies a changeset + * + * @ocs: changeset pointer + * + * Applies a changeset to the live tree. + * Any side-effects of live tree state changes are applied here on + * sucess, like creation/destruction of devices and side-effects + * like creation of sysfs properties and directories. + * Returns 0 on success, a negative error value in case of an error. + * On error the partially applied effects are reverted. + */ +int of_changeset_apply(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + /* perform the rest of the work */ + pr_debug("of_changeset: applying...\n"); + list_for_each_entry(ce, &ocs->entries, node) { + ret = __of_changeset_entry_apply(ce); + if (ret) { + pr_err("%s: Error applying changeset (%d)\n", __func__, ret); + list_for_each_entry_continue_reverse(ce, &ocs->entries, node) + __of_changeset_entry_revert(ce); + return ret; + } + } + pr_debug("of_changeset: applied, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 0); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_revert - Reverts an applied changeset + * + * @ocs: changeset pointer + * + * Reverts a changeset returning the state of the tree to what it + * was before the application. + * Any side-effects like creation/destruction of devices and + * removal of sysfs properties and directories are applied. + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_revert(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + pr_debug("of_changeset: reverting...\n"); + list_for_each_entry_reverse(ce, &ocs->entries, node) { + ret = __of_changeset_entry_revert(ce); + if (ret) { + pr_err("%s: Error reverting changeset (%d)\n", __func__, ret); + list_for_each_entry_continue(ce, &ocs->entries, node) + __of_changeset_entry_apply(ce); + return ret; + } + } + pr_debug("of_changeset: reverted, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry_reverse(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 1); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_action - Perform a changeset action + * + * @ocs: changeset pointer + * @action: action to perform + * @np: Pointer to device node + * @prop: Pointer to property + * + * On action being one of: + * + OF_RECONFIG_ATTACH_NODE + * + OF_RECONFIG_DETACH_NODE, + * + OF_RECONFIG_ADD_PROPERTY + * + OF_RECONFIG_REMOVE_PROPERTY, + * + OF_RECONFIG_UPDATE_PROPERTY + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_action(struct of_changeset *ocs, unsigned long action, + struct device_node *np, struct property *prop) +{ + struct of_changeset_entry *ce; + + ce = kzalloc(sizeof(*ce), GFP_KERNEL); + if (!ce) { + pr_err("%s: Failed to allocate\n", __func__); + return -ENOMEM; + } + /* get a reference to the node */ + ce->action = action; + ce->np = of_node_get(np); + ce->prop = prop; + + if (action == OF_RECONFIG_UPDATE_PROPERTY && prop) + ce->old_prop = of_find_property(np, prop->name, NULL); + + /* add it to the list */ + list_add_tail(&ce->node, &ocs->entries); + return 0; +} diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index f69ccb1fa308..858e0a5d9a11 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -81,4 +81,13 @@ extern int __of_attach_node_sysfs(struct device_node *np); extern void __of_detach_node(struct device_node *np); extern void __of_detach_node_sysfs(struct device_node *np); +/* iterators for transactions, used for overlays */ +/* forward iterator */ +#define for_each_transaction_entry(_oft, _te) \ + list_for_each_entry(_te, &(_oft)->te_list, node) + +/* reverse iterator */ +#define for_each_transaction_entry_reverse(_oft, _te) \ + list_for_each_entry_reverse(_te, &(_oft)->te_list, node) + #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index ee2166f0f36a..04e39a183e53 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -293,6 +293,56 @@ static void __init of_selftest_property_copy(void) #endif } +static void __init of_selftest_changeset(void) +{ +#ifdef CONFIG_OF_DYNAMIC + struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" }; + struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" }; + struct property *ppremove; + struct device_node *n1, *n2, *n21, *nremove, *parent; + struct of_changeset chgset; + + of_changeset_init(&chgset); + n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL); + selftest(n1, "testcase setup failure\n"); + n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL); + selftest(n2, "testcase setup failure\n"); + n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL); + selftest(n21, "testcase setup failure %p\n", n21); + nremove = of_find_node_by_path("/testcase-data/changeset/node-remove"); + selftest(nremove, "testcase setup failure\n"); + ppadd = __of_prop_dup(&padd, GFP_KERNEL); + selftest(ppadd, "testcase setup failure\n"); + ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL); + selftest(ppupdate, "testcase setup failure\n"); + parent = nremove->parent; + n1->parent = parent; + n2->parent = parent; + n21->parent = n2; + n2->child = n21; + ppremove = of_find_property(parent, "prop-remove", NULL); + selftest(ppremove, "failed to find removal prop"); + + of_changeset_init(&chgset); + selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n"); + selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n"); + selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n"); + selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n"); + selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n"); + selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n"); + selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n"); + mutex_lock(&of_mutex); + selftest(!of_changeset_apply(&chgset), "apply failed\n"); + mutex_unlock(&of_mutex); + + mutex_lock(&of_mutex); + selftest(!of_changeset_revert(&chgset), "revert failed\n"); + mutex_unlock(&of_mutex); + + of_changeset_destroy(&chgset); +#endif +} + static void __init of_selftest_parse_interrupts(void) { struct device_node *np; @@ -561,6 +611,7 @@ static int __init of_selftest(void) of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); of_selftest_property_copy(); + of_selftest_changeset(); of_selftest_parse_interrupts(); of_selftest_parse_interrupts_extended(); of_selftest_match_node(); diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi index 6d8d980ac858..669bb07df142 100644 --- a/drivers/of/testcase-data/testcases.dtsi +++ b/drivers/of/testcase-data/testcases.dtsi @@ -1,3 +1,13 @@ +/ { + testcase-data { + changeset { + prop-update = "hello"; + prop-remove = "world"; + node-remove { + }; + }; + }; +}; #include "tests-phandle.dtsi" #include "tests-interrupts.dtsi" #include "tests-match.dtsi" diff --git a/include/linux/of.h b/include/linux/of.h index 689e26274cf0..ae091484bf74 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -751,4 +751,102 @@ static inline int of_get_available_child_count(const struct device_node *np) return num; } +#ifdef CONFIG_OF +#define _OF_DECLARE(table, name, compat, fn, fn_type) \ + static const struct of_device_id __of_table_##name \ + __used __section(__##table##_of_table) \ + = { .compatible = compat, \ + .data = (fn == (fn_type)NULL) ? fn : fn } +#else +#define _OF_DECLARE(table, name, compat, fn, fn_type) \ + static const struct of_device_id __of_table_##name \ + __attribute__((unused)) \ + = { .compatible = compat, \ + .data = (fn == (fn_type)NULL) ? fn : fn } +#endif + +typedef int (*of_init_fn_2)(struct device_node *, struct device_node *); +typedef void (*of_init_fn_1)(struct device_node *); + +#define OF_DECLARE_1(table, name, compat, fn) \ + _OF_DECLARE(table, name, compat, fn, of_init_fn_1) +#define OF_DECLARE_2(table, name, compat, fn) \ + _OF_DECLARE(table, name, compat, fn, of_init_fn_2) + +/** + * struct of_changeset_entry - Holds a changeset entry + * + * @node: list_head for the log list + * @action: notifier action + * @np: pointer to the device node affected + * @prop: pointer to the property affected + * @old_prop: hold a pointer to the original property + * + * Every modification of the device tree during a changeset + * is held in a list of of_changeset_entry structures. + * That way we can recover from a partial application, or we can + * revert the changeset + */ +struct of_changeset_entry { + struct list_head node; + unsigned long action; + struct device_node *np; + struct property *prop; + struct property *old_prop; +}; + +/** + * struct of_changeset - changeset tracker structure + * + * @entries: list_head for the changeset entries + * + * changesets are a convenient way to apply bulk changes to the + * live tree. In case of an error, changes are rolled-back. + * changesets live on after initial application, and if not + * destroyed after use, they can be reverted in one single call. + */ +struct of_changeset { + struct list_head entries; +}; + +#ifdef CONFIG_OF_DYNAMIC +extern void of_changeset_init(struct of_changeset *ocs); +extern void of_changeset_destroy(struct of_changeset *ocs); +extern int of_changeset_apply(struct of_changeset *ocs); +extern int of_changeset_revert(struct of_changeset *ocs); +extern int of_changeset_action(struct of_changeset *ocs, + unsigned long action, struct device_node *np, + struct property *prop); + +static inline int of_changeset_attach_node(struct of_changeset *ocs, + struct device_node *np) +{ + return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL); +} + +static inline int of_changeset_detach_node(struct of_changeset *ocs, + struct device_node *np) +{ + return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL); +} + +static inline int of_changeset_add_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop); +} + +static inline int of_changeset_remove_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop); +} + +static inline int of_changeset_update_property(struct of_changeset *ocs, + struct device_node *np, struct property *prop) +{ + return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); +} +#endif + #endif /* _LINUX_OF_H */ From cf77cbddf4794311ca069c5e67428434a86a8021 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 4 Jul 2014 19:59:20 +0300 Subject: [PATCH 259/298] of: Introduce Device Tree resolve support. Introduce support for dynamic device tree resolution. Using it, it is possible to prepare a device tree that's been loaded on runtime to be modified and inserted at the kernel live tree. Export of of_resolve and bug fix of double free by Guenter Roeck Signed-off-by: Pantelis Antoniou [grant.likely: Don't need to select CONFIG_OF_DYNAMIC and CONFIG_OF_DEVICE] [grant.likely: Don't need to depend on OF or !SPARC] [grant.likely: Factor out duplicate code blocks into single function] Signed-off-by: Grant Likely (cherry picked from commit 7941b27b16e3282f6ec8817e36492f1deec753a7) Signed-off-by: Mark Brown Conflicts: drivers/of/Kconfig drivers/of/Makefile --- .../devicetree/dynamic-resolution-notes.txt | 25 ++ drivers/of/Kconfig | 3 + drivers/of/Makefile | 1 + drivers/of/resolver.c | 336 ++++++++++++++++++ include/linux/of.h | 3 + 5 files changed, 368 insertions(+) create mode 100644 Documentation/devicetree/dynamic-resolution-notes.txt create mode 100644 drivers/of/resolver.c diff --git a/Documentation/devicetree/dynamic-resolution-notes.txt b/Documentation/devicetree/dynamic-resolution-notes.txt new file mode 100644 index 000000000000..083d23262abe --- /dev/null +++ b/Documentation/devicetree/dynamic-resolution-notes.txt @@ -0,0 +1,25 @@ +Device Tree Dynamic Resolver Notes +---------------------------------- + +This document describes the implementation of the in-kernel +Device Tree resolver, residing in drivers/of/resolver.c and is a +companion document to Documentation/devicetree/dt-object-internal.txt[1] + +How the resolver works +---------------------- + +The resolver is given as an input an arbitrary tree compiled with the +proper dtc option and having a /plugin/ tag. This generates the +appropriate __fixups__ & __local_fixups__ nodes as described in [1]. + +In sequence the resolver works by the following steps: + +1. Get the maximum device tree phandle value from the live tree + 1. +2. Adjust all the local phandles of the tree to resolve by that amount. +3. Using the __local__fixups__ node information adjust all local references + by the same amount. +4. For each property in the __fixups__ node locate the node it references + in the live tree. This is the label used to tag the node. +5. Retrieve the phandle of the target of the fixup. +6. For each fixup in the property locate the node:property:offset location + and replace it with the phandle value. diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index a71c12d4bad7..b3184422fd23 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -67,4 +67,7 @@ config OF_MTD depends on MTD def_bool y +config OF_RESOLVE + bool + endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 35b9d1f9865e..abe533cb0072 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o +obj-$(CONFIG_OF_RESOLVE) += resolver.o diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c new file mode 100644 index 000000000000..aed7959f800d --- /dev/null +++ b/drivers/of/resolver.c @@ -0,0 +1,336 @@ +/* + * Functions for dealing with DT resolution + * + * Copyright (C) 2012 Pantelis Antoniou + * Copyright (C) 2012 Texas Instruments Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* illegal phandle value (set when unresolved) */ +#define OF_PHANDLE_ILLEGAL 0xdeadbeef + +/** + * Find a node with the give full name by recursively following any of + * the child node links. + */ +static struct device_node *__of_find_node_by_full_name(struct device_node *node, + const char *full_name) +{ + struct device_node *child, *found; + + if (node == NULL) + return NULL; + + /* check */ + if (of_node_cmp(node->full_name, full_name) == 0) + return node; + + for_each_child_of_node(node, child) { + found = __of_find_node_by_full_name(child, full_name); + if (found != NULL) + return found; + } + + return NULL; +} + +/* + * Find live tree's maximum phandle value. + */ +static phandle of_get_tree_max_phandle(void) +{ + struct device_node *node; + phandle phandle; + unsigned long flags; + + /* now search recursively */ + raw_spin_lock_irqsave(&devtree_lock, flags); + phandle = 0; + for_each_of_allnodes(node) { + if (node->phandle != OF_PHANDLE_ILLEGAL && + node->phandle > phandle) + phandle = node->phandle; + } + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + return phandle; +} + +/* + * Adjust a subtree's phandle values by a given delta. + * Makes sure not to just adjust the device node's phandle value, + * but modify the phandle properties values as well. + */ +static void __of_adjust_tree_phandles(struct device_node *node, + int phandle_delta) +{ + struct device_node *child; + struct property *prop; + phandle phandle; + + /* first adjust the node's phandle direct value */ + if (node->phandle != 0 && node->phandle != OF_PHANDLE_ILLEGAL) + node->phandle += phandle_delta; + + /* now adjust phandle & linux,phandle values */ + for_each_property_of_node(node, prop) { + + /* only look for these two */ + if (of_prop_cmp(prop->name, "phandle") != 0 && + of_prop_cmp(prop->name, "linux,phandle") != 0) + continue; + + /* must be big enough */ + if (prop->length < 4) + continue; + + /* read phandle value */ + phandle = be32_to_cpup(prop->value); + if (phandle == OF_PHANDLE_ILLEGAL) /* unresolved */ + continue; + + /* adjust */ + *(uint32_t *)prop->value = cpu_to_be32(node->phandle); + } + + /* now do the children recursively */ + for_each_child_of_node(node, child) + __of_adjust_tree_phandles(child, phandle_delta); +} + +static int __of_adjust_phandle_ref(struct device_node *node, struct property *rprop, int value, bool is_delta) +{ + phandle phandle; + struct device_node *refnode; + struct property *sprop; + char *propval, *propcur, *propend, *nodestr, *propstr, *s; + int offset, propcurlen; + int err = 0; + + /* make a copy */ + propval = kmalloc(rprop->length, GFP_KERNEL); + if (!propval) { + pr_err("%s: Could not copy value of '%s'\n", + __func__, rprop->name); + return -ENOMEM; + } + memcpy(propval, rprop->value, rprop->length); + + propend = propval + rprop->length; + for (propcur = propval; propcur < propend; propcur += propcurlen + 1) { + propcurlen = strlen(propcur); + + nodestr = propcur; + s = strchr(propcur, ':'); + if (!s) { + pr_err("%s: Illegal symbol entry '%s' (1)\n", + __func__, propcur); + err = -EINVAL; + goto err_fail; + } + *s++ = '\0'; + + propstr = s; + s = strchr(s, ':'); + if (!s) { + pr_err("%s: Illegal symbol entry '%s' (2)\n", + __func__, (char *)rprop->value); + err = -EINVAL; + goto err_fail; + } + + *s++ = '\0'; + err = kstrtoint(s, 10, &offset); + if (err != 0) { + pr_err("%s: Could get offset '%s'\n", + __func__, (char *)rprop->value); + goto err_fail; + } + + /* look into the resolve node for the full path */ + refnode = __of_find_node_by_full_name(node, nodestr); + if (!refnode) { + pr_warn("%s: Could not find refnode '%s'\n", + __func__, (char *)rprop->value); + continue; + } + + /* now find the property */ + for_each_property_of_node(refnode, sprop) { + if (of_prop_cmp(sprop->name, propstr) == 0) + break; + } + + if (!sprop) { + pr_err("%s: Could not find property '%s'\n", + __func__, (char *)rprop->value); + err = -ENOENT; + goto err_fail; + } + + phandle = is_delta ? be32_to_cpup(sprop->value + offset) + value : value; + *(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle); + } + +err_fail: + kfree(propval); + return err; +} + +/* + * Adjust the local phandle references by the given phandle delta. + * Assumes the existances of a __local_fixups__ node at the root + * of the tree. Does not take any devtree locks so make sure you + * call this on a tree which is at the detached state. + */ +static int __of_adjust_tree_phandle_references(struct device_node *node, + int phandle_delta) +{ + struct device_node *child; + struct property *rprop; + int err; + + /* locate the symbols & fixups nodes on resolve */ + for_each_child_of_node(node, child) + if (of_node_cmp(child->name, "__local_fixups__") == 0) + break; + + /* no local fixups */ + if (!child) + return 0; + + /* find the local fixups property */ + for_each_property_of_node(child, rprop) { + /* skip properties added automatically */ + if (of_prop_cmp(rprop->name, "name") == 0) + continue; + + err = __of_adjust_phandle_ref(node, rprop, phandle_delta, true); + if (err) + return err; + } + + return 0; +} + +/** + * of_resolve - Resolve the given node against the live tree. + * + * @resolve: Node to resolve + * + * Perform dynamic Device Tree resolution against the live tree + * to the given node to resolve. This depends on the live tree + * having a __symbols__ node, and the resolve node the __fixups__ & + * __local_fixups__ nodes (if needed). + * The result of the operation is a resolve node that it's contents + * are fit to be inserted or operate upon the live tree. + * Returns 0 on success or a negative error value on error. + */ +int of_resolve_phandles(struct device_node *resolve) +{ + struct device_node *child, *refnode; + struct device_node *root_sym, *resolve_sym, *resolve_fix; + struct property *rprop; + const char *refpath; + phandle phandle, phandle_delta; + int err; + + /* the resolve node must exist, and be detached */ + if (!resolve || !of_node_check_flag(resolve, OF_DETACHED)) + return -EINVAL; + + /* first we need to adjust the phandles */ + phandle_delta = of_get_tree_max_phandle() + 1; + __of_adjust_tree_phandles(resolve, phandle_delta); + err = __of_adjust_tree_phandle_references(resolve, phandle_delta); + if (err != 0) + return err; + + root_sym = NULL; + resolve_sym = NULL; + resolve_fix = NULL; + + /* this may fail (if no fixups are required) */ + root_sym = of_find_node_by_path("/__symbols__"); + + /* locate the symbols & fixups nodes on resolve */ + for_each_child_of_node(resolve, child) { + + if (!resolve_sym && + of_node_cmp(child->name, "__symbols__") == 0) + resolve_sym = child; + + if (!resolve_fix && + of_node_cmp(child->name, "__fixups__") == 0) + resolve_fix = child; + + /* both found, don't bother anymore */ + if (resolve_sym && resolve_fix) + break; + } + + /* we do allow for the case where no fixups are needed */ + if (!resolve_fix) { + err = 0; /* no error */ + goto out; + } + + /* we need to fixup, but no root symbols... */ + if (!root_sym) { + err = -EINVAL; + goto out; + } + + for_each_property_of_node(resolve_fix, rprop) { + + /* skip properties added automatically */ + if (of_prop_cmp(rprop->name, "name") == 0) + continue; + + err = of_property_read_string(root_sym, + rprop->name, &refpath); + if (err != 0) { + pr_err("%s: Could not find symbol '%s'\n", + __func__, rprop->name); + goto out; + } + + refnode = of_find_node_by_path(refpath); + if (!refnode) { + pr_err("%s: Could not find node by path '%s'\n", + __func__, refpath); + err = -ENOENT; + goto out; + } + + phandle = refnode->phandle; + of_node_put(refnode); + + pr_debug("%s: %s phandle is 0x%08x\n", + __func__, rprop->name, phandle); + + err = __of_adjust_phandle_ref(resolve, rprop, phandle, false); + if (err) + break; + } + +out: + /* NULL is handled by of_node_put as NOP */ + of_node_put(root_sym); + + return err; +} +EXPORT_SYMBOL_GPL(of_resolve_phandles); diff --git a/include/linux/of.h b/include/linux/of.h index ae091484bf74..4896e7c3347f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -849,4 +849,7 @@ static inline int of_changeset_update_property(struct of_changeset *ocs, } #endif +/* CONFIG_OF_RESOLVE api */ +extern int of_resolve_phandles(struct device_node *tree); + #endif /* _LINUX_OF_H */ From 9ae47ebcf425e1bfedfb32f55983439c9ee63076 Mon Sep 17 00:00:00 2001 From: Gaurav Minocha Date: Wed, 16 Jul 2014 23:09:39 -0700 Subject: [PATCH 260/298] Adding selftest testdata dynamically into live tree This patch attaches selftest's device tree data (required by /drivers/of/selftest.c) dynamically into live device tree. First, it links selftest device tree data into the kernel image and then iterates over all the nodes and attaches them into the live tree. Once the testcases are complete, it removes the data attached. This patch will remove the manual process of addition and removal of selftest device tree data into the machine's dts file. Tested successfully with current selftest's testcases. Signed-off-by: Gaurav Minocha [glikely: Removed ability to build as a module and fixed no-devicetree bug] Signed-off-by: Grant Likely (cherry picked from commit ae9304c9d3111759fed5946272be3b13ba41f7e3) Signed-off-by: Mark Brown Conflicts: drivers/of/testcase-data/testcases.dts --- arch/arm/boot/dts/versatile-pb.dts | 2 - drivers/of/Kconfig | 1 + drivers/of/Makefile | 3 +- drivers/of/selftest.c | 156 ++++++++++++++++++ .../{testcases.dtsi => testcases.dts} | 11 +- 5 files changed, 160 insertions(+), 13 deletions(-) rename drivers/of/testcase-data/{testcases.dtsi => testcases.dts} (50%) diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts index 65f657711323..8d39677b7d4c 100644 --- a/arch/arm/boot/dts/versatile-pb.dts +++ b/arch/arm/boot/dts/versatile-pb.dts @@ -46,5 +46,3 @@ }; }; }; - -#include diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index b3184422fd23..17942557ecf6 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -10,6 +10,7 @@ menu "Device Tree and Open Firmware support" config OF_SELFTEST bool "Device Tree Runtime self tests" depends on OF_IRQ + select OF_DYNAMIC help This option builds in test cases for the device tree infrastructure that are executed once at boot time, and the results dumped to the diff --git a/drivers/of/Makefile b/drivers/of/Makefile index abe533cb0072..7e65dd45f0cf 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_OF_PROMTREE) += pdt.o obj-$(CONFIG_OF_ADDRESS) += address.o obj-$(CONFIG_OF_IRQ) += irq.o obj-$(CONFIG_OF_NET) += of_net.o -obj-$(CONFIG_OF_SELFTEST) += selftest.o +obj-$(CONFIG_OF_SELFTEST) += of_selftest.o +of_selftest-objs := selftest.o testcase-data/testcases.dtb.o obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 04e39a183e53..d41002667833 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,10 @@ static struct selftest_results { int failed; } selftest_results; +#define NO_OF_NODES 2 +static struct device_node *nodes[NO_OF_NODES]; +static int last_node_index; + #define selftest(result, fmt, ...) { \ if (!(result)) { \ selftest_results.failed++; \ @@ -594,9 +599,156 @@ static void __init of_selftest_platform_populate(void) } } +/** + * update_node_properties - adds the properties + * of np into dup node (present in live tree) and + * updates parent of children of np to dup. + * + * @np: node already present in live tree + * @dup: node present in live tree to be updated + */ +static void update_node_properties(struct device_node *np, + struct device_node *dup) +{ + struct property *prop; + struct device_node *child; + + for_each_property_of_node(np, prop) + of_add_property(dup, prop); + + for_each_child_of_node(np, child) + child->parent = dup; +} + +/** + * attach_node_and_children - attaches nodes + * and its children to live tree + * + * @np: Node to attach to live tree + */ +static int attach_node_and_children(struct device_node *np) +{ + struct device_node *next, *root = np, *dup; + + if (!np) { + pr_warn("%s: No tree to attach; not running tests\n", + __func__); + return -ENODATA; + } + + + /* skip root node */ + np = np->child; + /* storing a copy in temporary node */ + dup = np; + + while (dup) { + nodes[last_node_index++] = dup; + dup = dup->sibling; + } + dup = NULL; + + while (np) { + next = np->allnext; + dup = of_find_node_by_path(np->full_name); + if (dup) + update_node_properties(np, dup); + else { + np->child = NULL; + if (np->parent == root) + np->parent = of_allnodes; + of_attach_node(np); + } + np = next; + } + + return 0; +} + +/** + * selftest_data_add - Reads, copies data from + * linked tree and attaches it to the live tree + */ +static int __init selftest_data_add(void) +{ + void *selftest_data; + struct device_node *selftest_data_node; + extern uint8_t __dtb_testcases_begin[]; + extern uint8_t __dtb_testcases_end[]; + const int size = __dtb_testcases_end - __dtb_testcases_begin; + + if (!size || !of_allnodes) { + pr_warn("%s: No testcase data to attach; not running tests\n", + __func__); + return -ENODATA; + } + + /* creating copy */ + selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL); + + if (!selftest_data) { + pr_warn("%s: Failed to allocate memory for selftest_data; " + "not running tests\n", __func__); + return -ENOMEM; + } + of_fdt_unflatten_tree(selftest_data, &selftest_data_node); + + /* attach the sub-tree to live tree */ + return attach_node_and_children(selftest_data_node); +} + +/** + * detach_node_and_children - detaches node + * and its children from live tree + * + * @np: Node to detach from live tree + */ +static void detach_node_and_children(struct device_node *np) +{ + while (np->child) + detach_node_and_children(np->child); + + while (np->sibling) + detach_node_and_children(np->sibling); + + of_detach_node(np); +} + +/** + * selftest_data_remove - removes the selftest data + * nodes from the live tree + */ +static void selftest_data_remove(void) +{ + struct device_node *np; + struct property *prop; + + while (last_node_index >= 0) { + if (nodes[last_node_index]) { + np = of_find_node_by_path(nodes[last_node_index]->full_name); + if (strcmp(np->full_name, "/aliases") != 0) { + detach_node_and_children(np->child); + of_detach_node(np); + } else { + for_each_property_of_node(np, prop) { + if (strcmp(prop->name, "testcase-alias") == 0) + of_remove_property(np, prop); + } + } + } + last_node_index--; + } +} + static int __init of_selftest(void) { struct device_node *np; + int res; + + /* adding data for selftest */ + res = selftest_data_add(); + if (res) + return res; np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); if (!np) { @@ -618,6 +770,10 @@ static int __init of_selftest(void) of_selftest_platform_populate(); pr_info("end of selftest - %i passed, %i failed\n", selftest_results.passed, selftest_results.failed); + + /* removing selftest data from live tree */ + selftest_data_remove(); + return 0; } late_initcall(of_selftest); diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dts similarity index 50% rename from drivers/of/testcase-data/testcases.dtsi rename to drivers/of/testcase-data/testcases.dts index 669bb07df142..8e7568ee3175 100644 --- a/drivers/of/testcase-data/testcases.dtsi +++ b/drivers/of/testcase-data/testcases.dts @@ -1,13 +1,4 @@ -/ { - testcase-data { - changeset { - prop-update = "hello"; - prop-remove = "world"; - node-remove { - }; - }; - }; -}; +/dts-v1/; #include "tests-phandle.dtsi" #include "tests-interrupts.dtsi" #include "tests-match.dtsi" From 442e9dac8e05ad5586a2116ecb65f5326823d931 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Sat, 26 Jul 2014 10:58:43 -0600 Subject: [PATCH 261/298] of: typo fix in __of_prop_dup() Fix a trivial typo in a comment block. Signed-off-by: Grant Likely (cherry picked from commit b6ae5dc54b0a5c542d06d46b9083ceb70bf7e083) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index c1002b7be786..54fecc49a1fe 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -259,7 +259,7 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) /* * NOTE: There is no check for zero length value. - * In case of a boolean property This will allocate a value + * In case of a boolean property, this will allocate a value * of zero bytes. We do this to work around the use * of of_get_property() calls on boolean values. */ From dacf48b307aaa5d2cfc9d0a5b5b52ed4765b9d2a Mon Sep 17 00:00:00 2001 From: Gaurav Minocha Date: Fri, 25 Jul 2014 19:57:38 -0700 Subject: [PATCH 262/298] of: Fixing OF Selftest build error This patch is to fix following error while compiling OF selftests. "drivers/of/selftest.c:617:2: error: implicit declaration of function 'of_fdt_unflatten_tree'" Now, CONFIG_OF_SELFTEST depends on CONFIG_OF_EARLY_FLATTREE so that the broken configuration cannot be selected. Ultimately it would be a good idea to allow CONFIG_OF_SELFTEST to select CONFIG_OF_EARLY_FLATTREE, but there is a dependency problem on i386 and x86_64 that causes dtc to not get built and causes the build to fail. That problem needs to be fixed first. Signed-off-by: Gaurav Minocha Signed-off-by: Grant Likely (cherry picked from commit 32147e9a8ea54f2ec9914edeede8a9d9dc02e886) Signed-off-by: Mark Brown --- drivers/of/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 17942557ecf6..78787b128d32 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -9,7 +9,7 @@ menu "Device Tree and Open Firmware support" config OF_SELFTEST bool "Device Tree Runtime self tests" - depends on OF_IRQ + depends on OF_IRQ && OF_EARLY_FLATTREE select OF_DYNAMIC help This option builds in test cases for the device tree infrastructure From 220f5ea9df1e8ded213958f635b5310de44305d2 Mon Sep 17 00:00:00 2001 From: Gaurav Minocha Date: Sat, 26 Jul 2014 12:48:50 -0700 Subject: [PATCH 263/298] Enabling OF selftest to run without machine's devicetree If there is no devicetree present, this patch adds the selftest data as a live devicetree. It also removes the same after the testcase execution is complete. Tested with and without machine's devicetree. Signed-off-by: Gaurav Minocha Signed-off-by: Grant Likely (cherry picked from commit b951f9dc7f25fc1e39aafda5edb4b47b38285d9f) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index d41002667833..a737cb5974de 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -27,6 +27,7 @@ static struct selftest_results { #define NO_OF_NODES 2 static struct device_node *nodes[NO_OF_NODES]; static int last_node_index; +static bool selftest_live_tree; #define selftest(result, fmt, ...) { \ if (!(result)) { \ @@ -630,13 +631,6 @@ static int attach_node_and_children(struct device_node *np) { struct device_node *next, *root = np, *dup; - if (!np) { - pr_warn("%s: No tree to attach; not running tests\n", - __func__); - return -ENODATA; - } - - /* skip root node */ np = np->child; /* storing a copy in temporary node */ @@ -672,12 +666,12 @@ static int attach_node_and_children(struct device_node *np) static int __init selftest_data_add(void) { void *selftest_data; - struct device_node *selftest_data_node; + struct device_node *selftest_data_node, *np; extern uint8_t __dtb_testcases_begin[]; extern uint8_t __dtb_testcases_end[]; const int size = __dtb_testcases_end - __dtb_testcases_begin; - if (!size || !of_allnodes) { + if (!size) { pr_warn("%s: No testcase data to attach; not running tests\n", __func__); return -ENODATA; @@ -692,6 +686,22 @@ static int __init selftest_data_add(void) return -ENOMEM; } of_fdt_unflatten_tree(selftest_data, &selftest_data_node); + if (!selftest_data_node) { + pr_warn("%s: No tree to attach; not running tests\n", __func__); + return -ENODATA; + } + + if (!of_allnodes) { + /* enabling flag for removing nodes */ + selftest_live_tree = true; + of_allnodes = selftest_data_node; + + for_each_of_allnodes(np) + __of_attach_node_sysfs(np); + of_aliases = of_find_node_by_path("/aliases"); + of_chosen = of_find_node_by_path("/chosen"); + return 0; + } /* attach the sub-tree to live tree */ return attach_node_and_children(selftest_data_node); @@ -723,6 +733,18 @@ static void selftest_data_remove(void) struct device_node *np; struct property *prop; + if (selftest_live_tree) { + of_node_put(of_aliases); + of_node_put(of_chosen); + of_aliases = NULL; + of_chosen = NULL; + for_each_child_of_node(of_allnodes, np) + detach_node_and_children(np); + __of_detach_node_sysfs(of_allnodes); + of_allnodes = NULL; + return; + } + while (last_node_index >= 0) { if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); From ed22824fc77e158ae6075b06c3047a12731553c3 Mon Sep 17 00:00:00 2001 From: Gaurav Minocha Date: Fri, 5 Sep 2014 09:56:13 -0700 Subject: [PATCH 264/298] of: Disabling OF functions that use sysfs if CONFIG_SYSFS disabled This patch is to the fix the recent runtime bug in kernel reported by . The bug was exposed by commit b951f9dc, "Enabling OF selftest to run without machine's devicetree" and is exposed when CONFIG_OF_SELFTEST is enabled and CONFIG_SYSFS is disabled. Mail Subject: [OF test] BUG: unable to handle kernel NULL pointer dereference at 00000038 Tested on x86 and arm architecture Signed-off-by: Gaurav Minocha Signed-off-by: Grant Likely (cherry picked from commit ef69d74035befbddd7c2b7e86120e49191107acc) Signed-off-by: Mark Brown --- drivers/of/base.c | 12 ++++++++++++ drivers/of/dynamic.c | 3 +++ 2 files changed, 15 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 8059f149d171..a810ab79c6f4 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -136,6 +136,9 @@ int __of_add_property_sysfs(struct device_node *np, struct property *pp) /* Important: Don't leak passwords */ bool secure = strncmp(pp->name, "security-", 9) == 0; + if (!IS_ENABLED(CONFIG_SYSFS)) + return 0; + if (!of_kset || !of_node_is_attached(np)) return 0; @@ -156,6 +159,9 @@ int __of_attach_node_sysfs(struct device_node *np) struct property *pp; int rc; + if (!IS_ENABLED(CONFIG_SYSFS)) + return 0; + if (!of_kset) return 0; @@ -1637,6 +1643,9 @@ int __of_remove_property(struct device_node *np, struct property *prop) void __of_remove_property_sysfs(struct device_node *np, struct property *prop) { + if (!IS_ENABLED(CONFIG_SYSFS)) + return; + /* at early boot, bail here and defer setup to of_init() */ if (of_kset && of_node_is_attached(np)) sysfs_remove_bin_file(&np->kobj, &prop->attr); @@ -1701,6 +1710,9 @@ int __of_update_property(struct device_node *np, struct property *newprop, void __of_update_property_sysfs(struct device_node *np, struct property *newprop, struct property *oldprop) { + if (!IS_ENABLED(CONFIG_SYSFS)) + return; + /* At early boot, bail out and defer setup to of_init() */ if (!of_kset) return; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 54fecc49a1fe..f297891d8529 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -45,6 +45,9 @@ void __of_detach_node_sysfs(struct device_node *np) { struct property *pp; + if (!IS_ENABLED(CONFIG_SYSFS)) + return; + BUG_ON(!of_node_is_initialized(np)); if (!of_kset) return; From 0f6f826ccfdc20b244865ee4edc9fd6512b26e0d Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 1 Oct 2014 16:57:07 +0100 Subject: [PATCH 265/298] of: Fix NULL dereference in selftest removal code The selftest code removes its testcase data from the live tree when exiting, but if the testcases data tree contains an empty child of the root, then it causes an oops due to a NULL dereference. The reason is that the code tries to directly dereference the child pointer without checking first if a child is actually there. The solution is to pass the parent node into detach_node_and_children() instead of trying to pass the child. This required removing the code that attempts to remove all of the sibling nodes in detach_node_and_children(), which was never sensible in the first place. At the same time add a check to make sure the bounds of the nodes list are not exceeded by the testdata tree. If they are then abort. Signed-off-by: Grant Likely Cc: Gaurav Minocha (cherry picked from commit e66c98c7a0eacc33a9369a3ec086740044eb986c) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index a737cb5974de..883e60b04eb5 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -637,6 +637,8 @@ static int attach_node_and_children(struct device_node *np) dup = np; while (dup) { + if (WARN_ON(last_node_index >= NO_OF_NODES)) + return -EINVAL; nodes[last_node_index++] = dup; dup = dup->sibling; } @@ -717,10 +719,6 @@ static void detach_node_and_children(struct device_node *np) { while (np->child) detach_node_and_children(np->child); - - while (np->sibling) - detach_node_and_children(np->sibling); - of_detach_node(np); } @@ -749,8 +747,7 @@ static void selftest_data_remove(void) if (nodes[last_node_index]) { np = of_find_node_by_path(nodes[last_node_index]->full_name); if (strcmp(np->full_name, "/aliases") != 0) { - detach_node_and_children(np->child); - of_detach_node(np); + detach_node_and_children(np); } else { for_each_property_of_node(np, prop) { if (strcmp(prop->name, "testcase-alias") == 0) From 176ce2e67a03ea16b768a39c22cadba1711912b0 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 1 Oct 2014 17:40:22 +0100 Subject: [PATCH 266/298] of/selftest: Test structure of device tree Add a testcase to verify that the device tree is properly constructed and the lists are in a correct order. The new testcase gets run twice; once after adding the testcase data, and once after removing it again. It is run twice to make sure adding and removing the testcase data doesn't corrupt the data structure. Signed-off-by: Grant Likely Cc: Gaurav Minocha (cherry picked from commit f2051d6a88cd03f74221da887f56d778a1b2f1f1) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 56 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 883e60b04eb5..252a7eda8d6a 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -145,6 +145,53 @@ static void __init of_selftest_dynamic(void) "Adding a large property should have passed\n"); } +static int __init of_selftest_check_node_linkage(struct device_node *np) +{ + struct device_node *child, *allnext_index = np; + int count = 0, rc; + + for_each_child_of_node(np, child) { + if (child->parent != np) { + pr_err("Child node %s links to wrong parent %s\n", + child->name, np->name); + return -EINVAL; + } + + while (allnext_index && allnext_index != child) + allnext_index = allnext_index->allnext; + if (allnext_index != child) { + pr_err("Node %s is ordered differently in sibling and allnode lists\n", + child->name); + return -EINVAL; + } + + rc = of_selftest_check_node_linkage(child); + if (rc < 0) + return rc; + count += rc; + } + + return count + 1; +} + +static void __init of_selftest_check_tree_linkage(void) +{ + struct device_node *np; + int allnode_count = 0, child_count; + + if (!of_allnodes) + return; + + for_each_of_allnodes(np) + allnode_count++; + child_count = of_selftest_check_node_linkage(of_allnodes); + + selftest(child_count > 0, "Device node data structure is corrupted\n"); + selftest(child_count == allnode_count, "allnodes list size (%i) doesn't match" + "sibling lists size (%i)\n", allnode_count, child_count); + pr_debug("allnodes list size (%i); sibling lists size (%i)\n", allnode_count, child_count); +} + static void __init of_selftest_parse_phandle_with_args(void) { struct device_node *np; @@ -777,6 +824,7 @@ static int __init of_selftest(void) of_node_put(np); pr_info("start of selftest - you will see error messages\n"); + of_selftest_check_tree_linkage(); of_selftest_find_node_by_name(); of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); @@ -787,12 +835,16 @@ static int __init of_selftest(void) of_selftest_parse_interrupts_extended(); of_selftest_match_node(); of_selftest_platform_populate(); - pr_info("end of selftest - %i passed, %i failed\n", - selftest_results.passed, selftest_results.failed); /* removing selftest data from live tree */ selftest_data_remove(); + /* Double check linkage after removing testcase data */ + of_selftest_check_tree_linkage(); + + pr_info("end of selftest - %i passed, %i failed\n", + selftest_results.passed, selftest_results.failed); + return 0; } late_initcall(of_selftest); From d3d58c334eb08f18e4e8b8226a1e2b3991508ff3 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 2 Oct 2014 13:09:15 +0100 Subject: [PATCH 267/298] of/selftest: Add a test for duplicate phandles All phandles in the tree should be unique. Add a testcase to make sure that this is so. Note: this testcase fails on the current kernel because the selftest code itself ends up adding duplicate phandles. Before this testcase is merged the selftest code needs to be modified to resolve phandles before adding them. Signed-off-by: Grant Likely Cc: Pantelis Antoniou (cherry picked from commit 841ec21357eee222416e3b7f1b6ef23cfc6ee43f) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 47 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 252a7eda8d6a..4f83e97f8788 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -192,6 +193,51 @@ static void __init of_selftest_check_tree_linkage(void) pr_debug("allnodes list size (%i); sibling lists size (%i)\n", allnode_count, child_count); } +struct node_hash { + struct hlist_node node; + struct device_node *np; +}; + +static void __init of_selftest_check_phandles(void) +{ + struct device_node *np; + struct node_hash *nh; + struct hlist_node *tmp; + int i, dup_count = 0, phandle_count = 0; + DECLARE_HASHTABLE(ht, 8); + + hash_init(ht); + for_each_of_allnodes(np) { + if (!np->phandle) + continue; + + hash_for_each_possible(ht, nh, node, np->phandle) { + if (nh->np->phandle == np->phandle) { + pr_info("Duplicate phandle! %i used by %s and %s\n", + np->phandle, nh->np->full_name, np->full_name); + dup_count++; + break; + } + } + + nh = kzalloc(sizeof(*nh), GFP_KERNEL); + if (WARN_ON(!nh)) + return; + + nh->np = np; + hash_add(ht, &nh->node, np->phandle); + phandle_count++; + } + selftest(dup_count == 0, "Found %i duplicates in %i phandles\n", + dup_count, phandle_count); + + /* Clean up */ + hash_for_each_safe(ht, i, tmp, nh, node) { + hash_del(&nh->node); + kfree(nh); + } +} + static void __init of_selftest_parse_phandle_with_args(void) { struct device_node *np; @@ -825,6 +871,7 @@ static int __init of_selftest(void) pr_info("start of selftest - you will see error messages\n"); of_selftest_check_tree_linkage(); + of_selftest_check_phandles(); of_selftest_find_node_by_name(); of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); From a19d4c95d6fa08585732d1ef9b6dd80f9c3a8ba1 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 2 Oct 2014 14:36:46 +0100 Subject: [PATCH 268/298] of/selftest: Use the resolver to fixup phandles The selftest data ends up causing duplicate phandles in the live tree for the time that the testcase data is inserted into the live tree. This is obviously a bad situation because anything attempting to read the tree while the selftests are running make resolve phandles to one of the testcase data nodes. Fix the problem by using the of_resolve_phandles() function to eliminate duplicates. Signed-off-by: Grant Likely Cc: Pantelis Antoniou Cc: Gaurav Minocha (cherry picked from commit 2eb46da2a760e5764c48b752a5ef320e02b96b21) Signed-off-by: Mark Brown --- drivers/of/Kconfig | 1 + drivers/of/selftest.c | 9 ++++++- drivers/of/testcase-data/testcases.dts | 35 ++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 78787b128d32..43ef13808e2d 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -11,6 +11,7 @@ config OF_SELFTEST bool "Device Tree Runtime self tests" depends on OF_IRQ && OF_EARLY_FLATTREE select OF_DYNAMIC + select OF_RESOLVE help This option builds in test cases for the device tree infrastructure that are executed once at boot time, and the results dumped to the diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 4f83e97f8788..4fed34bff5cf 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -25,7 +25,7 @@ static struct selftest_results { int failed; } selftest_results; -#define NO_OF_NODES 2 +#define NO_OF_NODES 3 static struct device_node *nodes[NO_OF_NODES]; static int last_node_index; static bool selftest_live_tree; @@ -765,6 +765,7 @@ static int __init selftest_data_add(void) extern uint8_t __dtb_testcases_begin[]; extern uint8_t __dtb_testcases_end[]; const int size = __dtb_testcases_end - __dtb_testcases_begin; + int rc; if (!size) { pr_warn("%s: No testcase data to attach; not running tests\n", @@ -785,6 +786,12 @@ static int __init selftest_data_add(void) pr_warn("%s: No tree to attach; not running tests\n", __func__); return -ENODATA; } + of_node_set_flag(selftest_data_node, OF_DETACHED); + rc = of_resolve_phandles(selftest_data_node); + if (rc) { + pr_err("%s: Failed to resolve phandles (rc=%i)\n", __func__, rc); + return -EINVAL; + } if (!of_allnodes) { /* enabling flag for removing nodes */ diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts index 8e7568ee3175..002e55db6160 100644 --- a/drivers/of/testcase-data/testcases.dts +++ b/drivers/of/testcase-data/testcases.dts @@ -3,3 +3,38 @@ #include "tests-interrupts.dtsi" #include "tests-match.dtsi" #include "tests-platform.dtsi" + +/* + * phandle fixup data - generated by dtc patches that aren't upstream. + * This data must be regenerated whenever phandle references are modified in + * the testdata tree. + * + * The format of this data may be subject to change. For the time being consider + * this a kernel-internal data format. + */ +/ { __local_fixups__ { + fixup = "/testcase-data/testcase-device2:interrupt-parent:0", + "/testcase-data/testcase-device1:interrupt-parent:0", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:60", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:52", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:44", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:36", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:24", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:8", + "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:0", + "/testcase-data/interrupts/interrupts1:interrupt-parent:0", + "/testcase-data/interrupts/interrupts0:interrupt-parent:0", + "/testcase-data/interrupts/intmap1:interrupt-map:12", + "/testcase-data/interrupts/intmap0:interrupt-map:52", + "/testcase-data/interrupts/intmap0:interrupt-map:36", + "/testcase-data/interrupts/intmap0:interrupt-map:16", + "/testcase-data/interrupts/intmap0:interrupt-map:4", + "/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:12", + "/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:0", + "/testcase-data/phandle-tests/consumer-a:phandle-list:56", + "/testcase-data/phandle-tests/consumer-a:phandle-list:52", + "/testcase-data/phandle-tests/consumer-a:phandle-list:40", + "/testcase-data/phandle-tests/consumer-a:phandle-list:24", + "/testcase-data/phandle-tests/consumer-a:phandle-list:8", + "/testcase-data/phandle-tests/consumer-a:phandle-list:0"; +}; }; From 2ca04f7a512c14de91f9368956d0905560f5bdef Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 7 Oct 2014 11:30:31 +0100 Subject: [PATCH 269/298] of/selftest: Move hash table off stack to fix large frame size The new testcase that checks phandle consistency was using a hash table on the stack which made the frame size much large than it should be. Fix the problem by moving the hash table into the file scope. Signed-off-by: Grant Likely (cherry picked from commit 2118f4b8dfc666c3e4a9e262beca79636a0852fe) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 4fed34bff5cf..78001270a598 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -198,20 +198,19 @@ struct node_hash { struct device_node *np; }; +static DEFINE_HASHTABLE(phandle_ht, 8); static void __init of_selftest_check_phandles(void) { struct device_node *np; struct node_hash *nh; struct hlist_node *tmp; int i, dup_count = 0, phandle_count = 0; - DECLARE_HASHTABLE(ht, 8); - hash_init(ht); for_each_of_allnodes(np) { if (!np->phandle) continue; - hash_for_each_possible(ht, nh, node, np->phandle) { + hash_for_each_possible(phandle_ht, nh, node, np->phandle) { if (nh->np->phandle == np->phandle) { pr_info("Duplicate phandle! %i used by %s and %s\n", np->phandle, nh->np->full_name, np->full_name); @@ -225,14 +224,14 @@ static void __init of_selftest_check_phandles(void) return; nh->np = np; - hash_add(ht, &nh->node, np->phandle); + hash_add(phandle_ht, &nh->node, np->phandle); phandle_count++; } selftest(dup_count == 0, "Found %i duplicates in %i phandles\n", dup_count, phandle_count); /* Clean up */ - hash_for_each_safe(ht, i, tmp, nh, node) { + hash_for_each_safe(phandle_ht, i, tmp, nh, node) { hash_del(&nh->node); kfree(nh); } From 7c71b6a864616c9e79ee4375e9c0d722355dbe35 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 4 Nov 2014 13:14:13 +0000 Subject: [PATCH 270/298] of/unittest: Remove test devices after adding them The of_platform_populate() test cases don't remove the test devices after they are added. Fix this by adding tests for of_platform_depopulate(). At the same time rework the selftest() macro to return the test result value. This makes it easy to use the macro inside an if() condition. Signed-off-by: Grant Likely (cherry picked from commit 851da976dc1d72becc03e144b38c4efab9e7b361) Signed-off-by: Mark Brown --- drivers/of/selftest.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 78001270a598..1207b6d0c122 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -30,15 +30,17 @@ static struct device_node *nodes[NO_OF_NODES]; static int last_node_index; static bool selftest_live_tree; -#define selftest(result, fmt, ...) { \ - if (!(result)) { \ +#define selftest(result, fmt, ...) ({ \ + bool failed = !(result); \ + if (failed) { \ selftest_results.failed++; \ pr_err("FAIL %s():%i " fmt, __func__, __LINE__, ##__VA_ARGS__); \ } else { \ selftest_results.passed++; \ pr_debug("pass %s():%i\n", __func__, __LINE__); \ } \ -} + failed; \ +}) static void __init of_selftest_find_node_by_name(void) { @@ -648,10 +650,13 @@ static void __init of_selftest_match_node(void) } } +struct device test_bus = { + .init_name = "unittest-bus", +}; static void __init of_selftest_platform_populate(void) { - int irq; - struct device_node *np, *child; + int irq, rc; + struct device_node *np, *child, *grandchild; struct platform_device *pdev; struct of_device_id match[] = { { .compatible = "test-device", }, @@ -676,20 +681,32 @@ static void __init of_selftest_platform_populate(void) irq = platform_get_irq(pdev, 0); selftest(irq < 0 && irq != -EPROBE_DEFER, "device parsing error failed - %d\n", irq); - np = of_find_node_by_path("/testcase-data/platform-tests"); - if (!np) { - pr_err("No testcase data in device tree\n"); + if (selftest(np = of_find_node_by_path("/testcase-data/platform-tests"), + "No testcase data in device tree\n")); + return; + + if (selftest(!(rc = device_register(&test_bus)), + "testbus registration failed; rc=%i\n", rc)); return; - } for_each_child_of_node(np, child) { - struct device_node *grandchild; - of_platform_populate(child, match, NULL, NULL); + of_platform_populate(child, match, NULL, &test_bus); for_each_child_of_node(child, grandchild) selftest(of_find_device_by_node(grandchild), "Could not create device for node '%s'\n", grandchild->name); } + + of_platform_depopulate(&test_bus); + for_each_child_of_node(np, child) { + for_each_child_of_node(child, grandchild) + selftest(!of_find_device_by_node(grandchild), + "device didn't get destroyed '%s'\n", + grandchild->name); + } + + device_unregister(&test_bus); + of_node_put(np); } /** From 121372d89662a674a3ae4fdc5ad6ff1a0db3cf30 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 4 Nov 2014 13:24:45 +0000 Subject: [PATCH 271/298] of/unittest: Rename selftest.c to unittest.c This is unit testing code. It should use that name because it makes more sense than 'selftest'. Rename the files to match and rename the config variable. Signed-off-by: Grant Likely (cherry picked from commit 19fd74879a32fb10357e0cda9c8050f01bb3eeb8) Signed-off-by: Mark Brown --- drivers/of/Kconfig | 4 ++-- drivers/of/Makefile | 4 ++-- drivers/of/{testcase-data => unittest-data}/testcases.dts | 0 .../of/{testcase-data => unittest-data}/tests-interrupts.dtsi | 0 drivers/of/{testcase-data => unittest-data}/tests-match.dtsi | 0 .../of/{testcase-data => unittest-data}/tests-phandle.dtsi | 0 .../of/{testcase-data => unittest-data}/tests-platform.dtsi | 0 drivers/of/{selftest.c => unittest.c} | 0 8 files changed, 4 insertions(+), 4 deletions(-) rename drivers/of/{testcase-data => unittest-data}/testcases.dts (100%) rename drivers/of/{testcase-data => unittest-data}/tests-interrupts.dtsi (100%) rename drivers/of/{testcase-data => unittest-data}/tests-match.dtsi (100%) rename drivers/of/{testcase-data => unittest-data}/tests-phandle.dtsi (100%) rename drivers/of/{testcase-data => unittest-data}/tests-platform.dtsi (100%) rename drivers/of/{selftest.c => unittest.c} (100%) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 43ef13808e2d..625dfd2341e3 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -7,8 +7,8 @@ config OF menu "Device Tree and Open Firmware support" depends on OF -config OF_SELFTEST - bool "Device Tree Runtime self tests" +config OF_UNITTEST + bool "Device Tree runtime unit tests" depends on OF_IRQ && OF_EARLY_FLATTREE select OF_DYNAMIC select OF_RESOLVE diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 7e65dd45f0cf..cb5f0602d347 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -5,8 +5,8 @@ obj-$(CONFIG_OF_PROMTREE) += pdt.o obj-$(CONFIG_OF_ADDRESS) += address.o obj-$(CONFIG_OF_IRQ) += irq.o obj-$(CONFIG_OF_NET) += of_net.o -obj-$(CONFIG_OF_SELFTEST) += of_selftest.o -of_selftest-objs := selftest.o testcase-data/testcases.dtb.o +obj-$(CONFIG_OF_UNITTEST) += of_unittest.o +of_unittest-objs := unittest.o unittest-data/testcases.dtb.o obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/unittest-data/testcases.dts similarity index 100% rename from drivers/of/testcase-data/testcases.dts rename to drivers/of/unittest-data/testcases.dts diff --git a/drivers/of/testcase-data/tests-interrupts.dtsi b/drivers/of/unittest-data/tests-interrupts.dtsi similarity index 100% rename from drivers/of/testcase-data/tests-interrupts.dtsi rename to drivers/of/unittest-data/tests-interrupts.dtsi diff --git a/drivers/of/testcase-data/tests-match.dtsi b/drivers/of/unittest-data/tests-match.dtsi similarity index 100% rename from drivers/of/testcase-data/tests-match.dtsi rename to drivers/of/unittest-data/tests-match.dtsi diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/unittest-data/tests-phandle.dtsi similarity index 100% rename from drivers/of/testcase-data/tests-phandle.dtsi rename to drivers/of/unittest-data/tests-phandle.dtsi diff --git a/drivers/of/testcase-data/tests-platform.dtsi b/drivers/of/unittest-data/tests-platform.dtsi similarity index 100% rename from drivers/of/testcase-data/tests-platform.dtsi rename to drivers/of/unittest-data/tests-platform.dtsi diff --git a/drivers/of/selftest.c b/drivers/of/unittest.c similarity index 100% rename from drivers/of/selftest.c rename to drivers/of/unittest.c From 9f1e7cdb9fd3cfa9bef604eb26de3b9cde977a70 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 4 Nov 2014 10:26:26 +0000 Subject: [PATCH 272/298] of/platform: Move platform devices under /sys/devices/platform Currently the devices created by drivers/of/platform.c get created at the root of /sys/devices. This goes against the typical pattern for sysfs where the top level /sys/devices structure contains categories of devices, and the structure of devices is placed below that. To fix this, make the code in drivers/of/platform.c follow the drivers/base/platform.c behaviour, and use &platform_bus as the default parent for all new platform_devices and amba_devices. This change has been discussed for a long time, but nobody has actually acted on it. Userspace code that expects to find devices under a fixed /sys/devices/... path will be affected. It isn't /supposed/ to do that, but if anyone complains then I'll add a default-off workaround option to put them back into the root. Signed-off-by: Grant Likely Acked-by: Benjamin Herrenschmidt Acked-by: Greg Kroah-Hartman Cc: Rob Herring Cc: Arnd Bergmann (cherry picked from commit 43c0767e17ac70e494b6a381b3a20be6a1a75c70) Signed-off-by: Mark Brown Conflicts: drivers/of/platform.c --- drivers/of/platform.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 404d1daebefa..5b0f1b08f2be 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -172,10 +172,7 @@ struct platform_device *of_device_alloc(struct device_node *np, } dev->dev.of_node = of_node_get(np); -#if defined(CONFIG_MICROBLAZE) - dev->dev.dma_mask = &dev->archdata.dma_mask; -#endif - dev->dev.parent = parent; + dev->dev.parent = parent ? : &platform_bus; if (bus_id) dev_set_name(&dev->dev, "%s", bus_id); @@ -275,7 +272,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node, /* setup generic device info */ dev->dev.coherent_dma_mask = ~0; dev->dev.of_node = of_node_get(node); - dev->dev.parent = parent; + dev->dev.parent = parent ? : &platform_bus; dev->dev.platform_data = platform_data; if (bus_id) dev_set_name(&dev->dev, "%s", bus_id); From 97955d9dfdb2d500077793ec06a2ed97136aa4e6 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 7 May 2014 15:17:26 -0500 Subject: [PATCH 273/298] of/platform: fix device naming for non-translatable addresses Using non-translatable addresses in platform device names is wrong because they may not be globally unique. Just use the default naming with a global index if the address cannot be translated instead. of_can_translate_address has the same checks as of_translate_address, so we can remove it here as well. Reported-by: "Ivan T. Ivanov" Cc: Josh Cartwright Cc: Courtney Cavin Cc: Bjorn Andersson Cc: Grant Likely Signed-off-by: Rob Herring Tested-by: Ivan T. Ivanov Tested-by: Frank Rowand Reviewed-by: Frank Rowand (cherry picked from commit b8acee3ef83f0fc50e57a3d4c91234982befda95) Signed-off-by: Mark Brown --- drivers/of/platform.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 5b0f1b08f2be..10acf98f75fa 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -78,7 +78,6 @@ void of_device_make_bus_id(struct device *dev) struct device_node *node = dev->of_node; const __be32 *reg; u64 addr; - const __be32 *addrp; int magic; #ifdef CONFIG_PPC_DCR @@ -106,15 +105,7 @@ void of_device_make_bus_id(struct device *dev) */ reg = of_get_property(node, "reg", NULL); if (reg) { - if (of_can_translate_address(node)) { - addr = of_translate_address(node, reg); - } else { - addrp = of_get_address(node, 0, NULL, NULL); - if (addrp) - addr = of_read_number(addrp, 1); - else - addr = OF_BAD_ADDR; - } + addr = of_translate_address(node, reg); if (addr != OF_BAD_ADDR) { dev_set_name(dev, "%llx.%s", (unsigned long long)addr, node->name); From 8e15a9bb2c25e5823e2955157df6777a7711dad7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 7 May 2014 15:23:56 -0500 Subject: [PATCH 274/298] of: kill off of_can_translate_address of_can_translate_address only checks some conditions for address translation, but does not check other conditions like having range properties. The checks it does do are redundant with __of_address_translate. The only difference is printing a message or not. Since we only have a single caller that does the full translation anyway, just remove of_can_translate_address and quiet the error message. Cc: Grant Likely Signed-off-by: Rob Herring Tested-by: Frank Rowand Reviewed-by: Frank Rowand (cherry picked from commit d9c6866be8a145e32da616d8dcbae806032d75b5) Signed-off-by: Mark Brown --- drivers/of/address.c | 22 +--------------------- drivers/of/platform.c | 5 ++--- include/linux/of_address.h | 1 - 3 files changed, 3 insertions(+), 25 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 1a54f1ffaadb..14e40a936089 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -496,8 +496,7 @@ static u64 __of_translate_address(struct device_node *dev, /* Count address cells & copy address locally */ bus->count_cells(dev, &na, &ns); if (!OF_CHECK_COUNTS(na, ns)) { - printk(KERN_ERR "prom_parse: Bad cell count for %s\n", - of_node_full_name(dev)); + pr_debug("OF: Bad cell count for %s\n", of_node_full_name(dev)); goto bail; } memcpy(addr, in_addr, na * 4); @@ -562,25 +561,6 @@ u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr) } EXPORT_SYMBOL(of_translate_dma_address); -bool of_can_translate_address(struct device_node *dev) -{ - struct device_node *parent; - struct of_bus *bus; - int na, ns; - - parent = of_get_parent(dev); - if (parent == NULL) - return false; - - bus = of_match_bus(parent); - bus->count_cells(dev, &na, &ns); - - of_node_put(parent); - - return OF_CHECK_COUNTS(na, ns); -} -EXPORT_SYMBOL(of_can_translate_address); - const __be32 *of_get_address(struct device_node *dev, int index, u64 *size, unsigned int *flags) { diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 10acf98f75fa..ddf5c2b5c89c 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -140,9 +140,8 @@ struct platform_device *of_device_alloc(struct device_node *np, return NULL; /* count the io and irq resources */ - if (of_can_translate_address(np)) - while (of_address_to_resource(np, num_reg, &temp_res) == 0) - num_reg++; + while (of_address_to_resource(np, num_reg, &temp_res) == 0) + num_reg++; num_irq = of_irq_count(np); /* Populate the resource table */ diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 5f6ed6b182b8..906ca7681756 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -40,7 +40,6 @@ extern u64 of_translate_dma_address(struct device_node *dev, #ifdef CONFIG_OF_ADDRESS extern u64 of_translate_address(struct device_node *np, const __be32 *addr); -extern bool of_can_translate_address(struct device_node *dev); extern int of_address_to_resource(struct device_node *dev, int index, struct resource *r); extern struct device_node *of_find_matching_node_by_address( From dbce6922f909f318f6d591931664b5db643d7586 Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Thu, 15 May 2014 16:55:24 +0100 Subject: [PATCH 275/298] of: Keep track of populated platform devices In "Device Tree powered" systems, platform devices are usually massively populated with of_platform_populate() call, executed at some level of initcalls, either by generic architecture or by platform-specific code. There are situations though where certain devices must be created (and bound with drivers) before all the others. This presents a challenge, as devices created explicitly would be created again by of_platform_populate(). This patch tries to solve that issue in a generic way, adding a "populated" flag for a DT node description. Subsequent of_platform_populate() will skip such nodes (and its children) in a similar way to the non-available ones. This patch also adds of_platform_depopulate() as an operation complementary to the _populate() one. It removes a platform or an amba device populated from the Device Tree, together with its all children (leaving, however, devices without associated of_node untouched) clearing the "populated" flag on the way. Signed-off-by: Pawel Moll Reviewed-by: Rob Herring Acked-by: Grant Likely (cherry picked from commit c6e126de43e7d4abfd6cf796b40589db3a046167) Signed-off-by: Mark Brown --- drivers/of/platform.c | 74 ++++++++++++++++++++++++++++++++++--- include/linux/of.h | 7 ++++ include/linux/of_platform.h | 5 +++ 3 files changed, 81 insertions(+), 5 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index ddf5c2b5c89c..15c7eb696742 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -191,12 +191,13 @@ static struct platform_device *of_platform_device_create_pdata( { struct platform_device *dev; - if (!of_device_is_available(np)) + if (!of_device_is_available(np) || + of_node_test_and_set_flag(np, OF_POPULATED)) return NULL; dev = of_device_alloc(np, bus_id, parent); if (!dev) - return NULL; + goto err_clear_flag; #if defined(CONFIG_MICROBLAZE) dev->archdata.dma_mask = 0xffffffffUL; @@ -214,10 +215,14 @@ static struct platform_device *of_platform_device_create_pdata( if (of_device_add(dev) != 0) { platform_device_put(dev); - return NULL; + goto err_clear_flag; } return dev; + +err_clear_flag: + of_node_clear_flag(np, OF_POPULATED); + return NULL; } /** @@ -249,14 +254,15 @@ static struct amba_device *of_amba_device_create(struct device_node *node, pr_debug("Creating amba device %s\n", node->full_name); - if (!of_device_is_available(node)) + if (!of_device_is_available(node) || + of_node_test_and_set_flag(node, OF_POPULATED)) return NULL; dev = amba_device_alloc(NULL, 0, 0); if (!dev) { pr_err("%s(): amba_device_alloc() failed for %s\n", __func__, node->full_name); - return NULL; + goto err_clear_flag; } /* setup generic device info */ @@ -296,6 +302,8 @@ static struct amba_device *of_amba_device_create(struct device_node *node, err_free: amba_device_put(dev); +err_clear_flag: + of_node_clear_flag(node, OF_POPULATED); return NULL; } #else /* CONFIG_ARM_AMBA */ @@ -472,4 +480,60 @@ int of_platform_populate(struct device_node *root, return rc; } EXPORT_SYMBOL_GPL(of_platform_populate); + +static int of_platform_device_destroy(struct device *dev, void *data) +{ + bool *children_left = data; + + /* Do not touch devices not populated from the device tree */ + if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) { + *children_left = true; + return 0; + } + + /* Recurse, but don't touch this device if it has any children left */ + if (of_platform_depopulate(dev) != 0) { + *children_left = true; + return 0; + } + + if (dev->bus == &platform_bus_type) + platform_device_unregister(to_platform_device(dev)); +#ifdef CONFIG_ARM_AMBA + else if (dev->bus == &amba_bustype) + amba_device_unregister(to_amba_device(dev)); +#endif + else { + *children_left = true; + return 0; + } + + of_node_clear_flag(dev->of_node, OF_POPULATED); + + return 0; +} + +/** + * of_platform_depopulate() - Remove devices populated from device tree + * @parent: device which childred will be removed + * + * Complementary to of_platform_populate(), this function removes children + * of the given device (and, recurrently, their children) that have been + * created from their respective device tree nodes (and only those, + * leaving others - eg. manually created - unharmed). + * + * Returns 0 when all children devices have been removed or + * -EBUSY when some children remained. + */ +int of_platform_depopulate(struct device *parent) +{ + bool children_left = false; + + device_for_each_child(parent, &children_left, + of_platform_device_destroy); + + return children_left ? -EBUSY : 0; +} +EXPORT_SYMBOL_GPL(of_platform_depopulate); + #endif /* CONFIG_OF_ADDRESS */ diff --git a/include/linux/of.h b/include/linux/of.h index 4896e7c3347f..38c51ae7324b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -128,6 +128,12 @@ static inline int of_node_check_flag(struct device_node *n, unsigned long flag) return test_bit(flag, &n->_flags); } +static inline int of_node_test_and_set_flag(struct device_node *n, + unsigned long flag) +{ + return test_and_set_bit(flag, &n->_flags); +} + static inline void of_node_set_flag(struct device_node *n, unsigned long flag) { set_bit(flag, &n->_flags); @@ -195,6 +201,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) /* flag descriptions */ #define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ #define OF_DETACHED 2 /* node has been detached from the device tree */ +#define OF_POPULATED 3 /* device already created for the node */ #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 05cb4a928252..b1010eeaac0d 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -72,6 +72,7 @@ extern int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, struct device *parent); +extern int of_platform_depopulate(struct device *parent); #else static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, @@ -80,6 +81,10 @@ static inline int of_platform_populate(struct device_node *root, { return -ENODEV; } +static inline int of_platform_depopulate(struct device *parent) +{ + return -ENODEV; +} #endif #endif /* _LINUX_OF_PLATFORM_H */ From 5570a532ec485ea0c4e884f981c35d1dd5ac57af Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 21 May 2014 15:40:31 +0900 Subject: [PATCH 276/298] of: Ensure unique names without sacrificing determinism The way the driver core is implemented, every device using the same bus type is required to have a unique name because a symlink to each device is created in the appropriate /sys/bus/*/devices directory, and two identical names causes a collision. The current code handles the requirement by using an globally incremented counter that is appended to the device name. It works, but it means any change to device registration will change the assigned numbers. Instead, if we build up the name by using information from the parent nodes, then it can be guaranteed to be unique without adding a random number to the end of it. Signed-off-by: Grant Likely Cc: Ezequiel Garcia Cc: Rob Herring (cherry picked from commit 07e461cd7e73a84f0e3757932b93cc80976fd749) Signed-off-by: Mark Brown --- drivers/of/platform.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 15c7eb696742..76547f29b5fe 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -68,17 +68,15 @@ EXPORT_SYMBOL(of_find_device_by_node); * of_device_make_bus_id - Use the device node data to assign a unique name * @dev: pointer to device structure that is linked to a device tree node * - * This routine will first try using either the dcr-reg or the reg property - * value to derive a unique name. As a last resort it will use the node - * name followed by a unique number. + * This routine will first try using the translated bus address to + * derive a unique name. If it cannot, then it will prepend names from + * parent nodes until a unique name can be derived. */ void of_device_make_bus_id(struct device *dev) { - static atomic_t bus_no_reg_magic; struct device_node *node = dev->of_node; const __be32 *reg; u64 addr; - int magic; #ifdef CONFIG_PPC_DCR /* @@ -100,25 +98,25 @@ void of_device_make_bus_id(struct device *dev) } #endif /* CONFIG_PPC_DCR */ - /* - * For MMIO, get the physical address - */ - reg = of_get_property(node, "reg", NULL); - if (reg) { - addr = of_translate_address(node, reg); - if (addr != OF_BAD_ADDR) { - dev_set_name(dev, "%llx.%s", - (unsigned long long)addr, node->name); + /* Construct the name, using parent nodes if necessary to ensure uniqueness */ + while (node->parent) { + /* + * If the address can be translated, then that is as much + * uniqueness as we need. Make it the first component and return + */ + reg = of_get_property(node, "reg", NULL); + if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) { + dev_set_name(dev, dev_name(dev) ? "%llx.%s:%s" : "%llx.%s", + (unsigned long long)addr, node->name, + dev_name(dev)); return; } - } - /* - * No BusID, use the node name and add a globally incremented - * counter (and pray...) - */ - magic = atomic_add_return(1, &bus_no_reg_magic); - dev_set_name(dev, "%s.%d", node->name, magic - 1); + /* format arguments only used if dev_name() resolves to NULL */ + dev_set_name(dev, dev_name(dev) ? "%s:%s" : "%s", + strrchr(node->full_name, '/') + 1, dev_name(dev)); + node = node->parent; + } } /** From e9bcf18b35171597d0d8cc0947a5944a6d79bdac Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 23 May 2014 07:50:50 +0900 Subject: [PATCH 277/298] of: Stop naming platform_device using dcr address There is now a way to ensure all platform devices get a unique name when populated from the device tree, and the DCR_NATIVE code path is broken anyway. PowerPC Cell (PS3) is the only platform that actually uses this path. Most likely nobody will notice if it is killed. Remove the code and associated ugly #ifdef. The user-visible impact of this patch is that any DCR device on Cell will get a new name in the /sys/devices hierarchy. Signed-off-by: Grant Likely Cc: Rob Herring Cc: Benjamin Herrenschmidt (cherry picked from commit ba52464a629fab2493925007b00f2ca65d02ed4e) Signed-off-by: Mark Brown --- arch/powerpc/include/asm/dcr-mmio.h | 4 ---- arch/powerpc/sysdev/dcr.c | 6 +++--- drivers/of/platform.c | 24 ------------------------ 3 files changed, 3 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/dcr-mmio.h b/arch/powerpc/include/asm/dcr-mmio.h index acd491dbd45a..93a68b28e695 100644 --- a/arch/powerpc/include/asm/dcr-mmio.h +++ b/arch/powerpc/include/asm/dcr-mmio.h @@ -51,10 +51,6 @@ static inline void dcr_write_mmio(dcr_host_mmio_t host, out_be32(host.token + ((host.base + dcr_n) * host.stride), value); } -extern u64 of_translate_dcr_address(struct device_node *dev, - unsigned int dcr_n, - unsigned int *stride); - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_DCR_MMIO_H */ diff --git a/arch/powerpc/sysdev/dcr.c b/arch/powerpc/sysdev/dcr.c index 1bd0eba4d355..e9056e438575 100644 --- a/arch/powerpc/sysdev/dcr.c +++ b/arch/powerpc/sysdev/dcr.c @@ -152,9 +152,9 @@ EXPORT_SYMBOL_GPL(dcr_resource_len); #ifdef CONFIG_PPC_DCR_MMIO -u64 of_translate_dcr_address(struct device_node *dev, - unsigned int dcr_n, - unsigned int *out_stride) +static u64 of_translate_dcr_address(struct device_node *dev, + unsigned int dcr_n, + unsigned int *out_stride) { struct device_node *dp; const u32 *p; diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 76547f29b5fe..2ba387e5b6ec 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -51,10 +51,6 @@ struct platform_device *of_find_device_by_node(struct device_node *np) } EXPORT_SYMBOL(of_find_device_by_node); -#if defined(CONFIG_PPC_DCR) -#include -#endif - #ifdef CONFIG_OF_ADDRESS /* * The following routines scan a subtree and registers a device for @@ -78,26 +74,6 @@ void of_device_make_bus_id(struct device *dev) const __be32 *reg; u64 addr; -#ifdef CONFIG_PPC_DCR - /* - * If it's a DCR based device, use 'd' for native DCRs - * and 'D' for MMIO DCRs. - */ - reg = of_get_property(node, "dcr-reg", NULL); - if (reg) { -#ifdef CONFIG_PPC_DCR_NATIVE - dev_set_name(dev, "d%x.%s", *reg, node->name); -#else /* CONFIG_PPC_DCR_NATIVE */ - u64 addr = of_translate_dcr_address(node, *reg, NULL); - if (addr != OF_BAD_ADDR) { - dev_set_name(dev, "D%llx.%s", - (unsigned long long)addr, node->name); - return; - } -#endif /* !CONFIG_PPC_DCR_NATIVE */ - } -#endif /* CONFIG_PPC_DCR */ - /* Construct the name, using parent nodes if necessary to ensure uniqueness */ while (node->parent) { /* From 07a298bd5a35aff2ae1f12f99f9694ab4c4c67fc Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Tue, 24 Jun 2014 16:13:47 +0100 Subject: [PATCH 278/298] of/platform: Fix of_platform_device_destroy iteration of devices of_platform_destroy does not work properly, since the tree population test was iterating on all devices having as its parent the given platform device. The check was intended to check whether any other platform or amba devices created by of_platform_populate were still populated, but instead checked for every kind of device. This is wrong, since platform devices typically create a subsystem regular device and set themselves as parents. Instead, go ahead and call the unregister functions for any devices created with of_platform_populate. The driver core will take care of unbinding drivers, and drivers are responsible for getting rid of any child devices that weren't created by of_platform_populate. Signed-off-by: Grant Likely Signed-off-by: Pantelis Antoniou (cherry picked from commit 75f353b61342b5847c7f6d8499fd6301dce09845) Signed-off-by: Mark Brown --- drivers/of/platform.c | 32 +++++++++----------------------- include/linux/of.h | 1 + include/linux/of_platform.h | 7 ++----- 3 files changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 2ba387e5b6ec..b98e501588d3 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -371,6 +371,7 @@ static int of_platform_bus_create(struct device_node *bus, break; } } + of_node_set_flag(bus, OF_POPULATED_BUS); return rc; } @@ -457,19 +458,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate); static int of_platform_device_destroy(struct device *dev, void *data) { - bool *children_left = data; - /* Do not touch devices not populated from the device tree */ - if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) { - *children_left = true; + if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) return 0; - } - /* Recurse, but don't touch this device if it has any children left */ - if (of_platform_depopulate(dev) != 0) { - *children_left = true; - return 0; - } + /* Recurse for any nodes that were treated as busses */ + if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS)) + device_for_each_child(dev, NULL, of_platform_device_destroy); if (dev->bus == &platform_bus_type) platform_device_unregister(to_platform_device(dev)); @@ -477,19 +472,15 @@ static int of_platform_device_destroy(struct device *dev, void *data) else if (dev->bus == &amba_bustype) amba_device_unregister(to_amba_device(dev)); #endif - else { - *children_left = true; - return 0; - } of_node_clear_flag(dev->of_node, OF_POPULATED); - + of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; } /** * of_platform_depopulate() - Remove devices populated from device tree - * @parent: device which childred will be removed + * @parent: device which children will be removed * * Complementary to of_platform_populate(), this function removes children * of the given device (and, recurrently, their children) that have been @@ -499,14 +490,9 @@ static int of_platform_device_destroy(struct device *dev, void *data) * Returns 0 when all children devices have been removed or * -EBUSY when some children remained. */ -int of_platform_depopulate(struct device *parent) +void of_platform_depopulate(struct device *parent) { - bool children_left = false; - - device_for_each_child(parent, &children_left, - of_platform_device_destroy); - - return children_left ? -EBUSY : 0; + device_for_each_child(parent, NULL, of_platform_device_destroy); } EXPORT_SYMBOL_GPL(of_platform_depopulate); diff --git a/include/linux/of.h b/include/linux/of.h index 38c51ae7324b..d50e5c8c6ed8 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -202,6 +202,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ #define OF_DETACHED 2 /* node has been detached from the device tree */ #define OF_POPULATED 3 /* device already created for the node */ +#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index b1010eeaac0d..d0a441468294 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root, const struct of_device_id *matches, const struct of_dev_auxdata *lookup, struct device *parent); -extern int of_platform_depopulate(struct device *parent); +extern void of_platform_depopulate(struct device *parent); #else static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, @@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root, { return -ENODEV; } -static inline int of_platform_depopulate(struct device *parent) -{ - return -ENODEV; -} +static inline void of_platform_depopulate(struct device *parent) { } #endif #endif /* _LINUX_OF_PLATFORM_H */ From fb7fc6199688143c0f755021376f021afb8d06d0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 14 Nov 2014 17:58:23 +1100 Subject: [PATCH 279/298] of/address: Don't throw errors on absent ranges properties The core always tries to translate any "reg" property to construct the platform device names. This results in a pile of "OF: no ranges; cannot translate" errors in dmesg whenever we expose things like i2c devices that cannot directly translate to the MMIO space. Turn this into a pr_debug instead Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Grant Likely (cherry picked from commit a0212ae0be5ba10b6e01b7121f86e391ae1927ae) Signed-off-by: Mark Brown --- drivers/of/address.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 14e40a936089..9c5270d4b759 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -428,7 +428,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, ranges = of_get_property(parent, rprop, &rlen); #if !defined(CONFIG_PPC) if (ranges == NULL) { - pr_err("OF: no ranges; cannot translate\n"); + pr_debug("OF: no ranges; cannot translate\n"); return 1; } #endif /* !defined(CONFIG_PPC) */ From 0c0ed63f86d08f01876f7677ba842aad9af825a1 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:33:49 +0200 Subject: [PATCH 280/298] of/resolver: Switch to new local fixups format. The original resolver format is way too cryptic, switch to using a tree based format that gets rid of repetitions, is more compact and readable. At the same time, update the selftests to using the new local fixups format. Signed-off-by: Pantelis Antoniou [grant.likely: Squashed in testcase changes and merged similar functions] Signed-off-by: Grant Likely (cherry picked from commit da56d04c806a3e9986c66a061d7363ca3157c37b) Signed-off-by: Mark Brown --- drivers/of/resolver.c | 128 ++++++++++++++++++++----- drivers/of/unittest-data/testcases.dts | 61 +++++++----- 2 files changed, 139 insertions(+), 50 deletions(-) diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c index aed7959f800d..640eb4cb46e3 100644 --- a/drivers/of/resolver.c +++ b/drivers/of/resolver.c @@ -111,7 +111,8 @@ static void __of_adjust_tree_phandles(struct device_node *node, __of_adjust_tree_phandles(child, phandle_delta); } -static int __of_adjust_phandle_ref(struct device_node *node, struct property *rprop, int value, bool is_delta) +static int __of_adjust_phandle_ref(struct device_node *node, + struct property *rprop, int value) { phandle phandle; struct device_node *refnode; @@ -181,7 +182,7 @@ static int __of_adjust_phandle_ref(struct device_node *node, struct property *rp goto err_fail; } - phandle = is_delta ? be32_to_cpup(sprop->value + offset) + value : value; + phandle = value; *(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle); } @@ -190,36 +191,97 @@ err_fail: return err; } +/* compare nodes taking into account that 'name' strips out the @ part */ +static int __of_node_name_cmp(const struct device_node *dn1, + const struct device_node *dn2) +{ + const char *n1 = strrchr(dn1->full_name, '/') ? : "/"; + const char *n2 = strrchr(dn2->full_name, '/') ? : "/"; + + return of_node_cmp(n1, n2); +} + /* * Adjust the local phandle references by the given phandle delta. - * Assumes the existances of a __local_fixups__ node at the root - * of the tree. Does not take any devtree locks so make sure you - * call this on a tree which is at the detached state. + * Assumes the existances of a __local_fixups__ node at the root. + * Assumes that __of_verify_tree_phandle_references has been called. + * Does not take any devtree locks so make sure you call this on a tree + * which is at the detached state. */ static int __of_adjust_tree_phandle_references(struct device_node *node, - int phandle_delta) + struct device_node *target, int phandle_delta) { - struct device_node *child; - struct property *rprop; - int err; + struct device_node *child, *childtarget; + struct property *rprop, *sprop; + int err, i, count; + unsigned int off; + phandle phandle; - /* locate the symbols & fixups nodes on resolve */ - for_each_child_of_node(node, child) - if (of_node_cmp(child->name, "__local_fixups__") == 0) - break; - - /* no local fixups */ - if (!child) + if (node == NULL) return 0; - /* find the local fixups property */ - for_each_property_of_node(child, rprop) { + for_each_property_of_node(node, rprop) { + /* skip properties added automatically */ - if (of_prop_cmp(rprop->name, "name") == 0) + if (of_prop_cmp(rprop->name, "name") == 0 || + of_prop_cmp(rprop->name, "phandle") == 0 || + of_prop_cmp(rprop->name, "linux,phandle") == 0) continue; - err = __of_adjust_phandle_ref(node, rprop, phandle_delta, true); - if (err) + if ((rprop->length % 4) != 0 || rprop->length == 0) { + pr_err("%s: Illegal property (size) '%s' @%s\n", + __func__, rprop->name, node->full_name); + return -EINVAL; + } + count = rprop->length / sizeof(__be32); + + /* now find the target property */ + for_each_property_of_node(target, sprop) { + if (of_prop_cmp(sprop->name, rprop->name) == 0) + break; + } + + if (sprop == NULL) { + pr_err("%s: Could not find target property '%s' @%s\n", + __func__, rprop->name, node->full_name); + return -EINVAL; + } + + for (i = 0; i < count; i++) { + off = be32_to_cpu(((__be32 *)rprop->value)[i]); + /* make sure the offset doesn't overstep (even wrap) */ + if (off >= sprop->length || + (off + 4) > sprop->length) { + pr_err("%s: Illegal property '%s' @%s\n", + __func__, rprop->name, + node->full_name); + return -EINVAL; + } + + if (phandle_delta) { + /* adjust */ + phandle = be32_to_cpu(*(__be32 *)(sprop->value + off)); + phandle += phandle_delta; + *(__be32 *)(sprop->value + off) = cpu_to_be32(phandle); + } + } + } + + for_each_child_of_node(node, child) { + + for_each_child_of_node(target, childtarget) + if (__of_node_name_cmp(child, childtarget) == 0) + break; + + if (!childtarget) { + pr_err("%s: Could not find target child '%s' @%s\n", + __func__, child->name, node->full_name); + return -EINVAL; + } + + err = __of_adjust_tree_phandle_references(child, childtarget, + phandle_delta); + if (err != 0) return err; } @@ -241,7 +303,7 @@ static int __of_adjust_tree_phandle_references(struct device_node *node, */ int of_resolve_phandles(struct device_node *resolve) { - struct device_node *child, *refnode; + struct device_node *child, *childroot, *refnode; struct device_node *root_sym, *resolve_sym, *resolve_fix; struct property *rprop; const char *refpath; @@ -255,9 +317,23 @@ int of_resolve_phandles(struct device_node *resolve) /* first we need to adjust the phandles */ phandle_delta = of_get_tree_max_phandle() + 1; __of_adjust_tree_phandles(resolve, phandle_delta); - err = __of_adjust_tree_phandle_references(resolve, phandle_delta); - if (err != 0) - return err; + + /* locate the local fixups */ + childroot = NULL; + for_each_child_of_node(resolve, childroot) + if (of_node_cmp(childroot->name, "__local_fixups__") == 0) + break; + + if (childroot != NULL) { + /* resolve root is guaranteed to be the '/' */ + err = __of_adjust_tree_phandle_references(childroot, + resolve, 0); + if (err != 0) + return err; + + BUG_ON(__of_adjust_tree_phandle_references(childroot, + resolve, phandle_delta)); + } root_sym = NULL; resolve_sym = NULL; @@ -322,7 +398,7 @@ int of_resolve_phandles(struct device_node *resolve) pr_debug("%s: %s phandle is 0x%08x\n", __func__, rprop->name, phandle); - err = __of_adjust_phandle_ref(resolve, rprop, phandle, false); + err = __of_adjust_phandle_ref(resolve, rprop, phandle); if (err) break; } diff --git a/drivers/of/unittest-data/testcases.dts b/drivers/of/unittest-data/testcases.dts index 002e55db6160..6ed76ca115a5 100644 --- a/drivers/of/unittest-data/testcases.dts +++ b/drivers/of/unittest-data/testcases.dts @@ -13,28 +13,41 @@ * this a kernel-internal data format. */ / { __local_fixups__ { - fixup = "/testcase-data/testcase-device2:interrupt-parent:0", - "/testcase-data/testcase-device1:interrupt-parent:0", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:60", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:52", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:44", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:36", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:24", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:8", - "/testcase-data/interrupts/interrupts-extended0:interrupts-extended:0", - "/testcase-data/interrupts/interrupts1:interrupt-parent:0", - "/testcase-data/interrupts/interrupts0:interrupt-parent:0", - "/testcase-data/interrupts/intmap1:interrupt-map:12", - "/testcase-data/interrupts/intmap0:interrupt-map:52", - "/testcase-data/interrupts/intmap0:interrupt-map:36", - "/testcase-data/interrupts/intmap0:interrupt-map:16", - "/testcase-data/interrupts/intmap0:interrupt-map:4", - "/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:12", - "/testcase-data/phandle-tests/consumer-a:phandle-list-bad-args:0", - "/testcase-data/phandle-tests/consumer-a:phandle-list:56", - "/testcase-data/phandle-tests/consumer-a:phandle-list:52", - "/testcase-data/phandle-tests/consumer-a:phandle-list:40", - "/testcase-data/phandle-tests/consumer-a:phandle-list:24", - "/testcase-data/phandle-tests/consumer-a:phandle-list:8", - "/testcase-data/phandle-tests/consumer-a:phandle-list:0"; + testcase-data { + phandle-tests { + consumer-a { + phandle-list = <0x00000000 0x00000008 + 0x00000018 0x00000028 + 0x00000034 0x00000038>; + phandle-list-bad-args = <0x00000000 0x0000000c>; + }; + }; + interrupts { + intmap0 { + interrupt-map = <0x00000004 0x00000010 + 0x00000024 0x00000034>; + }; + intmap1 { + interrupt-map = <0x0000000c>; + }; + interrupts0 { + interrupt-parent = <0x00000000>; + }; + interrupts1 { + interrupt-parent = <0x00000000>; + }; + interrupts-extended0 { + interrupts-extended = <0x00000000 0x00000008 + 0x00000018 0x00000024 + 0x0000002c 0x00000034 + 0x0000003c>; + }; + }; + testcase-device1 { + interrupt-parent = <0x00000000>; + }; + testcase-device2 { + interrupt-parent = <0x00000000>; + }; + }; }; }; From 04a179654bbfec8df68be4f2e3ecccdb2fc256b5 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:33:53 +0200 Subject: [PATCH 281/298] of/reconfig: Add of_reconfig_get_state_change() of notifier helper. Introduce of_reconfig_get_state_change() which allows an of notifier to query about device state changes. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit b53a2340d0d30468b7315992ba77fe188c3bc5c8) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 96 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 7 ++++ 2 files changed, 103 insertions(+) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index f297891d8529..75680adc5e67 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -85,6 +85,102 @@ int of_reconfig_notify(unsigned long action, void *p) return notifier_to_errno(rc); } +/* + * of_reconfig_get_state_change() - Returns new state of device + * @action - action of the of notifier + * @arg - argument of the of notifier + * + * Returns the new state of a device based on the notifier used. + * Returns 0 on device going from enabled to disabled, 1 on device + * going from disabled to enabled and -1 on no change. + */ +int of_reconfig_get_state_change(unsigned long action, void *arg) +{ + struct device_node *dn; + struct property *prop, *old_prop; + struct of_prop_reconfig *pr; + int is_status, status_state, old_status_state, prev_state, new_state; + + /* figure out if a device should be created or destroyed */ + dn = NULL; + prop = old_prop = NULL; + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + case OF_RECONFIG_DETACH_NODE: + dn = arg; + prop = of_find_property(dn, "status", NULL); + break; + case OF_RECONFIG_ADD_PROPERTY: + case OF_RECONFIG_REMOVE_PROPERTY: + pr = arg; + dn = pr->dn; + prop = pr->prop; + break; + case OF_RECONFIG_UPDATE_PROPERTY: + pr = arg; + dn = pr->dn; + prop = pr->prop; + old_prop = pr->old_prop; + break; + default: + return OF_RECONFIG_NO_CHANGE; + } + + is_status = 0; + status_state = -1; + old_status_state = -1; + prev_state = -1; + new_state = -1; + + if (prop && !strcmp(prop->name, "status")) { + is_status = 1; + status_state = !strcmp(prop->value, "okay") || + !strcmp(prop->value, "ok"); + if (old_prop) + old_status_state = !strcmp(old_prop->value, "okay") || + !strcmp(old_prop->value, "ok"); + } + + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + prev_state = 0; + /* -1 & 0 status either missing or okay */ + new_state = status_state != 0; + break; + case OF_RECONFIG_DETACH_NODE: + /* -1 & 0 status either missing or okay */ + prev_state = status_state != 0; + new_state = 0; + break; + case OF_RECONFIG_ADD_PROPERTY: + if (is_status) { + /* no status property -> enabled (legacy) */ + prev_state = 1; + new_state = status_state; + } + break; + case OF_RECONFIG_REMOVE_PROPERTY: + if (is_status) { + prev_state = status_state; + /* no status property -> enabled (legacy) */ + new_state = 1; + } + break; + case OF_RECONFIG_UPDATE_PROPERTY: + if (is_status) { + prev_state = old_status_state != 0; + new_state = status_state != 0; + } + break; + } + + if (prev_state == new_state) + return OF_RECONFIG_NO_CHANGE; + + return new_state ? OF_RECONFIG_CHANGE_ADD : OF_RECONFIG_CHANGE_REMOVE; +} +EXPORT_SYMBOL_GPL(of_reconfig_get_state_change); + int of_property_notify(int action, struct device_node *np, struct property *prop, struct property *oldprop) { diff --git a/include/linux/of.h b/include/linux/of.h index d50e5c8c6ed8..4ac73299995a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -327,6 +327,7 @@ struct of_prop_reconfig { extern int of_reconfig_notifier_register(struct notifier_block *); extern int of_reconfig_notifier_unregister(struct notifier_block *); extern int of_reconfig_notify(unsigned long, void *); +extern int of_reconfig_get_state_change(unsigned long action, void *arg); extern int of_attach_node(struct device_node *); extern int of_detach_node(struct device_node *); @@ -817,6 +818,12 @@ struct of_changeset { struct list_head entries; }; +enum of_reconfig_change { + OF_RECONFIG_NO_CHANGE = 0, + OF_RECONFIG_CHANGE_ADD, + OF_RECONFIG_CHANGE_REMOVE, +}; + #ifdef CONFIG_OF_DYNAMIC extern void of_changeset_init(struct of_changeset *ocs); extern void of_changeset_destroy(struct of_changeset *ocs); From 330304f96bae2ff263c1b8ecc32c0823ad0b8130 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 14 Nov 2014 14:34:55 +0000 Subject: [PATCH 282/298] of/reconfig: Add debug output for OF_RECONFIG notifiers Add some additional debug output to cover OF_RECONFIG notifier activity. At the same time, refactor the changeset debug output to use the same strings as the notifier debug output. Signed-off-by: Grant Likely (cherry picked from commit 00aa37206e1a54dae61a0dba96bf2ee0938b99d7) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 48 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 75680adc5e67..c99fb9b93c17 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -77,10 +77,38 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); +#ifdef DEBUG +const char *action_names[] = { + [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE", + [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE", + [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY", + [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY", + [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY", +}; +#endif + int of_reconfig_notify(unsigned long action, void *p) { int rc; +#ifdef DEBUG + struct device_node *dn = p; + struct of_prop_reconfig *pr = p; + switch (action) { + case OF_RECONFIG_ATTACH_NODE: + case OF_RECONFIG_DETACH_NODE: + pr_debug("of/notify %-15s %s\n", action_names[action], + dn->full_name); + break; + case OF_RECONFIG_ADD_PROPERTY: + case OF_RECONFIG_REMOVE_PROPERTY: + case OF_RECONFIG_UPDATE_PROPERTY: + pr_debug("of/notify %-15s %s:%s\n", action_names[action], + pr->dn->full_name, pr->prop->name); + break; + + } +#endif rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); return notifier_to_errno(rc); } @@ -426,27 +454,15 @@ static void __of_changeset_entry_dump(struct of_changeset_entry *ce) { switch (ce->action) { case OF_RECONFIG_ADD_PROPERTY: - pr_debug("%p: %s %s/%s\n", - ce, "ADD_PROPERTY ", ce->np->full_name, - ce->prop->name); - break; case OF_RECONFIG_REMOVE_PROPERTY: - pr_debug("%p: %s %s/%s\n", - ce, "REMOVE_PROPERTY", ce->np->full_name, - ce->prop->name); - break; case OF_RECONFIG_UPDATE_PROPERTY: - pr_debug("%p: %s %s/%s\n", - ce, "UPDATE_PROPERTY", ce->np->full_name, - ce->prop->name); + pr_debug("of/cset<%p> %-15s %s/%s\n", ce, action_names[ce->action], + ce->np->full_name, ce->prop->name); break; case OF_RECONFIG_ATTACH_NODE: - pr_debug("%p: %s %s\n", - ce, "ATTACH_NODE ", ce->np->full_name); - break; case OF_RECONFIG_DETACH_NODE: - pr_debug("%p: %s %s\n", - ce, "DETACH_NODE ", ce->np->full_name); + pr_debug("of/cset<%p> %-15s %s\n", ce, action_names[ce->action], + ce->np->full_name); break; } } From 8a575393a241e60bdc35f21ebe6f112b8e087227 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 24 Nov 2014 17:58:01 +0000 Subject: [PATCH 283/298] of/reconfig: Always use the same structure for notifiers The OF_RECONFIG notifier callback uses a different structure depending on whether it is a node change or a property change. This is silly, and not very safe. Rework the code to use the same data structure regardless of the type of notifier. Signed-off-by: Grant Likely Cc: Benjamin Herrenschmidt Cc: Rob Herring Cc: Pantelis Antoniou Cc: (cherry picked from commit f5242e5a883bf1c1aba6bfd87b85e7dda0e62191) Signed-off-by: Mark Brown Conflicts: arch/powerpc/platforms/pseries/hotplug-memory.c include/linux/of.h --- arch/powerpc/mm/numa.c | 3 +- arch/powerpc/platforms/pseries/hotplug-cpu.c | 7 +-- .../platforms/pseries/hotplug-memory.c | 11 +++-- arch/powerpc/platforms/pseries/iommu.c | 5 ++- arch/powerpc/platforms/pseries/setup.c | 5 ++- drivers/crypto/nx/nx-842.c | 4 +- drivers/of/dynamic.c | 41 ++++++++--------- include/linux/of.h | 44 ++++++++++++++----- 8 files changed, 71 insertions(+), 49 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 30a42e24bf14..3e1949fc781c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1659,12 +1659,11 @@ static void stage_topology_update(int core_id) static int dt_update_callback(struct notifier_block *nb, unsigned long action, void *data) { - struct of_prop_reconfig *update; + struct of_reconfig_data *update = data; int rc = NOTIFY_DONE; switch (action) { case OF_RECONFIG_UPDATE_PROPERTY: - update = (struct of_prop_reconfig *)data; if (!of_prop_cmp(update->dn->type, "cpu") && !of_prop_cmp(update->prop->name, "ibm,associativity")) { u32 core_id; diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 0ea99e3d4815..402c7fdf0399 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -337,16 +337,17 @@ static void pseries_remove_processor(struct device_node *np) } static int pseries_smp_notifier(struct notifier_block *nb, - unsigned long action, void *node) + unsigned long action, void *data) { + struct of_reconfig_data *rd = data; int err = 0; switch (action) { case OF_RECONFIG_ATTACH_NODE: - err = pseries_add_processor(node); + err = pseries_add_processor(rd->dn); break; case OF_RECONFIG_DETACH_NODE: - pseries_remove_processor(node); + pseries_remove_processor(rd->dn); break; } return notifier_from_errno(err); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index daae52670a0a..804539d86101 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -200,7 +200,7 @@ static int pseries_add_memory(struct device_node *np) return (ret < 0) ? -EINVAL : 0; } -static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) +static int pseries_update_drconf_memory(struct of_reconfig_data *pr) { struct of_drconf_cell *new_drmem, *old_drmem; unsigned long memblock_size; @@ -247,9 +247,9 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr) } static int pseries_memory_notifier(struct notifier_block *nb, - unsigned long action, void *node) + unsigned long action, void *data) { - struct of_prop_reconfig *pr; + struct of_reconfig_data *rd = data; int err = 0; switch (action) { @@ -260,9 +260,8 @@ static int pseries_memory_notifier(struct notifier_block *nb, err = pseries_remove_memory(node); break; case OF_RECONFIG_UPDATE_PROPERTY: - pr = (struct of_prop_reconfig *)node; - if (!strcmp(pr->prop->name, "ibm,dynamic-memory")) - err = pseries_update_drconf_memory(pr); + if (!strcmp(rd->prop->name, "ibm,dynamic-memory")) + err = pseries_update_drconf_memory(rd); break; } return notifier_from_errno(err); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 33b552ffbe57..998fe0e2f288 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1246,10 +1246,11 @@ static struct notifier_block iommu_mem_nb = { .notifier_call = iommu_mem_notifier, }; -static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { int err = NOTIFY_OK; - struct device_node *np = node; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; struct pci_dn *pci = PCI_DN(np); struct direct_window *window; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 972df0ffd4dc..a9be45dd5cc0 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -253,9 +253,10 @@ static void __init pseries_discover_pic(void) " interrupt-controller\n"); } -static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node) +static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct device_node *np = node; + struct of_reconfig_data *rd = data; + struct device_node *np = rd->dn; struct pci_dn *pci = NULL; int err = NOTIFY_OK; diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index a86359765fda..d2ab58569fdd 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -1009,9 +1009,9 @@ error_out: * notifier_to_errno() to decode this value */ static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, - void *update) + void *data) { - struct of_prop_reconfig *upd = update; + struct of_reconfig_data *upd = data; struct nx842_devdata *local_devdata; struct device_node *node = NULL; diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index c99fb9b93c17..d4a24d716f1d 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -87,18 +87,17 @@ const char *action_names[] = { }; #endif -int of_reconfig_notify(unsigned long action, void *p) +int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p) { int rc; #ifdef DEBUG - struct device_node *dn = p; - struct of_prop_reconfig *pr = p; + struct of_reconfig_data *pr = p; switch (action) { case OF_RECONFIG_ATTACH_NODE: case OF_RECONFIG_DETACH_NODE: pr_debug("of/notify %-15s %s\n", action_names[action], - dn->full_name); + pr->dn->full_name); break; case OF_RECONFIG_ADD_PROPERTY: case OF_RECONFIG_REMOVE_PROPERTY: @@ -122,31 +121,22 @@ int of_reconfig_notify(unsigned long action, void *p) * Returns 0 on device going from enabled to disabled, 1 on device * going from disabled to enabled and -1 on no change. */ -int of_reconfig_get_state_change(unsigned long action, void *arg) +int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr) { - struct device_node *dn; - struct property *prop, *old_prop; - struct of_prop_reconfig *pr; + struct property *prop, *old_prop = NULL; int is_status, status_state, old_status_state, prev_state, new_state; /* figure out if a device should be created or destroyed */ - dn = NULL; - prop = old_prop = NULL; switch (action) { case OF_RECONFIG_ATTACH_NODE: case OF_RECONFIG_DETACH_NODE: - dn = arg; - prop = of_find_property(dn, "status", NULL); + prop = of_find_property(pr->dn, "status", NULL); break; case OF_RECONFIG_ADD_PROPERTY: case OF_RECONFIG_REMOVE_PROPERTY: - pr = arg; - dn = pr->dn; prop = pr->prop; break; case OF_RECONFIG_UPDATE_PROPERTY: - pr = arg; - dn = pr->dn; prop = pr->prop; old_prop = pr->old_prop; break; @@ -212,7 +202,7 @@ EXPORT_SYMBOL_GPL(of_reconfig_get_state_change); int of_property_notify(int action, struct device_node *np, struct property *prop, struct property *oldprop) { - struct of_prop_reconfig pr; + struct of_reconfig_data pr; /* only call notifiers if the node is attached */ if (!of_node_is_attached(np)) @@ -252,8 +242,12 @@ void __of_attach_node(struct device_node *np) */ int of_attach_node(struct device_node *np) { + struct of_reconfig_data rd; unsigned long flags; + memset(&rd, 0, sizeof(rd)); + rd.dn = np; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_attach_node(np); @@ -262,7 +256,7 @@ int of_attach_node(struct device_node *np) __of_attach_node_sysfs(np); mutex_unlock(&of_mutex); - of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, &rd); return 0; } @@ -311,9 +305,13 @@ void __of_detach_node(struct device_node *np) */ int of_detach_node(struct device_node *np) { + struct of_reconfig_data rd; unsigned long flags; int rc = 0; + memset(&rd, 0, sizeof(rd)); + rd.dn = np; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); __of_detach_node(np); @@ -322,7 +320,7 @@ int of_detach_node(struct device_node *np) __of_detach_node_sysfs(np); mutex_unlock(&of_mutex); - of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + of_reconfig_notify(OF_RECONFIG_DETACH_NODE, &rd); return rc; } @@ -500,6 +498,7 @@ static void __of_changeset_entry_invert(struct of_changeset_entry *ce, static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert) { + struct of_reconfig_data rd; struct of_changeset_entry ce_inverted; int ret; @@ -511,7 +510,9 @@ static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool reve switch (ce->action) { case OF_RECONFIG_ATTACH_NODE: case OF_RECONFIG_DETACH_NODE: - ret = of_reconfig_notify(ce->action, ce->np); + memset(&rd, 0, sizeof(rd)); + rd.dn = ce->np; + ret = of_reconfig_notify(ce->action, &rd); break; case OF_RECONFIG_ADD_PROPERTY: case OF_RECONFIG_REMOVE_PROPERTY: diff --git a/include/linux/of.h b/include/linux/of.h index 4ac73299995a..118e41a97f7d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -74,6 +74,12 @@ struct of_phandle_args { uint32_t args[MAX_PHANDLE_ARGS]; }; +struct of_reconfig_data { + struct device_node *dn; + struct property *prop; + struct property *old_prop; +}; + /* initialize a node */ extern struct kobj_type of_node_ktype; static inline void of_node_init(struct device_node *node) @@ -318,17 +324,6 @@ extern int of_update_property(struct device_node *np, struct property *newprop); #define OF_RECONFIG_REMOVE_PROPERTY 0x0004 #define OF_RECONFIG_UPDATE_PROPERTY 0x0005 -struct of_prop_reconfig { - struct device_node *dn; - struct property *prop; - struct property *old_prop; -}; - -extern int of_reconfig_notifier_register(struct notifier_block *); -extern int of_reconfig_notifier_unregister(struct notifier_block *); -extern int of_reconfig_notify(unsigned long, void *); -extern int of_reconfig_get_state_change(unsigned long action, void *arg); - extern int of_attach_node(struct device_node *); extern int of_detach_node(struct device_node *); @@ -825,6 +820,12 @@ enum of_reconfig_change { }; #ifdef CONFIG_OF_DYNAMIC +extern int of_reconfig_notifier_register(struct notifier_block *); +extern int of_reconfig_notifier_unregister(struct notifier_block *); +extern int of_reconfig_notify(unsigned long, struct of_reconfig_data *rd); +extern int of_reconfig_get_state_change(unsigned long action, + struct of_reconfig_data *arg); + extern void of_changeset_init(struct of_changeset *ocs); extern void of_changeset_destroy(struct of_changeset *ocs); extern int of_changeset_apply(struct of_changeset *ocs); @@ -862,7 +863,26 @@ static inline int of_changeset_update_property(struct of_changeset *ocs, { return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop); } -#endif +#else /* CONFIG_OF_DYNAMIC */ +static inline int of_reconfig_notifier_register(struct notifier_block *nb) +{ + return -EINVAL; +} +static inline int of_reconfig_notifier_unregister(struct notifier_block *nb) +{ + return -EINVAL; +} +static inline int of_reconfig_notify(unsigned long action, + struct of_reconfig_data *arg) +{ + return -EINVAL; +} +static inline int of_reconfig_get_state_change(unsigned long action, + struct of_reconfig_data *arg) +{ + return -EINVAL; +} +#endif /* CONFIG_OF_DYNAMIC */ /* CONFIG_OF_RESOLVE api */ extern int of_resolve_phandles(struct device_node *tree); From 94b62a841aac10919826afaf3cdee118dbdabef2 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:36:01 +0200 Subject: [PATCH 284/298] of/reconfig: Add OF_DYNAMIC notifier for platform_bus_type Add OF notifier handler needed for creating/destroying platform devices according to dynamic runtime changes in the DT live tree. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 801d728c10db4b28e01590b46bf1f0df930760cc) Signed-off-by: Mark Brown --- drivers/base/platform.c | 1 + drivers/of/platform.c | 55 +++++++++++++++++++++++++++++++++++++ include/linux/of_platform.h | 6 ++++ 3 files changed, 62 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index bc78848dd59a..3b977de18bab 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -937,6 +937,7 @@ int __init platform_bus_init(void) error = bus_register(&platform_bus_type); if (error) device_unregister(&platform_bus); + of_platform_register_reconfig_notifier(); return error; } diff --git a/drivers/of/platform.c b/drivers/of/platform.c index b98e501588d3..ad662c6fa471 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -496,4 +496,59 @@ void of_platform_depopulate(struct device *parent) } EXPORT_SYMBOL_GPL(of_platform_depopulate); +#ifdef CONFIG_OF_DYNAMIC +static int of_platform_notify(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct of_reconfig_data *rd = arg; + struct platform_device *pdev_parent, *pdev; + bool children_left; + + switch (of_reconfig_get_state_change(action, rd)) { + case OF_RECONFIG_CHANGE_ADD: + /* verify that the parent is a bus */ + if (!of_node_check_flag(rd->dn->parent, OF_POPULATED_BUS)) + return NOTIFY_OK; /* not for us */ + + /* pdev_parent may be NULL when no bus platform device */ + pdev_parent = of_find_device_by_node(rd->dn->parent); + pdev = of_platform_device_create(rd->dn, NULL, + pdev_parent ? &pdev_parent->dev : NULL); + of_dev_put(pdev_parent); + + if (pdev == NULL) { + pr_err("%s: failed to create for '%s'\n", + __func__, rd->dn->full_name); + /* of_platform_device_create tosses the error code */ + return notifier_from_errno(-EINVAL); + } + break; + + case OF_RECONFIG_CHANGE_REMOVE: + /* find our device by node */ + pdev = of_find_device_by_node(rd->dn); + if (pdev == NULL) + return NOTIFY_OK; /* no? not meant for us */ + + /* unregister takes one ref away */ + of_platform_device_destroy(&pdev->dev, &children_left); + + /* and put the reference of the find */ + of_dev_put(pdev); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block platform_of_notifier = { + .notifier_call = of_platform_notify, +}; + +void of_platform_register_reconfig_notifier(void) +{ + WARN_ON(of_reconfig_notifier_register(&platform_of_notifier)); +} +#endif /* CONFIG_OF_DYNAMIC */ + #endif /* CONFIG_OF_ADDRESS */ diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index d0a441468294..9142922871a7 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -84,4 +84,10 @@ static inline int of_platform_populate(struct device_node *root, static inline void of_platform_depopulate(struct device *parent) { } #endif +#ifdef CONFIG_OF_DYNAMIC +extern void of_platform_register_reconfig_notifier(void); +#else +static inline void of_platform_register_reconfig_notifier(void) { } +#endif + #endif /* _LINUX_OF_PLATFORM_H */ From 8e13f7dacf243610e4478d82479f85b97d5c923f Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 14 Nov 2014 15:33:07 +0000 Subject: [PATCH 285/298] of: Use vargs in __of_node_alloc The overlay code needs to construct a new full_name from the parent name and the node name, but the current method has to allocate and then free an temporary string which is wasteful. Fix this problem by using vargs to pass in a format and arguments into __of_node_alloc(). At the same time remove the allocflags argument to __of_node_alloc(). The only users all use GFP_KERNEL, so there is no need to provide it as an option. If there is ever a need later it can be added back. Signed-off-by: Grant Likely (cherry picked from commit ef8bbd73a76197cf8362a2b43aaadc5717bd0746) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 16 ++++++++-------- drivers/of/of_private.h | 2 +- drivers/of/unittest.c | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index d4a24d716f1d..925fe13052bd 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -409,33 +409,33 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) /** * __of_node_alloc() - Create an empty device node dynamically. * @full_name: Full name of the new device node - * @allocflags: Allocation flags (typically pass GFP_KERNEL) * * Create an empty device tree node, suitable for further modification. * The node data are dynamically allocated and all the node flags * have the OF_DYNAMIC & OF_DETACHED bits set. * Returns the newly allocated node or NULL on out of memory error. */ -struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags) +struct device_node *__of_node_alloc(const char *fmt, ...) { + va_list vargs; struct device_node *node; - node = kzalloc(sizeof(*node), allocflags); + node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - - node->full_name = kstrdup(full_name, allocflags); - of_node_set_flag(node, OF_DYNAMIC); - of_node_set_flag(node, OF_DETACHED); + va_start(vargs, fmt); + node->full_name = kvasprintf(GFP_KERNEL, fmt, vargs); + va_end(vargs); if (!node->full_name) goto err_free; + of_node_set_flag(node, OF_DYNAMIC); + of_node_set_flag(node, OF_DETACHED); of_node_init(node); return node; err_free: - kfree(node->full_name); kfree(node); return NULL; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 858e0a5d9a11..618abcad307e 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -61,7 +61,7 @@ static inline int of_property_notify(int action, struct device_node *np, * own the devtree lock or work on detached trees only. */ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); -struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); +__printf(1, 2) struct device_node *__of_node_alloc(const char *fmt, ...); extern const void *__of_get_property(const struct device_node *np, const char *name, int *lenp); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 1207b6d0c122..d1bb1975a738 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -403,11 +403,11 @@ static void __init of_selftest_changeset(void) struct of_changeset chgset; of_changeset_init(&chgset); - n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL); + n1 = __of_node_alloc("/testcase-data/changeset/n1"); selftest(n1, "testcase setup failure\n"); - n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL); + n2 = __of_node_alloc("/testcase-data/changeset/n2"); selftest(n2, "testcase setup failure\n"); - n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL); + n21 = __of_node_alloc("/testcase-data/changeset/n2/n21"); selftest(n21, "testcase setup failure %p\n", n21); nremove = of_find_node_by_path("/testcase-data/changeset/node-remove"); selftest(nremove, "testcase setup failure\n"); From 70d66c2202486ba7ab43d47867e248834902bbc1 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 17 Nov 2014 22:31:32 +0000 Subject: [PATCH 286/298] of: Refactor __of_node_alloc() into __of_node_dup() Add a node argument to __of_node_alloc() and rename it to __of_node_dup() so that it can also be used to duplicate a node with its properties. This is important for the overlay code so that it can create new nodes without using separate changeset items for every single property. At the same time rework the overlay code to use the new function and drop the extra changeset items. Signed-off-by: Grant Likely (cherry picked from commit e51795815ef1a7adc018cbaf05aac46e3d24eda8) Signed-off-by: Mark Brown --- drivers/of/dynamic.c | 40 +++++++++++++++++++++++++++++----------- drivers/of/of_private.h | 2 +- drivers/of/unittest.c | 14 ++++++++++---- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 925fe13052bd..87887de81cd3 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -407,15 +407,16 @@ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) } /** - * __of_node_alloc() - Create an empty device node dynamically. - * @full_name: Full name of the new device node + * __of_node_dup() - Duplicate or create an empty device node dynamically. + * @fmt: Format string (plus vargs) for new full name of the device node * - * Create an empty device tree node, suitable for further modification. - * The node data are dynamically allocated and all the node flags - * have the OF_DYNAMIC & OF_DETACHED bits set. - * Returns the newly allocated node or NULL on out of memory error. + * Create an device tree node, either by duplicating an empty node or by allocating + * an empty one suitable for further modification. The node data are + * dynamically allocated and all the node flags have the OF_DYNAMIC & + * OF_DETACHED bits set. Returns the newly allocated node or NULL on out of + * memory error. */ -struct device_node *__of_node_alloc(const char *fmt, ...) +struct device_node *__of_node_dup(const struct device_node *np, const char *fmt, ...) { va_list vargs; struct device_node *node; @@ -426,17 +427,34 @@ struct device_node *__of_node_alloc(const char *fmt, ...) va_start(vargs, fmt); node->full_name = kvasprintf(GFP_KERNEL, fmt, vargs); va_end(vargs); - if (!node->full_name) - goto err_free; + if (!node->full_name) { + kfree(node); + return NULL; + } of_node_set_flag(node, OF_DYNAMIC); of_node_set_flag(node, OF_DETACHED); of_node_init(node); + /* Iterate over and duplicate all properties */ + if (np) { + struct property *pp, *new_pp; + for_each_property_of_node(np, pp) { + new_pp = __of_prop_dup(pp, GFP_KERNEL); + if (!new_pp) + goto err_prop; + if (__of_add_property(node, new_pp)) { + kfree(new_pp->name); + kfree(new_pp->value); + kfree(new_pp); + goto err_prop; + } + } + } return node; - err_free: - kfree(node); + err_prop: + of_node_put(node); /* Frees the node and properties */ return NULL; } diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 618abcad307e..8e882e706cd8 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -61,7 +61,7 @@ static inline int of_property_notify(int action, struct device_node *np, * own the devtree lock or work on detached trees only. */ struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); -__printf(1, 2) struct device_node *__of_node_alloc(const char *fmt, ...); +__printf(2, 3) struct device_node *__of_node_dup(const struct device_node *np, const char *fmt, ...); extern const void *__of_get_property(const struct device_node *np, const char *name, int *lenp); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index d1bb1975a738..0a5237e1794c 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -399,15 +399,15 @@ static void __init of_selftest_changeset(void) struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" }; struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" }; struct property *ppremove; - struct device_node *n1, *n2, *n21, *nremove, *parent; + struct device_node *n1, *n2, *n21, *nremove, *parent, *np; struct of_changeset chgset; of_changeset_init(&chgset); - n1 = __of_node_alloc("/testcase-data/changeset/n1"); + n1 = __of_node_dup(NULL, "/testcase-data/changeset/n1"); selftest(n1, "testcase setup failure\n"); - n2 = __of_node_alloc("/testcase-data/changeset/n2"); + n2 = __of_node_dup(NULL, "/testcase-data/changeset/n2"); selftest(n2, "testcase setup failure\n"); - n21 = __of_node_alloc("/testcase-data/changeset/n2/n21"); + n21 = __of_node_dup(NULL, "%s/%s", "/testcase-data/changeset/n2", "n21"); selftest(n21, "testcase setup failure %p\n", n21); nremove = of_find_node_by_path("/testcase-data/changeset/node-remove"); selftest(nremove, "testcase setup failure\n"); @@ -435,6 +435,12 @@ static void __init of_selftest_changeset(void) selftest(!of_changeset_apply(&chgset), "apply failed\n"); mutex_unlock(&of_mutex); + /* Make sure node names are constructed correctly */ + selftest((np = of_find_node_by_path("/testcase-data/changeset/n2/n21")), + "'%s' not added\n", n21->full_name); + if (np) + of_node_put(np); + mutex_lock(&of_mutex); selftest(!of_changeset_revert(&chgset), "revert failed\n"); mutex_unlock(&of_mutex); From 6868c7072082beb6157519577820331a4b538c2f Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:35:58 +0200 Subject: [PATCH 287/298] of/overlay: Introduce DT overlay support Overlays are a method to dynamically modify part of the kernel's device tree with dynamically loaded data. Add the core functionality to parse, apply and remove an overlay changeset. The core functionality takes care of managing the overlay data format and performing the add and remove. Drivers are expected to use the overlay functionality to support custom expansion busses commonly found on consumer development boards like the BeagleBone or Raspberry Pi. The overlay code uses CONFIG_OF_DYNAMIC changesets to perform the low level work of modifying the devicetree. Documentation about internal and APIs is provided in Documentation/devicetree/overlay-notes.txt v2: - Switch from __of_node_alloc() to __of_node_dup() - Documentation fixups - Remove 2-pass processing of properties - Remove separate ov_lock; just use the DT mutex. v1: - Drop delete capability using '-' prefix. The '-' prefixed names are valid properties and nodes and there is no need for it just yet. - Do not update special properties - name & phandle ones. - Change order of node attachment, so that the special property update works. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 7518b5890d8ac366faa2326ce2356ef6392ce63d) Signed-off-by: Mark Brown Conflicts: drivers/of/Makefile --- Documentation/devicetree/overlay-notes.txt | 133 +++++ drivers/of/Kconfig | 7 + drivers/of/Makefile | 1 + drivers/of/overlay.c | 562 +++++++++++++++++++++ include/linux/of.h | 31 ++ 5 files changed, 734 insertions(+) create mode 100644 Documentation/devicetree/overlay-notes.txt create mode 100644 drivers/of/overlay.c diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt new file mode 100644 index 000000000000..30ae758e3eef --- /dev/null +++ b/Documentation/devicetree/overlay-notes.txt @@ -0,0 +1,133 @@ +Device Tree Overlay Notes +------------------------- + +This document describes the implementation of the in-kernel +device tree overlay functionality residing in drivers/of/overlay.c and is a +companion document to Documentation/devicetree/dt-object-internal.txt[1] & +Documentation/devicetree/dynamic-resolution-notes.txt[2] + +How overlays work +----------------- + +A Device Tree's overlay purpose is to modify the kernel's live tree, and +have the modification affecting the state of the the kernel in a way that +is reflecting the changes. +Since the kernel mainly deals with devices, any new device node that result +in an active device should have it created while if the device node is either +disabled or removed all together, the affected device should be deregistered. + +Lets take an example where we have a foo board with the following base tree +which is taken from [1]. + +---- foo.dts ----------------------------------------------------------------- + /* FOO platform */ + / { + compatible = "corp,foo"; + + /* shared resources */ + res: res { + }; + + /* On chip peripherals */ + ocp: ocp { + /* peripherals that are always instantiated */ + peripheral1 { ... }; + } + }; +---- foo.dts ----------------------------------------------------------------- + +The overlay bar.dts, when loaded (and resolved as described in [2]) should + +---- bar.dts ----------------------------------------------------------------- +/plugin/; /* allow undefined label references and record them */ +/ { + .... /* various properties for loader use; i.e. part id etc. */ + fragment@0 { + target = <&ocp>; + __overlay__ { + /* bar peripheral */ + bar { + compatible = "corp,bar"; + ... /* various properties and child nodes */ + } + }; + }; +}; +---- bar.dts ----------------------------------------------------------------- + +result in foo+bar.dts + +---- foo+bar.dts ------------------------------------------------------------- + /* FOO platform + bar peripheral */ + / { + compatible = "corp,foo"; + + /* shared resources */ + res: res { + }; + + /* On chip peripherals */ + ocp: ocp { + /* peripherals that are always instantiated */ + peripheral1 { ... }; + + /* bar peripheral */ + bar { + compatible = "corp,bar"; + ... /* various properties and child nodes */ + } + } + }; +---- foo+bar.dts ------------------------------------------------------------- + +As a result of the the overlay, a new device node (bar) has been created +so a bar platform device will be registered and if a matching device driver +is loaded the device will be created as expected. + +Overlay in-kernel API +-------------------------------- + +The API is quite easy to use. + +1. Call of_overlay_create() to create and apply an overlay. The return value +is a cookie identifying this overlay. + +2. Call of_overlay_destroy() to remove and cleanup the overlay previously +created via the call to of_overlay_create(). Removal of an overlay that +is stacked by another will not be permitted. + +Finally, if you need to remove all overlays in one-go, just call +of_overlay_destroy_all() which will remove every single one in the correct +order. + +Overlay DTS Format +------------------ + +The DTS of an overlay should have the following format: + +{ + /* ignored properties by the overlay */ + + fragment@0 { /* first child node */ + + target=; /* phandle target of the overlay */ + or + target-path="/path"; /* target path of the overlay */ + + __overlay__ { + property-a; /* add property-a to the target */ + node-a { /* add to an existing, or create a node-a */ + ... + }; + }; + } + fragment@1 { /* second child node */ + ... + }; + /* more fragments follow */ +} + +Using the non-phandle based target method allows one to use a base DT which does +not contain a __symbols__ node, i.e. it was not compiled with the -@ option. +The __symbols__ node is only required for the target= method, since it +contains the information required to map from a phandle to a tree location. diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 625dfd2341e3..952358597275 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -72,4 +72,11 @@ config OF_MTD config OF_RESOLVE bool +config OF_OVERLAY + bool + depends on OF + select OF_DYNAMIC + select OF_DEVICE + select OF_RESOLVE + endmenu # OF diff --git a/drivers/of/Makefile b/drivers/of/Makefile index cb5f0602d347..576fe7053831 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -12,3 +12,4 @@ obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o obj-$(CONFIG_OF_MTD) += of_mtd.o obj-$(CONFIG_OF_RESOLVE) += resolver.o +obj-$(CONFIG_OF_OVERLAY) += overlay.o diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c new file mode 100644 index 000000000000..ea63fbd228ed --- /dev/null +++ b/drivers/of/overlay.c @@ -0,0 +1,562 @@ +/* + * Functions for working with device tree overlays + * + * Copyright (C) 2012 Pantelis Antoniou + * Copyright (C) 2012 Texas Instruments Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ +#undef DEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "of_private.h" + +/** + * struct of_overlay_info - Holds a single overlay info + * @target: target of the overlay operation + * @overlay: pointer to the overlay contents node + * + * Holds a single overlay state, including all the overlay logs & + * records. + */ +struct of_overlay_info { + struct device_node *target; + struct device_node *overlay; +}; + +/** + * struct of_overlay - Holds a complete overlay transaction + * @node: List on which we are located + * @count: Count of ovinfo structures + * @ovinfo_tab: Overlay info table (count sized) + * @cset: Changeset to be used + * + * Holds a complete overlay transaction + */ +struct of_overlay { + int id; + struct list_head node; + int count; + struct of_overlay_info *ovinfo_tab; + struct of_changeset cset; +}; + +static int of_overlay_apply_one(struct of_overlay *ov, + struct device_node *target, const struct device_node *overlay); + +static int of_overlay_apply_single_property(struct of_overlay *ov, + struct device_node *target, struct property *prop) +{ + struct property *propn, *tprop; + + /* NOTE: Multiple changes of single properties not supported */ + tprop = of_find_property(target, prop->name, NULL); + + /* special properties are not meant to be updated (silent NOP) */ + if (of_prop_cmp(prop->name, "name") == 0 || + of_prop_cmp(prop->name, "phandle") == 0 || + of_prop_cmp(prop->name, "linux,phandle") == 0) + return 0; + + propn = __of_prop_dup(prop, GFP_KERNEL); + if (propn == NULL) + return -ENOMEM; + + /* not found? add */ + if (tprop == NULL) + return of_changeset_add_property(&ov->cset, target, propn); + + /* found? update */ + return of_changeset_update_property(&ov->cset, target, propn); +} + +static int of_overlay_apply_single_device_node(struct of_overlay *ov, + struct device_node *target, struct device_node *child) +{ + const char *cname; + struct device_node *tchild, *grandchild; + int ret = 0; + + cname = kbasename(child->full_name); + if (cname == NULL) + return -ENOMEM; + + /* NOTE: Multiple mods of created nodes not supported */ + tchild = of_get_child_by_name(target, cname); + if (tchild != NULL) { + /* apply overlay recursively */ + ret = of_overlay_apply_one(ov, tchild, child); + of_node_put(tchild); + } else { + /* create empty tree as a target */ + tchild = __of_node_dup(child, "%s/%s", target->full_name, cname); + if (!tchild) + return -ENOMEM; + + /* point to parent */ + tchild->parent = target; + + ret = of_changeset_attach_node(&ov->cset, tchild); + if (ret) + return ret; + + ret = of_overlay_apply_one(ov, tchild, child); + if (ret) + return ret; + + /* The properties are already copied, now do the child nodes */ + for_each_child_of_node(child, grandchild) { + ret = of_overlay_apply_single_device_node(ov, tchild, grandchild); + if (ret) { + pr_err("%s: Failed to apply single node @%s/%s\n", + __func__, tchild->full_name, + grandchild->name); + return ret; + } + } + } + + return ret; +} + +/* + * Apply a single overlay node recursively. + * + * Note that the in case of an error the target node is left + * in a inconsistent state. Error recovery should be performed + * by using the changeset. + */ +static int of_overlay_apply_one(struct of_overlay *ov, + struct device_node *target, const struct device_node *overlay) +{ + struct device_node *child; + struct property *prop; + int ret; + + for_each_property_of_node(overlay, prop) { + ret = of_overlay_apply_single_property(ov, target, prop); + if (ret) { + pr_err("%s: Failed to apply prop @%s/%s\n", + __func__, target->full_name, prop->name); + return ret; + } + } + + for_each_child_of_node(overlay, child) { + ret = of_overlay_apply_single_device_node(ov, target, child); + if (ret != 0) { + pr_err("%s: Failed to apply single node @%s/%s\n", + __func__, target->full_name, + child->name); + return ret; + } + } + + return 0; +} + +/** + * of_overlay_apply() - Apply @count overlays pointed at by @ovinfo_tab + * @ov: Overlay to apply + * + * Applies the overlays given, while handling all error conditions + * appropriately. Either the operation succeeds, or if it fails the + * live tree is reverted to the state before the attempt. + * Returns 0, or an error if the overlay attempt failed. + */ +static int of_overlay_apply(struct of_overlay *ov) +{ + int i, err; + + /* first we apply the overlays atomically */ + for (i = 0; i < ov->count; i++) { + struct of_overlay_info *ovinfo = &ov->ovinfo_tab[i]; + + err = of_overlay_apply_one(ov, ovinfo->target, ovinfo->overlay); + if (err != 0) { + pr_err("%s: overlay failed '%s'\n", + __func__, ovinfo->target->full_name); + return err; + } + } + + return 0; +} + +/* + * Find the target node using a number of different strategies + * in order of preference + * + * "target" property containing the phandle of the target + * "target-path" property containing the path of the target + */ +static struct device_node *find_target_node(struct device_node *info_node) +{ + const char *path; + u32 val; + int ret; + + /* first try to go by using the target as a phandle */ + ret = of_property_read_u32(info_node, "target", &val); + if (ret == 0) + return of_find_node_by_phandle(val); + + /* now try to locate by path */ + ret = of_property_read_string(info_node, "target-path", &path); + if (ret == 0) + return of_find_node_by_path(path); + + pr_err("%s: Failed to find target for node %p (%s)\n", __func__, + info_node, info_node->name); + + return NULL; +} + +/** + * of_fill_overlay_info() - Fill an overlay info structure + * @ov Overlay to fill + * @info_node: Device node containing the overlay + * @ovinfo: Pointer to the overlay info structure to fill + * + * Fills an overlay info structure with the overlay information + * from a device node. This device node must have a target property + * which contains a phandle of the overlay target node, and an + * __overlay__ child node which has the overlay contents. + * Both ovinfo->target & ovinfo->overlay have their references taken. + * + * Returns 0 on success, or a negative error value. + */ +static int of_fill_overlay_info(struct of_overlay *ov, + struct device_node *info_node, struct of_overlay_info *ovinfo) +{ + ovinfo->overlay = of_get_child_by_name(info_node, "__overlay__"); + if (ovinfo->overlay == NULL) + goto err_fail; + + ovinfo->target = find_target_node(info_node); + if (ovinfo->target == NULL) + goto err_fail; + + return 0; + +err_fail: + of_node_put(ovinfo->target); + of_node_put(ovinfo->overlay); + + memset(ovinfo, 0, sizeof(*ovinfo)); + return -EINVAL; +} + +/** + * of_build_overlay_info() - Build an overlay info array + * @ov Overlay to build + * @tree: Device node containing all the overlays + * + * Helper function that given a tree containing overlay information, + * allocates and builds an overlay info array containing it, ready + * for use using of_overlay_apply. + * + * Returns 0 on success with the @cntp @ovinfop pointers valid, + * while on error a negative error value is returned. + */ +static int of_build_overlay_info(struct of_overlay *ov, + struct device_node *tree) +{ + struct device_node *node; + struct of_overlay_info *ovinfo; + int cnt, err; + + /* worst case; every child is a node */ + cnt = 0; + for_each_child_of_node(tree, node) + cnt++; + + ovinfo = kcalloc(cnt, sizeof(*ovinfo), GFP_KERNEL); + if (ovinfo == NULL) + return -ENOMEM; + + cnt = 0; + for_each_child_of_node(tree, node) { + memset(&ovinfo[cnt], 0, sizeof(*ovinfo)); + err = of_fill_overlay_info(ov, node, &ovinfo[cnt]); + if (err == 0) + cnt++; + } + + /* if nothing filled, return error */ + if (cnt == 0) { + kfree(ovinfo); + return -ENODEV; + } + + ov->count = cnt; + ov->ovinfo_tab = ovinfo; + + return 0; +} + +/** + * of_free_overlay_info() - Free an overlay info array + * @ov Overlay to free the overlay info from + * @ovinfo_tab: Array of overlay_info's to free + * + * Releases the memory of a previously allocated ovinfo array + * by of_build_overlay_info. + * Returns 0, or an error if the arguments are bogus. + */ +static int of_free_overlay_info(struct of_overlay *ov) +{ + struct of_overlay_info *ovinfo; + int i; + + /* do it in reverse */ + for (i = ov->count - 1; i >= 0; i--) { + ovinfo = &ov->ovinfo_tab[i]; + + of_node_put(ovinfo->target); + of_node_put(ovinfo->overlay); + } + kfree(ov->ovinfo_tab); + + return 0; +} + +static LIST_HEAD(ov_list); +static DEFINE_IDR(ov_idr); + +/** + * of_overlay_create() - Create and apply an overlay + * @tree: Device node containing all the overlays + * + * Creates and applies an overlay while also keeping track + * of the overlay in a list. This list can be used to prevent + * illegal overlay removals. + * + * Returns the id of the created overlay, or an negative error number + */ +int of_overlay_create(struct device_node *tree) +{ + struct of_overlay *ov; + int err, id; + + /* allocate the overlay structure */ + ov = kzalloc(sizeof(*ov), GFP_KERNEL); + if (ov == NULL) + return -ENOMEM; + ov->id = -1; + + INIT_LIST_HEAD(&ov->node); + + of_changeset_init(&ov->cset); + + mutex_lock(&of_mutex); + + id = idr_alloc(&ov_idr, ov, 0, 0, GFP_KERNEL); + if (id < 0) { + pr_err("%s: idr_alloc() failed for tree@%s\n", + __func__, tree->full_name); + err = id; + goto err_destroy_trans; + } + ov->id = id; + + /* build the overlay info structures */ + err = of_build_overlay_info(ov, tree); + if (err) { + pr_err("%s: of_build_overlay_info() failed for tree@%s\n", + __func__, tree->full_name); + goto err_free_idr; + } + + /* apply the overlay */ + err = of_overlay_apply(ov); + if (err) { + pr_err("%s: of_overlay_apply() failed for tree@%s\n", + __func__, tree->full_name); + goto err_abort_trans; + } + + /* apply the changeset */ + err = of_changeset_apply(&ov->cset); + if (err) { + pr_err("%s: of_changeset_apply() failed for tree@%s\n", + __func__, tree->full_name); + goto err_revert_overlay; + } + + /* add to the tail of the overlay list */ + list_add_tail(&ov->node, &ov_list); + + mutex_unlock(&of_mutex); + + return id; + +err_revert_overlay: +err_abort_trans: + of_free_overlay_info(ov); +err_free_idr: + idr_remove(&ov_idr, ov->id); +err_destroy_trans: + of_changeset_destroy(&ov->cset); + kfree(ov); + mutex_unlock(&of_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(of_overlay_create); + +/* check whether the given node, lies under the given tree */ +static int overlay_subtree_check(struct device_node *tree, + struct device_node *dn) +{ + struct device_node *child; + + /* match? */ + if (tree == dn) + return 1; + + for_each_child_of_node(tree, child) { + if (overlay_subtree_check(child, dn)) + return 1; + } + + return 0; +} + +/* check whether this overlay is the topmost */ +static int overlay_is_topmost(struct of_overlay *ov, struct device_node *dn) +{ + struct of_overlay *ovt; + struct of_changeset_entry *ce; + + list_for_each_entry_reverse(ovt, &ov_list, node) { + /* if we hit ourselves, we're done */ + if (ovt == ov) + break; + + /* check against each subtree affected by this overlay */ + list_for_each_entry(ce, &ovt->cset.entries, node) { + if (overlay_subtree_check(ce->np, dn)) { + pr_err("%s: #%d clashes #%d @%s\n", + __func__, ov->id, ovt->id, + dn->full_name); + return 0; + } + } + } + + /* overlay is topmost */ + return 1; +} + +/* + * We can safely remove the overlay only if it's the top-most one. + * Newly applied overlays are inserted at the tail of the overlay list, + * so a top most overlay is the one that is closest to the tail. + * + * The topmost check is done by exploiting this property. For each + * affected device node in the log list we check if this overlay is + * the one closest to the tail. If another overlay has affected this + * device node and is closest to the tail, then removal is not permited. + */ +static int overlay_removal_is_ok(struct of_overlay *ov) +{ + struct of_changeset_entry *ce; + + list_for_each_entry(ce, &ov->cset.entries, node) { + if (!overlay_is_topmost(ov, ce->np)) { + pr_err("%s: overlay #%d is not topmost\n", + __func__, ov->id); + return 0; + } + } + + return 1; +} + +/** + * of_overlay_destroy() - Removes an overlay + * @id: Overlay id number returned by a previous call to of_overlay_create + * + * Removes an overlay if it is permissible. + * + * Returns 0 on success, or an negative error number + */ +int of_overlay_destroy(int id) +{ + struct of_overlay *ov; + int err; + + mutex_lock(&of_mutex); + + ov = idr_find(&ov_idr, id); + if (ov == NULL) { + err = -ENODEV; + pr_err("%s: Could not find overlay #%d\n", + __func__, id); + goto out; + } + + /* check whether the overlay is safe to remove */ + if (!overlay_removal_is_ok(ov)) { + err = -EBUSY; + pr_err("%s: removal check failed for overlay #%d\n", + __func__, id); + goto out; + } + + + list_del(&ov->node); + of_changeset_revert(&ov->cset); + of_free_overlay_info(ov); + idr_remove(&ov_idr, id); + of_changeset_destroy(&ov->cset); + kfree(ov); + + err = 0; + +out: + mutex_unlock(&of_mutex); + + return err; +} +EXPORT_SYMBOL_GPL(of_overlay_destroy); + +/** + * of_overlay_destroy_all() - Removes all overlays from the system + * + * Removes all overlays from the system in the correct order. + * + * Returns 0 on success, or an negative error number + */ +int of_overlay_destroy_all(void) +{ + struct of_overlay *ov, *ovn; + + mutex_lock(&of_mutex); + + /* the tail of list is guaranteed to be safe to remove */ + list_for_each_entry_safe_reverse(ov, ovn, &ov_list, node) { + list_del(&ov->node); + of_changeset_revert(&ov->cset); + of_free_overlay_info(ov); + idr_remove(&ov_idr, ov->id); + kfree(ov); + } + + mutex_unlock(&of_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(of_overlay_destroy_all); diff --git a/include/linux/of.h b/include/linux/of.h index 118e41a97f7d..116b4b6dcedd 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -887,4 +888,34 @@ static inline int of_reconfig_get_state_change(unsigned long action, /* CONFIG_OF_RESOLVE api */ extern int of_resolve_phandles(struct device_node *tree); +/** + * Overlay support + */ + +#ifdef CONFIG_OF_OVERLAY + +/* ID based overlays; the API for external users */ +int of_overlay_create(struct device_node *tree); +int of_overlay_destroy(int id); +int of_overlay_destroy_all(void); + +#else + +static inline int of_overlay_create(struct device_node *tree) +{ + return -ENOTSUPP; +} + +static inline int of_overlay_destroy(int id) +{ + return -ENOTSUPP; +} + +static inline int of_overlay_destroy_all(void) +{ + return -ENOTSUPP; +} + +#endif + #endif /* _LINUX_OF_H */ From 6a7325d5a8cb4b1b68cb7d468c064ea48bb02a5b Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 16 Dec 2014 19:45:25 +0200 Subject: [PATCH 288/298] of/overlay: Do not generate duplicate nodes During the course of the rewrites a bug sneaked in when dealing with children nodes of overlays, which ends up duplicating sub nodes. Simply remove the duplicate traversal of child nodes to fix. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 3e7f7626fd49a9ffba8520a1a073f62929acad63) Signed-off-by: Mark Brown --- drivers/of/overlay.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index ea63fbd228ed..352b4f28f82c 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -114,17 +114,6 @@ static int of_overlay_apply_single_device_node(struct of_overlay *ov, ret = of_overlay_apply_one(ov, tchild, child); if (ret) return ret; - - /* The properties are already copied, now do the child nodes */ - for_each_child_of_node(child, grandchild) { - ret = of_overlay_apply_single_device_node(ov, tchild, grandchild); - if (ret) { - pr_err("%s: Failed to apply single node @%s/%s\n", - __func__, tchild->full_name, - grandchild->name); - return ret; - } - } } return ret; From e6855ef9f7ba899da934771ec5a587c1bea180e0 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 16 Dec 2014 19:45:26 +0200 Subject: [PATCH 289/298] of/platform: Handle of_populate drivers in notifier When using overlays with drivers calling of_populate the notifier will try to create the device twice. Using the populated bit before proceeding protects against this. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 15204ab1ebc5aba608cd19c83c37b98438b938b0) Signed-off-by: Mark Brown --- drivers/of/platform.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index ad662c6fa471..f1e097302ff8 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -510,6 +510,10 @@ static int of_platform_notify(struct notifier_block *nb, if (!of_node_check_flag(rd->dn->parent, OF_POPULATED_BUS)) return NOTIFY_OK; /* not for us */ + /* already populated? (driver using of_populate manually) */ + if (of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* pdev_parent may be NULL when no bus platform device */ pdev_parent = of_find_device_by_node(rd->dn->parent); pdev = of_platform_device_create(rd->dn, NULL, @@ -525,6 +529,11 @@ static int of_platform_notify(struct notifier_block *nb, break; case OF_RECONFIG_CHANGE_REMOVE: + + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ pdev = of_find_device_by_node(rd->dn); if (pdev == NULL) From 469570a49a1cc82b2b827c0fda5a029dff5bef55 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 17 Feb 2015 11:35:02 +0900 Subject: [PATCH 290/298] of/overlay: Directly include idr.h The overlay code uses IDRs but does not explicitly include the header providing the interface, instead relying on an implicit inclusion. Make the dependency implict to avoid potential future build issues if the implicit inclusion goes away. Signed-off-by: Mark Brown (cherry picked from commit 82b67d73b121febe5ae76747680a2d979c21c5e6) Signed-off-by: Mark Brown --- drivers/of/overlay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 352b4f28f82c..406664801cb5 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "of_private.h" From d989a71f59760eed7db5fa4600e3cfe4e007cc63 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:36:02 +0200 Subject: [PATCH 291/298] i2c/of: Factor out Devicetree registration code Dynamically inserting i2c client device nodes requires the use of a single device registration method. Factor out the loop body of of_i2c_register_devices() so that it can be called for individual device_nodes instead of for all the children of a node. Note: The diff of this commit looks far more complicated than it actually is due the indentation being changed for a large block of code. When viewed using the diff -w flag to ignore whitespace changes it can be seen that the change is actually quite simple. Signed-off-by: Pantelis Antoniou [grant.likely: Made new function static and removed changes to header] Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Cc: Rob Herring Cc: linux-i2c@vger.kernel.org (cherry picked from commit a430a3455f2c48995e06b359a82a1109a419e9ef) Signed-off-by: Mark Brown --- drivers/i2c/i2c-core.c | 101 ++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 5fb80b8962a2..2fc609a29c6b 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "i2c-core.h" @@ -982,9 +983,59 @@ static void i2c_scan_static_board_info(struct i2c_adapter *adapter) /* OF support code */ #if IS_ENABLED(CONFIG_OF) +static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, + struct device_node *node) +{ + struct i2c_client *result; + struct i2c_board_info info = {}; + struct dev_archdata dev_ad = {}; + const __be32 *addr; + int len; + + dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name); + + if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) { + dev_err(&adap->dev, "of_i2c: modalias failure on %s\n", + node->full_name); + return ERR_PTR(-EINVAL); + } + + addr = of_get_property(node, "reg", &len); + if (!addr || (len < sizeof(int))) { + dev_err(&adap->dev, "of_i2c: invalid reg on %s\n", + node->full_name); + return ERR_PTR(-EINVAL); + } + + info.addr = be32_to_cpup(addr); + if (info.addr > (1 << 10) - 1) { + dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n", + info.addr, node->full_name); + return ERR_PTR(-EINVAL); + } + + info.irq = irq_of_parse_and_map(node, 0); + info.of_node = of_node_get(node); + info.archdata = &dev_ad; + + if (of_get_property(node, "wakeup-source", NULL)) + info.flags |= I2C_CLIENT_WAKE; + + request_module("%s%s", I2C_MODULE_PREFIX, info.type); + + result = i2c_new_device(adap, &info); + if (result == NULL) { + dev_err(&adap->dev, "of_i2c: Failure registering %s\n", + node->full_name); + of_node_put(node); + irq_dispose_mapping(info.irq); + return ERR_PTR(-EINVAL); + } + return result; +} + static void of_i2c_register_devices(struct i2c_adapter *adap) { - void *result; struct device_node *node; /* Only register child devices if the adapter has a node pointer set */ @@ -993,52 +1044,8 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) { - struct i2c_board_info info = {}; - struct dev_archdata dev_ad = {}; - const __be32 *addr; - int len; - - dev_dbg(&adap->dev, "of_i2c: register %s\n", node->full_name); - - if (of_modalias_node(node, info.type, sizeof(info.type)) < 0) { - dev_err(&adap->dev, "of_i2c: modalias failure on %s\n", - node->full_name); - continue; - } - - addr = of_get_property(node, "reg", &len); - if (!addr || (len < sizeof(int))) { - dev_err(&adap->dev, "of_i2c: invalid reg on %s\n", - node->full_name); - continue; - } - - info.addr = be32_to_cpup(addr); - if (info.addr > (1 << 10) - 1) { - dev_err(&adap->dev, "of_i2c: invalid addr=%x on %s\n", - info.addr, node->full_name); - continue; - } - - info.irq = irq_of_parse_and_map(node, 0); - info.of_node = of_node_get(node); - info.archdata = &dev_ad; - - if (of_get_property(node, "wakeup-source", NULL)) - info.flags |= I2C_CLIENT_WAKE; - - request_module("%s%s", I2C_MODULE_PREFIX, info.type); - - result = i2c_new_device(adap, &info); - if (result == NULL) { - dev_err(&adap->dev, "of_i2c: Failure registering %s\n", - node->full_name); - of_node_put(node); - irq_dispose_mapping(info.irq); - continue; - } - } + for_each_available_child_of_node(adap->dev.of_node, node) + of_i2c_register_device(adap, node); } static int of_dev_node_match(struct device *dev, void *data) From e389b9aee729de629f17cfa10ec9ae193dc99d54 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:36:03 +0200 Subject: [PATCH 292/298] i2c/of: Add OF_RECONFIG notifier handler CONFIG_OF_DYNAMIC enables runtime changes to the device tree which in turn may trigger addition or removal of devices from Linux. Add an OF_RECONFIG notifier handler to receive tree change events and to creating or destroy i2c devices as required. Signed-off-by: Pantelis Antoniou [grant.likely: clean up #ifdefs and drop unneeded error handling] Signed-off-by: Grant Likely Reviewed-by: Wolfram Sang Cc: Rob Herring Cc: linux-i2c@vger.kernel.org (cherry picked from commit ea7513bbc04170f1cbf42953187a4d8b731c71c4) Signed-off-by: Mark Brown --- drivers/i2c/i2c-core.c | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 2fc609a29c6b..e2e484567df2 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1658,6 +1658,52 @@ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg) } EXPORT_SYMBOL(i2c_clients_command); +#if IS_ENABLED(CONFIG_OF_DYNAMIC) +static int of_i2c_notify(struct notifier_block *nb, unsigned long action, + void *arg) +{ + struct of_reconfig_data *rd = arg; + struct i2c_adapter *adap; + struct i2c_client *client; + + switch (of_reconfig_get_state_change(action, rd)) { + case OF_RECONFIG_CHANGE_ADD: + adap = of_find_i2c_adapter_by_node(rd->dn->parent); + if (adap == NULL) + return NOTIFY_OK; /* not for us */ + + client = of_i2c_register_device(adap, rd->dn); + put_device(&adap->dev); + + if (IS_ERR(client)) { + pr_err("%s: failed to create for '%s'\n", + __func__, rd->dn->full_name); + return notifier_from_errno(PTR_ERR(client)); + } + break; + case OF_RECONFIG_CHANGE_REMOVE: + /* find our device by node */ + client = of_find_i2c_device_by_node(rd->dn); + if (client == NULL) + return NOTIFY_OK; /* no? not meant for us */ + + /* unregister takes one ref away */ + i2c_unregister_device(client); + + /* and put the reference of the find */ + put_device(&client->dev); + break; + } + + return NOTIFY_OK; +} +static struct notifier_block i2c_of_notifier = { + .notifier_call = of_i2c_notify, +}; +#else +extern struct notifier_block i2c_of_notifier; +#endif /* CONFIG_OF_DYNAMIC */ + static int __init i2c_init(void) { int retval; @@ -1675,6 +1721,10 @@ static int __init i2c_init(void) retval = i2c_add_driver(&dummy_driver); if (retval) goto class_err; + + if (IS_ENABLED(CONFIG_OF_DYNAMIC)) + WARN_ON(of_reconfig_notifier_register(&i2c_of_notifier)); + return 0; class_err: @@ -1688,6 +1738,8 @@ bus_err: static void __exit i2c_exit(void) { + if (IS_ENABLED(CONFIG_OF_DYNAMIC)) + WARN_ON(of_reconfig_notifier_unregister(&i2c_of_notifier)); i2c_del_driver(&dummy_driver); #ifdef CONFIG_I2C_COMPAT class_compat_unregister(i2c_adapter_compat_class); From a45e4e42eed3e70da7adc867a881af80d6fd6cc5 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Thu, 15 Jan 2015 18:33:55 +0200 Subject: [PATCH 293/298] i2c: Mark instantiated device nodes with OF_POPULATE Mark (and unmark) device nodes with the POPULATE flag as appropriate. This is required to avoid multi probing when using I2C and device overlays containing a mux. Signed-off-by: Pantelis Antoniou Signed-off-by: Wolfram Sang (cherry picked from commit d5285c36e6d27c1a411a3131c034641a5fa452cb) Signed-off-by: Mark Brown --- drivers/i2c/i2c-core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index e2e484567df2..7ac0a84a2e89 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -723,6 +723,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { + if (client->dev.of_node) + of_node_clear_flag(client->dev.of_node, OF_POPULATED); device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); @@ -1044,8 +1046,11 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) + for_each_available_child_of_node(adap->dev.of_node, node) { + if (of_node_test_and_set_flag(node, OF_POPULATED)) + continue; of_i2c_register_device(adap, node); + } } static int of_dev_node_match(struct device *dev, void *data) @@ -1672,6 +1677,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (adap == NULL) return NOTIFY_OK; /* not for us */ + if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { + put_device(&adap->dev); + return NOTIFY_OK; + } + client = of_i2c_register_device(adap, rd->dn); put_device(&adap->dev); @@ -1682,6 +1692,10 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } break; case OF_RECONFIG_CHANGE_REMOVE: + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ client = of_find_i2c_device_by_node(rd->dn); if (client == NULL) From f01c984756914ac778cddab9bb0954a6f454fd12 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Thu, 15 Jan 2015 20:33:18 +0200 Subject: [PATCH 294/298] i2c: Mark instantiated device nodes with OF_POPULATE Mark (and unmark) device nodes with the POPULATE flag as appropriate. This is required to avoid multi probing when using I2C and device overlays containing a mux. Signed-off-by: Pantelis Antoniou Signed-off-by: Wolfram Sang (cherry picked from commit 6d0a0d9ad09be16e3ca676b245643945b4bff47d) Signed-off-by: Mark Brown Conflicts: drivers/i2c/i2c-core.c --- drivers/i2c/i2c-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 7ac0a84a2e89..435403c85b1b 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -723,8 +723,10 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { +#if IS_ENABLED(CONFIG_OF_DYNAMIC) if (client->dev.of_node) of_node_clear_flag(client->dev.of_node, OF_POPULATED); +#endif device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); From d213317747118305fea775fcfc72a533b11b15bd Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Tue, 28 Oct 2014 22:35:59 +0200 Subject: [PATCH 295/298] of/overlay: Add overlay unittests Add unittests for OF overlays. It tests overlay device addition/removal and whether the apply revert sequence is correct. Changes since V1: * Added local fixups entries. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 177d271cf3171bb6826ee5189f67dc1f7d34f1da) Signed-off-by: Mark Brown --- .../devicetree/bindings/unittest.txt | 14 + drivers/of/unittest-data/testcases.dts | 16 + drivers/of/unittest-data/tests-overlay.dtsi | 180 +++++++ drivers/of/unittest.c | 481 ++++++++++++++++++ 4 files changed, 691 insertions(+) create mode 100644 Documentation/devicetree/bindings/unittest.txt create mode 100644 drivers/of/unittest-data/tests-overlay.dtsi diff --git a/Documentation/devicetree/bindings/unittest.txt b/Documentation/devicetree/bindings/unittest.txt new file mode 100644 index 000000000000..0f92a22fddfa --- /dev/null +++ b/Documentation/devicetree/bindings/unittest.txt @@ -0,0 +1,14 @@ +* OF selftest platform device + +** selftest + +Required properties: +- compatible: must be "selftest" + +All other properties are optional. + +Example: + selftest { + compatible = "selftest"; + status = "okay"; + }; diff --git a/drivers/of/unittest-data/testcases.dts b/drivers/of/unittest-data/testcases.dts index 6ed76ca115a5..d26b96a87e8f 100644 --- a/drivers/of/unittest-data/testcases.dts +++ b/drivers/of/unittest-data/testcases.dts @@ -3,6 +3,7 @@ #include "tests-interrupts.dtsi" #include "tests-match.dtsi" #include "tests-platform.dtsi" +#include "tests-overlay.dtsi" /* * phandle fixup data - generated by dtc patches that aren't upstream. @@ -49,5 +50,20 @@ testcase-device2 { interrupt-parent = <0x00000000>; }; + overlay2 { + fragment@0 { + target = <0x00000000>; + }; + }; + overlay3 { + fragment@0 { + target = <0x00000000>; + }; + }; + overlay4 { + fragment@0 { + target = <0x00000000>; + }; + }; }; }; }; diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi new file mode 100644 index 000000000000..75976da22b2e --- /dev/null +++ b/drivers/of/unittest-data/tests-overlay.dtsi @@ -0,0 +1,180 @@ + +/ { + testcase-data { + overlay-node { + + /* test bus */ + selftestbus: test-bus { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <0>; + + selftest100: test-selftest100 { + compatible = "selftest"; + status = "okay"; + reg = <100>; + }; + + selftest101: test-selftest101 { + compatible = "selftest"; + status = "disabled"; + reg = <101>; + }; + + selftest0: test-selftest0 { + compatible = "selftest"; + status = "disabled"; + reg = <0>; + }; + + selftest1: test-selftest1 { + compatible = "selftest"; + status = "okay"; + reg = <1>; + }; + + selftest2: test-selftest2 { + compatible = "selftest"; + status = "disabled"; + reg = <2>; + }; + + selftest3: test-selftest3 { + compatible = "selftest"; + status = "okay"; + reg = <3>; + }; + + selftest5: test-selftest5 { + compatible = "selftest"; + status = "disabled"; + reg = <5>; + }; + + selftest6: test-selftest6 { + compatible = "selftest"; + status = "disabled"; + reg = <6>; + }; + + selftest7: test-selftest7 { + compatible = "selftest"; + status = "disabled"; + reg = <7>; + }; + + selftest8: test-selftest8 { + compatible = "selftest"; + status = "disabled"; + reg = <8>; + }; + }; + }; + + /* test enable using absolute target path */ + overlay0 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest0"; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test disable using absolute target path */ + overlay1 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest1"; + __overlay__ { + status = "disabled"; + }; + }; + }; + + /* test enable using label */ + overlay2 { + fragment@0 { + target = <&selftest2>; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test disable using label */ + overlay3 { + fragment@0 { + target = <&selftest3>; + __overlay__ { + status = "disabled"; + }; + }; + }; + + /* test insertion of a full node */ + overlay4 { + fragment@0 { + target = <&selftestbus>; + __overlay__ { + + /* suppress DTC warning */ + #address-cells = <1>; + #size-cells = <0>; + + test-selftest4 { + compatible = "selftest"; + status = "okay"; + reg = <4>; + }; + }; + }; + }; + + /* test overlay apply revert */ + overlay5 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest5"; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test overlays application and removal in sequence */ + overlay6 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest6"; + __overlay__ { + status = "okay"; + }; + }; + }; + overlay7 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest7"; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test overlays application and removal in bad sequence */ + overlay8 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest8"; + __overlay__ { + status = "okay"; + }; + }; + }; + overlay9 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/test-selftest8"; + __overlay__ { + property-foo = "bar"; + }; + }; + }; + + }; +}; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 0a5237e1794c..f312502c8f46 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "of_private.h" @@ -881,6 +883,484 @@ static void selftest_data_remove(void) } } +#ifdef CONFIG_OF_OVERLAY + +static int selftest_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + + if (np == NULL) { + dev_err(dev, "No OF data for device\n"); + return -EINVAL; + + } + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + return 0; +} + +static int selftest_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + return 0; +} + +static struct of_device_id selftest_match[] = { + { .compatible = "selftest", }, + {}, +}; +MODULE_DEVICE_TABLE(of, altera_jtaguart_match); + +static struct platform_driver selftest_driver = { + .probe = selftest_probe, + .remove = selftest_remove, + .driver = { + .name = "selftest", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(selftest_match), + }, +}; + +/* get the platform device instantiated at the path */ +static struct platform_device *of_path_to_platform_device(const char *path) +{ + struct device_node *np; + struct platform_device *pdev; + + np = of_find_node_by_path(path); + if (np == NULL) + return NULL; + + pdev = of_find_device_by_node(np); + of_node_put(np); + + return pdev; +} + +/* find out if a platform device exists at that path */ +static int of_path_platform_device_exists(const char *path) +{ + struct platform_device *pdev; + + pdev = of_path_to_platform_device(path); + platform_device_put(pdev); + return pdev != NULL; +} + +static const char *selftest_path(int nr) +{ + static char buf[256]; + + snprintf(buf, sizeof(buf) - 1, + "/testcase-data/overlay-node/test-bus/test-selftest%d", nr); + buf[sizeof(buf) - 1] = '\0'; + + return buf; +} + +static const char *overlay_path(int nr) +{ + static char buf[256]; + + snprintf(buf, sizeof(buf) - 1, + "/testcase-data/overlay%d", nr); + buf[sizeof(buf) - 1] = '\0'; + + return buf; +} + +static const char *bus_path = "/testcase-data/overlay-node/test-bus"; + +static int of_selftest_apply_overlay(int selftest_nr, int overlay_nr, + int *overlay_id) +{ + struct device_node *np = NULL; + int ret, id = -1; + + np = of_find_node_by_path(overlay_path(overlay_nr)); + if (np == NULL) { + selftest(0, "could not find overlay node @\"%s\"\n", + overlay_path(overlay_nr)); + ret = -EINVAL; + goto out; + } + + ret = of_overlay_create(np); + if (ret < 0) { + selftest(0, "could not create overlay from \"%s\"\n", + overlay_path(overlay_nr)); + goto out; + } + id = ret; + + ret = 0; + +out: + of_node_put(np); + + if (overlay_id) + *overlay_id = id; + + return ret; +} + +/* apply an overlay while checking before and after states */ +static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, + int before, int after) +{ + int ret; + + /* selftest device must not be in before state */ + if (of_path_platform_device_exists(selftest_path(selftest_nr)) + != before) { + selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr), + !before ? "enabled" : "disabled"); + return -EINVAL; + } + + ret = of_selftest_apply_overlay(overlay_nr, selftest_nr, NULL); + if (ret != 0) { + /* of_selftest_apply_overlay already called selftest() */ + return ret; + } + + /* selftest device must be to set to after state */ + if (of_path_platform_device_exists(selftest_path(selftest_nr)) + != after) { + selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr), + !after ? "enabled" : "disabled"); + return -EINVAL; + } + + return 0; +} + +/* apply an overlay and then revert it while checking before, after states */ +static int of_selftest_apply_revert_overlay_check(int overlay_nr, + int selftest_nr, int before, int after) +{ + int ret, ov_id; + + /* selftest device must be in before state */ + if (of_path_platform_device_exists(selftest_path(selftest_nr)) + != before) { + selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr), + !before ? "enabled" : "disabled"); + return -EINVAL; + } + + /* apply the overlay */ + ret = of_selftest_apply_overlay(overlay_nr, selftest_nr, &ov_id); + if (ret != 0) { + /* of_selftest_apply_overlay already called selftest() */ + return ret; + } + + /* selftest device must be in after state */ + if (of_path_platform_device_exists(selftest_path(selftest_nr)) + != after) { + selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr), + !after ? "enabled" : "disabled"); + return -EINVAL; + } + + ret = of_overlay_destroy(ov_id); + if (ret != 0) { + selftest(0, "overlay @\"%s\" failed to be destroyed @\"%s\"\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr)); + return ret; + } + + /* selftest device must be again in before state */ + if (of_path_platform_device_exists(selftest_path(selftest_nr)) + != before) { + selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", + overlay_path(overlay_nr), + selftest_path(selftest_nr), + !before ? "enabled" : "disabled"); + return -EINVAL; + } + + return 0; +} + +/* test activation of device */ +static void of_selftest_overlay_0(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(0, 0, 0, 1); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 0); +} + +/* test deactivation of device */ +static void of_selftest_overlay_1(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(1, 1, 1, 0); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 1); +} + +/* test activation of device */ +static void of_selftest_overlay_2(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(2, 2, 0, 1); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 2); +} + +/* test deactivation of device */ +static void of_selftest_overlay_3(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(3, 3, 1, 0); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 3); +} + +/* test activation of a full device node */ +static void of_selftest_overlay_4(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(4, 4, 0, 1); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 4); +} + +/* test overlay apply/revert sequence */ +static void of_selftest_overlay_5(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 5); +} + +/* test overlay application in sequence */ +static void of_selftest_overlay_6(void) +{ + struct device_node *np; + int ret, i, ov_id[2]; + int overlay_nr = 6, selftest_nr = 6; + int before = 0, after = 1; + + /* selftest device must be in before state */ + for (i = 0; i < 2; i++) { + if (of_path_platform_device_exists( + selftest_path(selftest_nr + i)) + != before) { + selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", + overlay_path(overlay_nr + i), + selftest_path(selftest_nr + i), + !before ? "enabled" : "disabled"); + return; + } + } + + /* apply the overlays */ + for (i = 0; i < 2; i++) { + + np = of_find_node_by_path(overlay_path(overlay_nr + i)); + if (np == NULL) { + selftest(0, "could not find overlay node @\"%s\"\n", + overlay_path(overlay_nr + i)); + return; + } + + ret = of_overlay_create(np); + if (ret < 0) { + selftest(0, "could not create overlay from \"%s\"\n", + overlay_path(overlay_nr + i)); + return; + } + ov_id[i] = ret; + } + + for (i = 0; i < 2; i++) { + /* selftest device must be in after state */ + if (of_path_platform_device_exists( + selftest_path(selftest_nr + i)) + != after) { + selftest(0, "overlay @\"%s\" failed @\"%s\" %s\n", + overlay_path(overlay_nr + i), + selftest_path(selftest_nr + i), + !after ? "enabled" : "disabled"); + return; + } + } + + for (i = 1; i >= 0; i--) { + ret = of_overlay_destroy(ov_id[i]); + if (ret != 0) { + selftest(0, "overlay @\"%s\" failed destroy @\"%s\"\n", + overlay_path(overlay_nr + i), + selftest_path(selftest_nr + i)); + return; + } + } + + for (i = 0; i < 2; i++) { + /* selftest device must be again in before state */ + if (of_path_platform_device_exists( + selftest_path(selftest_nr + i)) + != before) { + selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", + overlay_path(overlay_nr + i), + selftest_path(selftest_nr + i), + !before ? "enabled" : "disabled"); + return; + } + } + + selftest(1, "overlay test %d passed\n", 6); +} + +/* test overlay application in sequence */ +static void of_selftest_overlay_8(void) +{ + struct device_node *np; + int ret, i, ov_id[2]; + int overlay_nr = 8, selftest_nr = 8; + + /* we don't care about device state in this test */ + + /* apply the overlays */ + for (i = 0; i < 2; i++) { + + np = of_find_node_by_path(overlay_path(overlay_nr + i)); + if (np == NULL) { + selftest(0, "could not find overlay node @\"%s\"\n", + overlay_path(overlay_nr + i)); + return; + } + + ret = of_overlay_create(np); + if (ret < 0) { + selftest(0, "could not create overlay from \"%s\"\n", + overlay_path(overlay_nr + i)); + return; + } + ov_id[i] = ret; + } + + /* now try to remove first overlay (it should fail) */ + ret = of_overlay_destroy(ov_id[0]); + if (ret == 0) { + selftest(0, "overlay @\"%s\" was destroyed @\"%s\"\n", + overlay_path(overlay_nr + 0), + selftest_path(selftest_nr)); + return; + } + + /* removing them in order should work */ + for (i = 1; i >= 0; i--) { + ret = of_overlay_destroy(ov_id[i]); + if (ret != 0) { + selftest(0, "overlay @\"%s\" not destroyed @\"%s\"\n", + overlay_path(overlay_nr + i), + selftest_path(selftest_nr)); + return; + } + } + + selftest(1, "overlay test %d passed\n", 8); +} + +static void __init of_selftest_overlay(void) +{ + struct device_node *bus_np = NULL; + int ret; + + ret = platform_driver_register(&selftest_driver); + if (ret != 0) { + selftest(0, "could not register selftest driver\n"); + goto out; + } + + bus_np = of_find_node_by_path(bus_path); + if (bus_np == NULL) { + selftest(0, "could not find bus_path \"%s\"\n", bus_path); + goto out; + } + + ret = of_platform_populate(bus_np, of_default_bus_match_table, + NULL, NULL); + if (ret != 0) { + selftest(0, "could not populate bus @ \"%s\"\n", bus_path); + goto out; + } + + if (!of_path_platform_device_exists(selftest_path(100))) { + selftest(0, "could not find selftest0 @ \"%s\"\n", + selftest_path(100)); + goto out; + } + + if (of_path_platform_device_exists(selftest_path(101))) { + selftest(0, "selftest1 @ \"%s\" should not exist\n", + selftest_path(101)); + goto out; + } + + selftest(1, "basic infrastructure of overlays passed"); + + /* tests in sequence */ + of_selftest_overlay_0(); + of_selftest_overlay_1(); + of_selftest_overlay_2(); + of_selftest_overlay_3(); + of_selftest_overlay_4(); + of_selftest_overlay_5(); + of_selftest_overlay_6(); + of_selftest_overlay_8(); + +out: + of_node_put(bus_np); +} + +#else +static inline void __init of_selftest_overlay(void) { } +#endif + static int __init of_selftest(void) { struct device_node *np; @@ -911,6 +1391,7 @@ static int __init of_selftest(void) of_selftest_parse_interrupts_extended(); of_selftest_match_node(); of_selftest_platform_populate(); + of_selftest_overlay(); /* removing selftest data from live tree */ selftest_data_remove(); From 336c93f88581c011fc316d3da0bb92e4e8d78de2 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Fri, 19 Dec 2014 14:34:34 +0200 Subject: [PATCH 296/298] of/unittest: Overlays with sub-devices tests Introduce selftests for overlays using sub-devices present in children nodes. Signed-off-by: Pantelis Antoniou Signed-off-by: Grant Likely (cherry picked from commit 6b1271de3723a7957c7cc6a7f36ea114f557e730) Signed-off-by: Mark Brown --- drivers/of/unittest-data/tests-overlay.dtsi | 55 +++++++++++++++++++++ drivers/of/unittest.c | 39 +++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi index 75976da22b2e..a2b687d5f324 100644 --- a/drivers/of/unittest-data/tests-overlay.dtsi +++ b/drivers/of/unittest-data/tests-overlay.dtsi @@ -176,5 +176,60 @@ }; }; + overlay10 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus"; + __overlay__ { + + /* suppress DTC warning */ + #address-cells = <1>; + #size-cells = <0>; + + test-selftest10 { + compatible = "selftest"; + status = "okay"; + reg = <10>; + + #address-cells = <1>; + #size-cells = <0>; + + test-selftest101 { + compatible = "selftest"; + status = "okay"; + reg = <1>; + }; + + }; + }; + }; + }; + + overlay11 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus"; + __overlay__ { + + /* suppress DTC warning */ + #address-cells = <1>; + #size-cells = <0>; + + test-selftest11 { + compatible = "selftest"; + status = "okay"; + reg = <11>; + + #address-cells = <1>; + #size-cells = <0>; + + test-selftest111 { + compatible = "selftest"; + status = "okay"; + reg = <1>; + }; + + }; + }; + }; + }; }; }; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index f312502c8f46..0bc3203f6568 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -897,6 +897,9 @@ static int selftest_probe(struct platform_device *pdev) } dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + of_platform_populate(np, NULL, NULL, &pdev->dev); + return 0; } @@ -1305,6 +1308,39 @@ static void of_selftest_overlay_8(void) selftest(1, "overlay test %d passed\n", 8); } +/* test insertion of a bus with parent devices */ +static void of_selftest_overlay_10(void) +{ + int ret; + char *child_path; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(10, 10, 0, 1); + if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 10)) + return; + + child_path = kasprintf(GFP_KERNEL, "%s/test-selftest101", + selftest_path(10)); + if (selftest(child_path, "overlay test %d failed; kasprintf\n", 10)) + return; + + ret = of_path_platform_device_exists(child_path); + kfree(child_path); + if (selftest(ret, "overlay test %d failed; no child device\n", 10)) + return; +} + +/* test insertion of a bus with parent devices (and revert) */ +static void of_selftest_overlay_11(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1); + if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 11)) + return; +} + static void __init of_selftest_overlay(void) { struct device_node *bus_np = NULL; @@ -1353,6 +1389,9 @@ static void __init of_selftest_overlay(void) of_selftest_overlay_6(); of_selftest_overlay_8(); + of_selftest_overlay_10(); + of_selftest_overlay_11(); + out: of_node_put(bus_np); } From 62a3e7a53f2536b8b7456bf1d7c013ae31372945 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Mon, 12 Jan 2015 19:02:49 +0200 Subject: [PATCH 297/298] of: unitest: Add I2C overlay unit tests. Introduce I2C device tree overlay tests. Tests insertion and removal of i2c adapters, i2c devices, and muxes. Signed-off-by: Pantelis Antoniou Signed-off-by: Rob Herring (cherry picked from commit d5e75500ca401d3128c82c5b0dee2f9b259d5b5c) Signed-off-by: Mark Brown --- .../devicetree/bindings/unittest.txt | 59 +- drivers/of/unittest-data/tests-overlay.dtsi | 94 ++++ drivers/of/unittest.c | 512 ++++++++++++++++-- 3 files changed, 614 insertions(+), 51 deletions(-) diff --git a/Documentation/devicetree/bindings/unittest.txt b/Documentation/devicetree/bindings/unittest.txt index 0f92a22fddfa..8933211f32f9 100644 --- a/Documentation/devicetree/bindings/unittest.txt +++ b/Documentation/devicetree/bindings/unittest.txt @@ -1,4 +1,4 @@ -* OF selftest platform device +1) OF selftest platform device ** selftest @@ -12,3 +12,60 @@ Example: compatible = "selftest"; status = "okay"; }; + +2) OF selftest i2c adapter platform device + +** platform device unittest adapter + +Required properties: +- compatible: must be selftest-i2c-bus + +Children nodes contain selftest i2c devices. + +Example: + selftest-i2c-bus { + compatible = "selftest-i2c-bus"; + status = "okay"; + }; + +3) OF selftest i2c device + +** I2C selftest device + +Required properties: +- compatible: must be selftest-i2c-dev + +All other properties are optional + +Example: + selftest-i2c-dev { + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + +4) OF selftest i2c mux device + +** I2C selftest mux + +Required properties: +- compatible: must be selftest-i2c-mux + +Children nodes contain selftest i2c bus nodes per channel. + +Example: + selftest-i2c-mux { + compatible = "selftest-i2c-mux"; + status = "okay"; + #address-cells = <1>; + #size-cells = <0>; + channel-0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + i2c-dev { + reg = <8>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi index a2b687d5f324..244226cbb5a3 100644 --- a/drivers/of/unittest-data/tests-overlay.dtsi +++ b/drivers/of/unittest-data/tests-overlay.dtsi @@ -68,6 +68,48 @@ status = "disabled"; reg = <8>; }; + + i2c-test-bus { + compatible = "selftest-i2c-bus"; + status = "okay"; + reg = <50>; + + #address-cells = <1>; + #size-cells = <0>; + + test-selftest12 { + reg = <8>; + compatible = "selftest-i2c-dev"; + status = "disabled"; + }; + + test-selftest13 { + reg = <9>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + + test-selftest14 { + reg = <10>; + compatible = "selftest-i2c-mux"; + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + test-mux-dev { + reg = <32>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; + }; }; }; @@ -231,5 +273,57 @@ }; }; }; + + /* test enable using absolute target path (i2c) */ + overlay12 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus/test-selftest12"; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test disable using absolute target path (i2c) */ + overlay13 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus/test-selftest13"; + __overlay__ { + status = "disabled"; + }; + }; + }; + + /* test mux overlay */ + overlay15 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus"; + __overlay__ { + #address-cells = <1>; + #size-cells = <0>; + test-selftest15 { + reg = <11>; + compatible = "selftest-i2c-mux"; + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + test-mux-dev { + reg = <32>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; + }; + }; + }; + }; }; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 0bc3203f6568..23249591d2e8 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -20,6 +20,9 @@ #include #include +#include +#include + #include "of_private.h" static struct selftest_results { @@ -954,17 +957,94 @@ static int of_path_platform_device_exists(const char *path) return pdev != NULL; } -static const char *selftest_path(int nr) +#if IS_ENABLED(CONFIG_I2C) + +/* get the i2c client device instantiated at the path */ +static struct i2c_client *of_path_to_i2c_client(const char *path) { + struct device_node *np; + struct i2c_client *client; + + np = of_find_node_by_path(path); + if (np == NULL) + return NULL; + + client = of_find_i2c_device_by_node(np); + of_node_put(np); + + return client; +} + +/* find out if a i2c client device exists at that path */ +static int of_path_i2c_client_exists(const char *path) +{ + struct i2c_client *client; + + client = of_path_to_i2c_client(path); + if (client) + put_device(&client->dev); + return client != NULL; +} +#else +static int of_path_i2c_client_exists(const char *path) +{ + return 0; +} +#endif + +enum overlay_type { + PDEV_OVERLAY, + I2C_OVERLAY +}; + +static int of_path_device_type_exists(const char *path, + enum overlay_type ovtype) +{ + switch (ovtype) { + case PDEV_OVERLAY: + return of_path_platform_device_exists(path); + case I2C_OVERLAY: + return of_path_i2c_client_exists(path); + } + return 0; +} + +static const char *selftest_path(int nr, enum overlay_type ovtype) +{ + const char *base; static char buf[256]; - snprintf(buf, sizeof(buf) - 1, - "/testcase-data/overlay-node/test-bus/test-selftest%d", nr); + switch (ovtype) { + case PDEV_OVERLAY: + base = "/testcase-data/overlay-node/test-bus"; + break; + case I2C_OVERLAY: + base = "/testcase-data/overlay-node/test-bus/i2c-test-bus"; + break; + default: + buf[0] = '\0'; + return buf; + } + snprintf(buf, sizeof(buf) - 1, "%s/test-selftest%d", base, nr); buf[sizeof(buf) - 1] = '\0'; - return buf; } +static int of_selftest_device_exists(int selftest_nr, enum overlay_type ovtype) +{ + const char *path; + + path = selftest_path(selftest_nr, ovtype); + + switch (ovtype) { + case PDEV_OVERLAY: + return of_path_platform_device_exists(path); + case I2C_OVERLAY: + return of_path_i2c_client_exists(path); + } + return 0; +} + static const char *overlay_path(int nr) { static char buf[256]; @@ -1013,16 +1093,15 @@ out: /* apply an overlay while checking before and after states */ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, - int before, int after) + int before, int after, enum overlay_type ovtype) { int ret; /* selftest device must not be in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, ovtype) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1034,11 +1113,10 @@ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, } /* selftest device must be to set to after state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != after) { + if (of_selftest_device_exists(selftest_nr, ovtype) != after) { selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !after ? "enabled" : "disabled"); return -EINVAL; } @@ -1048,16 +1126,16 @@ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, /* apply an overlay and then revert it while checking before, after states */ static int of_selftest_apply_revert_overlay_check(int overlay_nr, - int selftest_nr, int before, int after) + int selftest_nr, int before, int after, + enum overlay_type ovtype) { int ret, ov_id; /* selftest device must be in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, ovtype) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1070,11 +1148,10 @@ static int of_selftest_apply_revert_overlay_check(int overlay_nr, } /* selftest device must be in after state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != after) { + if (of_selftest_device_exists(selftest_nr, ovtype) != after) { selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !after ? "enabled" : "disabled"); return -EINVAL; } @@ -1083,16 +1160,15 @@ static int of_selftest_apply_revert_overlay_check(int overlay_nr, if (ret != 0) { selftest(0, "overlay @\"%s\" failed to be destroyed @\"%s\"\n", overlay_path(overlay_nr), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, ovtype)); return ret; } /* selftest device must be again in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1106,7 +1182,7 @@ static void of_selftest_overlay_0(void) int ret; /* device should enable */ - ret = of_selftest_apply_overlay_check(0, 0, 0, 1); + ret = of_selftest_apply_overlay_check(0, 0, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1119,7 +1195,7 @@ static void of_selftest_overlay_1(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(1, 1, 1, 0); + ret = of_selftest_apply_overlay_check(1, 1, 1, 0, PDEV_OVERLAY); if (ret != 0) return; @@ -1132,7 +1208,7 @@ static void of_selftest_overlay_2(void) int ret; /* device should enable */ - ret = of_selftest_apply_overlay_check(2, 2, 0, 1); + ret = of_selftest_apply_overlay_check(2, 2, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1145,7 +1221,7 @@ static void of_selftest_overlay_3(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(3, 3, 1, 0); + ret = of_selftest_apply_overlay_check(3, 3, 1, 0, PDEV_OVERLAY); if (ret != 0) return; @@ -1158,7 +1234,7 @@ static void of_selftest_overlay_4(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(4, 4, 0, 1); + ret = of_selftest_apply_overlay_check(4, 4, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1171,7 +1247,7 @@ static void of_selftest_overlay_5(void) int ret; /* device should disable */ - ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1); + ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1188,12 +1264,12 @@ static void of_selftest_overlay_6(void) /* selftest device must be in before state */ for (i = 0; i < 2; i++) { - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !before ? "enabled" : "disabled"); return; } @@ -1220,12 +1296,12 @@ static void of_selftest_overlay_6(void) for (i = 0; i < 2; i++) { /* selftest device must be in after state */ - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != after) { selftest(0, "overlay @\"%s\" failed @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !after ? "enabled" : "disabled"); return; } @@ -1236,19 +1312,20 @@ static void of_selftest_overlay_6(void) if (ret != 0) { selftest(0, "overlay @\"%s\" failed destroy @\"%s\"\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i)); + selftest_path(selftest_nr + i, + PDEV_OVERLAY)); return; } } for (i = 0; i < 2; i++) { /* selftest device must be again in before state */ - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !before ? "enabled" : "disabled"); return; } @@ -1290,7 +1367,8 @@ static void of_selftest_overlay_8(void) if (ret == 0) { selftest(0, "overlay @\"%s\" was destroyed @\"%s\"\n", overlay_path(overlay_nr + 0), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, + PDEV_OVERLAY)); return; } @@ -1300,7 +1378,8 @@ static void of_selftest_overlay_8(void) if (ret != 0) { selftest(0, "overlay @\"%s\" not destroyed @\"%s\"\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, + PDEV_OVERLAY)); return; } } @@ -1315,16 +1394,17 @@ static void of_selftest_overlay_10(void) char *child_path; /* device should disable */ - ret = of_selftest_apply_overlay_check(10, 10, 0, 1); - if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 10)) + ret = of_selftest_apply_overlay_check(10, 10, 0, 1, PDEV_OVERLAY); + if (selftest(ret == 0, + "overlay test %d failed; overlay application\n", 10)) return; child_path = kasprintf(GFP_KERNEL, "%s/test-selftest101", - selftest_path(10)); + selftest_path(10, PDEV_OVERLAY)); if (selftest(child_path, "overlay test %d failed; kasprintf\n", 10)) return; - ret = of_path_platform_device_exists(child_path); + ret = of_path_device_type_exists(child_path, PDEV_OVERLAY); kfree(child_path); if (selftest(ret, "overlay test %d failed; no child device\n", 10)) return; @@ -1336,11 +1416,331 @@ static void of_selftest_overlay_11(void) int ret; /* device should disable */ - ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1); - if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 11)) + ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1, + PDEV_OVERLAY); + if (selftest(ret == 0, + "overlay test %d failed; overlay application\n", 11)) return; } +#if IS_ENABLED(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY) + +struct selftest_i2c_bus_data { + struct platform_device *pdev; + struct i2c_adapter adap; +}; + +static int selftest_i2c_master_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + struct selftest_i2c_bus_data *std = i2c_get_adapdata(adap); + + (void)std; + + return num; +} + +static u32 selftest_i2c_functionality(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm selftest_i2c_algo = { + .master_xfer = selftest_i2c_master_xfer, + .functionality = selftest_i2c_functionality, +}; + +static int selftest_i2c_bus_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct selftest_i2c_bus_data *std; + struct i2c_adapter *adap; + int ret; + + if (np == NULL) { + dev_err(dev, "No OF data for device\n"); + return -EINVAL; + + } + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + std = devm_kzalloc(dev, sizeof(*std), GFP_KERNEL); + if (!std) { + dev_err(dev, "Failed to allocate selftest i2c data\n"); + return -ENOMEM; + } + + /* link them together */ + std->pdev = pdev; + platform_set_drvdata(pdev, std); + + adap = &std->adap; + i2c_set_adapdata(adap, std); + adap->nr = -1; + strlcpy(adap->name, pdev->name, sizeof(adap->name)); + adap->class = I2C_CLASS_DEPRECATED; + adap->algo = &selftest_i2c_algo; + adap->dev.parent = dev; + adap->dev.of_node = dev->of_node; + adap->timeout = 5 * HZ; + adap->retries = 3; + + ret = i2c_add_numbered_adapter(adap); + if (ret != 0) { + dev_err(dev, "Failed to add I2C adapter\n"); + return ret; + } + + return 0; +} + +static int selftest_i2c_bus_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct selftest_i2c_bus_data *std = platform_get_drvdata(pdev); + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + i2c_del_adapter(&std->adap); + + return 0; +} + +static struct of_device_id selftest_i2c_bus_match[] = { + { .compatible = "selftest-i2c-bus", }, + {}, +}; + +static struct platform_driver selftest_i2c_bus_driver = { + .probe = selftest_i2c_bus_probe, + .remove = selftest_i2c_bus_remove, + .driver = { + .name = "selftest-i2c-bus", + .of_match_table = of_match_ptr(selftest_i2c_bus_match), + }, +}; + +static int selftest_i2c_dev_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + + if (!np) { + dev_err(dev, "No OF node\n"); + return -EINVAL; + } + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + return 0; +}; + +static int selftest_i2c_dev_remove(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + return 0; +} + +static const struct i2c_device_id selftest_i2c_dev_id[] = { + { .name = "selftest-i2c-dev" }, + { } +}; + +static struct i2c_driver selftest_i2c_dev_driver = { + .driver = { + .name = "selftest-i2c-dev", + .owner = THIS_MODULE, + }, + .probe = selftest_i2c_dev_probe, + .remove = selftest_i2c_dev_remove, + .id_table = selftest_i2c_dev_id, +}; + +#if IS_ENABLED(CONFIG_I2C_MUX) + +struct selftest_i2c_mux_data { + int nchans; + struct i2c_adapter *adap[]; +}; + +static int selftest_i2c_mux_select_chan(struct i2c_adapter *adap, + void *client, u32 chan) +{ + return 0; +} + +static int selftest_i2c_mux_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret, i, nchans, size; + struct device *dev = &client->dev; + struct i2c_adapter *adap = to_i2c_adapter(dev->parent); + struct device_node *np = client->dev.of_node, *child; + struct selftest_i2c_mux_data *stm; + u32 reg, max_reg; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + if (!np) { + dev_err(dev, "No OF node\n"); + return -EINVAL; + } + + max_reg = (u32)-1; + for_each_child_of_node(np, child) { + ret = of_property_read_u32(child, "reg", ®); + if (ret) + continue; + if (max_reg == (u32)-1 || reg > max_reg) + max_reg = reg; + } + nchans = max_reg == (u32)-1 ? 0 : max_reg + 1; + if (nchans == 0) { + dev_err(dev, "No channels\n"); + return -EINVAL; + } + + size = offsetof(struct selftest_i2c_mux_data, adap[nchans]); + stm = devm_kzalloc(dev, size, GFP_KERNEL); + if (!stm) { + dev_err(dev, "Out of memory\n"); + return -ENOMEM; + } + stm->nchans = nchans; + for (i = 0; i < nchans; i++) { + stm->adap[i] = i2c_add_mux_adapter(adap, dev, client, + 0, i, 0, selftest_i2c_mux_select_chan, NULL); + if (!stm->adap[i]) { + dev_err(dev, "Failed to register mux #%d\n", i); + for (i--; i >= 0; i--) + i2c_del_mux_adapter(stm->adap[i]); + return -ENODEV; + } + } + + i2c_set_clientdata(client, stm); + + return 0; +}; + +static int selftest_i2c_mux_remove(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + struct selftest_i2c_mux_data *stm = i2c_get_clientdata(client); + int i; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + for (i = stm->nchans - 1; i >= 0; i--) + i2c_del_mux_adapter(stm->adap[i]); + return 0; +} + +static const struct i2c_device_id selftest_i2c_mux_id[] = { + { .name = "selftest-i2c-mux" }, + { } +}; + +static struct i2c_driver selftest_i2c_mux_driver = { + .driver = { + .name = "selftest-i2c-mux", + .owner = THIS_MODULE, + }, + .probe = selftest_i2c_mux_probe, + .remove = selftest_i2c_mux_remove, + .id_table = selftest_i2c_mux_id, +}; + +#endif + +static int of_selftest_overlay_i2c_init(void) +{ + int ret; + + ret = i2c_add_driver(&selftest_i2c_dev_driver); + if (selftest(ret == 0, + "could not register selftest i2c device driver\n")) + return ret; + + ret = platform_driver_register(&selftest_i2c_bus_driver); + if (selftest(ret == 0, + "could not register selftest i2c bus driver\n")) + return ret; + +#if IS_ENABLED(CONFIG_I2C_MUX) + ret = i2c_add_driver(&selftest_i2c_mux_driver); + if (selftest(ret == 0, + "could not register selftest i2c mux driver\n")) + return ret; +#endif + + return 0; +} + +static void of_selftest_overlay_i2c_cleanup(void) +{ +#if IS_ENABLED(CONFIG_I2C_MUX) + i2c_del_driver(&selftest_i2c_mux_driver); +#endif + platform_driver_unregister(&selftest_i2c_bus_driver); + i2c_del_driver(&selftest_i2c_dev_driver); +} + +static void of_selftest_overlay_i2c_12(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(12, 12, 0, 1, I2C_OVERLAY); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 12); +} + +/* test deactivation of device */ +static void of_selftest_overlay_i2c_13(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(13, 13, 1, 0, I2C_OVERLAY); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 13); +} + +/* just check for i2c mux existence */ +static void of_selftest_overlay_i2c_14(void) +{ +} + +static void of_selftest_overlay_i2c_15(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(16, 15, 0, 1, I2C_OVERLAY); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 15); +} + +#else + +static inline void of_selftest_overlay_i2c_14(void) { } +static inline void of_selftest_overlay_i2c_15(void) { } + +#endif + static void __init of_selftest_overlay(void) { struct device_node *bus_np = NULL; @@ -1365,15 +1765,15 @@ static void __init of_selftest_overlay(void) goto out; } - if (!of_path_platform_device_exists(selftest_path(100))) { + if (!of_selftest_device_exists(100, PDEV_OVERLAY)) { selftest(0, "could not find selftest0 @ \"%s\"\n", - selftest_path(100)); + selftest_path(100, PDEV_OVERLAY)); goto out; } - if (of_path_platform_device_exists(selftest_path(101))) { + if (of_selftest_device_exists(101, PDEV_OVERLAY)) { selftest(0, "selftest1 @ \"%s\" should not exist\n", - selftest_path(101)); + selftest_path(101, PDEV_OVERLAY)); goto out; } @@ -1392,6 +1792,18 @@ static void __init of_selftest_overlay(void) of_selftest_overlay_10(); of_selftest_overlay_11(); +#if IS_ENABLED(CONFIG_I2C) + if (selftest(of_selftest_overlay_i2c_init() == 0, "i2c init failed\n")) + goto out; + + of_selftest_overlay_i2c_12(); + of_selftest_overlay_i2c_13(); + of_selftest_overlay_i2c_14(); + of_selftest_overlay_i2c_15(); + + of_selftest_overlay_i2c_cleanup(); +#endif + out: of_node_put(bus_np); } From 2ea8f7ea87f24735c49543c7f523919295028c58 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 21 Feb 2015 00:05:56 +0900 Subject: [PATCH 298/298] of: unittest: Fix mispick of testcase data Ensure we have the data we're going to use for the changeset tests. Signed-off-by: Mark Brown --- drivers/of/unittest-data/testcases.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/of/unittest-data/testcases.dts b/drivers/of/unittest-data/testcases.dts index d26b96a87e8f..16c9c8775a5c 100644 --- a/drivers/of/unittest-data/testcases.dts +++ b/drivers/of/unittest-data/testcases.dts @@ -1,4 +1,14 @@ /dts-v1/; +/ { + testcase-data { + changeset { + prop-update = "hello"; + prop-remove = "world"; + node-remove { + }; + }; + }; +}; #include "tests-phandle.dtsi" #include "tests-interrupts.dtsi" #include "tests-match.dtsi"