From eb7124b0cca13de746719238eceba21537e8b396 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Sun, 4 Aug 2019 16:50:44 -0700 Subject: [PATCH 001/104] usb: usbfs: fix double-free of usb memory upon submiturb error commit c43f28dfdc4654e738aa6d3fd08a105b2bee758d upstream. Upon an error within proc_do_submiturb(), dec_usb_memory_use_count() gets called once by the error handling tail and again by free_async(). Remove the first call. Signed-off-by: Gavin Li Acked-by: Alan Stern Cc: stable Link: https://lore.kernel.org/r/20190804235044.22327-1-gavinli@thegavinli.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 52fc2084b310..06a8f645106b 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1810,8 +1810,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb return 0; error: - if (as && as->usbm) - dec_usb_memory_use_count(as->usbm, &as->usbm->urb_use_count); kfree(isopkt); kfree(dr); if (as) From f34431096132f2865dd8ca02448a0b112fa52d52 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 8 Aug 2019 11:27:28 +0200 Subject: [PATCH 002/104] usb: iowarrior: fix deadlock on disconnect commit c468a8aa790e0dfe0a7f8a39db282d39c2c00b46 upstream. We have to drop the mutex before we close() upon disconnect() as close() needs the lock. This is safe to do by dropping the mutex as intfdata is already set to NULL, so open() will fail. Fixes: 03f36e885fc26 ("USB: open disconnect race in iowarrior") Reported-by: syzbot+a64a382964bf6c71a9c0@syzkaller.appspotmail.com Cc: stable Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190808092728.23417-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 0ef29d202263..318e087f8442 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -886,19 +886,20 @@ static void iowarrior_disconnect(struct usb_interface *interface) dev = usb_get_intfdata(interface); mutex_lock(&iowarrior_open_disc_lock); usb_set_intfdata(interface, NULL); + /* prevent device read, write and ioctl */ + dev->present = 0; minor = dev->minor; + mutex_unlock(&iowarrior_open_disc_lock); + /* give back our minor - this will call close() locks need to be dropped at this point*/ - /* give back our minor */ usb_deregister_dev(interface, &iowarrior_class); mutex_lock(&dev->mutex); /* prevent device read, write and ioctl */ - dev->present = 0; mutex_unlock(&dev->mutex); - mutex_unlock(&iowarrior_open_disc_lock); if (dev->opened) { /* There is a process that holds a filedescriptor to the device , From 808dff56c326ebab0a4848d501ebe60b612e3815 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 8 Aug 2019 00:15:21 -0500 Subject: [PATCH 003/104] sound: fix a memory leak bug commit c7cd7c748a3250ca33509f9235efab9c803aca09 upstream. In sound_insert_unit(), the controlling structure 's' is allocated through kmalloc(). Then it is added to the sound driver list by invoking __sound_insert_unit(). Later on, if __register_chrdev() fails, 's' is removed from the list through __sound_remove_unit(). If 'index' is not less than 0, -EBUSY is returned to indicate the error. However, 's' is not deallocated on this execution path, leading to a memory leak bug. To fix the above issue, free 's' before -EBUSY is returned. Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/sound_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/sound_core.c b/sound/sound_core.c index 99b73c675743..20d4e2e1bacf 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -287,7 +287,8 @@ retry: goto retry; } spin_unlock(&sound_loader_lock); - return -EBUSY; + r = -EBUSY; + goto fail; } } From a6932b4b97baea5f96ea857271612c6ed37a26a2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 19 Jul 2019 20:46:50 +0200 Subject: [PATCH 004/104] x86/mm: Check for pfn instead of page in vmalloc_sync_one() commit 51b75b5b563a2637f9d8dc5bd02a31b2ff9e5ea0 upstream. Do not require a struct page for the mapped memory location because it might not exist. This can happen when an ioremapped region is mapped with 2MB pages. Fixes: 5d72b4fba40ef ('x86, mm: support huge I/O mapping capability I/F') Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Dave Hansen Link: https://lkml.kernel.org/r/20190719184652.11391-2-joro@8bytes.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c140198d9fa5..2870424bda1f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -279,7 +279,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); else - BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k)); return pmd_k; } From ffd85e35d6354ebf85ac7db6388254b8ebf57bd4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 19 Jul 2019 20:46:51 +0200 Subject: [PATCH 005/104] x86/mm: Sync also unmappings in vmalloc_sync_all() commit 8e998fc24de47c55b47a887f6c95ab91acd4a720 upstream. With huge-page ioremap areas the unmappings also need to be synced between all page-tables. Otherwise it can cause data corruption when a region is unmapped and later re-used. Make the vmalloc_sync_one() function ready to sync unmappings and make sure vmalloc_sync_all() iterates over all page-tables even when an unmapped PMD is found. Fixes: 5d72b4fba40ef ('x86, mm: support huge I/O mapping capability I/F') Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Dave Hansen Link: https://lkml.kernel.org/r/20190719184652.11391-3-joro@8bytes.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/fault.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2870424bda1f..7f4b3c59df47 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -273,11 +273,12 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) pmd = pmd_offset(pud, address); pmd_k = pmd_offset(pud_k, address); + + if (pmd_present(*pmd) != pmd_present(*pmd_k)) + set_pmd(pmd, *pmd_k); + if (!pmd_present(*pmd_k)) return NULL; - - if (!pmd_present(*pmd)) - set_pmd(pmd, *pmd_k); else BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k)); @@ -299,17 +300,13 @@ void vmalloc_sync_all(void) spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; - pmd_t *ret; /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - ret = vmalloc_sync_one(page_address(page), address); + vmalloc_sync_one(page_address(page), address); spin_unlock(pgt_lock); - - if (!ret) - break; } spin_unlock(&pgd_lock); } From 6c8f40d2c2724d61bdcbc528cdad1ee0d763012e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 19 Jul 2019 20:46:52 +0200 Subject: [PATCH 006/104] mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy() commit 3f8fd02b1bf1d7ba964485a56f2f4b53ae88c167 upstream. On x86-32 with PTI enabled, parts of the kernel page-tables are not shared between processes. This can cause mappings in the vmalloc/ioremap area to persist in some page-tables after the region is unmapped and released. When the region is re-used the processes with the old mappings do not fault in the new mappings but still access the old ones. This causes undefined behavior, in reality often data corruption, kernel oopses and panics and even spontaneous reboots. Fix this problem by activly syncing unmaps in the vmalloc/ioremap area to all page-tables in the system before the regions can be re-used. References: https://bugzilla.suse.com/show_bug.cgi?id=1118689 Fixes: 5d72b4fba40ef ('x86, mm: support huge I/O mapping capability I/F') Signed-off-by: Joerg Roedel Signed-off-by: Thomas Gleixner Reviewed-by: Dave Hansen Link: https://lkml.kernel.org/r/20190719184652.11391-4-joro@8bytes.org Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 73afe460caf0..dd66f1fb3fcf 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1707,6 +1707,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!addr) return NULL; + /* + * First make sure the mappings are removed from all page-tables + * before they are freed. + */ + vmalloc_sync_all(); + /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED * flag. It means that vm_struct is not fully initialized. @@ -2243,6 +2249,9 @@ EXPORT_SYMBOL(remap_vmalloc_range); /* * Implement a stub for vmalloc_sync_all() if the architecture chose not to * have one. + * + * The purpose of this function is to make sure the vmalloc area + * mappings are identical in all page-tables in the system. */ void __weak vmalloc_sync_all(void) { From 624c2b3696baa621a724e7772284a4f5a48d3002 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 3 Aug 2017 15:49:02 +0200 Subject: [PATCH 007/104] perf record: Fix wrong size in perf_record_mmap for last kernel module commit 9ad4652b66f19a60f07e63b942b80b5c2d7465bf upstream. During work on perf report for s390 I ran into the following issue: 0 0x318 [0x78]: PERF_RECORD_MMAP -1/0: [0x3ff804d6990(0xfffffc007fb2966f) @ 0]: x /lib/modules/4.12.0perf1+/kernel/drivers/s390/net/qeth_l2.ko This is a PERF_RECORD_MMAP entry of the perf.data file with an invalid module size for qeth_l2.ko (the s390 ethernet device driver). Even a mainframe does not have 0xfffffc007fb2966f bytes of main memory. It turned out that this wrong size is created by the perf record command. What happens is this function call sequence from __cmd_record(): perf_session__new(): perf_session__create_kernel_maps(): machine__create_kernel_maps(): machine__create_modules(): Creates map for all loaded kernel modules. modules__parse(): Reads /proc/modules and extracts module name and load address (1st and last column) machine__create_module(): Called for every module found in /proc/modules. Creates a new map for every module found and enters module name and start address into the map. Since the module end address is unknown it is set to zero. This ends up with a kernel module map list sorted by module start addresses. All module end addresses are zero. Last machine__create_kernel_maps() calls function map_groups__fixup_end(). This function iterates through the maps and assigns each map entry's end address the successor map entry start address. The last entry of the map group has no successor, so ~0 is used as end to consume the remaining memory. Later __cmd_record calls function record__synthesize() which in turn calls perf_event__synthesize_kernel_mmap() and perf_event__synthesize_modules() to create PERF_REPORT_MMAP entries into the perf.data file. On s390 this results in the last module qeth_l2.ko (which has highest start address, see module table: [root@s8360047 perf]# cat /proc/modules qeth_l2 86016 1 - Live 0x000003ff804d6000 qeth 266240 1 qeth_l2, Live 0x000003ff80296000 ccwgroup 24576 1 qeth, Live 0x000003ff80218000 vmur 36864 0 - Live 0x000003ff80182000 qdio 143360 2 qeth_l2,qeth, Live 0x000003ff80002000 [root@s8360047 perf]# ) to be the last entry and its map has an end address of ~0. When the PERF_RECORD_MMAP entry is created for kernel module qeth_l2.ko its start address and length is written. The length is calculated in line: event->mmap.len = pos->end - pos->start; and results in 0xffffffffffffffff - 0x3ff804d6990(*) = 0xfffffc007fb2966f (*) On s390 the module start address is actually determined by a __weak function named arch__fix_module_text_start() in machine__create_module(). I think this improvable. We can use the module size (2nd column of /proc/modules) to get each loaded kernel module size and calculate its end address. Only for map entries which do not have a valid end address (end is still zero) we can use the heuristic we have now, that is use successor start address or ~0. Signed-off-by: Thomas-Mich Richter Reviewed-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter Cc: Zvonko Kosic LPU-Reference: 20170803134902.47207-2-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-nmoqij5b5vxx7rq2ckwu8iaj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Cc: Daniel Daz Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/machine.c | 4 +++- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/symbol.c | 21 ++++++++++++++------- tools/perf/util/symbol.h | 2 +- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df85b9efd80f..dbf2d68b9e66 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1079,7 +1079,8 @@ int __weak arch__fix_module_text_start(u64 *start __maybe_unused, return 0; } -static int machine__create_module(void *arg, const char *name, u64 start) +static int machine__create_module(void *arg, const char *name, u64 start, + u64 size) { struct machine *machine = arg; struct map *map; @@ -1090,6 +1091,7 @@ static int machine__create_module(void *arg, const char *name, u64 start) map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; + map->end = start + size; dso__kernel_module_get_build_id(map->dso, machine->root_dir); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 20ba5a9aeae4..5a50326c8158 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1478,7 +1478,7 @@ static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci, static int kcore_copy__process_modules(void *arg, const char *name __maybe_unused, - u64 start) + u64 start, u64 size __maybe_unused) { struct kcore_copy_info *kci = arg; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f199d5b11d76..acde8e489352 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -217,7 +217,8 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) goto out_unlock; for (next = map__next(curr); next; next = map__next(curr)) { - curr->end = next->start; + if (!curr->end) + curr->end = next->start; curr = next; } @@ -225,7 +226,8 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) * We still haven't the actual symbols, so guess the * last map final address. */ - curr->end = ~0ULL; + if (!curr->end) + curr->end = ~0ULL; out_unlock: pthread_rwlock_unlock(&maps->lock); @@ -512,7 +514,7 @@ void dso__sort_by_name(struct dso *dso, enum map_type type) int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, - u64 start)) + u64 start, u64 size)) { char *line = NULL; size_t n; @@ -525,8 +527,8 @@ int modules__parse(const char *filename, void *arg, while (1) { char name[PATH_MAX]; - u64 start; - char *sep; + u64 start, size; + char *sep, *endptr; ssize_t line_len; line_len = getline(&line, &n, file); @@ -558,7 +560,11 @@ int modules__parse(const char *filename, void *arg, scnprintf(name, sizeof(name), "[%s]", line); - err = process_module(arg, name, start); + size = strtoul(sep + 1, &endptr, 0); + if (*endptr != ' ' && *endptr != '\t') + continue; + + err = process_module(arg, name, start, size); if (err) break; } @@ -905,7 +911,8 @@ static struct module_info *find_module(const char *name, return NULL; } -static int __read_proc_modules(void *arg, const char *name, u64 start) +static int __read_proc_modules(void *arg, const char *name, u64 start, + u64 size __maybe_unused) { struct rb_root *modules = arg; struct module_info *mi; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d964844eb314..833b1be2fb29 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -268,7 +268,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, - u64 start)); + u64 start, u64 size)); int filename__read_debuglink(const char *filename, char *debuglink, size_t size); From 708eac3bdb554e854ae3554a9c9953f79f834ee9 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 8 Aug 2019 09:48:23 +0300 Subject: [PATCH 008/104] perf db-export: Fix thread__exec_comm() commit 3de7ae0b2a1d86dbb23d0cb135150534fdb2e836 upstream. Threads synthesized from /proc have comms with a start time of zero, and not marked as "exec". Currently, there can be 2 such comms. The first is created by processing a synthesized fork event and is set to the parent's comm string, and the second by processing a synthesized comm event set to the thread's current comm string. In the absence of an "exec" comm, thread__exec_comm() picks the last (oldest) comm, which, in the case above, is the parent's comm string. For a main thread, that is very probably wrong. Use the second-to-last in that case. This affects only db-export because it is the only user of thread__exec_comm(). Example: $ sudo perf record -a -o pt-a-sleep-1 -e intel_pt//u -- sleep 1 $ sudo chown ahunter pt-a-sleep-1 Before: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1.db branches calls $ sqlite3 -header -column pt-a-sleep-1.db 'select * from comm_threads_view' comm_id command thread_id pid tid ---------- ---------- ---------- ---------- ---------- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kthreadd 3 78 78 5 sudo 4 15180 15180 5 sudo 5 15180 15182 7 kworker/4: 6 10335 10335 8 kthreadd 7 55 55 10 systemd 8 865 865 10 systemd 9 865 875 13 perf 10 15181 15181 15 sleep 10 15181 15181 16 kworker/3: 11 14179 14179 17 kthreadd 12 29376 29376 19 systemd 13 746 746 21 systemd 14 401 401 23 systemd 15 879 879 23 systemd 16 879 945 25 kthreadd 17 556 556 27 kworker/u1 18 14136 14136 28 kworker/u1 19 15021 15021 29 kthreadd 20 509 509 31 systemd 21 836 836 31 systemd 22 836 967 33 systemd 23 1148 1148 33 systemd 24 1148 1163 35 kworker/2: 25 17988 17988 36 kworker/0: 26 13478 13478 After: $ perf script -i pt-a-sleep-1 --itrace=bep -s tools/perf/scripts/python/export-to-sqlite.py pt-a-sleep-1b.db branches calls $ sqlite3 -header -column pt-a-sleep-1b.db 'select * from comm_threads_view' comm_id command thread_id pid tid ---------- ---------- ---------- ---------- ---------- 1 swapper 1 0 0 2 rcu_sched 2 10 10 3 kswapd0 3 78 78 4 perf 4 15180 15180 4 perf 5 15180 15182 6 kworker/4: 6 10335 10335 7 kcompactd0 7 55 55 8 accounts-d 8 865 865 8 accounts-d 9 865 875 10 perf 10 15181 15181 12 sleep 10 15181 15181 13 kworker/3: 11 14179 14179 14 kworker/1: 12 29376 29376 15 haveged 13 746 746 16 systemd-jo 14 401 401 17 NetworkMan 15 879 879 17 NetworkMan 16 879 945 19 irq/131-iw 17 556 556 20 kworker/u1 18 14136 14136 21 kworker/u1 19 15021 15021 22 kworker/u1 20 509 509 23 thermald 21 836 836 23 thermald 22 836 967 25 unity-sett 23 1148 1148 25 unity-sett 24 1148 1163 27 kworker/2: 25 17988 17988 28 kworker/0: 26 13478 13478 Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: 65de51f93ebf ("perf tools: Identify which comms are from exec") Link: http://lkml.kernel.org/r/20190808064823.14846-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/thread.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f5af87f66663..81aee5adc8a2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -114,14 +114,24 @@ struct comm *thread__comm(const struct thread *thread) struct comm *thread__exec_comm(const struct thread *thread) { - struct comm *comm, *last = NULL; + struct comm *comm, *last = NULL, *second_last = NULL; list_for_each_entry(comm, &thread->comm_list, list) { if (comm->exec) return comm; + second_last = last; last = comm; } + /* + * 'last' with no start time might be the parent's comm of a synthesized + * thread (created by processing a synthesized fork event). For a main + * thread, that is very probably wrong. Prefer a later comm to avoid + * that case. + */ + if (second_last && !last->start && thread->pid_ == thread->tid) + return second_last; + return last; } From 0d7f710286c1e9d2de4fdda638f9f9ef6c412080 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 24 Jul 2019 14:27:02 +0200 Subject: [PATCH 009/104] perf record: Fix module size on s390 commit 12a6d2940b5f02b4b9f71ce098e3bb02bc24a9ea upstream. On s390 the modules loaded in memory have the text segment located after the GOT and Relocation table. This can be seen with this output: [root@m35lp76 perf]# fgrep qeth /proc/modules qeth 151552 1 qeth_l2, Live 0x000003ff800b2000 ... [root@m35lp76 perf]# cat /sys/module/qeth/sections/.text 0x000003ff800b3990 [root@m35lp76 perf]# There is an offset of 0x1990 bytes. The size of the qeth module is 151552 bytes (0x25000 in hex). The location of the GOT/relocation table at the beginning of a module is unique to s390. commit 203d8a4aa6ed ("perf s390: Fix 'start' address of module's map") adjusts the start address of a module in the map structures, but does not adjust the size of the modules. This leads to overlapping of module maps as this example shows: [root@m35lp76 perf] # ./perf report -D 0 0 0xfb0 [0xa0]: PERF_RECORD_MMAP -1/0: [0x3ff800b3990(0x25000) @ 0]: x /lib/modules/.../qeth.ko.xz 0 0 0x1050 [0xb0]: PERF_RECORD_MMAP -1/0: [0x3ff800d85a0(0x8000) @ 0]: x /lib/modules/.../ip6_tables.ko.xz The module qeth.ko has an adjusted start address modified to b3990, but its size is unchanged and the module ends at 0x3ff800d8990. This end address overlaps with the next modules start address of 0x3ff800d85a0. When the size of the leading GOT/Relocation table stored in the beginning of the text segment (0x1990 bytes) is subtracted from module qeth end address, there are no overlaps anymore: 0x3ff800d8990 - 0x1990 = 0x0x3ff800d7000 which is the same as 0x3ff800b2000 + 0x25000 = 0x0x3ff800d7000. To fix this issue, also adjust the modules size in function arch__fix_module_text_start(). Add another function parameter named size and reduce the size of the module when the text segment start address is changed. Output after: 0 0 0xfb0 [0xa0]: PERF_RECORD_MMAP -1/0: [0x3ff800b3990(0x23670) @ 0]: x /lib/modules/.../qeth.ko.xz 0 0 0x1050 [0xb0]: PERF_RECORD_MMAP -1/0: [0x3ff800d85a0(0x7a60) @ 0]: x /lib/modules/.../ip6_tables.ko.xz Reported-by: Stefan Liebler Signed-off-by: Thomas Richter Acked-by: Heiko Carstens Cc: Hendrik Brueckner Cc: Vasily Gorbik Cc: stable@vger.kernel.org Fixes: 203d8a4aa6ed ("perf s390: Fix 'start' address of module's map") Link: http://lkml.kernel.org/r/20190724122703.3996-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/s390/util/machine.c | 14 +++++++++++++- tools/perf/util/machine.c | 3 ++- tools/perf/util/machine.h | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c index d3d1452021d4..f4f8aff8a9bb 100644 --- a/tools/perf/arch/s390/util/machine.c +++ b/tools/perf/arch/s390/util/machine.c @@ -6,7 +6,7 @@ #include "api/fs/fs.h" #include "debug.h" -int arch__fix_module_text_start(u64 *start, const char *name) +int arch__fix_module_text_start(u64 *start, u64 *size, const char *name) { u64 m_start = *start; char path[PATH_MAX]; @@ -16,6 +16,18 @@ int arch__fix_module_text_start(u64 *start, const char *name) if (sysfs__read_ull(path, (unsigned long long *)start) < 0) { pr_debug2("Using module %s start:%#lx\n", path, m_start); *start = m_start; + } else { + /* Successful read of the modules segment text start address. + * Calculate difference between module start address + * in memory and module text segment start address. + * For example module load address is 0x3ff8011b000 + * (from /proc/modules) and module text segment start + * address is 0x3ff8011b870 (from file above). + * + * Adjust the module size and subtract the GOT table + * size located at the beginning of the module. + */ + *size -= (*start - m_start); } return 0; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index dbf2d68b9e66..45ea7a8cd818 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1074,6 +1074,7 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, + u64 *size __maybe_unused, const char *name __maybe_unused) { return 0; @@ -1085,7 +1086,7 @@ static int machine__create_module(void *arg, const char *name, u64 start, struct machine *machine = arg; struct map *map; - if (arch__fix_module_text_start(&start, name) < 0) + if (arch__fix_module_text_start(&start, &size, name) < 0) return -1; map = machine__findnew_module_map(machine, start, name); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 354de6e56109..9b87b8b870ae 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -205,7 +205,7 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); -int arch__fix_module_text_start(u64 *start, const char *name); +int arch__fix_module_text_start(u64 *start, u64 *size, const char *name); int __machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, bool no_kcore); From e253114f73134cf6f29b453176fb537441e12371 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 5 Aug 2019 12:15:28 +0100 Subject: [PATCH 010/104] usb: yurex: Fix use-after-free in yurex_delete commit fc05481b2fcabaaeccf63e32ac1baab54e5b6963 upstream. syzbot reported the following crash [0]: BUG: KASAN: use-after-free in usb_free_coherent+0x79/0x80 drivers/usb/core/usb.c:928 Read of size 8 at addr ffff8881b18599c8 by task syz-executor.4/16007 CPU: 0 PID: 16007 Comm: syz-executor.4 Not tainted 5.3.0-rc2+ #23 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 print_address_description+0x6a/0x32c mm/kasan/report.c:351 __kasan_report.cold+0x1a/0x33 mm/kasan/report.c:482 kasan_report+0xe/0x12 mm/kasan/common.c:612 usb_free_coherent+0x79/0x80 drivers/usb/core/usb.c:928 yurex_delete+0x138/0x330 drivers/usb/misc/yurex.c:100 kref_put include/linux/kref.h:65 [inline] yurex_release+0x66/0x90 drivers/usb/misc/yurex.c:392 __fput+0x2d7/0x840 fs/file_table.c:280 task_work_run+0x13f/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x1d2/0x200 arch/x86/entry/common.c:163 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x45f/0x580 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x413511 Code: 75 14 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 04 1b 00 00 c3 48 83 ec 08 e8 0a fc ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 53 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007ffc424ea2e0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 0000000000000007 RCX: 0000000000413511 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000006 RBP: 0000000000000001 R08: 0000000029a2fc22 R09: 0000000029a2fc26 R10: 00007ffc424ea3c0 R11: 0000000000000293 R12: 000000000075c9a0 R13: 000000000075c9a0 R14: 0000000000761938 R15: ffffffffffffffff Allocated by task 2776: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_kmalloc mm/kasan/common.c:487 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:460 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:748 [inline] usb_alloc_dev+0x51/0xf95 drivers/usb/core/usb.c:583 hub_port_connect drivers/usb/core/hub.c:5004 [inline] hub_port_connect_change drivers/usb/core/hub.c:5213 [inline] port_event drivers/usb/core/hub.c:5359 [inline] hub_event+0x15c0/0x3640 drivers/usb/core/hub.c:5441 process_one_work+0x92b/0x1530 kernel/workqueue.c:2269 worker_thread+0x96/0xe20 kernel/workqueue.c:2415 kthread+0x318/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Freed by task 16007: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:449 slab_free_hook mm/slub.c:1423 [inline] slab_free_freelist_hook mm/slub.c:1470 [inline] slab_free mm/slub.c:3012 [inline] kfree+0xe4/0x2f0 mm/slub.c:3953 device_release+0x71/0x200 drivers/base/core.c:1064 kobject_cleanup lib/kobject.c:693 [inline] kobject_release lib/kobject.c:722 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x171/0x280 lib/kobject.c:739 put_device+0x1b/0x30 drivers/base/core.c:2213 usb_put_dev+0x1f/0x30 drivers/usb/core/usb.c:725 yurex_delete+0x40/0x330 drivers/usb/misc/yurex.c:95 kref_put include/linux/kref.h:65 [inline] yurex_release+0x66/0x90 drivers/usb/misc/yurex.c:392 __fput+0x2d7/0x840 fs/file_table.c:280 task_work_run+0x13f/0x1c0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_usermode_loop+0x1d2/0x200 arch/x86/entry/common.c:163 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x45f/0x580 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881b1859980 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 72 bytes inside of 2048-byte region [ffff8881b1859980, ffff8881b185a180) The buggy address belongs to the page: page:ffffea0006c61600 refcount:1 mapcount:0 mapping:ffff8881da00c000 index:0x0 compound_mapcount: 0 flags: 0x200000000010200(slab|head) raw: 0200000000010200 0000000000000000 0000000100000001 ffff8881da00c000 raw: 0000000000000000 00000000000f000f 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881b1859880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8881b1859900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc > ffff8881b1859980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881b1859a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881b1859a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== A quick look at the yurex_delete() shows that we drop the reference to the usb_device before releasing any buffers associated with the device. Delay the reference drop until we have finished the cleanup. [0] https://lore.kernel.org/lkml/0000000000003f86d8058f0bd671@google.com/ Fixes: 6bc235a2e24a5e ("USB: add driver for Meywa-Denki & Kayac YUREX") Cc: Jiri Kosina Cc: Tomoki Sekiyama Cc: Oliver Neukum Cc: andreyknvl@google.com Cc: gregkh@linuxfoundation.org Cc: Alan Stern Cc: syzkaller-bugs@googlegroups.com Cc: dtor@chromium.org Reported-by: syzbot+d1fedb1c1fdb07fca507@syzkaller.appspotmail.com Signed-off-by: Suzuki K Poulose Cc: stable Link: https://lore.kernel.org/r/20190805111528.6758-1-suzuki.poulose@arm.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index efa3c86bd262..9744e5f996c1 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -96,7 +96,6 @@ static void yurex_delete(struct kref *kref) dev_dbg(&dev->interface->dev, "%s\n", __func__); - usb_put_dev(dev->udev); if (dev->cntl_urb) { usb_kill_urb(dev->cntl_urb); kfree(dev->cntl_req); @@ -112,6 +111,7 @@ static void yurex_delete(struct kref *kref) dev->int_buffer, dev->urb->transfer_dma); usb_free_urb(dev->urb); } + usb_put_dev(dev->udev); kfree(dev); } From 13350c2269a6e6c3145a9998ecdcf1036ac8c034 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Fri, 5 Jul 2019 15:32:16 +0200 Subject: [PATCH 011/104] can: peak_usb: fix potential double kfree_skb() commit fee6a8923ae0d318a7f7950c6c6c28a96cea099b upstream. When closing the CAN device while tx skbs are inflight, echo skb could be released twice. By calling close_candev() before unlinking all pending tx urbs, then the internal echo_skb[] array is fully and correctly cleared before the USB write callback and, therefore, can_get_echo_skb() are called, for each aborted URB. Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Signed-off-by: Stephane Grosjean Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 0b0302af3bd2..54c2354053ac 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -592,16 +592,16 @@ static int peak_usb_ndo_stop(struct net_device *netdev) dev->state &= ~PCAN_USB_STATE_STARTED; netif_stop_queue(netdev); + close_candev(netdev); + + dev->can.state = CAN_STATE_STOPPED; + /* unlink all pending urbs and free used memory */ peak_usb_unlink_all_urbs(dev); if (dev->adapter->dev_stop) dev->adapter->dev_stop(dev); - close_candev(netdev); - - dev->can.state = CAN_STATE_STOPPED; - /* can set bus off now */ if (dev->adapter->dev_set_bus) { int err = dev->adapter->dev_set_bus(dev, 0); From 43e4ea5b3ab8b99fa89741e43d6a6e2c158fba80 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 2 Jul 2019 21:41:40 +0200 Subject: [PATCH 012/104] netfilter: nfnetlink: avoid deadlock due to synchronous request_module [ Upstream commit 1b0890cd60829bd51455dc5ad689ed58c4408227 ] Thomas and Juliana report a deadlock when running: (rmmod nf_conntrack_netlink/xfrm_user) conntrack -e NEW -E & modprobe -v xfrm_user They provided following analysis: conntrack -e NEW -E netlink_bind() netlink_lock_table() -> increases "nl_table_users" nfnetlink_bind() # does not unlock the table as it's locked by netlink_bind() __request_module() call_usermodehelper_exec() This triggers "modprobe nf_conntrack_netlink" from kernel, netlink_bind() won't return until modprobe process is done. "modprobe xfrm_user": xfrm_user_init() register_pernet_subsys() -> grab pernet_ops_rwsem .. netlink_table_grab() calls schedule() as "nl_table_users" is non-zero so modprobe is blocked because netlink_bind() increased nl_table_users while also holding pernet_ops_rwsem. "modprobe nf_conntrack_netlink" runs and inits nf_conntrack_netlink: ctnetlink_init() register_pernet_subsys() -> blocks on "pernet_ops_rwsem" thanks to xfrm_user module both modprobe processes wait on one another -- neither can make progress. Switch netlink_bind() to "nowait" modprobe -- this releases the netlink table lock, which then allows both modprobe instances to complete. Reported-by: Thomas Jarosch Reported-by: Juliana Rodrigueiro Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nfnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 2278d9ab723b..9837a61cb3e3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -490,7 +490,7 @@ static int nfnetlink_bind(struct net *net, int group) ss = nfnetlink_get_subsys(type << 8); rcu_read_unlock(); if (!ss) - request_module("nfnetlink-subsys-%d", type); + request_module_nowait("nfnetlink-subsys-%d", type); return 0; } #endif From 2b9ac213b632b3f275728eb68a9a9e1c160aabf8 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 18 Jul 2019 18:37:34 +0000 Subject: [PATCH 013/104] iscsi_ibft: make ISCSI_IBFT dependson ACPI instead of ISCSI_IBFT_FIND [ Upstream commit 94bccc34071094c165c79b515d21b63c78f7e968 ] iscsi_ibft can use ACPI to find the iBFT entry during bootup, currently, ISCSI_IBFT depends on ISCSI_IBFT_FIND which is a X86 legacy way to find the iBFT by searching through the low memory. This patch changes the dependency so that other arch like ARM64 can use ISCSI_IBFT as long as the arch supports ACPI. ibft_init() needs to use the global variable ibft_addr declared in iscsi_ibft_find.c. A #ifndef CONFIG_ISCSI_IBFT_FIND is needed to declare the variable if CONFIG_ISCSI_IBFT_FIND is not selected. Moving ibft_addr into the iscsi_ibft.c does not work because if ISCSI_IBFT is selected as a module, the arch/x86/kernel/setup.c won't be able to find the variable at compile time. Signed-off-by: Thomas Tai Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin --- drivers/firmware/Kconfig | 5 +++-- drivers/firmware/iscsi_ibft.c | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index bca172d42c74..854df538ae01 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -144,7 +144,7 @@ config DMI_SCAN_MACHINE_NON_EFI_FALLBACK config ISCSI_IBFT_FIND bool "iSCSI Boot Firmware Table Attributes" - depends on X86 && ACPI + depends on X86 && ISCSI_IBFT default n help This option enables the kernel to find the region of memory @@ -155,7 +155,8 @@ config ISCSI_IBFT_FIND config ISCSI_IBFT tristate "iSCSI Boot Firmware Table Attributes module" select ISCSI_BOOT_SYSFS - depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL + select ISCSI_IBFT_FIND if X86 + depends on ACPI && SCSI && SCSI_LOWLEVEL default n help This option enables support for detection and exposing of iSCSI diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 132b9bae4b6a..220bbc91cebd 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -93,6 +93,10 @@ MODULE_DESCRIPTION("sysfs interface to BIOS iBFT information"); MODULE_LICENSE("GPL"); MODULE_VERSION(IBFT_ISCSI_VERSION); +#ifndef CONFIG_ISCSI_IBFT_FIND +struct acpi_table_ibft *ibft_addr; +#endif + struct ibft_hdr { u8 id; u8 version; From 760b12a7262b0b32e2da4b2f5b21dce52053846b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 17 Jul 2019 18:57:12 -0700 Subject: [PATCH 014/104] mac80211: don't warn about CW params when not using them [ Upstream commit d2b3fe42bc629c2d4002f652b3abdfb2e72991c7 ] ieee80211_set_wmm_default() normally sets up the initial CW min/max for each queue, except that it skips doing this if the driver doesn't support ->conf_tx. We still end up calling drv_conf_tx() in some cases (e.g., ieee80211_reconfig()), which also still won't do anything useful...except it complains here about the invalid CW parameters. Let's just skip the WARN if we weren't going to do anything useful with the parameters. Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20190718015712.197499-1-briannorris@chromium.org Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/driver-ops.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index bb886e7db47f..f783d1377d9a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -169,11 +169,16 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; - if (WARN_ONCE(params->cw_min == 0 || - params->cw_min > params->cw_max, - "%s: invalid CW_min/CW_max: %d/%d\n", - sdata->name, params->cw_min, params->cw_max)) + if (params->cw_min == 0 || params->cw_min > params->cw_max) { + /* + * If we can't configure hardware anyway, don't warn. We may + * never have initialized the CW parameters. + */ + WARN_ONCE(local->ops->conf_tx, + "%s: invalid CW_min/CW_max: %d/%d\n", + sdata->name, params->cw_min, params->cw_max); return -EINVAL; + } trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) From 71faaeb11df9378aa317c76e59e37f34c0851fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gerhart?= Date: Mon, 15 Jul 2019 18:33:55 +0200 Subject: [PATCH 015/104] hwmon: (nct6775) Fix register address and added missed tolerance for nct6106 [ Upstream commit f3d43e2e45fd9d44ba52d20debd12cd4ee9c89bf ] Fixed address of third NCT6106_REG_WEIGHT_DUTY_STEP, and added missed NCT6106_REG_TOLERANCE_H. Fixes: 6c009501ff200 ("hwmon: (nct6775) Add support for NCT6102D/6106D") Signed-off-by: Bjoern Gerhart Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/nct6775.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2b31b84d0a5b..006f090c1b0a 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -698,7 +698,7 @@ static const u16 NCT6106_REG_TARGET[] = { 0x111, 0x121, 0x131 }; static const u16 NCT6106_REG_WEIGHT_TEMP_SEL[] = { 0x168, 0x178, 0x188 }; static const u16 NCT6106_REG_WEIGHT_TEMP_STEP[] = { 0x169, 0x179, 0x189 }; static const u16 NCT6106_REG_WEIGHT_TEMP_STEP_TOL[] = { 0x16a, 0x17a, 0x18a }; -static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x17c }; +static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x18b }; static const u16 NCT6106_REG_WEIGHT_TEMP_BASE[] = { 0x16c, 0x17c, 0x18c }; static const u16 NCT6106_REG_WEIGHT_DUTY_BASE[] = { 0x16d, 0x17d, 0x18d }; @@ -3481,6 +3481,7 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_TIME[0] = NCT6106_REG_FAN_STOP_TIME; data->REG_FAN_TIME[1] = NCT6106_REG_FAN_STEP_UP_TIME; data->REG_FAN_TIME[2] = NCT6106_REG_FAN_STEP_DOWN_TIME; + data->REG_TOLERANCE_H = NCT6106_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6106_REG_PWM; data->REG_PWM[1] = NCT6106_REG_FAN_START_OUTPUT; data->REG_PWM[2] = NCT6106_REG_FAN_STOP_OUTPUT; From 260718adc61479571869b3baf9bc85d55daa243b Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Wed, 17 Jul 2019 11:55:04 +0800 Subject: [PATCH 016/104] cpufreq/pasemi: fix use-after-free in pas_cpufreq_cpu_init() [ Upstream commit e0a12445d1cb186d875410d093a00d215bec6a89 ] The cpu variable is still being used in the of_get_property() call after the of_node_put() call, which may result in use-after-free. Fixes: a9acc26b75f6 ("cpufreq/pasemi: fix possible object reference leak") Signed-off-by: Wen Yang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/pasemi-cpufreq.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c index 58c933f48300..991b6a3062c4 100644 --- a/drivers/cpufreq/pasemi-cpufreq.c +++ b/drivers/cpufreq/pasemi-cpufreq.c @@ -145,11 +145,19 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) int err = -ENODEV; cpu = of_get_cpu_node(policy->cpu, NULL); - - of_node_put(cpu); if (!cpu) goto out; + max_freqp = of_get_property(cpu, "clock-frequency", NULL); + of_node_put(cpu); + if (!max_freqp) { + err = -EINVAL; + goto out; + } + + /* we need the freq in kHz */ + max_freq = *max_freqp / 1000; + dn = of_find_compatible_node(NULL, NULL, "1682m-sdc"); if (!dn) dn = of_find_compatible_node(NULL, NULL, @@ -185,16 +193,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) } pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - if (!max_freqp) { - err = -EINVAL; - goto out_unmap_sdcpwr; - } - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - pr_debug("max clock-frequency is at %u kHz\n", max_freq); pr_debug("initializing frequency table\n"); @@ -212,9 +210,6 @@ static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) return cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency()); -out_unmap_sdcpwr: - iounmap(sdcpwr_mapbase); - out_unmap_sdcasr: iounmap(sdcasr_mapbase); out: From 657c648a19eb9b92ee74220af0ed07c6a45db380 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 11 Jul 2019 18:17:36 +0200 Subject: [PATCH 017/104] s390/qdio: add sanity checks to the fast-requeue path [ Upstream commit a6ec414a4dd529eeac5c3ea51c661daba3397108 ] If the device driver were to send out a full queue's worth of SBALs, current code would end up discovering the last of those SBALs as PRIMED and erroneously skip the SIGA-w. This immediately stalls the queue. Add a check to not attempt fast-requeue in this case. While at it also make sure that the state of the previous SBAL was successfully extracted before inspecting it. Signed-off-by: Julian Wiedmann Reviewed-by: Jens Remus Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- drivers/s390/cio/qdio_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 58cd0e0c9680..b65cab444802 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1576,13 +1576,13 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags, rc = qdio_kick_outbound_q(q, phys_aob); } else if (need_siga_sync(q)) { rc = qdio_siga_sync_q(q); + } else if (count < QDIO_MAX_BUFFERS_PER_Q && + get_buf_state(q, prev_buf(bufnr), &state, 0) > 0 && + state == SLSB_CU_OUTPUT_PRIMED) { + /* The previous buffer is not processed yet, tack on. */ + qperf_inc(q, fast_requeue); } else { - /* try to fast requeue buffers */ - get_buf_state(q, prev_buf(bufnr), &state, 0); - if (state != SLSB_CU_OUTPUT_PRIMED) - rc = qdio_kick_outbound_q(q, 0); - else - qperf_inc(q, fast_requeue); + rc = qdio_kick_outbound_q(q, 0); } /* in case of SIGA errors we must process the error immediately */ From daa60696bdf4e59fcb304b3601b1e3f8cdb4f412 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 22 Jul 2019 10:24:33 +0100 Subject: [PATCH 018/104] ALSA: compress: Fix regression on compressed capture streams [ Upstream commit 4475f8c4ab7b248991a60d9c02808dbb813d6be8 ] A previous fix to the stop handling on compressed capture streams causes some knock on issues. The previous fix updated snd_compr_drain_notify to set the state back to PREPARED for capture streams. This causes some issues however as the handling for snd_compr_poll differs between the two states and some user-space applications were relying on the poll failing after the stream had been stopped. To correct this regression whilst still fixing the original problem the patch was addressing, update the capture handling to skip the PREPARED state rather than skipping the SETUP state as it has done until now. Fixes: 4f2ab5e1d13d ("ALSA: compress: Fix stop handling on compressed capture streams") Signed-off-by: Charles Keepax Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/compress_driver.h | 5 +---- sound/core/compress_offload.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h index 96bc5acdade3..49482080311a 100644 --- a/include/sound/compress_driver.h +++ b/include/sound/compress_driver.h @@ -185,10 +185,7 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream) if (snd_BUG_ON(!stream)) return; - if (stream->direction == SND_COMPRESS_PLAYBACK) - stream->runtime->state = SNDRV_PCM_STATE_SETUP; - else - stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + stream->runtime->state = SNDRV_PCM_STATE_SETUP; wake_up(&stream->runtime->sleep); } diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 555df64d46ff..cf1317546b0f 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -575,10 +575,7 @@ snd_compr_set_params(struct snd_compr_stream *stream, unsigned long arg) stream->metadata_set = false; stream->next_track = false; - if (stream->direction == SND_COMPRESS_PLAYBACK) - stream->runtime->state = SNDRV_PCM_STATE_SETUP; - else - stream->runtime->state = SNDRV_PCM_STATE_PREPARED; + stream->runtime->state = SNDRV_PCM_STATE_SETUP; } else { return -EPERM; } @@ -694,8 +691,17 @@ static int snd_compr_start(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state != SNDRV_PCM_STATE_PREPARED) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_SETUP: + if (stream->direction != SND_COMPRESS_CAPTURE) + return -EPERM; + break; + case SNDRV_PCM_STATE_PREPARED: + break; + default: return -EPERM; + } + retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_START); if (!retval) stream->runtime->state = SNDRV_PCM_STATE_RUNNING; From 8d25080f4d84654929a7eca15cb2eddbd6337bfc Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 22 Jul 2019 10:24:34 +0100 Subject: [PATCH 019/104] ALSA: compress: Prevent bypasses of set_params [ Upstream commit 26c3f1542f5064310ad26794c09321780d00c57d ] Currently, whilst in SNDRV_PCM_STATE_OPEN it is possible to call snd_compr_stop, snd_compr_drain and snd_compr_partial_drain, which allow a transition to SNDRV_PCM_STATE_SETUP. The stream should only be able to move to the setup state once it has received a SNDRV_COMPRESS_SET_PARAMS ioctl. Fix this issue by not allowing those ioctls whilst in the open state. Signed-off-by: Charles Keepax Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index cf1317546b0f..1538fbc7562b 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -712,9 +712,15 @@ static int snd_compr_stop(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: return -EPERM; + default: + break; + } + retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP); if (!retval) { snd_compr_drain_notify(stream); @@ -802,9 +808,14 @@ static int snd_compr_drain(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: return -EPERM; + default: + break; + } retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_DRAIN); if (retval) { @@ -841,9 +852,16 @@ static int snd_compr_next_track(struct snd_compr_stream *stream) static int snd_compr_partial_drain(struct snd_compr_stream *stream) { int retval; - if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED || - stream->runtime->state == SNDRV_PCM_STATE_SETUP) + + switch (stream->runtime->state) { + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_PREPARED: return -EPERM; + default: + break; + } + /* stream can be drained only when next track has been signalled */ if (stream->next_track == false) return -EPERM; From cbc76c3b9d42ce1720e57cdb8026e2e218fb2eea Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 22 Jul 2019 10:24:35 +0100 Subject: [PATCH 020/104] ALSA: compress: Don't allow paritial drain operations on capture streams [ Upstream commit a70ab8a8645083f3700814e757f2940a88b7ef88 ] Partial drain and next track are intended for gapless playback and don't really have an obvious interpretation for a capture stream, so makes sense to not allow those operations on capture streams. Signed-off-by: Charles Keepax Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 1538fbc7562b..b4f1536b17cb 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -835,6 +835,10 @@ static int snd_compr_next_track(struct snd_compr_stream *stream) if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING) return -EPERM; + /* next track doesn't have any meaning for capture streams */ + if (stream->direction == SND_COMPRESS_CAPTURE) + return -EPERM; + /* you can signal next track if this is intended to be a gapless stream * and current track metadata is set */ @@ -862,6 +866,10 @@ static int snd_compr_partial_drain(struct snd_compr_stream *stream) break; } + /* partial drain doesn't have any meaning for capture streams */ + if (stream->direction == SND_COMPRESS_CAPTURE) + return -EPERM; + /* stream can be drained only when next track has been signalled */ if (stream->next_track == false) return -EPERM; From 13810c478b5721bc100a1044bf7b9adddcd57aa6 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 22 Jul 2019 10:24:36 +0100 Subject: [PATCH 021/104] ALSA: compress: Be more restrictive about when a drain is allowed [ Upstream commit 3b8179944cb0dd53e5223996966746cdc8a60657 ] Draining makes little sense in the situation of hardware overrun, as the hardware will have consumed all its available samples. Additionally, draining whilst the stream is paused would presumably get stuck as no data is being consumed on the DSP side. Signed-off-by: Charles Keepax Acked-by: Vinod Koul Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/compress_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index b4f1536b17cb..2e2d18468491 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -812,7 +812,10 @@ static int snd_compr_drain(struct snd_compr_stream *stream) case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + case SNDRV_PCM_STATE_PAUSED: return -EPERM; + case SNDRV_PCM_STATE_XRUN: + return -EPIPE; default: break; } @@ -861,7 +864,10 @@ static int snd_compr_partial_drain(struct snd_compr_stream *stream) case SNDRV_PCM_STATE_OPEN: case SNDRV_PCM_STATE_SETUP: case SNDRV_PCM_STATE_PREPARED: + case SNDRV_PCM_STATE_PAUSED: return -EPERM; + case SNDRV_PCM_STATE_XRUN: + return -EPIPE; default: break; } From 217c52217e8af93288448c01f18cf09910df11f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jul 2019 11:28:37 -0300 Subject: [PATCH 022/104] perf probe: Avoid calling freeing routine multiple times for same pointer [ Upstream commit d95daf5accf4a72005daa13fbb1d1bd8709f2861 ] When perf_add_probe_events() we call cleanup_perf_probe_events() for the pev pointer it receives, then, as part of handling this failure the main 'perf probe' goes on and calls cleanup_params() and that will again call cleanup_perf_probe_events()for the same pointer, so just set nevents to zero when handling the failure of perf_add_probe_events() to avoid the double free. Cc: Adrian Hunter Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-x8qgma4g813z96dvtw9w219q@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-probe.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 9a250c71840e..2b420e7a92c0 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -675,6 +675,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) ret = perf_add_probe_events(params.events, params.nevents); if (ret < 0) { + + /* + * When perf_add_probe_events() fails it calls + * cleanup_perf_probe_events(pevs, npevs), i.e. + * cleanup_perf_probe_events(params.events, params.nevents), which + * will call clear_perf_probe_event(), so set nevents to zero + * to avoid cleanup_params() to call clear_perf_probe_event() again + * on the same pevs. + */ + params.nevents = 0; pr_err_with_code(" Error: Failed to add events.", ret); return ret; } From dfaf368e67c3371f71220dbb7400aed6f5d59c1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 14:26:34 +0200 Subject: [PATCH 023/104] drbd: dynamically allocate shash descriptor [ Upstream commit 77ce56e2bfaa64127ae5e23ef136c0168b818777 ] Building with clang and KASAN, we get a warning about an overly large stack frame on 32-bit architectures: drivers/block/drbd/drbd_receiver.c:921:31: error: stack frame size of 1280 bytes in function 'conn_connect' [-Werror,-Wframe-larger-than=] We already allocate other data dynamically in this function, so just do the same for the shash descriptor, which makes up most of this memory. Link: https://lore.kernel.org/lkml/20190617132440.2721536-1-arnd@arndb.de/ Reviewed-by: Kees Cook Reviewed-by: Roland Kammerer Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/drbd/drbd_receiver.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 83957a1e15ed..8e8e4ccb128f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -5297,7 +5297,7 @@ static int drbd_do_auth(struct drbd_connection *connection) unsigned int key_len; char secret[SHARED_SECRET_MAX]; /* 64 byte */ unsigned int resp_size; - SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm); + struct shash_desc *desc; struct packet_info pi; struct net_conf *nc; int err, rv; @@ -5310,6 +5310,13 @@ static int drbd_do_auth(struct drbd_connection *connection) memcpy(secret, nc->shared_secret, key_len); rcu_read_unlock(); + desc = kmalloc(sizeof(struct shash_desc) + + crypto_shash_descsize(connection->cram_hmac_tfm), + GFP_KERNEL); + if (!desc) { + rv = -1; + goto fail; + } desc->tfm = connection->cram_hmac_tfm; desc->flags = 0; @@ -5452,7 +5459,10 @@ static int drbd_do_auth(struct drbd_connection *connection) kfree(peers_ch); kfree(response); kfree(right_response); - shash_desc_zero(desc); + if (desc) { + shash_desc_zero(desc); + kfree(desc); + } return rv; } From ab1ab88e93bea156af5e3fb0a3ce20f8ee3e0600 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 22 Jul 2019 17:25:48 +0100 Subject: [PATCH 024/104] ACPI/IORT: Fix off-by-one check in iort_dev_find_its_id() [ Upstream commit 5a46d3f71d5e5a9f82eabc682f996f1281705ac7 ] Static analysis identified that index comparison against ITS entries in iort_dev_find_its_id() is off by one. Update the comparison condition and clarify the resulting error message. Fixes: 4bf2efd26d76 ("ACPI: Add new IORT functions to support MSI domain handling") Link: https://lore.kernel.org/linux-arm-kernel/20190613065410.GB16334@mwanda/ Reviewed-by: Hanjun Guo Reported-by: Dan Carpenter Signed-off-by: Lorenzo Pieralisi Cc: Dan Carpenter Cc: Will Deacon Cc: Hanjun Guo Cc: Sudeep Holla Cc: Catalin Marinas Cc: Robin Murphy Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/acpi/arm64/iort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 6b81746cd13c..e5b1b3f1c231 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -324,8 +324,8 @@ static int iort_dev_find_its_id(struct device *dev, u32 req_id, /* Move to ITS specific data */ its = (struct acpi_iort_its_group *)node->node_data; - if (idx > its->its_count) { - dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n", + if (idx >= its->its_count) { + dev_err(dev, "requested ITS ID index [%d] overruns ITS entries [%d]\n", idx, its->its_count); return -ENXIO; } From 67b14bd258e166ee346c035a9197b4fce4957fa7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 22 Jul 2019 16:51:50 +0200 Subject: [PATCH 025/104] ARM: davinci: fix sleep.S build error on ARMv4 [ Upstream commit d64b212ea960db4276a1d8372bd98cb861dfcbb0 ] When building a multiplatform kernel that includes armv4 support, the default target CPU does not support the blx instruction, which leads to a build failure: arch/arm/mach-davinci/sleep.S: Assembler messages: arch/arm/mach-davinci/sleep.S:56: Error: selected processor does not support `blx ip' in ARM mode Add a .arch statement in the sources to make this file build. Link: https://lore.kernel.org/r/20190722145211.1154785-1-arnd@arndb.de Acked-by: Sekhar Nori Signed-off-by: Arnd Bergmann Signed-off-by: Olof Johansson Signed-off-by: Sasha Levin --- arch/arm/mach-davinci/sleep.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S index cd350dee4df3..efcd400b2abb 100644 --- a/arch/arm/mach-davinci/sleep.S +++ b/arch/arm/mach-davinci/sleep.S @@ -37,6 +37,7 @@ #define DEEPSLEEP_SLEEPENABLE_BIT BIT(31) .text + .arch armv5te /* * Move DaVinci into deep sleep state * From d80a03c747230b7cc4c715e7733c6c10d335a961 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 22 Jul 2019 09:15:24 -0700 Subject: [PATCH 026/104] scsi: megaraid_sas: fix panic on loading firmware crashdump [ Upstream commit 3b5f307ef3cb5022bfe3c8ca5b8f2114d5bf6c29 ] While loading fw crashdump in function fw_crash_buffer_show(), left bytes in one dma chunk was not checked, if copying size over it, overflow access will cause kernel panic. Signed-off-by: Junxiao Bi Acked-by: Sumit Saxena Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 5b1c37e3913c..d90693b2767f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2847,6 +2847,7 @@ megasas_fw_crash_buffer_show(struct device *cdev, u32 size; unsigned long buff_addr; unsigned long dmachunk = CRASH_DMA_BUF_SIZE; + unsigned long chunk_left_bytes; unsigned long src_addr; unsigned long flags; u32 buff_offset; @@ -2872,6 +2873,8 @@ megasas_fw_crash_buffer_show(struct device *cdev, } size = (instance->fw_crash_buffer_size * dmachunk) - buff_offset; + chunk_left_bytes = dmachunk - (buff_offset % dmachunk); + size = (size > chunk_left_bytes) ? chunk_left_bytes : size; size = (size >= PAGE_SIZE) ? (PAGE_SIZE - 1) : size; src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] + From b3a0c297e6e636d5b87bf5bb5cfa441a84d944c9 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 17 Jul 2019 14:48:27 -0500 Subject: [PATCH 027/104] scsi: ibmvfc: fix WARN_ON during event pool release [ Upstream commit 5578257ca0e21056821e6481bd534ba267b84e58 ] While removing an ibmvfc client adapter a WARN_ON like the following WARN_ON is seen in the kernel log: WARNING: CPU: 6 PID: 5421 at ./include/linux/dma-mapping.h:541 ibmvfc_free_event_pool+0x12c/0x1f0 [ibmvfc] CPU: 6 PID: 5421 Comm: rmmod Tainted: G E 4.17.0-rc1-next-20180419-autotest #1 NIP: d00000000290328c LR: d00000000290325c CTR: c00000000036ee20 REGS: c000000288d1b7e0 TRAP: 0700 Tainted: G E (4.17.0-rc1-next-20180419-autotest) MSR: 800000010282b033 CR: 44008828 XER: 20000000 CFAR: c00000000036e408 SOFTE: 1 GPR00: d00000000290325c c000000288d1ba60 d000000002917900 c000000289d75448 GPR04: 0000000000000071 c0000000ff870000 0000000018040000 0000000000000001 GPR08: 0000000000000000 c00000000156e838 0000000000000001 d00000000290c640 GPR12: c00000000036ee20 c00000001ec4dc00 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 00000100276901e0 0000000010020598 GPR20: 0000000010020550 0000000010020538 0000000010020578 00000000100205b0 GPR24: 0000000000000000 0000000000000000 0000000010020590 5deadbeef0000100 GPR28: 5deadbeef0000200 d000000002910b00 0000000000000071 c0000002822f87d8 NIP [d00000000290328c] ibmvfc_free_event_pool+0x12c/0x1f0 [ibmvfc] LR [d00000000290325c] ibmvfc_free_event_pool+0xfc/0x1f0 [ibmvfc] Call Trace: [c000000288d1ba60] [d00000000290325c] ibmvfc_free_event_pool+0xfc/0x1f0 [ibmvfc] (unreliable) [c000000288d1baf0] [d000000002909390] ibmvfc_abort_task_set+0x7b0/0x8b0 [ibmvfc] [c000000288d1bb70] [c0000000000d8c68] vio_bus_remove+0x68/0x100 [c000000288d1bbb0] [c0000000007da7c4] device_release_driver_internal+0x1f4/0x2d0 [c000000288d1bc00] [c0000000007da95c] driver_detach+0x7c/0x100 [c000000288d1bc40] [c0000000007d8af4] bus_remove_driver+0x84/0x140 [c000000288d1bcb0] [c0000000007db6ac] driver_unregister+0x4c/0xa0 [c000000288d1bd20] [c0000000000d6e7c] vio_unregister_driver+0x2c/0x50 [c000000288d1bd50] [d00000000290ba0c] cleanup_module+0x24/0x15e0 [ibmvfc] [c000000288d1bd70] [c0000000001dadb0] sys_delete_module+0x220/0x2d0 [c000000288d1be30] [c00000000000b284] system_call+0x58/0x6c Instruction dump: e8410018 e87f0068 809f0078 e8bf0080 e8df0088 2fa30000 419e008c e9230200 2fa90000 419e0080 894d098a 794a07e0 <0b0a0000> e9290008 2fa90000 419e0028 This is tripped as a result of irqs being disabled during the call to dma_free_coherent() by ibmvfc_free_event_pool(). At this point in the code path we have quiesced the adapter and its overly paranoid anyways to be holding the host lock. Reported-by: Abdul Haleem Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 7e487c78279c..54dea767dfde 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -4883,8 +4883,8 @@ static int ibmvfc_remove(struct vio_dev *vdev) spin_lock_irqsave(vhost->host->host_lock, flags); ibmvfc_purge_requests(vhost, DID_ERROR); - ibmvfc_free_event_pool(vhost); spin_unlock_irqrestore(vhost->host->host_lock, flags); + ibmvfc_free_event_pool(vhost); ibmvfc_free_mem(vhost); spin_lock(&ibmvfc_driver_lock); From 5bf03ad9c4c77e25eaebe4f02266f9757f71f6ff Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 12 Jul 2019 08:53:47 +0200 Subject: [PATCH 028/104] scsi: scsi_dh_alua: always use a 2 second delay before retrying RTPG [ Upstream commit 20122994e38aef0ae50555884d287adde6641c94 ] Retrying immediately after we've received a 'transitioning' sense code is pretty much pointless, we should always use a delay before retrying. So ensure the default delay is applied before retrying. Signed-off-by: Hannes Reinecke Tested-by: Zhangguanghui Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_alua.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index d3145799b92f..98787588247b 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -53,6 +53,7 @@ #define ALUA_FAILOVER_TIMEOUT 60 #define ALUA_FAILOVER_RETRIES 5 #define ALUA_RTPG_DELAY_MSECS 5 +#define ALUA_RTPG_RETRY_DELAY 2 /* device handler flags */ #define ALUA_OPTIMIZE_STPG 0x01 @@ -681,7 +682,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) case SCSI_ACCESS_STATE_TRANSITIONING: if (time_before(jiffies, pg->expiry)) { /* State transition, retry */ - pg->interval = 2; + pg->interval = ALUA_RTPG_RETRY_DELAY; err = SCSI_DH_RETRY; } else { struct alua_dh_data *h; @@ -809,6 +810,8 @@ static void alua_rtpg_work(struct work_struct *work) spin_lock_irqsave(&pg->lock, flags); pg->flags &= ~ALUA_PG_RUNNING; pg->flags |= ALUA_PG_RUN_RTPG; + if (!pg->interval) + pg->interval = ALUA_RTPG_RETRY_DELAY; spin_unlock_irqrestore(&pg->lock, flags); queue_delayed_work(alua_wq, &pg->rtpg_work, pg->interval * HZ); @@ -820,6 +823,8 @@ static void alua_rtpg_work(struct work_struct *work) spin_lock_irqsave(&pg->lock, flags); if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { pg->flags &= ~ALUA_PG_RUNNING; + if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG)) + pg->interval = ALUA_RTPG_RETRY_DELAY; pg->flags |= ALUA_PG_RUN_RTPG; spin_unlock_irqrestore(&pg->lock, flags); queue_delayed_work(alua_wq, &pg->rtpg_work, From a59873b5fe37eac75f0a1e5febde88106f9d8020 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jul 2019 15:03:15 +0200 Subject: [PATCH 029/104] tty/ldsem, locking/rwsem: Add missing ACQUIRE to read_failed sleep loop [ Upstream commit 952041a8639a7a3a73a2b6573cb8aa8518bc39f8 ] While reviewing rwsem down_slowpath, Will noticed ldsem had a copy of a bug we just found for rwsem. X = 0; CPU0 CPU1 rwsem_down_read() for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); X = 1; rwsem_up_write(); rwsem_mark_wake() atomic_long_add(adjustment, &sem->count); smp_store_release(&waiter->task, NULL); if (!waiter.task) break; ... } r = X; Allows 'r == 0'. Reported-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Linus Torvalds Cc: Peter Hurley Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 4898e640caf0 ("tty: Add timed, writer-prioritized rw semaphore") Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- drivers/tty/tty_ldsem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index dbd7ba32caac..6c5eb99fcfce 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -137,8 +137,7 @@ static void __ldsem_wake_readers(struct ld_semaphore *sem) list_for_each_entry_safe(waiter, next, &sem->read_wait, list) { tsk = waiter->task; - smp_mb(); - waiter->task = NULL; + smp_store_release(&waiter->task, NULL); wake_up_process(tsk); put_task_struct(tsk); } @@ -234,7 +233,7 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout) for (;;) { set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (!waiter.task) + if (!smp_load_acquire(&waiter.task)) break; if (!timeout) break; From ac370f1ea518c49068baf6c485c8d86303750225 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Wed, 24 Jul 2019 15:53:24 +0300 Subject: [PATCH 030/104] perf/core: Fix creating kernel counters for PMUs that override event->cpu [ Upstream commit 4ce54af8b33d3e21ca935fc1b89b58cbba956051 ] Some hardware PMU drivers will override perf_event.cpu inside their event_init callback. This causes a lockdep splat when initialized through the kernel API: WARNING: CPU: 0 PID: 250 at kernel/events/core.c:2917 ctx_sched_out+0x78/0x208 pc : ctx_sched_out+0x78/0x208 Call trace: ctx_sched_out+0x78/0x208 __perf_install_in_context+0x160/0x248 remote_function+0x58/0x68 generic_exec_single+0x100/0x180 smp_call_function_single+0x174/0x1b8 perf_install_in_context+0x178/0x188 perf_event_create_kernel_counter+0x118/0x160 Fix this by calling perf_install_in_context with event->cpu, just like perf_event_open Signed-off-by: Leonard Crestez Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Frank Li Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/r/c4ebe0503623066896d7046def4d6b1e06e0eb2e.1563972056.git.leonard.crestez@nxp.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 93d7333c64d8..5bbf7537a612 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10130,7 +10130,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, goto err_unlock; } - perf_install_in_context(ctx, event, cpu); + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); mutex_unlock(&ctx->mutex); From 0cad79bfb5aa596b9449fe66b0edf69a8344326c Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Wed, 31 Jul 2019 10:54:47 -0400 Subject: [PATCH 031/104] can: peak_usb: pcan_usb_pro: Fix info-leaks to USB devices commit ead16e53c2f0ed946d82d4037c630e2f60f4ab69 upstream. Uninitialized Kernel memory can leak to USB devices. Fix by using kzalloc() instead of kmalloc() on the affected buffers. Signed-off-by: Tomas Bortoli Reported-by: syzbot+d6a5a1a3657b596ef132@syzkaller.appspotmail.com Fixes: f14e22435a27 ("net: can: peak_usb: Do not do dma on the stack") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_pro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c index bbdd6058cd2f..d85fdc6949c6 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_pro.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_pro.c @@ -500,7 +500,7 @@ static int pcan_usb_pro_drv_loaded(struct peak_usb_device *dev, int loaded) u8 *buffer; int err; - buffer = kmalloc(PCAN_USBPRO_FCT_DRVLD_REQ_LEN, GFP_KERNEL); + buffer = kzalloc(PCAN_USBPRO_FCT_DRVLD_REQ_LEN, GFP_KERNEL); if (!buffer) return -ENOMEM; From 127ab64c38e21c55adf8781ca92f7dc9d1a9903e Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Wed, 31 Jul 2019 10:54:47 -0400 Subject: [PATCH 032/104] can: peak_usb: pcan_usb_fd: Fix info-leaks to USB devices commit 30a8beeb3042f49d0537b7050fd21b490166a3d9 upstream. Uninitialized Kernel memory can leak to USB devices. Fix by using kzalloc() instead of kmalloc() on the affected buffers. Signed-off-by: Tomas Bortoli Reported-by: syzbot+513e4d0985298538bf9b@syzkaller.appspotmail.com Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") Cc: linux-stable Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 7f5ec40e2b4d..40647b837b31 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -851,7 +851,7 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev) goto err_out; /* allocate command buffer once for all for the interface */ - pdev->cmd_buffer_addr = kmalloc(PCAN_UFD_CMD_BUFFER_SIZE, + pdev->cmd_buffer_addr = kzalloc(PCAN_UFD_CMD_BUFFER_SIZE, GFP_KERNEL); if (!pdev->cmd_buffer_addr) goto err_out_1; From d3d1b67ffdd76eb118290e6d2f9733798816c67f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 26 Jul 2019 08:00:49 -0700 Subject: [PATCH 033/104] hwmon: (nct7802) Fix wrong detection of in4 presence commit 38ada2f406a9b81fb1249c5c9227fa657e7d5671 upstream. The code to detect if in4 is present is wrong; if in4 is not present, the in4_input sysfs attribute is still present. In detail: - Ihen RTD3_MD=11 (VSEN3 present), everything is as expected (no bug). - If we have RTD3_MD!=11 (no VSEN3), we unexpectedly have a in4_input file under /sys and the "sensors" command displays in4_input. But as expected, we have no in4_min, in4_max, in4_alarm, in4_beep. Fix is_visible function to detect and report in4_input visibility as expected. Reported-by: Gilles Buloz Cc: Gilles Buloz Cc: stable@vger.kernel.org Fixes: 3434f37835804 ("hwmon: Driver for Nuvoton NCT7802Y") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/nct7802.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 12b94b094c0d..7f8738a83cb9 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -768,7 +768,7 @@ static struct attribute *nct7802_in_attrs[] = { &sensor_dev_attr_in3_alarm.dev_attr.attr, &sensor_dev_attr_in3_beep.dev_attr.attr, - &sensor_dev_attr_in4_input.dev_attr.attr, /* 17 */ + &sensor_dev_attr_in4_input.dev_attr.attr, /* 16 */ &sensor_dev_attr_in4_min.dev_attr.attr, &sensor_dev_attr_in4_max.dev_attr.attr, &sensor_dev_attr_in4_alarm.dev_attr.attr, @@ -794,9 +794,9 @@ static umode_t nct7802_in_is_visible(struct kobject *kobj, if (index >= 6 && index < 11 && (reg & 0x03) != 0x03) /* VSEN1 */ return 0; - if (index >= 11 && index < 17 && (reg & 0x0c) != 0x0c) /* VSEN2 */ + if (index >= 11 && index < 16 && (reg & 0x0c) != 0x0c) /* VSEN2 */ return 0; - if (index >= 17 && (reg & 0x30) != 0x30) /* VSEN3 */ + if (index >= 16 && (reg & 0x30) != 0x30) /* VSEN3 */ return 0; return attr->mode; From dbb4f2d59f392634019e0ff91280fc67434cf9da Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 8 Aug 2019 00:50:58 -0500 Subject: [PATCH 034/104] ALSA: firewire: fix a memory leak bug commit 1be3c1fae6c1e1f5bb982b255d2034034454527a upstream. In iso_packets_buffer_init(), 'b->packets' is allocated through kmalloc_array(). Then, the aligned packet size is checked. If it is larger than PAGE_SIZE, -EINVAL will be returned to indicate the error. However, the allocated 'b->packets' is not deallocated on this path, leading to a memory leak. To fix the above issue, free 'b->packets' before returning the error code. Fixes: 31ef9134eb52 ("ALSA: add LaCie FireWire Speakers/Griffin FireWave Surround driver") Signed-off-by: Wenwen Wang Reviewed-by: Takashi Sakamoto Cc: # v2.6.39+ Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/packets-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/packets-buffer.c b/sound/firewire/packets-buffer.c index ea1506679c66..3b09b8ef3a09 100644 --- a/sound/firewire/packets-buffer.c +++ b/sound/firewire/packets-buffer.c @@ -37,7 +37,7 @@ int iso_packets_buffer_init(struct iso_packets_buffer *b, struct fw_unit *unit, packets_per_page = PAGE_SIZE / packet_size; if (WARN_ON(!packets_per_page)) { err = -EINVAL; - goto error; + goto err_packets; } pages = DIV_ROUND_UP(count, packets_per_page); From 542233bd738e18de4d5a5a2ccbf57f363e06a08b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 6 Aug 2019 14:03:56 +0200 Subject: [PATCH 035/104] ALSA: hda - Don't override global PCM hw info flag commit c1c6c877b0c79fd7e05c931435aa42211eaeebaf upstream. The commit bfcba288b97f ("ALSA - hda: Add support for link audio time reporting") introduced the conditional PCM hw info setup, but it overwrites the global azx_pcm_hw object. This will cause a problem if any other HD-audio controller, as it'll inherit the same bit flag although another controller doesn't support that feature. Fix the bug by setting the PCM hw info flag locally. Fixes: bfcba288b97f ("ALSA - hda: Add support for link audio time reporting") Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_controller.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 56af7308a2fd..85dbe2420248 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -609,11 +609,9 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) } runtime->private_data = azx_dev; - if (chip->gts_present) - azx_pcm_hw.info = azx_pcm_hw.info | - SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME; - runtime->hw = azx_pcm_hw; + if (chip->gts_present) + runtime->hw.info |= SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME; runtime->hw.channels_min = hinfo->channels_min; runtime->hw.channels_max = hinfo->channels_max; runtime->hw.formats = hinfo->formats; From c7b1a1e984805825ef1de08e778633de86d82521 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 26 Jul 2019 15:47:58 -0700 Subject: [PATCH 036/104] mac80211: don't WARN on short WMM parameters from AP commit 05aaa5c97dce4c10a9e7eae2f1569a684e0c5ced upstream. In a very similar spirit to commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs"), an AP may not transmit a fully-formed WMM IE. For example, it may miss or repeat an Access Category. The above loop won't catch that and will instead leave one of the four ACs zeroed out. This triggers the following warning in drv_conf_tx() wlan0: invalid CW_min/CW_max: 0/0 and it may leave one of the hardware queues unconfigured. If we detect such a case, let's just print a warning and fall back to the defaults. Tested with a hacked version of hostapd, intentionally corrupting the IEs in hostapd_eid_wmm(). Cc: stable@vger.kernel.org Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20190726224758.210953-1-briannorris@chromium.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d787717140e5..f462f026fc6a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1873,6 +1873,16 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, } } + /* WMM specification requires all 4 ACIs. */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (params[ac].cw_min == 0) { + sdata_info(sdata, + "AP has invalid WMM params (missing AC %d), using defaults\n", + ac); + return false; + } + } + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { mlme_dbg(sdata, "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n", From 84bdf5e2584a671bd40d78854b6bbb007a9eff09 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Mon, 22 Jul 2019 11:34:59 -0700 Subject: [PATCH 037/104] SMB3: Fix deadlock in validate negotiate hits reconnect commit e99c63e4d86d3a94818693147b469fa70de6f945 upstream. Currently we skip SMB2_TREE_CONNECT command when checking during reconnect because Tree Connect happens when establishing an SMB session. For SMB 3.0 protocol version the code also calls validate negotiate which results in SMB2_IOCL command being sent over the wire. This may deadlock on trying to acquire a mutex when checking for reconnect. Fix this by skipping SMB2_IOCL command when doing the reconnect check. Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 52b6e4a40748..1a47b952e9bd 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -168,7 +168,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (tcon == NULL) return 0; - if (smb2_command == SMB2_TREE_CONNECT) + if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) return 0; if (tcon->tidStatus == CifsExiting) { From 92225e41001d4381b7785eeb56e528df6bb32e96 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 25 Jul 2019 18:13:10 -0500 Subject: [PATCH 038/104] smb3: send CAP_DFS capability during session setup commit 8d33096a460d5b9bd13300f01615df5bb454db10 upstream. We had a report of a server which did not do a DFS referral because the session setup Capabilities field was set to 0 (unlike negotiate protocol where we set CAP_DFS). Better to send it session setup in the capabilities as well (this also more closely matches Windows client behavior). Signed-off-by: Steve French Reviewed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 1a47b952e9bd..5255deac86b2 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -660,7 +660,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) else req->SecurityMode = 0; +#ifdef CONFIG_CIFS_DFS_UPCALL + req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS); +#else req->Capabilities = 0; +#endif /* DFS_UPCALL */ + req->Channel = 0; /* MBZ */ sess_data->iov[0].iov_base = (char *)req; From fefc9e101542326385e7d9c7760bf9ea4fcd11b6 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 24 Jul 2019 12:46:34 -0700 Subject: [PATCH 039/104] mwifiex: fix 802.11n/WPA detection commit df612421fe2566654047769c6852ffae1a31df16 upstream. Commit 63d7ef36103d ("mwifiex: Don't abort on small, spec-compliant vendor IEs") adjusted the ieee_types_vendor_header struct, which inadvertently messed up the offsets used in mwifiex_is_wpa_oui_present(). Add that offset back in, mirroring mwifiex_is_rsn_oui_present(). As it stands, commit 63d7ef36103d breaks compatibility with WPA (not WPA2) 802.11n networks, since we hit the "info: Disable 11n if AES is not supported by AP" case in mwifiex_is_network_compatible(). Fixes: 63d7ef36103d ("mwifiex: Don't abort on small, spec-compliant vendor IEs") Cc: Signed-off-by: Brian Norris Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/marvell/mwifiex/main.h | 1 + drivers/net/wireless/marvell/mwifiex/scan.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 26df28f4bfb2..d4e19efb75af 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -120,6 +120,7 @@ enum { #define MWIFIEX_MAX_TOTAL_SCAN_TIME (MWIFIEX_TIMER_10S - MWIFIEX_TIMER_1S) +#define WPA_GTK_OUI_OFFSET 2 #define RSN_GTK_OUI_OFFSET 2 #define MWIFIEX_OUI_NOT_PRESENT 0 diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 97847eee2dfb..7e96b6a37946 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -181,7 +181,8 @@ mwifiex_is_wpa_oui_present(struct mwifiex_bssdescriptor *bss_desc, u32 cipher) u8 ret = MWIFIEX_OUI_NOT_PRESENT; if (has_vendor_hdr(bss_desc->bcn_wpa_ie, WLAN_EID_VENDOR_SPECIFIC)) { - iebody = (struct ie_body *) bss_desc->bcn_wpa_ie->data; + iebody = (struct ie_body *)((u8 *)bss_desc->bcn_wpa_ie->data + + WPA_GTK_OUI_OFFSET); oui = &mwifiex_wpa_oui[cipher][0]; ret = mwifiex_search_oui_in_ie(iebody, oui); if (ret) From 0d7ed7f42907aae3093c49c76d465d16810313a0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 21 Jul 2019 14:02:27 +0300 Subject: [PATCH 040/104] iwlwifi: don't unmap as page memory that was mapped as single commit 87e7e25aee6b59fef740856f4e86d4b60496c9e1 upstream. In order to remember how to unmap a memory (as single or as page), we maintain a bit per Transmit Buffer (TBs) in the meta data (structure iwl_cmd_meta). We maintain a bitmap: 1 bit per TB. If the TB is set, we will free the memory as a page. This bitmap was never cleared. Fix this. Cc: stable@vger.kernel.org Fixes: 3cd1980b0cdf ("iwlwifi: pcie: introduce new tfd and tb formats") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index e1bfc9522cbe..174e45d78c46 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -439,6 +439,8 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, DMA_TO_DEVICE); } + meta->tbs = 0; + if (trans->cfg->use_tfh) { struct iwl_tfh_tfd *tfd_fh = (void *)tfd; From 718ce1eb744d677393869244eec640daab064136 Mon Sep 17 00:00:00 2001 From: Suganath Prabu Date: Tue, 30 Jul 2019 03:43:57 -0400 Subject: [PATCH 041/104] scsi: mpt3sas: Use 63-bit DMA addressing on SAS35 HBA commit df9a606184bfdb5ae3ca9d226184e9489f5c24f7 upstream. Although SAS3 & SAS3.5 IT HBA controllers support 64-bit DMA addressing, as per hardware design, if DMA-able range contains all 64-bits set (0xFFFFFFFF-FFFFFFFF) then it results in a firmware fault. E.g. SGE's start address is 0xFFFFFFFF-FFFF000 and data length is 0x1000 bytes. when HBA tries to DMA the data at 0xFFFFFFFF-FFFFFFFF location then HBA will fault the firmware. Driver will set 63-bit DMA mask to ensure the above address will not be used. Cc: # 5.1.20+ Signed-off-by: Suganath Prabu Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_base.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index a1a5ceb42ce6..6ccde2b41517 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1707,9 +1707,11 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) { struct sysinfo s; u64 consistent_dma_mask; + /* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */ + int dma_mask = (ioc->hba_mpi_version_belonged > MPI2_VERSION) ? 63 : 64; if (ioc->dma_mask) - consistent_dma_mask = DMA_BIT_MASK(64); + consistent_dma_mask = DMA_BIT_MASK(dma_mask); else consistent_dma_mask = DMA_BIT_MASK(32); @@ -1717,11 +1719,11 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) const uint64_t required_mask = dma_get_required_mask(&pdev->dev); if ((required_mask > DMA_BIT_MASK(32)) && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && + !pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask)) && !pci_set_consistent_dma_mask(pdev, consistent_dma_mask)) { ioc->base_add_sg_single = &_base_add_sg_single_64; ioc->sge_size = sizeof(Mpi2SGESimple64_t); - ioc->dma_mask = 64; + ioc->dma_mask = dma_mask; goto out; } } @@ -1747,7 +1749,7 @@ static int _base_change_consistent_dma_mask(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(ioc->dma_mask))) { if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) return -ENODEV; } @@ -3381,7 +3383,7 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc) total_sz += sz; } while (ioc->rdpq_array_enable && (++i < ioc->reply_queue_count)); - if (ioc->dma_mask == 64) { + if (ioc->dma_mask > 32) { if (_base_change_consistent_dma_mask(ioc, ioc->pdev) != 0) { pr_warn(MPT3SAS_FMT "no suitable consistent DMA mask for %s\n", From 694457ee8cb1b327bbb9ab8157cea220fd866227 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 9 Aug 2019 23:43:56 -0500 Subject: [PATCH 042/104] sh: kernel: hw_breakpoint: Fix missing break in switch statement commit 1ee1119d184bb06af921b48c3021d921bbd85bac upstream. Add missing break statement in order to prevent the code from falling through to case SH_BREAKPOINT_WRITE. Fixes: 09a072947791 ("sh: hw-breakpoints: Add preliminary support for SH-4A UBC.") Cc: stable@vger.kernel.org Reviewed-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 2197fc584186..000cc3343867 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -160,6 +160,7 @@ int arch_bp_generic_fields(int sh_len, int sh_type, switch (sh_type) { case SH_BREAKPOINT_READ: *gen_type = HW_BREAKPOINT_R; + break; case SH_BREAKPOINT_WRITE: *gen_type = HW_BREAKPOINT_W; break; From faf6760c93af392f2ef115367d5711dab068f50f Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 13 Aug 2019 15:37:37 -0700 Subject: [PATCH 043/104] mm/usercopy: use memory range to be accessed for wraparound check commit 951531691c4bcaa59f56a316e018bc2ff1ddf855 upstream. Currently, when checking to see if accessing n bytes starting at address "ptr" will cause a wraparound in the memory addresses, the check in check_bogus_address() adds an extra byte, which is incorrect, as the range of addresses that will be accessed is [ptr, ptr + (n - 1)]. This can lead to incorrectly detecting a wraparound in the memory address, when trying to read 4 KB from memory that is mapped to the the last possible page in the virtual address space, when in fact, accessing that range of memory would not cause a wraparound to occur. Use the memory range that will actually be accessed when considering if accessing a certain amount of bytes will cause the memory address to wrap around. Link: http://lkml.kernel.org/r/1564509253-23287-1-git-send-email-isaacm@codeaurora.org Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Prasad Sodagudi Signed-off-by: Isaac J. Manjarres Co-developed-by: Prasad Sodagudi Reviewed-by: William Kucharski Acked-by: Kees Cook Cc: Greg Kroah-Hartman Cc: Trilok Soni Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [kees: backport to v4.9] Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 3c8da0af9695..7683c22551ff 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -124,7 +124,7 @@ static inline const char *check_kernel_text_object(const void *ptr, static inline const char *check_bogus_address(const void *ptr, unsigned long n) { /* Reject if object wraps past end of memory. */ - if ((unsigned long)ptr + n < (unsigned long)ptr) + if ((unsigned long)ptr + (n - 1) < (unsigned long)ptr) return ""; /* Reject if NULL or ZERO-allocation. */ From 43729e6fea58fe2c4fcc1e83547018ede540839d Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Tue, 13 Aug 2019 15:37:28 -0700 Subject: [PATCH 044/104] mm/memcontrol.c: fix use after free in mem_cgroup_iter() commit 54a83d6bcbf8f4700013766b974bf9190d40b689 upstream. This patch is sent to report an use after free in mem_cgroup_iter() after merging commit be2657752e9e ("mm: memcg: fix use after free in mem_cgroup_iter()"). I work with android kernel tree (4.9 & 4.14), and commit be2657752e9e ("mm: memcg: fix use after free in mem_cgroup_iter()") has been merged to the trees. However, I can still observe use after free issues addressed in the commit be2657752e9e. (on low-end devices, a few times this month) backtrace: css_tryget <- crash here mem_cgroup_iter shrink_node shrink_zones do_try_to_free_pages try_to_free_pages __perform_reclaim __alloc_pages_direct_reclaim __alloc_pages_slowpath __alloc_pages_nodemask To debug, I poisoned mem_cgroup before freeing it: static void __mem_cgroup_free(struct mem_cgroup *memcg) for_each_node(node) free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); + /* poison memcg before freeing it */ + memset(memcg, 0x78, sizeof(struct mem_cgroup)); kfree(memcg); } The coredump shows the position=0xdbbc2a00 is freed. (gdb) p/x ((struct mem_cgroup_per_node *)0xe5009e00)->iter[8] $13 = {position = 0xdbbc2a00, generation = 0x2efd} 0xdbbc2a00: 0xdbbc2e00 0x00000000 0xdbbc2800 0x00000100 0xdbbc2a10: 0x00000200 0x78787878 0x00026218 0x00000000 0xdbbc2a20: 0xdcad6000 0x00000001 0x78787800 0x00000000 0xdbbc2a30: 0x78780000 0x00000000 0x0068fb84 0x78787878 0xdbbc2a40: 0x78787878 0x78787878 0x78787878 0xe3fa5cc0 0xdbbc2a50: 0x78787878 0x78787878 0x00000000 0x00000000 0xdbbc2a60: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a70: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a80: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2a90: 0x00000001 0x00000000 0x00000000 0x00100000 0xdbbc2aa0: 0x00000001 0xdbbc2ac8 0x00000000 0x00000000 0xdbbc2ab0: 0x00000000 0x00000000 0x00000000 0x00000000 0xdbbc2ac0: 0x00000000 0x00000000 0xe5b02618 0x00001000 0xdbbc2ad0: 0x00000000 0x78787878 0x78787878 0x78787878 0xdbbc2ae0: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2af0: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b00: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b10: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b20: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b30: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b40: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b50: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b60: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b70: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2b80: 0x78787878 0x78787878 0x00000000 0x78787878 0xdbbc2b90: 0x78787878 0x78787878 0x78787878 0x78787878 0xdbbc2ba0: 0x78787878 0x78787878 0x78787878 0x78787878 In the reclaim path, try_to_free_pages() does not setup sc.target_mem_cgroup and sc is passed to do_try_to_free_pages(), ..., shrink_node(). In mem_cgroup_iter(), root is set to root_mem_cgroup because sc->target_mem_cgroup is NULL. It is possible to assign a memcg to root_mem_cgroup.nodeinfo.iter in mem_cgroup_iter(). try_to_free_pages struct scan_control sc = {...}, target_mem_cgroup is 0x0; do_try_to_free_pages shrink_zones shrink_node mem_cgroup *root = sc->target_mem_cgroup; memcg = mem_cgroup_iter(root, NULL, &reclaim); mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... css = css_next_descendant_pre(css, &root->css); memcg = mem_cgroup_from_css(css); cmpxchg(&iter->position, pos, memcg); My device uses memcg non-hierarchical mode. When we release a memcg: invalidate_reclaim_iterators() reaches only dead_memcg and its parents. If non-hierarchical mode is used, invalidate_reclaim_iterators() never reaches root_mem_cgroup. static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) { struct mem_cgroup *memcg = dead_memcg; for (; memcg; memcg = parent_mem_cgroup(memcg) ... } So the use after free scenario looks like: CPU1 CPU2 try_to_free_pages do_try_to_free_pages shrink_zones shrink_node mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... css = css_next_descendant_pre(css, &root->css); memcg = mem_cgroup_from_css(css); cmpxchg(&iter->position, pos, memcg); invalidate_reclaim_iterators(memcg); ... __mem_cgroup_free() kfree(memcg); try_to_free_pages do_try_to_free_pages shrink_zones shrink_node mem_cgroup_iter() if (!root) root = root_mem_cgroup; ... mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id); iter = &mz->iter[reclaim->priority]; pos = READ_ONCE(iter->position); css_tryget(&pos->css) <- use after free To avoid this, we should also invalidate root_mem_cgroup.nodeinfo.iter in invalidate_reclaim_iterators(). [cai@lca.pw: fix -Wparentheses compilation warning] Link: http://lkml.kernel.org/r/1564580753-17531-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/20190730015729.4406-1-miles.chen@mediatek.com Fixes: 5ac8fb31ad2e ("mm: memcontrol: convert reclaim iterator to simple css refcounting") Signed-off-by: Miles Chen Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memcontrol.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 86a6b331b964..cbcbba028c2d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -887,26 +887,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, css_put(&prev->css); } -static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +static void __invalidate_reclaim_iterators(struct mem_cgroup *from, + struct mem_cgroup *dead_memcg) { - struct mem_cgroup *memcg = dead_memcg; struct mem_cgroup_reclaim_iter *iter; struct mem_cgroup_per_node *mz; int nid; int i; - for (; memcg; memcg = parent_mem_cgroup(memcg)) { - for_each_node(nid) { - mz = mem_cgroup_nodeinfo(memcg, nid); - for (i = 0; i <= DEF_PRIORITY; i++) { - iter = &mz->iter[i]; - cmpxchg(&iter->position, - dead_memcg, NULL); - } + for_each_node(nid) { + mz = mem_cgroup_nodeinfo(from, nid); + for (i = 0; i <= DEF_PRIORITY; i++) { + iter = &mz->iter[i]; + cmpxchg(&iter->position, + dead_memcg, NULL); } } } +static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +{ + struct mem_cgroup *memcg = dead_memcg; + struct mem_cgroup *last; + + do { + __invalidate_reclaim_iterators(memcg, dead_memcg); + last = memcg; + } while ((memcg = parent_mem_cgroup(memcg))); + + /* + * When cgruop1 non-hierarchy mode is used, + * parent_mem_cgroup() does not walk all the way up to the + * cgroup root (root_mem_cgroup). So we have to handle + * dead_memcg from cgroup root separately. + */ + if (last != root_mem_cgroup) + __invalidate_reclaim_iterators(root_mem_cgroup, + dead_memcg); +} + /* * Iteration constructs for visiting all cgroups (under a tree). If * loops are exited prematurely (break), mem_cgroup_iter_break() must From 5124abda3060e2eab506fb14a27acadee3c3e396 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Aug 2019 23:59:20 +0100 Subject: [PATCH 045/104] bpf: get rid of pure_initcall dependency to enable jits commit fa9dd599b4dae841924b022768354cfde9affecb upstream. Having a pure_initcall() callback just to permanently enable BPF JITs under CONFIG_BPF_JIT_ALWAYS_ON is unnecessary and could leave a small race window in future where JIT is still disabled on boot. Since we know about the setting at compilation time anyway, just initialize it properly there. Also consolidate all the individual bpf_jit_enable variables into a single one and move them under one location. Moreover, don't allow for setting unspecified garbage values on them. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [bwh: Backported to 4.9 as dependency of commit 2e4a30983b0f "bpf: restrict access to core bpf sysctls": - Drop change in arch/mips/net/ebpf_jit.c - Drop change to bpf_jit_kallsyms - Adjust filenames, context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/arm/net/bpf_jit_32.c | 2 -- arch/arm64/net/bpf_jit_comp.c | 2 -- arch/mips/net/bpf_jit.c | 2 -- arch/powerpc/net/bpf_jit_comp.c | 2 -- arch/powerpc/net/bpf_jit_comp64.c | 2 -- arch/s390/net/bpf_jit_comp.c | 2 -- arch/sparc/net/bpf_jit_comp.c | 2 -- arch/x86/net/bpf_jit_comp.c | 2 -- kernel/bpf/core.c | 15 +++++++++++---- net/core/sysctl_net_core.c | 14 +++++++++----- net/socket.c | 9 --------- 11 files changed, 20 insertions(+), 34 deletions(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 93d0b6d0b63e..7fd448b23b94 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -72,8 +72,6 @@ struct jit_ctx { #endif }; -int bpf_jit_enable __read_mostly; - static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, unsigned int size) { diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b47a26f4290c..939c607b1376 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -30,8 +30,6 @@ #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 248603739198..bb9f779326d0 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1194,8 +1194,6 @@ jmp_cmp: return 0; } -int bpf_jit_enable __read_mostly; - void bpf_jit_compile(struct bpf_prog *fp) { struct jit_ctx ctx; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 9c58194c7ea5..158f43008314 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -18,8 +18,6 @@ #include "bpf_jit32.h" -int bpf_jit_enable __read_mostly; - static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 9f0810cfe5f3..888ee95340da 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -21,8 +21,6 @@ #include "bpf_jit64.h" -int bpf_jit_enable __read_mostly; - static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { int *p = area; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8bd25aebf488..896344b6e036 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -28,8 +28,6 @@ #include #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - struct bpf_jit { u32 seen; /* Flags to remember seen eBPF instructions */ u32 seen_reg[16]; /* Array to remember which registers are used */ diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index a6d9204a6a0b..98a4da3012e3 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -10,8 +10,6 @@ #include "bpf_jit.h" -int bpf_jit_enable __read_mostly; - static inline bool is_simm13(unsigned int value) { return value + 0x1000 < 0x2000; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index cd9764520851..d9dabd0c31fc 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,8 +15,6 @@ #include #include -int bpf_jit_enable __read_mostly; - /* * assembly code in arch/x86/net/bpf_jit.S */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 879ca844ba1d..da03ab4ec578 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -208,6 +208,10 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +/* All BPF JIT sysctl knobs here. */ +int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +int bpf_jit_harden __read_mostly; + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -244,8 +248,6 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) module_memfree(hdr); } -int bpf_jit_harden __read_mostly; - static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff) @@ -925,8 +927,13 @@ load_byte: STACK_FRAME_NON_STANDARD(__bpf_prog_run); /* jump table */ #else -static unsigned int __bpf_prog_ret0(void *ctx, const struct bpf_insn *insn) +static unsigned int __bpf_prog_ret0_warn(void *ctx, + const struct bpf_insn *insn) { + /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON + * is not working properly, so warn about it! + */ + WARN_ON_ONCE(1); return 0; } #endif @@ -981,7 +988,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) #ifndef CONFIG_BPF_JIT_ALWAYS_ON fp->bpf_func = (void *) __bpf_prog_run; #else - fp->bpf_func = (void *) __bpf_prog_ret0; + fp->bpf_func = (void *) __bpf_prog_ret0_warn; #endif /* eBPF JITs can rewrite the program in case constant diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 546ba76b35a5..7b7d26a4f8c8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -24,6 +24,7 @@ static int zero = 0; static int one = 1; +static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -292,13 +293,14 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -#else .proc_handler = proc_dointvec_minmax, +# ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, -#endif +# else + .extra1 = &zero, + .extra2 = &two, +# endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -306,7 +308,9 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, }, # endif #endif diff --git a/net/socket.c b/net/socket.c index d9e2989c10c4..bf99bc1fab2c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2550,15 +2550,6 @@ out_fs: core_initcall(sock_init); /* early initcall */ -static int __init jit_init(void) -{ -#ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; -#endif - return 0; -} -pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { From 4d0475705481f2d2c74d49aba241d2b07a900d8f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Aug 2019 23:59:56 +0100 Subject: [PATCH 046/104] bpf: restrict access to core bpf sysctls commit 2e4a30983b0f9b19b59e38bbf7427d7fdd480d98 upstream. Given BPF reaches far beyond just networking these days, it was never intended to allow setting and in some cases reading those knobs out of a user namespace root running without CAP_SYS_ADMIN, thus tighten such access. Also the bpf_jit_enable = 2 debugging mode should only be allowed if kptr_restrict is not set since it otherwise can leak addresses to the kernel log. Dump a note to the kernel log that this is for debugging JITs only when enabled. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [bwh: Backported to 4.9: - We don't have bpf_dump_raw_ok(), so drop the condition based on it. This condition only made it a bit harder for a privileged user to do something silly. - Drop change to bpf_jit_kallsyms] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/core/sysctl_net_core.c | 39 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7b7d26a4f8c8..dd59880db48f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -232,6 +232,41 @@ static int proc_do_rss_key(struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +#ifdef CONFIG_BPF_JIT +static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, jit_enable = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &jit_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + *(int *)table->data = jit_enable; + if (jit_enable == 2) + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); + } + return ret; +} + +# ifdef CONFIG_HAVE_EBPF_JIT +static int +proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +# endif +#endif + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -293,7 +328,7 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_bpf_enable, # ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, @@ -308,7 +343,7 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_bpf_restricted, .extra1 = &zero, .extra2 = &two, }, From c98446e1bab6253ddce7144cc2a91c400a323839 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 17 Aug 2019 00:00:08 +0100 Subject: [PATCH 047/104] bpf: add bpf_jit_limit knob to restrict unpriv allocations commit ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 upstream. Rick reported that the BPF JIT could potentially fill the entire module space with BPF programs from unprivileged users which would prevent later attempts to load normal kernel modules or privileged BPF programs, for example. If JIT was enabled but unsuccessful to generate the image, then before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") we would always fall back to the BPF interpreter. Nowadays in the case where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort with a failure since the BPF interpreter was compiled out. Add a global limit and enforce it for unprivileged users such that in case of BPF interpreter compiled out we fail once the limit has been reached or we fall back to BPF interpreter earlier w/o using module mem if latter was compiled in. In a next step, fair share among unprivileged users can be resolved in particular for the case where we would fail hard once limit is reached. Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config") Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64") Co-Developed-by: Rick Edgecombe Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Jann Horn Cc: Kees Cook Cc: LKML Signed-off-by: Alexei Starovoitov [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/net.txt | 8 ++++++ include/linux/filter.h | 1 + kernel/bpf/core.c | 51 +++++++++++++++++++++++++++++++++--- net/core/sysctl_net_core.c | 10 +++++-- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index f0480f7ea740..4d5e3b0cab3f 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,6 +54,14 @@ Values : 1 - enable JIT hardening for unprivileged users only 2 - enable JIT hardening for all users +bpf_jit_limit +------------- + +This enforces a global limit for memory allocations to the BPF JIT +compiler in order to reject unprivileged JIT requests once it has +been surpassed. bpf_jit_limit contains the value of the global limit +in bytes. + dev_weight -------------- diff --git a/include/linux/filter.h b/include/linux/filter.h index 1f09c521adfe..689bba102007 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -599,6 +599,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; +extern int bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index da03ab4ec578..09df9bbfb48e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -208,9 +208,43 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) + /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; +int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; + +static atomic_long_t bpf_jit_current; + +#if defined(MODULES_VADDR) +static int __init bpf_jit_charge_init(void) +{ + /* Only used as heuristic here to derive limit. */ + bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, + PAGE_SIZE), INT_MAX); + return 0; +} +pure_initcall(bpf_jit_charge_init); +#endif + +static int bpf_jit_charge_modmem(u32 pages) +{ + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { + if (!capable(CAP_SYS_ADMIN)) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } + } + + return 0; +} + +static void bpf_jit_uncharge_modmem(u32 pages) +{ + atomic_long_sub(pages, &bpf_jit_current); +} struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, @@ -218,21 +252,27 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; - unsigned int size, hole, start; + u32 size, hole, start, pages; /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); - hdr = module_alloc(size); - if (hdr == NULL) + pages = size / PAGE_SIZE; + + if (bpf_jit_charge_modmem(pages)) return NULL; + hdr = module_alloc(size); + if (!hdr) { + bpf_jit_uncharge_modmem(pages); + return NULL; + } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); - hdr->pages = size / PAGE_SIZE; + hdr->pages = pages; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -245,7 +285,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { + u32 pages = hdr->pages; + module_memfree(hdr); + bpf_jit_uncharge_modmem(pages); } static int bpf_jit_blind_insn(const struct bpf_insn *from, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index dd59880db48f..58d76ca45937 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -253,7 +253,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } -# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -264,7 +263,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } -# endif #endif static struct ctl_table net_core_table[] = { @@ -348,6 +346,14 @@ static struct ctl_table net_core_table[] = { .extra2 = &two, }, # endif + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &one, + }, #endif { .procname = "netdev_tstamp_prequeue", From 43f7e9b81a81e4aae273d4199b5c59d581d1d83f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?haibinzhang=28=E5=BC=A0=E6=B5=B7=E6=96=8C=29?= Date: Sat, 17 Aug 2019 00:00:19 +0100 Subject: [PATCH 048/104] vhost-net: set packet weight of tx polling to 2 * vq size commit a2ac99905f1ea8b15997a6ec39af69aa28a3653b upstream. handle_tx will delay rx for tens or even hundreds of milliseconds when tx busy polling udp packets with small length(e.g. 1byte udp payload), because setting VHOST_NET_WEIGHT takes into account only sent-bytes but no single packet length. Ping-Latencies shown below were tested between two Virtual Machines using netperf (UDP_STREAM, len=1), and then another machine pinged the client: vq size=256 Packet-Weight Ping-Latencies(millisecond) min avg max Origin 3.319 18.489 57.303 64 1.643 2.021 2.552 128 1.825 2.600 3.224 256 1.997 2.710 4.295 512 1.860 3.171 4.631 1024 2.002 4.173 9.056 2048 2.257 5.650 9.688 4096 2.093 8.508 15.943 vq size=512 Packet-Weight Ping-Latencies(millisecond) min avg max Origin 6.537 29.177 66.245 64 2.798 3.614 4.403 128 2.861 3.820 4.775 256 3.008 4.018 4.807 512 3.254 4.523 5.824 1024 3.079 5.335 7.747 2048 3.944 8.201 12.762 4096 4.158 11.057 19.985 Seems pretty consistent, a small dip at 2 VQ sizes. Ring size is a hint from device about a burst size it can tolerate. Based on benchmarks, set the weight to 2 * vq size. To evaluate this change, another tests were done using netperf(RR, TX) between two machines with Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz, and vq size was tweaked through qemu. Results shown below does not show obvious changes. vq size=256 TCP_RR vq size=512 TCP_RR size/sessions/+thu%/+normalize% size/sessions/+thu%/+normalize% 1/ 1/ -7%/ -2% 1/ 1/ 0%/ -2% 1/ 4/ +1%/ 0% 1/ 4/ +1%/ 0% 1/ 8/ +1%/ -2% 1/ 8/ 0%/ +1% 64/ 1/ -6%/ 0% 64/ 1/ +7%/ +3% 64/ 4/ 0%/ +2% 64/ 4/ -1%/ +1% 64/ 8/ 0%/ 0% 64/ 8/ -1%/ -2% 256/ 1/ -3%/ -4% 256/ 1/ -4%/ -2% 256/ 4/ +3%/ +4% 256/ 4/ +1%/ +2% 256/ 8/ +2%/ 0% 256/ 8/ +1%/ -1% vq size=256 UDP_RR vq size=512 UDP_RR size/sessions/+thu%/+normalize% size/sessions/+thu%/+normalize% 1/ 1/ -5%/ +1% 1/ 1/ -3%/ -2% 1/ 4/ +4%/ +1% 1/ 4/ -2%/ +2% 1/ 8/ -1%/ -1% 1/ 8/ -1%/ 0% 64/ 1/ -2%/ -3% 64/ 1/ +1%/ +1% 64/ 4/ -5%/ -1% 64/ 4/ +2%/ 0% 64/ 8/ 0%/ -1% 64/ 8/ -2%/ +1% 256/ 1/ +7%/ +1% 256/ 1/ -7%/ 0% 256/ 4/ +1%/ +1% 256/ 4/ -3%/ -4% 256/ 8/ +2%/ +2% 256/ 8/ +1%/ +1% vq size=256 TCP_STREAM vq size=512 TCP_STREAM size/sessions/+thu%/+normalize% size/sessions/+thu%/+normalize% 64/ 1/ 0%/ -3% 64/ 1/ 0%/ 0% 64/ 4/ +3%/ -1% 64/ 4/ -2%/ +4% 64/ 8/ +9%/ -4% 64/ 8/ -1%/ +2% 256/ 1/ +1%/ -4% 256/ 1/ +1%/ +1% 256/ 4/ -1%/ -1% 256/ 4/ -3%/ 0% 256/ 8/ +7%/ +5% 256/ 8/ -3%/ 0% 512/ 1/ +1%/ 0% 512/ 1/ -1%/ -1% 512/ 4/ +1%/ -1% 512/ 4/ 0%/ 0% 512/ 8/ +7%/ -5% 512/ 8/ +6%/ -1% 1024/ 1/ 0%/ -1% 1024/ 1/ 0%/ +1% 1024/ 4/ +3%/ 0% 1024/ 4/ +1%/ 0% 1024/ 8/ +8%/ +5% 1024/ 8/ -1%/ 0% 2048/ 1/ +2%/ +2% 2048/ 1/ -1%/ 0% 2048/ 4/ +1%/ 0% 2048/ 4/ 0%/ -1% 2048/ 8/ -2%/ 0% 2048/ 8/ 5%/ -1% 4096/ 1/ -2%/ 0% 4096/ 1/ -2%/ 0% 4096/ 4/ +2%/ 0% 4096/ 4/ 0%/ 0% 4096/ 8/ +9%/ -2% 4096/ 8/ -5%/ -1% Acked-by: Michael S. Tsirkin Signed-off-by: Haibin Zhang Signed-off-by: Yunfang Tai Signed-off-by: Lidong Chen Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 75e1089dfb01..64431c0d2a53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -39,6 +39,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" * Using this limit prevents one virtqueue from starving others. */ #define VHOST_NET_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving rx. */ +#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2) + /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 @@ -372,6 +376,7 @@ static void handle_tx(struct vhost_net *net) struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; + int sent_pkts = 0; mutex_lock(&vq->mutex); sock = vq->private_data; @@ -474,7 +479,8 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + if (unlikely(total_len >= VHOST_NET_WEIGHT) || + unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) { vhost_poll_queue(&vq->poll); break; } From 73f768b7684cf2e3ff99e74bc99c9ff253aa979f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sat, 17 Aug 2019 00:00:28 +0100 Subject: [PATCH 049/104] vhost_net: use packet weight for rx handler, too commit db688c24eada63b1efe6d0d7d835e5c3bdd71fd3 upstream. Similar to commit a2ac99905f1e ("vhost-net: set packet weight of tx polling to 2 * vq size"), we need a packet-based limit for handler_rx, too - elsewhere, under rx flood with small packets, tx can be delayed for a very long time, even without busypolling. The pkt limit applied to handle_rx must be the same applied by handle_tx, or we will get unfair scheduling between rx and tx. Tying such limit to the queue length makes it less effective for large queue length values and can introduce large process scheduler latencies, so a constant valued is used - likewise the existing bytes limit. The selected limit has been validated with PVP[1] performance test with different queue sizes: queue size 256 512 1024 baseline 366 354 362 weight 128 715 723 670 weight 256 740 745 733 weight 512 600 460 583 weight 1024 423 427 418 A packet weight of 256 gives peek performances in under all the tested scenarios. No measurable regression in unidirectional performance tests has been detected. [1] https://developers.redhat.com/blog/2017/06/05/measuring-and-comparing-open-vswitch-performance/ Signed-off-by: Paolo Abeni Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 64431c0d2a53..4093b4eea8b9 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -40,8 +40,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" #define VHOST_NET_WEIGHT 0x80000 /* Max number of packets transferred before requeueing the job. - * Using this limit prevents one virtqueue from starving rx. */ -#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2) + * Using this limit prevents one virtqueue from starving others with small + * pkts. + */ +#define VHOST_NET_PKT_WEIGHT 256 /* MAX number of TX used buffers for outstanding zerocopy */ #define VHOST_MAX_PEND 128 @@ -480,7 +482,7 @@ static void handle_tx(struct vhost_net *net) total_len += len; vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT) || - unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) { + unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } @@ -662,6 +664,7 @@ static void handle_rx(struct vhost_net *net) struct socket *sock; struct iov_iter fixup; __virtio16 num_buffers; + int recv_pkts = 0; mutex_lock_nested(&vq->mutex, 0); sock = vq->private_data; @@ -760,7 +763,8 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(total_len >= VHOST_NET_WEIGHT)) { + if (unlikely(total_len >= VHOST_NET_WEIGHT) || + unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) { vhost_poll_queue(&vq->poll); goto out; } From 6214511f226c684d0da4a6cd1f24175d255ff061 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 17 Aug 2019 00:00:36 +0100 Subject: [PATCH 050/104] vhost_net: introduce vhost_exceeds_weight() commit 272f35cba53d088085e5952fd81d7a133ab90789 upstream. Signed-off-by: Jason Wang Signed-off-by: David S. Miller [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4093b4eea8b9..0132cbd12706 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -357,6 +357,12 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, return r; } +static bool vhost_exceeds_weight(int pkts, int total_len) +{ + return total_len >= VHOST_NET_WEIGHT || + pkts >= VHOST_NET_PKT_WEIGHT; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -481,8 +487,7 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); - if (unlikely(total_len >= VHOST_NET_WEIGHT) || - unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT)) { + if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { vhost_poll_queue(&vq->poll); break; } @@ -763,8 +768,7 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(total_len >= VHOST_NET_WEIGHT) || - unlikely(++recv_pkts >= VHOST_NET_PKT_WEIGHT)) { + if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { vhost_poll_queue(&vq->poll); goto out; } From 66c8d9d53e657d5068d9f234bc4ec1d703107a48 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 17 Aug 2019 00:00:44 +0100 Subject: [PATCH 051/104] vhost: introduce vhost_exceeds_weight() commit e82b9b0727ff6d665fff2d326162b460dded554d upstream. We used to have vhost_exceeds_weight() for vhost-net to: - prevent vhost kthread from hogging the cpu - balance the time spent between TX and RX This function could be useful for vsock and scsi as well. So move it to vhost.c. Device must specify a weight which counts the number of requests, or it can also specific a byte_weight which counts the number of bytes that has been processed. Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin [bwh: Backported to 4.9: - In vhost_net, both Tx modes are handled in one loop in handle_tx() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 18 +++++------------- drivers/vhost/scsi.c | 9 ++++++++- drivers/vhost/vhost.c | 20 +++++++++++++++++++- drivers/vhost/vhost.h | 6 +++++- drivers/vhost/vsock.c | 12 +++++++++++- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 0132cbd12706..9c503540e92a 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -357,12 +357,6 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net, return r; } -static bool vhost_exceeds_weight(int pkts, int total_len) -{ - return total_len >= VHOST_NET_WEIGHT || - pkts >= VHOST_NET_PKT_WEIGHT; -} - /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -487,10 +481,9 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); - if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts, + total_len))) break; - } } out: mutex_unlock(&vq->mutex); @@ -768,10 +761,8 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len))) goto out; - } } vhost_net_enable_vq(net, vq); out: @@ -842,7 +833,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].vhost_hlen = 0; n->vqs[i].sock_hlen = 0; } - vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, + VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 17f9ad6fdfa5..15c926c8c6b6 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -58,6 +58,12 @@ #define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_PROT_SGLS 512 +/* Max number of requests before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * request. + */ +#define VHOST_SCSI_WEIGHT 256 + struct vhost_scsi_inflight { /* Wait for the flush operation to finish */ struct completion comp; @@ -1433,7 +1439,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vqs[i] = &vs->vqs[i].vq; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, + VHOST_SCSI_WEIGHT, 0); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a54dbfe664cd..8da95d4ac4b7 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -393,8 +393,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) vhost_vq_free_iovecs(dev->vqs[i]); } +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, + int pkts, int total_len) +{ + struct vhost_dev *dev = vq->dev; + + if ((dev->byte_weight && total_len >= dev->byte_weight) || + pkts >= dev->weight) { + vhost_poll_queue(&vq->poll); + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(vhost_exceeds_weight); + void vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue **vqs, int nvqs) + struct vhost_virtqueue **vqs, int nvqs, + int weight, int byte_weight) { struct vhost_virtqueue *vq; int i; @@ -408,6 +424,8 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iotlb = NULL; dev->mm = NULL; dev->worker = NULL; + dev->weight = weight; + dev->byte_weight = byte_weight; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index e8efe1af7487..7932ad8d071a 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -164,9 +164,13 @@ struct vhost_dev { struct list_head read_list; struct list_head pending_list; wait_queue_head_t wait; + int weight; + int byte_weight; }; -void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); +bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); +void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, + int nvqs, int weight, int byte_weight); long vhost_dev_set_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev); long vhost_dev_check_owner(struct vhost_dev *); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3cefd602b5b1..a775493b7b67 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -21,6 +21,14 @@ #include "vhost.h" #define VHOST_VSOCK_DEFAULT_HOST_CID 2 +/* Max number of bytes transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others. */ +#define VHOST_VSOCK_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * small pkts. + */ +#define VHOST_VSOCK_PKT_WEIGHT 256 enum { VHOST_VSOCK_FEATURES = VHOST_FEATURES, @@ -529,7 +537,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), + VHOST_VSOCK_PKT_WEIGHT, + VHOST_VSOCK_WEIGHT); file->private_data = vsock; spin_lock_init(&vsock->send_pkt_list_lock); From 4b586288578a3a2aa4efb969feed86f2d760f082 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 17 Aug 2019 00:00:53 +0100 Subject: [PATCH 052/104] vhost_net: fix possible infinite loop commit e2412c07f8f3040593dfb88207865a3cd58680c0 upstream. When the rx buffer is too small for a packet, we will discard the vq descriptor and retry it for the next packet: while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk, &busyloop_intr))) { ... /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1); err = sock->ops->recvmsg(sock, &msg, 1, MSG_DONTWAIT | MSG_TRUNC); pr_debug("Discarded rx packet: len %zd\n", sock_len); continue; } ... } This makes it possible to trigger a infinite while..continue loop through the co-opreation of two VMs like: 1) Malicious VM1 allocate 1 byte rx buffer and try to slow down the vhost process as much as possible e.g using indirect descriptors or other. 2) Malicious VM2 generate packets to VM1 as fast as possible Fixing this by checking against weight at the end of RX and TX loop. This also eliminate other similar cases when: - userspace is consuming the packets in the meanwhile - theoretical TOCTOU attack if guest moving avail index back and forth to hit the continue after vhost find guest just add new buffers This addresses CVE-2019-3900. Fixes: d8316f3991d20 ("vhost: fix total length when packets are too short") Fixes: 3a4d5c94e9593 ("vhost_net: a kernel-level virtio server") Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin [bwh: Backported to 4.9: - Both Tx modes are handled in one loop in handle_tx() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9c503540e92a..dd8798bf88e7 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -393,7 +393,7 @@ static void handle_tx(struct vhost_net *net) hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; - for (;;) { + do { /* Release DMAs done buffers first */ if (zcopy) vhost_zerocopy_signal_used(net, vq); @@ -481,10 +481,7 @@ static void handle_tx(struct vhost_net *net) vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); - if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts, - total_len))) - break; - } + } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len))); out: mutex_unlock(&vq->mutex); } @@ -682,7 +679,10 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { + do { + sock_len = vhost_net_rx_peek_head_len(net, sock->sk); + if (!sock_len) + break; sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -761,10 +761,10 @@ static void handle_rx(struct vhost_net *net) vhost_log_write(vq, vq_log, log, vhost_len, vq->iov, in); total_len += vhost_len; - if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len))) - goto out; - } - vhost_net_enable_vq(net, vq); + } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len))); + + if (!sock_len) + vhost_net_enable_vq(net, vq); out: mutex_unlock(&vq->mutex); } @@ -834,7 +834,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].sock_hlen = 0; } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, - VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT); + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); From 02b40edda9fd2e42abae40f5dd85122f13dbe7b8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Sat, 17 Aug 2019 00:01:01 +0100 Subject: [PATCH 053/104] vhost: scsi: add weight support commit c1ea02f15ab5efb3e93fc3144d895410bf79fcf2 upstream. This patch will check the weight and exit the loop if we exceeds the weight. This is useful for preventing scsi kthread from hogging cpu which is guest triggerable. This addresses CVE-2019-3900. Cc: Paolo Bonzini Cc: Stefan Hajnoczi Fixes: 057cbf49a1f0 ("tcm_vhost: Initial merge for vhost level target fabric driver") Signed-off-by: Jason Wang Reviewed-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi [bwh: Backported to 4.9: - Drop changes in vhost_scsi_ctl_handle_vq() - Adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/scsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 15c926c8c6b6..dad90f6cc3e1 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -851,7 +851,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) u64 tag; u32 exp_data_len, data_direction; unsigned out, in; - int head, ret, prot_bytes; + int head, ret, prot_bytes, c = 0; size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp); size_t out_size, in_size; u16 lun; @@ -870,7 +870,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) vhost_disable_notify(&vs->dev, vq); - for (;;) { + do { head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); @@ -1086,7 +1086,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) */ INIT_WORK(&cmd->work, vhost_scsi_submission_work); queue_work(vhost_scsi_workqueue, &cmd->work); - } + } while (likely(!vhost_exceeds_weight(vq, ++c, 0))); out: mutex_unlock(&vq->mutex); } From 53e054b3cd1bd3dde2212c3e279ab4a3eefac6bb Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 17 Aug 2019 00:01:12 +0100 Subject: [PATCH 054/104] siphash: add cryptographically secure PRF commit 2c956a60778cbb6a27e0c7a8a52a91378c90e1d1 upstream. SipHash is a 64-bit keyed hash function that is actually a cryptographically secure PRF, like HMAC. Except SipHash is super fast, and is meant to be used as a hashtable keyed lookup function, or as a general PRF for short input use cases, such as sequence numbers or RNG chaining. For the first usage: There are a variety of attacks known as "hashtable poisoning" in which an attacker forms some data such that the hash of that data will be the same, and then preceeds to fill up all entries of a hashbucket. This is a realistic and well-known denial-of-service vector. Currently hashtables use jhash, which is fast but not secure, and some kind of rotating key scheme (or none at all, which isn't good). SipHash is meant as a replacement for jhash in these cases. There are a modicum of places in the kernel that are vulnerable to hashtable poisoning attacks, either via userspace vectors or network vectors, and there's not a reliable mechanism inside the kernel at the moment to fix it. The first step toward fixing these issues is actually getting a secure primitive into the kernel for developers to use. Then we can, bit by bit, port things over to it as deemed appropriate. While SipHash is extremely fast for a cryptographically secure function, it is likely a bit slower than the insecure jhash, and so replacements will be evaluated on a case-by-case basis based on whether or not the difference in speed is negligible and whether or not the current jhash usage poses a real security risk. For the second usage: A few places in the kernel are using MD5 or SHA1 for creating secure sequence numbers, syn cookies, port numbers, or fast random numbers. SipHash is a faster and more fitting, and more secure replacement for MD5 in those situations. Replacing MD5 and SHA1 with SipHash for these uses is obvious and straight-forward, and so is submitted along with this patch series. There shouldn't be much of a debate over its efficacy. Dozens of languages are already using this internally for their hash tables and PRFs. Some of the BSDs already use this in their kernels. SipHash is a widely known high-speed solution to a widely known set of problems, and it's time we catch-up. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Cc: Linus Torvalds Cc: Eric Biggers Cc: David Laight Cc: Eric Dumazet Signed-off-by: David S. Miller [bwh: Backported to 4.9 as dependency of commits df453700e8d8 "inet: switch IP ID generator to siphash" and 3c79107631db "netfilter: ctnetlink: don't use conntrack/expect object addresses as id"] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/siphash.txt | 100 ++++++++++++++++ MAINTAINERS | 7 ++ include/linux/siphash.h | 85 ++++++++++++++ lib/Kconfig.debug | 6 +- lib/Makefile | 5 +- lib/siphash.c | 232 ++++++++++++++++++++++++++++++++++++++ lib/test_siphash.c | 131 +++++++++++++++++++++ 7 files changed, 561 insertions(+), 5 deletions(-) create mode 100644 Documentation/siphash.txt create mode 100644 include/linux/siphash.h create mode 100644 lib/siphash.c create mode 100644 lib/test_siphash.c diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt new file mode 100644 index 000000000000..e8e6ddbbaab4 --- /dev/null +++ b/Documentation/siphash.txt @@ -0,0 +1,100 @@ + SipHash - a short input PRF +----------------------------------------------- +Written by Jason A. Donenfeld + +SipHash is a cryptographically secure PRF -- a keyed hash function -- that +performs very well for short inputs, hence the name. It was designed by +cryptographers Daniel J. Bernstein and Jean-Philippe Aumasson. It is intended +as a replacement for some uses of: `jhash`, `md5_transform`, `sha_transform`, +and so forth. + +SipHash takes a secret key filled with randomly generated numbers and either +an input buffer or several input integers. It spits out an integer that is +indistinguishable from random. You may then use that integer as part of secure +sequence numbers, secure cookies, or mask it off for use in a hash table. + +1. Generating a key + +Keys should always be generated from a cryptographically secure source of +random numbers, either using get_random_bytes or get_random_once: + +siphash_key_t key; +get_random_bytes(&key, sizeof(key)); + +If you're not deriving your key from here, you're doing it wrong. + +2. Using the functions + +There are two variants of the function, one that takes a list of integers, and +one that takes a buffer: + +u64 siphash(const void *data, size_t len, const siphash_key_t *key); + +And: + +u64 siphash_1u64(u64, const siphash_key_t *key); +u64 siphash_2u64(u64, u64, const siphash_key_t *key); +u64 siphash_3u64(u64, u64, u64, const siphash_key_t *key); +u64 siphash_4u64(u64, u64, u64, u64, const siphash_key_t *key); +u64 siphash_1u32(u32, const siphash_key_t *key); +u64 siphash_2u32(u32, u32, const siphash_key_t *key); +u64 siphash_3u32(u32, u32, u32, const siphash_key_t *key); +u64 siphash_4u32(u32, u32, u32, u32, const siphash_key_t *key); + +If you pass the generic siphash function something of a constant length, it +will constant fold at compile-time and automatically choose one of the +optimized functions. + +3. Hashtable key function usage: + +struct some_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + siphash_key_t key; +}; + +void init_hashtable(struct some_hashtable *table) +{ + get_random_bytes(&table->key, sizeof(table->key)); +} + +static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input) +{ + return &table->hashtable[siphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +You may then iterate like usual over the returned hash bucket. + +4. Security + +SipHash has a very high security margin, with its 128-bit key. So long as the +key is kept secret, it is impossible for an attacker to guess the outputs of +the function, even if being able to observe many outputs, since 2^128 outputs +is significant. + +Linux implements the "2-4" variant of SipHash. + +5. Struct-passing Pitfalls + +Often times the XuY functions will not be large enough, and instead you'll +want to pass a pre-filled struct to siphash. When doing this, it's important +to always ensure the struct has no padding holes. The easiest way to do this +is to simply arrange the members of the struct in descending order of size, +and to use offsetendof() instead of sizeof() for getting the size. For +performance reasons, if possible, it's probably a good thing to align the +struct to the right boundary. Here's an example: + +const struct { + struct in6_addr saddr; + u32 counter; + u16 dport; +} __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .counter = counter, + .dport = dport +}; +u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret); + +6. Resources + +Read the SipHash paper if you're interested in learning more: +https://131002.net/siphash/siphash.pdf diff --git a/MAINTAINERS b/MAINTAINERS index 4f559f5b3a89..98ee40591a9b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11068,6 +11068,13 @@ F: arch/arm/mach-s3c24xx/mach-bast.c F: arch/arm/mach-s3c24xx/bast-ide.c F: arch/arm/mach-s3c24xx/bast-irq.c +SIPHASH PRF ROUTINES +M: Jason A. Donenfeld +S: Maintained +F: lib/siphash.c +F: lib/test_siphash.c +F: include/linux/siphash.h + TI DAVINCI MACHINE SUPPORT M: Sekhar Nori M: Kevin Hilman diff --git a/include/linux/siphash.h b/include/linux/siphash.h new file mode 100644 index 000000000000..feeb29cd113e --- /dev/null +++ b/include/linux/siphash.h @@ -0,0 +1,85 @@ +/* Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#ifndef _LINUX_SIPHASH_H +#define _LINUX_SIPHASH_H + +#include +#include + +#define SIPHASH_ALIGNMENT __alignof__(u64) +typedef struct { + u64 key[2]; +} siphash_key_t; + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); +#endif + +u64 siphash_1u64(const u64 a, const siphash_key_t *key); +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); +u64 siphash_3u64(const u64 a, const u64 b, const u64 c, + const siphash_key_t *key); +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d, + const siphash_key_t *key); +u64 siphash_1u32(const u32 a, const siphash_key_t *key); +u64 siphash_3u32(const u32 a, const u32 b, const u32 c, + const siphash_key_t *key); + +static inline u64 siphash_2u32(const u32 a, const u32 b, + const siphash_key_t *key) +{ + return siphash_1u64((u64)b << 32 | a, key); +} +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c, + const u32 d, const siphash_key_t *key) +{ + return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key); +} + + +static inline u64 ___siphash_aligned(const __le64 *data, size_t len, + const siphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return siphash_1u32(le32_to_cpup((const __le32 *)data), key); + if (__builtin_constant_p(len) && len == 8) + return siphash_1u64(le64_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 16) + return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 24) + return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 32) + return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]), + le64_to_cpu(data[2]), le64_to_cpu(data[3]), + key); + return __siphash_aligned(data, len, key); +} + +/** + * siphash - compute 64-bit siphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the siphash key + */ +static inline u64 siphash(const void *data, size_t len, + const siphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + return __siphash_unaligned(data, len, key); +#endif + return ___siphash_aligned(data, len, key); +} + +#endif /* _LINUX_SIPHASH_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 58a22ca10f33..4f561860bf41 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1822,9 +1822,9 @@ config TEST_HASH tristate "Perform selftest on hash functions" default n help - Enable this option to test the kernel's integer () - and string () hash functions on boot - (or module load). + Enable this option to test the kernel's integer (), + string (), and siphash () + hash functions on boot (or module load). This is intended to help people writing architecture-specific optimized versions. If unsure, say N. diff --git a/lib/Makefile b/lib/Makefile index 2447a218fff8..452d2956a5a2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o + earlycpio.o seq_buf.o siphash.o \ + nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o @@ -44,7 +45,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o -obj-$(CONFIG_TEST_HASH) += test_hash.o +obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o CFLAGS_test_kasan.o += -fno-builtin obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o diff --git a/lib/siphash.c b/lib/siphash.c new file mode 100644 index 000000000000..c43cf406e71b --- /dev/null +++ b/lib/siphash.c @@ -0,0 +1,232 @@ +/* Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#include +#include + +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 +#include +#include +#endif + +#define SIPROUND \ + do { \ + v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \ + v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \ + v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \ + v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \ + } while (0) + +#define PREAMBLE(len) \ + u64 v0 = 0x736f6d6570736575ULL; \ + u64 v1 = 0x646f72616e646f6dULL; \ + u64 v2 = 0x6c7967656e657261ULL; \ + u64 v3 = 0x7465646279746573ULL; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} +EXPORT_SYMBOL(__siphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + PREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + POSTAMBLE +} +EXPORT_SYMBOL(__siphash_unaligned); +#endif + +/** + * siphash_1u64 - compute 64-bit siphash PRF value of a u64 + * @first: first u64 + * @key: the siphash key + */ +u64 siphash_1u64(const u64 first, const siphash_key_t *key) +{ + PREAMBLE(8) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_1u64); + +/** + * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64 + * @first: first u64 + * @second: second u64 + * @key: the siphash key + */ +u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_2u64); + +/** + * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @key: the siphash key + */ +u64 siphash_3u64(const u64 first, const u64 second, const u64 third, + const siphash_key_t *key) +{ + PREAMBLE(24) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_3u64); + +/** + * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64 + * @first: first u64 + * @second: second u64 + * @third: third u64 + * @forth: forth u64 + * @key: the siphash key + */ +u64 siphash_4u64(const u64 first, const u64 second, const u64 third, + const u64 forth, const siphash_key_t *key) +{ + PREAMBLE(32) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + v3 ^= third; + SIPROUND; + SIPROUND; + v0 ^= third; + v3 ^= forth; + SIPROUND; + SIPROUND; + v0 ^= forth; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_4u64); + +u64 siphash_1u32(const u32 first, const siphash_key_t *key) +{ + PREAMBLE(4) + b |= first; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_1u32); + +u64 siphash_3u32(const u32 first, const u32 second, const u32 third, + const siphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + PREAMBLE(12) + v3 ^= combined; + SIPROUND; + SIPROUND; + v0 ^= combined; + b |= third; + POSTAMBLE +} +EXPORT_SYMBOL(siphash_3u32); diff --git a/lib/test_siphash.c b/lib/test_siphash.c new file mode 100644 index 000000000000..d972acfc15e4 --- /dev/null +++ b/lib/test_siphash.c @@ -0,0 +1,131 @@ +/* Test cases for siphash.c + * + * Copyright (C) 2016 Jason A. Donenfeld . All Rights Reserved. + * + * This file is provided under a dual BSD/GPLv2 license. + * + * SipHash: a fast short-input PRF + * https://131002.net/siphash/ + * + * This implementation is specifically for SipHash2-4. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +/* Test vectors taken from official reference source available at: + * https://131002.net/siphash/siphash24.c + */ + +static const siphash_key_t test_key_siphash = + {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }}; + +static const u64 test_vectors_siphash[64] = { + 0x726fdb47dd0e0e31ULL, 0x74f839c593dc67fdULL, 0x0d6c8009d9a94f5aULL, + 0x85676696d7fb7e2dULL, 0xcf2794e0277187b7ULL, 0x18765564cd99a68dULL, + 0xcbc9466e58fee3ceULL, 0xab0200f58b01d137ULL, 0x93f5f5799a932462ULL, + 0x9e0082df0ba9e4b0ULL, 0x7a5dbbc594ddb9f3ULL, 0xf4b32f46226bada7ULL, + 0x751e8fbc860ee5fbULL, 0x14ea5627c0843d90ULL, 0xf723ca908e7af2eeULL, + 0xa129ca6149be45e5ULL, 0x3f2acc7f57c29bdbULL, 0x699ae9f52cbe4794ULL, + 0x4bc1b3f0968dd39cULL, 0xbb6dc91da77961bdULL, 0xbed65cf21aa2ee98ULL, + 0xd0f2cbb02e3b67c7ULL, 0x93536795e3a33e88ULL, 0xa80c038ccd5ccec8ULL, + 0xb8ad50c6f649af94ULL, 0xbce192de8a85b8eaULL, 0x17d835b85bbb15f3ULL, + 0x2f2e6163076bcfadULL, 0xde4daaaca71dc9a5ULL, 0xa6a2506687956571ULL, + 0xad87a3535c49ef28ULL, 0x32d892fad841c342ULL, 0x7127512f72f27cceULL, + 0xa7f32346f95978e3ULL, 0x12e0b01abb051238ULL, 0x15e034d40fa197aeULL, + 0x314dffbe0815a3b4ULL, 0x027990f029623981ULL, 0xcadcd4e59ef40c4dULL, + 0x9abfd8766a33735cULL, 0x0e3ea96b5304a7d0ULL, 0xad0c42d6fc585992ULL, + 0x187306c89bc215a9ULL, 0xd4a60abcf3792b95ULL, 0xf935451de4f21df2ULL, + 0xa9538f0419755787ULL, 0xdb9acddff56ca510ULL, 0xd06c98cd5c0975ebULL, + 0xe612a3cb9ecba951ULL, 0xc766e62cfcadaf96ULL, 0xee64435a9752fe72ULL, + 0xa192d576b245165aULL, 0x0a8787bf8ecb74b2ULL, 0x81b3e73d20b49b6fULL, + 0x7fa8220ba3b2eceaULL, 0x245731c13ca42499ULL, 0xb78dbfaf3a8d83bdULL, + 0xea1ad565322a1a0bULL, 0x60e61c23a3795013ULL, 0x6606d7e446282b93ULL, + 0x6ca4ecb15c5f91e1ULL, 0x9f626da15c9625f3ULL, 0xe51b38608ef25f57ULL, + 0x958a324ceb064572ULL +}; + +static int __init siphash_test_init(void) +{ + u8 in[64] __aligned(SIPHASH_ALIGNMENT); + u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT); + u8 i; + int ret = 0; + + for (i = 0; i < 64; ++i) { + in[i] = i; + in_unaligned[i + 1] = i; + if (siphash(in, i, &test_key_siphash) != + test_vectors_siphash[i]) { + pr_info("siphash self-test aligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + if (siphash(in_unaligned + 1, i, &test_key_siphash) != + test_vectors_siphash[i]) { + pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + } + if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != + test_vectors_siphash[8]) { + pr_info("siphash self-test 1u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + &test_key_siphash) != test_vectors_siphash[16]) { + pr_info("siphash self-test 2u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, &test_key_siphash) != + test_vectors_siphash[24]) { + pr_info("siphash self-test 3u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL, + 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL, + &test_key_siphash) != test_vectors_siphash[32]) { + pr_info("siphash self-test 4u64: FAIL\n"); + ret = -EINVAL; + } + if (siphash_1u32(0x03020100U, &test_key_siphash) != + test_vectors_siphash[4]) { + pr_info("siphash self-test 1u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) != + test_vectors_siphash[8]) { + pr_info("siphash self-test 2u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_siphash) != + test_vectors_siphash[12]) { + pr_info("siphash self-test 3u32: FAIL\n"); + ret = -EINVAL; + } + if (siphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) != + test_vectors_siphash[16]) { + pr_info("siphash self-test 4u32: FAIL\n"); + ret = -EINVAL; + } + if (!ret) + pr_info("self-tests: pass\n"); + return ret; +} + +static void __exit siphash_test_exit(void) +{ +} + +module_init(siphash_test_init); +module_exit(siphash_test_exit); + +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_LICENSE("Dual BSD/GPL"); From 175a407ce432088d827b822b8a47afd8360a8dbe Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sat, 17 Aug 2019 00:01:19 +0100 Subject: [PATCH 055/104] siphash: implement HalfSipHash1-3 for hash tables commit 1ae2324f732c9c4e2fa4ebd885fa1001b70d52e1 upstream. HalfSipHash, or hsiphash, is a shortened version of SipHash, which generates 32-bit outputs using a weaker 64-bit key. It has *much* lower security margins, and shouldn't be used for anything too sensitive, but it could be used as a hashtable key function replacement, if the output is never exposed, and if the security requirement is not too high. The goal is to make this something that performance-critical jhash users would be willing to use. On 64-bit machines, HalfSipHash1-3 is slower than SipHash1-3, so we alias SipHash1-3 to HalfSipHash1-3 on those systems. 64-bit x86_64: [ 0.509409] test_siphash: SipHash2-4 cycles: 4049181 [ 0.510650] test_siphash: SipHash1-3 cycles: 2512884 [ 0.512205] test_siphash: HalfSipHash1-3 cycles: 3429920 [ 0.512904] test_siphash: JenkinsHash cycles: 978267 So, we map hsiphash() -> SipHash1-3 32-bit x86: [ 0.509868] test_siphash: SipHash2-4 cycles: 14812892 [ 0.513601] test_siphash: SipHash1-3 cycles: 9510710 [ 0.515263] test_siphash: HalfSipHash1-3 cycles: 3856157 [ 0.515952] test_siphash: JenkinsHash cycles: 1148567 So, we map hsiphash() -> HalfSipHash1-3 hsiphash() is roughly 3 times slower than jhash(), but comes with a considerable security improvement. Signed-off-by: Jason A. Donenfeld Reviewed-by: Jean-Philippe Aumasson Signed-off-by: David S. Miller [bwh: Backported to 4.9 to avoid regression for WireGuard with only half the siphash API present] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- Documentation/siphash.txt | 75 +++++++++ include/linux/siphash.h | 57 ++++++- lib/siphash.c | 321 +++++++++++++++++++++++++++++++++++++- lib/test_siphash.c | 98 +++++++++++- 4 files changed, 546 insertions(+), 5 deletions(-) diff --git a/Documentation/siphash.txt b/Documentation/siphash.txt index e8e6ddbbaab4..908d348ff777 100644 --- a/Documentation/siphash.txt +++ b/Documentation/siphash.txt @@ -98,3 +98,78 @@ u64 h = siphash(&combined, offsetofend(typeof(combined), dport), &secret); Read the SipHash paper if you're interested in learning more: https://131002.net/siphash/siphash.pdf + + +~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +HalfSipHash - SipHash's insecure younger cousin +----------------------------------------------- +Written by Jason A. Donenfeld + +On the off-chance that SipHash is not fast enough for your needs, you might be +able to justify using HalfSipHash, a terrifying but potentially useful +possibility. HalfSipHash cuts SipHash's rounds down from "2-4" to "1-3" and, +even scarier, uses an easily brute-forcable 64-bit key (with a 32-bit output) +instead of SipHash's 128-bit key. However, this may appeal to some +high-performance `jhash` users. + +Danger! + +Do not ever use HalfSipHash except for as a hashtable key function, and only +then when you can be absolutely certain that the outputs will never be +transmitted out of the kernel. This is only remotely useful over `jhash` as a +means of mitigating hashtable flooding denial of service attacks. + +1. Generating a key + +Keys should always be generated from a cryptographically secure source of +random numbers, either using get_random_bytes or get_random_once: + +hsiphash_key_t key; +get_random_bytes(&key, sizeof(key)); + +If you're not deriving your key from here, you're doing it wrong. + +2. Using the functions + +There are two variants of the function, one that takes a list of integers, and +one that takes a buffer: + +u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key); + +And: + +u32 hsiphash_1u32(u32, const hsiphash_key_t *key); +u32 hsiphash_2u32(u32, u32, const hsiphash_key_t *key); +u32 hsiphash_3u32(u32, u32, u32, const hsiphash_key_t *key); +u32 hsiphash_4u32(u32, u32, u32, u32, const hsiphash_key_t *key); + +If you pass the generic hsiphash function something of a constant length, it +will constant fold at compile-time and automatically choose one of the +optimized functions. + +3. Hashtable key function usage: + +struct some_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + hsiphash_key_t key; +}; + +void init_hashtable(struct some_hashtable *table) +{ + get_random_bytes(&table->key, sizeof(table->key)); +} + +static inline hlist_head *some_hashtable_bucket(struct some_hashtable *table, struct interesting_input *input) +{ + return &table->hashtable[hsiphash(input, sizeof(*input), &table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +You may then iterate like usual over the returned hash bucket. + +4. Performance + +HalfSipHash is roughly 3 times slower than JenkinsHash. For many replacements, +this will not be a problem, as the hashtable lookup isn't the bottleneck. And +in general, this is probably a good sacrifice to make for the security and DoS +resistance of HalfSipHash. diff --git a/include/linux/siphash.h b/include/linux/siphash.h index feeb29cd113e..fa7a6b9cedbf 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -5,7 +5,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #ifndef _LINUX_SIPHASH_H @@ -82,4 +84,57 @@ static inline u64 siphash(const void *data, size_t len, return ___siphash_aligned(data, len, key); } +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long) +typedef struct { + unsigned long key[2]; +} hsiphash_key_t; + +u32 __hsiphash_aligned(const void *data, size_t len, + const hsiphash_key_t *key); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key); +#endif + +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c, + const hsiphash_key_t *key); +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d, + const hsiphash_key_t *key); + +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, + const hsiphash_key_t *key) +{ + if (__builtin_constant_p(len) && len == 4) + return hsiphash_1u32(le32_to_cpu(data[0]), key); + if (__builtin_constant_p(len) && len == 8) + return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + key); + if (__builtin_constant_p(len) && len == 12) + return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), key); + if (__builtin_constant_p(len) && len == 16) + return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]), + le32_to_cpu(data[2]), le32_to_cpu(data[3]), + key); + return __hsiphash_aligned(data, len, key); +} + +/** + * hsiphash - compute 32-bit hsiphash PRF value + * @data: buffer to hash + * @size: size of @data + * @key: the hsiphash key + */ +static inline u32 hsiphash(const void *data, size_t len, + const hsiphash_key_t *key) +{ +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + return __hsiphash_unaligned(data, len, key); +#endif + return ___hsiphash_aligned(data, len, key); +} + #endif /* _LINUX_SIPHASH_H */ diff --git a/lib/siphash.c b/lib/siphash.c index c43cf406e71b..3ae58b4edad6 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -5,7 +5,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #include @@ -230,3 +232,320 @@ u64 siphash_3u32(const u32 first, const u32 second, const u32 third, POSTAMBLE } EXPORT_SYMBOL(siphash_3u32); + +#if BITS_PER_LONG == 64 +/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for + * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3. + */ + +#define HSIPROUND SIPROUND +#define HPREAMBLE(len) PREAMBLE(len) +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = le64_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= le32_to_cpup(data); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u64)); + const u8 left = len & (sizeof(u64) - 1); + u64 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u64)) { + m = get_unaligned_le64(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64 + if (left) + b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) & + bytemask_from_count(left))); +#else + switch (left) { + case 7: b |= ((u64)end[6]) << 48; + case 6: b |= ((u64)end[5]) << 40; + case 5: b |= ((u64)end[4]) << 32; + case 4: b |= get_unaligned_le32(end); break; + case 3: b |= ((u64)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } +#endif + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_unaligned); +#endif + +/** + * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + b |= first; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_1u32); + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(8) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_2u32); + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(12) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + b |= third; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_3u32); + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + u64 combined = (u64)second << 32 | first; + HPREAMBLE(16) + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + combined = (u64)forth << 32 | third; + v3 ^= combined; + HSIPROUND; + v0 ^= combined; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_4u32); +#else +#define HSIPROUND \ + do { \ + v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \ + v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \ + v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \ + v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \ + } while (0) + +#define HPREAMBLE(len) \ + u32 v0 = 0; \ + u32 v1 = 0; \ + u32 v2 = 0x6c796765U; \ + u32 v3 = 0x74656462U; \ + u32 b = ((u32)(len)) << 24; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define HPOSTAMBLE \ + v3 ^= b; \ + HSIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + HSIPROUND; \ + HSIPROUND; \ + HSIPROUND; \ + return v1 ^ v3; + +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = le32_to_cpup(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; + case 2: b |= le16_to_cpup(data); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_aligned); + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +u32 __hsiphash_unaligned(const void *data, size_t len, + const hsiphash_key_t *key) +{ + const u8 *end = data + len - (len % sizeof(u32)); + const u8 left = len & (sizeof(u32) - 1); + u32 m; + HPREAMBLE(len) + for (; data != end; data += sizeof(u32)) { + m = get_unaligned_le32(data); + v3 ^= m; + HSIPROUND; + v0 ^= m; + } + switch (left) { + case 3: b |= ((u32)end[2]) << 16; + case 2: b |= get_unaligned_le16(end); break; + case 1: b |= end[0]; + } + HPOSTAMBLE +} +EXPORT_SYMBOL(__hsiphash_unaligned); +#endif + +/** + * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 + * @first: first u32 + * @key: the hsiphash key + */ +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key) +{ + HPREAMBLE(4) + v3 ^= first; + HSIPROUND; + v0 ^= first; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_1u32); + +/** + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32 + * @first: first u32 + * @second: second u32 + * @key: the hsiphash key + */ +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key) +{ + HPREAMBLE(8) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_2u32); + +/** + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @key: the hsiphash key + */ +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third, + const hsiphash_key_t *key) +{ + HPREAMBLE(12) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_3u32); + +/** + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32 + * @first: first u32 + * @second: second u32 + * @third: third u32 + * @forth: forth u32 + * @key: the hsiphash key + */ +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third, + const u32 forth, const hsiphash_key_t *key) +{ + HPREAMBLE(16) + v3 ^= first; + HSIPROUND; + v0 ^= first; + v3 ^= second; + HSIPROUND; + v0 ^= second; + v3 ^= third; + HSIPROUND; + v0 ^= third; + v3 ^= forth; + HSIPROUND; + v0 ^= forth; + HPOSTAMBLE +} +EXPORT_SYMBOL(hsiphash_4u32); +#endif diff --git a/lib/test_siphash.c b/lib/test_siphash.c index d972acfc15e4..a6d854d933bf 100644 --- a/lib/test_siphash.c +++ b/lib/test_siphash.c @@ -7,7 +7,9 @@ * SipHash: a fast short-input PRF * https://131002.net/siphash/ * - * This implementation is specifically for SipHash2-4. + * This implementation is specifically for SipHash2-4 for a secure PRF + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for + * hashtables. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -18,8 +20,8 @@ #include #include -/* Test vectors taken from official reference source available at: - * https://131002.net/siphash/siphash24.c +/* Test vectors taken from reference source available at: + * https://github.com/veorq/SipHash */ static const siphash_key_t test_key_siphash = @@ -50,6 +52,64 @@ static const u64 test_vectors_siphash[64] = { 0x958a324ceb064572ULL }; +#if BITS_PER_LONG == 64 +static const hsiphash_key_t test_key_hsiphash = + {{ 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }}; + +static const u32 test_vectors_hsiphash[64] = { + 0x050fc4dcU, 0x7d57ca93U, 0x4dc7d44dU, + 0xe7ddf7fbU, 0x88d38328U, 0x49533b67U, + 0xc59f22a7U, 0x9bb11140U, 0x8d299a8eU, + 0x6c063de4U, 0x92ff097fU, 0xf94dc352U, + 0x57b4d9a2U, 0x1229ffa7U, 0xc0f95d34U, + 0x2a519956U, 0x7d908b66U, 0x63dbd80cU, + 0xb473e63eU, 0x8d297d1cU, 0xa6cce040U, + 0x2b45f844U, 0xa320872eU, 0xdae6c123U, + 0x67349c8cU, 0x705b0979U, 0xca9913a5U, + 0x4ade3b35U, 0xef6cd00dU, 0x4ab1e1f4U, + 0x43c5e663U, 0x8c21d1bcU, 0x16a7b60dU, + 0x7a8ff9bfU, 0x1f2a753eU, 0xbf186b91U, + 0xada26206U, 0xa3c33057U, 0xae3a36a1U, + 0x7b108392U, 0x99e41531U, 0x3f1ad944U, + 0xc8138825U, 0xc28949a6U, 0xfaf8876bU, + 0x9f042196U, 0x68b1d623U, 0x8b5114fdU, + 0xdf074c46U, 0x12cc86b3U, 0x0a52098fU, + 0x9d292f9aU, 0xa2f41f12U, 0x43a71ed0U, + 0x73f0bce6U, 0x70a7e980U, 0x243c6d75U, + 0xfdb71513U, 0xa67d8a08U, 0xb7e8f148U, + 0xf7a644eeU, 0x0f1837f2U, 0x4b6694e0U, + 0xb7bbb3a8U +}; +#else +static const hsiphash_key_t test_key_hsiphash = + {{ 0x03020100U, 0x07060504U }}; + +static const u32 test_vectors_hsiphash[64] = { + 0x5814c896U, 0xe7e864caU, 0xbc4b0e30U, + 0x01539939U, 0x7e059ea6U, 0x88e3d89bU, + 0xa0080b65U, 0x9d38d9d6U, 0x577999b1U, + 0xc839caedU, 0xe4fa32cfU, 0x959246eeU, + 0x6b28096cU, 0x66dd9cd6U, 0x16658a7cU, + 0xd0257b04U, 0x8b31d501U, 0x2b1cd04bU, + 0x06712339U, 0x522aca67U, 0x911bb605U, + 0x90a65f0eU, 0xf826ef7bU, 0x62512debU, + 0x57150ad7U, 0x5d473507U, 0x1ec47442U, + 0xab64afd3U, 0x0a4100d0U, 0x6d2ce652U, + 0x2331b6a3U, 0x08d8791aU, 0xbc6dda8dU, + 0xe0f6c934U, 0xb0652033U, 0x9b9851ccU, + 0x7c46fb7fU, 0x732ba8cbU, 0xf142997aU, + 0xfcc9aa1bU, 0x05327eb2U, 0xe110131cU, + 0xf9e5e7c0U, 0xa7d708a6U, 0x11795ab1U, + 0x65671619U, 0x9f5fff91U, 0xd89c5267U, + 0x007783ebU, 0x95766243U, 0xab639262U, + 0x9c7e1390U, 0xc368dda6U, 0x38ddc455U, + 0xfa13d379U, 0x979ea4e8U, 0x53ecd77eU, + 0x2ee80657U, 0x33dbb66aU, 0xae3f0577U, + 0x88b4c4ccU, 0x3e7f480bU, 0x74c1ebf8U, + 0x87178304U +}; +#endif + static int __init siphash_test_init(void) { u8 in[64] __aligned(SIPHASH_ALIGNMENT); @@ -70,6 +130,16 @@ static int __init siphash_test_init(void) pr_info("siphash self-test unaligned %u: FAIL\n", i + 1); ret = -EINVAL; } + if (hsiphash(in, i, &test_key_hsiphash) != + test_vectors_hsiphash[i]) { + pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } + if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) != + test_vectors_hsiphash[i]) { + pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1); + ret = -EINVAL; + } } if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) != test_vectors_siphash[8]) { @@ -115,6 +185,28 @@ static int __init siphash_test_init(void) pr_info("siphash self-test 4u32: FAIL\n"); ret = -EINVAL; } + if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) != + test_vectors_hsiphash[4]) { + pr_info("hsiphash self-test 1u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) != + test_vectors_hsiphash[8]) { + pr_info("hsiphash self-test 2u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_3u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, &test_key_hsiphash) != + test_vectors_hsiphash[12]) { + pr_info("hsiphash self-test 3u32: FAIL\n"); + ret = -EINVAL; + } + if (hsiphash_4u32(0x03020100U, 0x07060504U, + 0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) != + test_vectors_hsiphash[16]) { + pr_info("hsiphash self-test 4u32: FAIL\n"); + ret = -EINVAL; + } if (!ret) pr_info("self-tests: pass\n"); return ret; From b97a2f3d58f439d11ececb2faa21dac775d63c5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 17 Aug 2019 00:01:27 +0100 Subject: [PATCH 056/104] inet: switch IP ID generator to siphash commit df453700e8d81b1bdafdf684365ee2b9431fb702 upstream. According to Amit Klein and Benny Pinkas, IP ID generation is too weak and might be used by attackers. Even with recent net_hash_mix() fix (netns: provide pure entropy for net_hash_mix()) having 64bit key and Jenkins hash is risky. It is time to switch to siphash and its 128bit keys. Signed-off-by: Eric Dumazet Reported-by: Amit Klein Reported-by: Benny Pinkas Signed-off-by: David S. Miller [bwh: Backported to 4.9: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/linux/siphash.h | 5 +++++ include/net/netns/ipv4.h | 2 ++ net/ipv4/route.c | 12 +++++++----- net/ipv6/output_core.c | 30 ++++++++++++++++-------------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/include/linux/siphash.h b/include/linux/siphash.h index fa7a6b9cedbf..bf21591a9e5e 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -21,6 +21,11 @@ typedef struct { u64 key[2]; } siphash_key_t; +static inline bool siphash_key_is_zero(const siphash_key_t *key) +{ + return !(key->key[0] | key->key[1]); +} + u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index bf619a67ec03..af1c5e7c7e94 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -8,6 +8,7 @@ #include #include #include +#include struct tcpm_hash_bucket; struct ctl_table_header; @@ -137,5 +138,6 @@ struct netns_ipv4 { int sysctl_fib_multipath_use_neigh; #endif atomic_t rt_genid; + siphash_key_t ip_id_key; }; #endif diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 02c49857b5a7..d558dc076577 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -496,15 +496,17 @@ EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ net_hash_mix(net), - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index a338bbc33cf3..6a6d01cb1ace 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,15 +10,25 @@ #include #include -static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, +static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; u32 hash, id; - hash = __ipv6_addr_jhash(dst, hashrnd); - hash = __ipv6_addr_jhash(src, hash); - hash ^= net_hash_mix(net); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, */ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; @@ -53,11 +62,7 @@ void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) if (!addrs) return; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); @@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { - static u32 ip6_idents_hashrnd __read_mostly; u32 id; - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); From 1922476beeeea46bebbe577215078736dd4231dc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 17 Aug 2019 00:01:34 +0100 Subject: [PATCH 057/104] netfilter: ctnetlink: don't use conntrack/expect object addresses as id commit 3c79107631db1f7fd32cf3f7368e4672004a3010 upstream. else, we leak the addresses to userspace via ctnetlink events and dumps. Compute an ID on demand based on the immutable parts of nf_conn struct. Another advantage compared to using an address is that there is no immediate re-use of the same ID in case the conntrack entry is freed and reallocated again immediately. Fixes: 3583240249ef ("[NETFILTER]: nf_conntrack_expect: kill unique ID") Fixes: 7f85f914721f ("[NETFILTER]: nf_conntrack: kill unique ID") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 35 ++++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 34 +++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 9ae819e27940..b57a9f37c297 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -336,6 +336,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, gfp_t flags); void nf_ct_tmpl_free(struct nf_conn *tmpl); +u32 nf_ct_get_id(const struct nf_conn *ct); + #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index df1d5618b008..c3d3b10947b4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -301,6 +302,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn after it has been committed to main hash table: + * + * 1. nf_conn address + * 2. nf_conn->ext address + * 3. nf_conn->master address (normally NULL) + * 4. tuple + * 5. the associated net namespace + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); + c = (unsigned long)ct->ext; + d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9e30fd0ab227..deea281ab169 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -441,7 +442,9 @@ static int ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1166,8 +1169,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2472,6 +2476,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2519,7 +2542,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -2818,7 +2841,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } From c6a46c615274da0cfb4090140ceea0427ae13e22 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 12 Aug 2019 15:01:30 -0700 Subject: [PATCH 058/104] xtensa: add missing isync to the cpu_reset TLB code commit cd8869f4cb257f22b89495ca40f5281e58ba359c upstream. ITLB entry modifications must be followed by the isync instruction before the new entries are possibly used. cpu_reset lacks one isync between ITLB way 6 initialization and jump to the identity mapping. Add missing isync to xtensa cpu_reset. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index a45d32abea26..b9beae798d72 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -626,6 +626,7 @@ void cpu_reset(void) "add %2, %2, %7\n\t" "addi %0, %0, -1\n\t" "bnez %0, 1b\n\t" + "isync\n\t" /* Jump to identity mapping */ "jx %3\n" "2:\n\t" From 3248c0892da13ffd05275c86edd2a3287b931962 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 9 Aug 2019 23:29:48 -0500 Subject: [PATCH 059/104] ALSA: hda - Fix a memory leak bug commit cfef67f016e4c00a2f423256fc678a6967a9fc09 upstream. In snd_hda_parse_generic_codec(), 'spec' is allocated through kzalloc(). Then, the pin widgets in 'codec' are parsed. However, if the parsing process fails, 'spec' is not deallocated, leading to a memory leak. To fix the above issue, free 'spec' before returning the error. Fixes: 352f7f914ebb ("ALSA: hda - Merge Realtek parser code to generic parser") Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index b0bd29003b5d..74223c0a6816 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -5898,7 +5898,7 @@ static int snd_hda_parse_generic_codec(struct hda_codec *codec) err = snd_hda_parse_pin_defcfg(codec, &spec->autocfg, NULL, 0); if (err < 0) - return err; + goto error; err = snd_hda_gen_parse_auto_config(codec, &spec->autocfg); if (err < 0) From f3f82e1041c4017ebadcb6f0f5dfc55ac3efcc48 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 14 Aug 2019 12:09:08 +0800 Subject: [PATCH 060/104] ALSA: hda - Add a generic reboot_notify commit 871b9066027702e6e6589da0e1edd3b7dede7205 upstream. Make codec enter D3 before rebooting or poweroff can fix the noise issue on some laptops. And in theory it is harmless for all codecs to enter D3 before rebooting or poweroff, let us add a generic reboot_notify, then realtek and conexant drivers can call this function. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 19 +++++++++++++++++++ sound/pci/hda/hda_generic.h | 1 + sound/pci/hda/patch_conexant.c | 6 +----- sound/pci/hda/patch_realtek.c | 11 +---------- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 74223c0a6816..98594cbe34c8 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -5849,6 +5849,24 @@ void snd_hda_gen_free(struct hda_codec *codec) } EXPORT_SYMBOL_GPL(snd_hda_gen_free); +/** + * snd_hda_gen_reboot_notify - Make codec enter D3 before rebooting + * @codec: the HDA codec + * + * This can be put as patch_ops reboot_notify function. + */ +void snd_hda_gen_reboot_notify(struct hda_codec *codec) +{ + /* Make the codec enter D3 to avoid spurious noises from the internal + * speaker during (and after) reboot + */ + snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); + snd_hda_codec_write(codec, codec->core.afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + msleep(10); +} +EXPORT_SYMBOL_GPL(snd_hda_gen_reboot_notify); + #ifdef CONFIG_PM /** * snd_hda_gen_check_power_status - check the loopback power save state @@ -5876,6 +5894,7 @@ static const struct hda_codec_ops generic_patch_ops = { .init = snd_hda_gen_init, .free = snd_hda_gen_free, .unsol_event = snd_hda_jack_unsol_event, + .reboot_notify = snd_hda_gen_reboot_notify, #ifdef CONFIG_PM .check_power_status = snd_hda_gen_check_power_status, #endif diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index f66fc7e25e07..932b533feb48 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -322,6 +322,7 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec, struct auto_pin_cfg *cfg); int snd_hda_gen_build_controls(struct hda_codec *codec); int snd_hda_gen_build_pcms(struct hda_codec *codec); +void snd_hda_gen_reboot_notify(struct hda_codec *codec); /* standard jack event callbacks */ void snd_hda_gen_hp_automute(struct hda_codec *codec, diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index df66969b124d..46330c5b1b76 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -216,11 +216,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); - - snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); - snd_hda_codec_write(codec, codec->core.afg, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - msleep(10); + snd_hda_gen_reboot_notify(codec); } static void cx_auto_free(struct hda_codec *codec) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 04d2dc7097a1..8e995fb27312 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -802,15 +802,6 @@ static void alc_reboot_notify(struct hda_codec *codec) alc_shutup(codec); } -/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */ -static void alc_d3_at_reboot(struct hda_codec *codec) -{ - snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3); - snd_hda_codec_write(codec, codec->core.afg, 0, - AC_VERB_SET_POWER_STATE, AC_PWRST_D3); - msleep(10); -} - #define alc_free snd_hda_gen_free #ifdef CONFIG_PM @@ -4473,7 +4464,7 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */ + spec->reboot_notify = snd_hda_gen_reboot_notify; /* reduce noise */ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; codec->power_save_node = 0; /* avoid click noises */ snd_hda_apply_pincfgs(codec, pincfgs); From f8053ac6e02e522106b4a92b085f4af991de6048 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 14 Aug 2019 12:09:07 +0800 Subject: [PATCH 061/104] ALSA: hda - Let all conexant codec enter D3 when rebooting commit 401714d9534aad8c24196b32600da683116bbe09 upstream. We have 3 new lenovo laptops which have conexant codec 0x14f11f86, these 3 laptops also have the noise issue when rebooting, after letting the codec enter D3 before rebooting or poweroff, the noise disappers. Instead of adding a new ID again in the reboot_notify(), let us make this function apply to all conexant codec. In theory make codec enter D3 before rebooting or poweroff is harmless, and I tested this change on a couple of other Lenovo laptops which have different conexant codecs, there is no side effect so far. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 46330c5b1b76..8557b94e462c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -204,15 +204,6 @@ static void cx_auto_reboot_notify(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - switch (codec->core.vendor_id) { - case 0x14f12008: /* CX8200 */ - case 0x14f150f2: /* CX20722 */ - case 0x14f150f4: /* CX20724 */ - break; - default: - return; - } - /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); From bbbaeba7074ba4d4e00ea91c6f8476a0d2fc055f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 25 Jul 2019 15:13:33 +0200 Subject: [PATCH 062/104] HID: holtek: test for sanity of intfdata commit 01ec0a5f19c8c82960a07f6c7410fc9e01d7fb51 upstream. The ioctl handler uses the intfdata of a second interface, which may not be present in a broken or malicious device, hence the intfdata needs to be checked for NULL. [jkosina@suse.cz: fix newly added spurious space] Reported-by: syzbot+965152643a75a56737be@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-holtek-kbd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 6e1a4a4fc0c1..ab9da597106f 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -126,9 +126,14 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, /* Locate the boot interface, to receive the LED change events */ struct usb_interface *boot_interface = usb_ifnum_to_if(usb_dev, 0); + struct hid_device *boot_hid; + struct hid_input *boot_hid_input; - struct hid_device *boot_hid = usb_get_intfdata(boot_interface); - struct hid_input *boot_hid_input = list_first_entry(&boot_hid->inputs, + if (unlikely(boot_interface == NULL)) + return -ENODEV; + + boot_hid = usb_get_intfdata(boot_interface); + boot_hid_input = list_first_entry(&boot_hid->inputs, struct hid_input, list); return boot_hid_input->input->event(boot_hid_input->input, type, code, From 52aaeae5f22247659238a06c36973775f57189f3 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 6 Aug 2019 16:38:58 +0800 Subject: [PATCH 063/104] HID: hiddev: avoid opening a disconnected device commit 9c09b214f30e3c11f9b0b03f89442df03643794d upstream. syzbot found the following crash on: HEAD commit: e96407b4 usb-fuzzer: main usb gadget fuzzer driver git tree: https://github.com/google/kasan.git usb-fuzzer console output: https://syzkaller.appspot.com/x/log.txt?x=147ac20c600000 kernel config: https://syzkaller.appspot.com/x/.config?x=792eb47789f57810 dashboard link: https://syzkaller.appspot.com/bug?extid=62a1e04fd3ec2abf099e compiler: gcc (GCC) 9.0.0 20181231 (experimental) ================================================================== BUG: KASAN: use-after-free in __lock_acquire+0x302a/0x3b50 kernel/locking/lockdep.c:3753 Read of size 8 at addr ffff8881cf591a08 by task syz-executor.1/26260 CPU: 1 PID: 26260 Comm: syz-executor.1 Not tainted 5.3.0-rc2+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 print_address_description+0x6a/0x32c mm/kasan/report.c:351 __kasan_report.cold+0x1a/0x33 mm/kasan/report.c:482 kasan_report+0xe/0x12 mm/kasan/common.c:612 __lock_acquire+0x302a/0x3b50 kernel/locking/lockdep.c:3753 lock_acquire+0x127/0x320 kernel/locking/lockdep.c:4412 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0x32/0x50 kernel/locking/spinlock.c:159 hiddev_release+0x82/0x520 drivers/hid/usbhid/hiddev.c:221 __fput+0x2d7/0x840 fs/file_table.c:280 task_work_run+0x13f/0x1c0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x8ef/0x2c50 kernel/exit.c:878 do_group_exit+0x125/0x340 kernel/exit.c:982 get_signal+0x466/0x23d0 kernel/signal.c:2728 do_signal+0x88/0x14e0 arch/x86/kernel/signal.c:815 exit_to_usermode_loop+0x1a2/0x200 arch/x86/entry/common.c:159 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath arch/x86/entry/common.c:274 [inline] do_syscall_64+0x45f/0x580 arch/x86/entry/common.c:299 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x459829 Code: fd b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f75b2a6ccf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: fffffffffffffe00 RBX: 000000000075c078 RCX: 0000000000459829 RDX: 0000000000000000 RSI: 0000000000000080 RDI: 000000000075c078 RBP: 000000000075c070 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000075c07c R13: 00007ffcdfe1023f R14: 00007f75b2a6d9c0 R15: 000000000075c07c Allocated by task 104: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_kmalloc mm/kasan/common.c:487 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:460 kmalloc include/linux/slab.h:552 [inline] kzalloc include/linux/slab.h:748 [inline] hiddev_connect+0x242/0x5b0 drivers/hid/usbhid/hiddev.c:900 hid_connect+0x239/0xbb0 drivers/hid/hid-core.c:1882 hid_hw_start drivers/hid/hid-core.c:1981 [inline] hid_hw_start+0xa2/0x130 drivers/hid/hid-core.c:1972 appleir_probe+0x13e/0x1a0 drivers/hid/hid-appleir.c:308 hid_device_probe+0x2be/0x3f0 drivers/hid/hid-core.c:2209 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 hid_add_device+0x33c/0x990 drivers/hid/hid-core.c:2365 usbhid_probe+0xa81/0xfa0 drivers/hid/usbhid/hid-core.c:1386 usb_probe_interface+0x305/0x7a0 drivers/usb/core/driver.c:361 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 usb_set_configuration+0xdf6/0x1670 drivers/usb/core/message.c:2023 generic_probe+0x9d/0xd5 drivers/usb/core/generic.c:210 usb_probe_device+0x99/0x100 drivers/usb/core/driver.c:266 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 usb_new_device.cold+0x6a4/0xe79 drivers/usb/core/hub.c:2536 hub_port_connect drivers/usb/core/hub.c:5098 [inline] hub_port_connect_change drivers/usb/core/hub.c:5213 [inline] port_event drivers/usb/core/hub.c:5359 [inline] hub_event+0x1b5c/0x3640 drivers/usb/core/hub.c:5441 process_one_work+0x92b/0x1530 kernel/workqueue.c:2269 worker_thread+0x96/0xe20 kernel/workqueue.c:2415 kthread+0x318/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Freed by task 104: save_stack+0x1b/0x80 mm/kasan/common.c:69 set_track mm/kasan/common.c:77 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:449 slab_free_hook mm/slub.c:1423 [inline] slab_free_freelist_hook mm/slub.c:1470 [inline] slab_free mm/slub.c:3012 [inline] kfree+0xe4/0x2f0 mm/slub.c:3953 hiddev_connect.cold+0x45/0x5c drivers/hid/usbhid/hiddev.c:914 hid_connect+0x239/0xbb0 drivers/hid/hid-core.c:1882 hid_hw_start drivers/hid/hid-core.c:1981 [inline] hid_hw_start+0xa2/0x130 drivers/hid/hid-core.c:1972 appleir_probe+0x13e/0x1a0 drivers/hid/hid-appleir.c:308 hid_device_probe+0x2be/0x3f0 drivers/hid/hid-core.c:2209 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 hid_add_device+0x33c/0x990 drivers/hid/hid-core.c:2365 usbhid_probe+0xa81/0xfa0 drivers/hid/usbhid/hid-core.c:1386 usb_probe_interface+0x305/0x7a0 drivers/usb/core/driver.c:361 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 usb_set_configuration+0xdf6/0x1670 drivers/usb/core/message.c:2023 generic_probe+0x9d/0xd5 drivers/usb/core/generic.c:210 usb_probe_device+0x99/0x100 drivers/usb/core/driver.c:266 really_probe+0x281/0x650 drivers/base/dd.c:548 driver_probe_device+0x101/0x1b0 drivers/base/dd.c:709 __device_attach_driver+0x1c2/0x220 drivers/base/dd.c:816 bus_for_each_drv+0x15c/0x1e0 drivers/base/bus.c:454 __device_attach+0x217/0x360 drivers/base/dd.c:882 bus_probe_device+0x1e4/0x290 drivers/base/bus.c:514 device_add+0xae6/0x16f0 drivers/base/core.c:2114 usb_new_device.cold+0x6a4/0xe79 drivers/usb/core/hub.c:2536 hub_port_connect drivers/usb/core/hub.c:5098 [inline] hub_port_connect_change drivers/usb/core/hub.c:5213 [inline] port_event drivers/usb/core/hub.c:5359 [inline] hub_event+0x1b5c/0x3640 drivers/usb/core/hub.c:5441 process_one_work+0x92b/0x1530 kernel/workqueue.c:2269 worker_thread+0x96/0xe20 kernel/workqueue.c:2415 kthread+0x318/0x420 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 The buggy address belongs to the object at ffff8881cf591900 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 264 bytes inside of 512-byte region [ffff8881cf591900, ffff8881cf591b00) The buggy address belongs to the page: page:ffffea00073d6400 refcount:1 mapcount:0 mapping:ffff8881da002500 index:0x0 compound_mapcount: 0 flags: 0x200000000010200(slab|head) raw: 0200000000010200 0000000000000000 0000000100000001 ffff8881da002500 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881cf591900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881cf591980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff8881cf591a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881cf591a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8881cf591b00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== In order to avoid opening a disconnected device, we need to check exist again after acquiring the existance lock, and bail out if necessary. Reported-by: syzbot Cc: Andrey Konovalov Signed-off-by: Hillf Danton Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 308d8432fea3..0bfc2009e407 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -308,6 +308,14 @@ static int hiddev_open(struct inode *inode, struct file *file) spin_unlock_irq(&list->hiddev->list_lock); mutex_lock(&hiddev->existancelock); + /* + * recheck exist with existance lock held to + * avoid opening a disconnected device + */ + if (!list->hiddev->exist) { + res = -ENODEV; + goto bail_unlock; + } if (!list->hiddev->open++) if (list->hiddev->exist) { struct hid_device *hid = hiddev->hid; From 963a14fb9c43f0a6b38fbe3da0b894a147c71388 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Tue, 6 Aug 2019 16:40:15 +0800 Subject: [PATCH 064/104] HID: hiddev: do cleanup in failure of opening a device commit 6d4472d7bec39917b54e4e80245784ea5d60ce49 upstream. Undo what we did for opening before releasing the memory slice. Reported-by: syzbot Cc: Andrey Konovalov Signed-off-by: Hillf Danton Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/usbhid/hiddev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 0bfc2009e407..8903ea09ac58 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -330,6 +330,10 @@ static int hiddev_open(struct inode *inode, struct file *file) return 0; bail_unlock: mutex_unlock(&hiddev->existancelock); + + spin_lock_irq(&list->hiddev->list_lock); + list_del(&list->node); + spin_unlock_irq(&list->hiddev->list_lock); bail: file->private_data = NULL; vfree(list); From 9ab5ae53eeb5beb61f66dd7c6eb9da653def97d2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 1 Aug 2019 09:44:25 -0700 Subject: [PATCH 065/104] Input: kbtab - sanity check for endpoint type commit c88090dfc84254fa149174eb3e6a8458de1912c4 upstream. The driver should check whether the endpoint it uses has the correct type. Reported-by: syzbot+c7df50363aaff50aa363@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/tablet/kbtab.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index 4d9d64908b59..b7ea64ec1205 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -125,6 +125,10 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i if (intf->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; + endpoint = &intf->cur_altsetting->endpoint[0].desc; + if (!usb_endpoint_is_int_in(endpoint)) + return -ENODEV; + kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL); input_dev = input_allocate_device(); if (!kbtab || !input_dev) @@ -163,8 +167,6 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0); - endpoint = &intf->cur_altsetting->endpoint[0].desc; - usb_fill_int_urb(kbtab->irq, dev, usb_rcvintpipe(dev, endpoint->bEndpointAddress), kbtab->data, 8, From b8cab0b87c0195620f825f567f40ffe135f58af8 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Aug 2019 09:05:55 -0700 Subject: [PATCH 066/104] Input: iforce - add sanity checks commit 849f5ae3a513c550cad741c68dd3d7eb2bcc2a2c upstream. The endpoint type should also be checked before a device is accepted. Reported-by: syzbot+5efc10c005014d061a74@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/iforce/iforce-usb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index db64adfbe1af..3e1ea912b41d 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -145,7 +145,12 @@ static int iforce_usb_probe(struct usb_interface *intf, return -ENODEV; epirq = &interface->endpoint[0].desc; + if (!usb_endpoint_is_int_in(epirq)) + return -ENODEV; + epout = &interface->endpoint[1].desc; + if (!usb_endpoint_is_int_out(epout)) + return -ENODEV; if (!(iforce = kzalloc(sizeof(struct iforce) + 32, GFP_KERNEL))) goto fail; From 58c33d479defa1ad1a1d0e26910964e98ee5caa1 Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Tue, 30 Jul 2019 15:13:57 +0200 Subject: [PATCH 067/104] net: usb: pegasus: fix improper read if get_registers() fail commit 224c04973db1125fcebefffd86115f99f50f8277 upstream. get_registers() may fail with -ENOMEM and in this case we can read a garbage from the status variable tmp. Reported-by: syzbot+3499a83b2d062ae409d4@syzkaller.appspotmail.com Signed-off-by: Denis Kirjanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/pegasus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index ee40ac23507a..5fe9f1273fe2 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -285,7 +285,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { int i; - __u8 tmp; + __u8 tmp = 0; __le16 retdatai; int ret; From c1f57bedec52544fee59434d29a2b84d365cda89 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 24 Jul 2019 22:08:50 +0800 Subject: [PATCH 068/104] xen/pciback: remove set but not used variable 'old_state' [ Upstream commit 09e088a4903bd0dd911b4f1732b250130cdaffed ] Fixes gcc '-Wunused-but-set-variable' warning: drivers/xen/xen-pciback/conf_space_capability.c: In function pm_ctrl_write: drivers/xen/xen-pciback/conf_space_capability.c:119:25: warning: variable old_state set but not used [-Wunused-but-set-variable] It is never used so can be removed. Reported-by: Hulk Robot Signed-off-by: YueHaibing Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/xen-pciback/conf_space_capability.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 7f83e9083e9d..b1a1d7de0894 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -115,13 +115,12 @@ static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value, { int err; u16 old_value; - pci_power_t new_state, old_state; + pci_power_t new_state; err = pci_read_config_word(dev, offset, &old_value); if (err) goto out; - old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK); new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK); new_value &= PM_OK_BITS; From 632d97a33ff1d37df2fe6294f47071dc38392054 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 12 Jul 2019 15:29:05 +0200 Subject: [PATCH 069/104] irqchip/irq-imx-gpcv2: Forward irq type to parent [ Upstream commit 9a446ef08f3bfc0c3deb9c6be840af2528ef8cf8 ] The GPCv2 is a stacked IRQ controller below the ARM GIC. It doesn't care about the IRQ type itself, but needs to forward the type to the parent IRQ controller, so this one can be configured correctly. Signed-off-by: Lucas Stach Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-imx-gpcv2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c index 2d203b422129..c56da0b13da5 100644 --- a/drivers/irqchip/irq-imx-gpcv2.c +++ b/drivers/irqchip/irq-imx-gpcv2.c @@ -145,6 +145,7 @@ static struct irq_chip gpcv2_irqchip_data_chip = { .irq_unmask = imx_gpcv2_irq_unmask, .irq_set_wake = imx_gpcv2_irq_set_wake, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_type = irq_chip_set_type_parent, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, #endif From 5b9310f34ae79a7727b3b43451559f819ee4caf2 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Tue, 23 Jul 2019 11:06:01 -0400 Subject: [PATCH 070/104] perf header: Fix divide by zero error if f_header.attr_size==0 [ Upstream commit 7622236ceb167aa3857395f9bdaf871442aa467e ] So I have been having lots of trouble with hand-crafted perf.data files causing segfaults and the like, so I have started fuzzing the perf tool. First issue found: If f_header.attr_size is 0 in the perf.data file, then perf will crash with a divide-by-zero error. Committer note: Added a pr_err() to tell the user why the command failed. Signed-off-by: Vince Weaver Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1907231100440.14532@macbook-air Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/header.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 283148104ffb..82db152bbc60 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2854,6 +2854,13 @@ int perf_session__read_header(struct perf_session *session) file->path); } + if (f_header.attr_size == 0) { + pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n" + "Was the 'perf record' command properly terminated?\n", + file->path); + return -EINVAL; + } + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); From 219db72f73477e6cf73c48b586447b7514dc6752 Mon Sep 17 00:00:00 2001 From: Numfor Mbiziwo-Tiapo Date: Wed, 24 Jul 2019 16:44:58 -0700 Subject: [PATCH 071/104] perf header: Fix use of unitialized value warning [ Upstream commit 20f9781f491360e7459c589705a2e4b1f136bee9 ] When building our local version of perf with MSAN (Memory Sanitizer) and running the perf record command, MSAN throws a use of uninitialized value warning in "tools/perf/util/util.c:333:6". This warning stems from the "buf" variable being passed into "write". It originated as the variable "ev" with the type union perf_event* defined in the "perf_event__synthesize_attr" function in "tools/perf/util/header.c". In the "perf_event__synthesize_attr" function they allocate space with a malloc call using ev, then go on to only assign some of the member variables before passing "ev" on as a parameter to the "process" function therefore "ev" contains uninitialized memory. Changing the malloc call to zalloc to initialize all the members of "ev" which gets rid of the warning. To reproduce this warning, build perf by running: make -C tools/perf CLANG=1 CC=clang EXTRA_CFLAGS="-fsanitize=memory\ -fsanitize-memory-track-origins" (Additionally, llvm might have to be installed and clang might have to be specified as the compiler - export CC=/usr/bin/clang) then running: tools/perf/perf record -o - ls / | tools/perf/perf --no-pager annotate\ -i - --stdio Please see the cover letter for why false positive warnings may be generated. Signed-off-by: Numfor Mbiziwo-Tiapo Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Drayton Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190724234500.253358-2-nums@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 82db152bbc60..258b19b251a8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2943,7 +2943,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, size += sizeof(struct perf_event_header); size += ids * sizeof(u64); - ev = malloc(size); + ev = zalloc(size); if (ev == NULL) return -ENOMEM; From 0623446f37d1b3f99642de7e6c4dae263da44648 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 29 Jul 2019 14:47:22 -0700 Subject: [PATCH 072/104] libata: zpodd: Fix small read overflow in zpodd_get_mech_type() [ Upstream commit 71d6c505b4d9e6f76586350450e785e3d452b346 ] Jeffrin reported a KASAN issue: BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70 Read of size 16 at addr ffffffff91f41f80 by task scsi_eh_1/149 ... The buggy address belongs to the variable: cdb.48319+0x0/0x40 Much like commit 18c9a99bce2a ("libata: zpodd: small read overflow in eject_tray()"), this fixes a cdb[] buffer length, this time in zpodd_get_mech_type(): We read from the cdb[] buffer in ata_exec_internal_sg(). It has to be ATAPI_CDB_LEN (16) bytes long, but this buffer is only 12 bytes. Reported-by: Jeffrin Jose T Fixes: afe759511808c ("libata: identify and init ZPODD devices") Link: https://lore.kernel.org/lkml/201907181423.E808958@keescook/ Tested-by: Jeffrin Jose T Reviewed-by: Nick Desaulniers Signed-off-by: Kees Cook Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libata-zpodd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 7017a81d53cf..083856272e92 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -55,7 +55,7 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) unsigned int ret; struct rm_feature_desc *desc; struct ata_taskfile tf; - static const char cdb[] = { GPCMD_GET_CONFIGURATION, + static const char cdb[ATAPI_CDB_LEN] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ 0, 0, 0,/* reserved */ From 033d8392e5f3169cace4d41477f8ca2ca8407ce5 Mon Sep 17 00:00:00 2001 From: Don Brace Date: Wed, 24 Jul 2019 17:08:06 -0500 Subject: [PATCH 073/104] scsi: hpsa: correct scsi command status issue after reset [ Upstream commit eeebce1862970653cdf5c01e98bc669edd8f529a ] Reviewed-by: Bader Ali - Saleh Reviewed-by: Scott Teel Reviewed-by: Scott Benesh Reviewed-by: Kevin Barnett Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hpsa.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 9f98c7211ec2..b82df8cdf962 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2236,6 +2236,8 @@ static int handle_ioaccel_mode2_error(struct ctlr_info *h, case IOACCEL2_SERV_RESPONSE_COMPLETE: switch (c2->error_data.status) { case IOACCEL2_STATUS_SR_TASK_COMP_GOOD: + if (cmd) + cmd->result = 0; break; case IOACCEL2_STATUS_SR_TASK_COMP_CHK_COND: cmd->result |= SAM_STAT_CHECK_CONDITION; @@ -2423,8 +2425,10 @@ static void process_ioaccel2_completion(struct ctlr_info *h, /* check for good status */ if (likely(c2->error_data.serv_response == 0 && - c2->error_data.status == 0)) + c2->error_data.status == 0)) { + cmd->result = 0; return hpsa_cmd_free_and_done(h, c, cmd); + } /* * Any RAID offload error results in retry which will use @@ -5511,6 +5515,12 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) } c = cmd_tagged_alloc(h, cmd); + /* + * This is necessary because the SML doesn't zero out this field during + * error recovery. + */ + cmd->result = 0; + /* * Call alternate submit routine for I/O accelerated commands. * Retries always go down the normal I/O path. From 4f62e065a5c628d8e8bad58f3f5245dfe7f504dd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 31 Jul 2019 14:26:51 +0200 Subject: [PATCH 074/104] ata: libahci: do not complain in case of deferred probe [ Upstream commit 090bb803708198e5ab6b0046398c7ed9f4d12d6b ] Retrieving PHYs can defer the probe, do not spawn an error when -EPROBE_DEFER is returned, it is normal behavior. Fixes: b1a9edbda040 ("ata: libahci: allow to use multiple PHYs") Reviewed-by: Hans de Goede Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/libahci_platform.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index cd2eab6aa92e..65371e1befe8 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -300,6 +300,9 @@ static int ahci_platform_get_phy(struct ahci_host_priv *hpriv, u32 port, hpriv->phys[port] = NULL; rc = 0; break; + case -EPROBE_DEFER: + /* Do not complain yet */ + break; default: dev_err(dev, From 1c335cd18ced56dfe9470375c34199635da9dd9c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 31 Jul 2019 00:59:00 +0900 Subject: [PATCH 075/104] kbuild: modpost: handle KBUILD_EXTRA_SYMBOLS only for external modules [ Upstream commit cb4819934a7f9b87876f11ed05b8624c0114551b ] KBUILD_EXTRA_SYMBOLS makes sense only when building external modules. Moreover, the modpost sets 'external_module' if the -e option is given. I replaced $(patsubst %, -e %,...) with simpler $(addprefix -e,...) while I was here. Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/Makefile.modpost | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 16923ba4b5b1..8cb7971b3f25 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -74,7 +74,7 @@ modpost = scripts/mod/modpost \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ - $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ + $(if $(KBUILD_EXTMOD),$(addprefix -e ,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ From 7796efd65eb333d3ae266910c8094145509b5694 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Tue, 30 Jul 2019 17:23:48 -0400 Subject: [PATCH 076/104] arm64/efi: fix variable 'si' set but not used [ Upstream commit f1d4836201543e88ebe70237e67938168d5fab19 ] GCC throws out this warning on arm64. drivers/firmware/efi/libstub/arm-stub.c: In function 'efi_entry': drivers/firmware/efi/libstub/arm-stub.c:132:22: warning: variable 'si' set but not used [-Wunused-but-set-variable] Fix it by making free_screen_info() a static inline function. Acked-by: Will Deacon Signed-off-by: Qian Cai Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/efi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 65615820155e..65db124a44bf 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -52,7 +52,11 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define efi_is_64bit() (true) #define alloc_screen_info(x...) &screen_info -#define free_screen_info(x...) + +static inline void free_screen_info(efi_system_table_t *sys_table_arg, + struct screen_info *si) +{ +} /* redeclare as 'hidden' so the compiler will generate relative references */ extern struct screen_info screen_info __attribute__((__visibility__("hidden"))); From 07a6a92898b53e1914a3c23d053deb3608c25a1c Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 31 Jul 2019 16:05:45 -0400 Subject: [PATCH 077/104] arm64/mm: fix variable 'pud' set but not used [ Upstream commit 7d4e2dcf311d3b98421d1f119efe5964cafa32fc ] GCC throws a warning, arch/arm64/mm/mmu.c: In function 'pud_free_pmd_page': arch/arm64/mm/mmu.c:1033:8: warning: variable 'pud' set but not used [-Wunused-but-set-variable] pud_t pud; ^~~ because pud_table() is a macro and compiled away. Fix it by making it a static inline function and for pud_sect() as well. Signed-off-by: Qian Cai Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 73e3718356b0..edb2c359480d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -387,8 +387,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_SECT) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 -#define pud_sect(pud) (0) -#define pud_table(pud) (1) +static inline bool pud_sect(pud_t pud) { return false; } +static inline bool pud_table(pud_t pud) { return true; } #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) From 65585fab2a28f1e4a2df14d6fffd5f39deda328c Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 30 Jul 2019 21:39:57 -0700 Subject: [PATCH 078/104] IB/core: Add mitigation for Spectre V1 [ Upstream commit 61f259821dd3306e49b7d42a3f90fb5a4ff3351b ] Some processors may mispredict an array bounds check and speculatively access memory that they should not. With a user supplied array index we like to play things safe by masking the value with the array size before it is used as an index. Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20190731043957.GA1600@agluck-desk2.amr.corp.intel.com Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/core/user_mad.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 415a3185cde7..cf93a96b6324 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -49,6 +49,7 @@ #include #include #include +#include #include @@ -843,11 +844,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) if (get_user(id, arg)) return -EFAULT; + if (id >= IB_UMAD_MAX_AGENTS) + return -EINVAL; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); - if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { + id = array_index_nospec(id, IB_UMAD_MAX_AGENTS); + if (!__get_agent(file, id)) { ret = -EINVAL; goto out; } From 2c8198236a983fb955a9e28d30d37f0b8527fe59 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 1 Aug 2019 15:14:49 +0300 Subject: [PATCH 079/104] IB/mad: Fix use-after-free in ib mad completion handling [ Upstream commit 770b7d96cfff6a8bf6c9f261ba6f135dc9edf484 ] We encountered a use-after-free bug when unloading the driver: [ 3562.116059] BUG: KASAN: use-after-free in ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.117233] Read of size 4 at addr ffff8882ca5aa868 by task kworker/u13:2/23862 [ 3562.118385] [ 3562.119519] CPU: 2 PID: 23862 Comm: kworker/u13:2 Tainted: G OE 5.1.0-for-upstream-dbg-2019-05-19_16-44-30-13 #1 [ 3562.121806] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014 [ 3562.123075] Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core] [ 3562.124383] Call Trace: [ 3562.125640] dump_stack+0x9a/0xeb [ 3562.126911] print_address_description+0xe3/0x2e0 [ 3562.128223] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.129545] __kasan_report+0x15c/0x1df [ 3562.130866] ? ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.132174] kasan_report+0xe/0x20 [ 3562.133514] ib_mad_post_receive_mads+0xddc/0xed0 [ib_core] [ 3562.134835] ? find_mad_agent+0xa00/0xa00 [ib_core] [ 3562.136158] ? qlist_free_all+0x51/0xb0 [ 3562.137498] ? mlx4_ib_sqp_comp_worker+0x1970/0x1970 [mlx4_ib] [ 3562.138833] ? quarantine_reduce+0x1fa/0x270 [ 3562.140171] ? kasan_unpoison_shadow+0x30/0x40 [ 3562.141522] ib_mad_recv_done+0xdf6/0x3000 [ib_core] [ 3562.142880] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.144277] ? ib_mad_send_done+0x1810/0x1810 [ib_core] [ 3562.145649] ? mlx4_ib_destroy_cq+0x2a0/0x2a0 [mlx4_ib] [ 3562.147008] ? _raw_spin_unlock_irqrestore+0x46/0x70 [ 3562.148380] ? debug_object_deactivate+0x2b9/0x4a0 [ 3562.149814] __ib_process_cq+0xe2/0x1d0 [ib_core] [ 3562.151195] ib_cq_poll_work+0x45/0xf0 [ib_core] [ 3562.152577] process_one_work+0x90c/0x1860 [ 3562.153959] ? pwq_dec_nr_in_flight+0x320/0x320 [ 3562.155320] worker_thread+0x87/0xbb0 [ 3562.156687] ? __kthread_parkme+0xb6/0x180 [ 3562.158058] ? process_one_work+0x1860/0x1860 [ 3562.159429] kthread+0x320/0x3e0 [ 3562.161391] ? kthread_park+0x120/0x120 [ 3562.162744] ret_from_fork+0x24/0x30 ... [ 3562.187615] Freed by task 31682: [ 3562.188602] save_stack+0x19/0x80 [ 3562.189586] __kasan_slab_free+0x11d/0x160 [ 3562.190571] kfree+0xf5/0x2f0 [ 3562.191552] ib_mad_port_close+0x200/0x380 [ib_core] [ 3562.192538] ib_mad_remove_device+0xf0/0x230 [ib_core] [ 3562.193538] remove_client_context+0xa6/0xe0 [ib_core] [ 3562.194514] disable_device+0x14e/0x260 [ib_core] [ 3562.195488] __ib_unregister_device+0x79/0x150 [ib_core] [ 3562.196462] ib_unregister_device+0x21/0x30 [ib_core] [ 3562.197439] mlx4_ib_remove+0x162/0x690 [mlx4_ib] [ 3562.198408] mlx4_remove_device+0x204/0x2c0 [mlx4_core] [ 3562.199381] mlx4_unregister_interface+0x49/0x1d0 [mlx4_core] [ 3562.200356] mlx4_ib_cleanup+0xc/0x1d [mlx4_ib] [ 3562.201329] __x64_sys_delete_module+0x2d2/0x400 [ 3562.202288] do_syscall_64+0x95/0x470 [ 3562.203277] entry_SYSCALL_64_after_hwframe+0x49/0xbe The problem was that the MAD PD was deallocated before the MAD CQ. There was completion work pending for the CQ when the PD got deallocated. When the mad completion handling reached procedure ib_mad_post_receive_mads(), we got a use-after-free bug in the following line of code in that procedure: sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; (the pd pointer in the above line is no longer valid, because the pd has been deallocated). We fix this by allocating the PD before the CQ in procedure ib_mad_port_open(), and deallocating the PD after freeing the CQ in procedure ib_mad_port_close(). Since the CQ completion work queue is flushed during ib_free_cq(), no completions will be pending for that CQ when the PD is later deallocated. Note that freeing the CQ before deallocating the PD is the practice in the ULPs. Fixes: 4be90bc60df4 ("IB/mad: Remove ib_get_dma_mr calls") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Link: https://lore.kernel.org/r/20190801121449.24973-1-leon@kernel.org Signed-off-by: Doug Ledford Signed-off-by: Sasha Levin --- drivers/infiniband/core/mad.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 3e2ab04201e2..25a28e706072 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3155,18 +3155,18 @@ static int ib_mad_port_open(struct ib_device *device, if (has_smi) cq_size *= 2; + port_priv->pd = ib_alloc_pd(device, 0); + if (IS_ERR(port_priv->pd)) { + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); + ret = PTR_ERR(port_priv->pd); + goto error3; + } + port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); - goto error3; - } - - port_priv->pd = ib_alloc_pd(device, 0); - if (IS_ERR(port_priv->pd)) { - dev_err(&device->dev, "Couldn't create ib_mad PD\n"); - ret = PTR_ERR(port_priv->pd); goto error4; } @@ -3209,11 +3209,11 @@ error8: error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: - ib_dealloc_pd(port_priv->pd); -error4: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); +error4: + ib_dealloc_pd(port_priv->pd); error3: kfree(port_priv); @@ -3243,8 +3243,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); - ib_dealloc_pd(port_priv->pd); ib_free_cq(port_priv->cq); + ib_dealloc_pd(port_priv->pd); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ From 3bed38ded6642d01b428ecdd48c972c79fde0e26 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 2 Aug 2019 21:48:40 -0700 Subject: [PATCH 080/104] ocfs2: remove set but not used variable 'last_hash' [ Upstream commit 7bc36e3ce91471b6377c8eadc0a2f220a2280083 ] Fixes gcc '-Wunused-but-set-variable' warning: fs/ocfs2/xattr.c: In function ocfs2_xattr_bucket_find: fs/ocfs2/xattr.c:3828:6: warning: variable last_hash set but not used [-Wunused-but-set-variable] It's never used and can be removed. Link: http://lkml.kernel.org/r/20190716132110.34836-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ocfs2/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 01932763b4d1..e108c945ac1f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3832,7 +3832,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; struct ocfs2_xattr_bucket *search; - u32 last_hash; u64 blkno, lower_blkno = 0; search = ocfs2_xattr_bucket_new(inode); @@ -3876,8 +3875,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, if (xh->xh_count) xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; - last_hash = le32_to_cpu(xe->xe_name_hash); - /* record lower_blkno which may be the insert place. */ lower_blkno = blkno; From 23a9fc5c5fcd822f8885047ea90cd9a59aef7bc0 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 12 Aug 2019 12:15:17 +0100 Subject: [PATCH 081/104] staging: comedi: dt3000: Fix signed integer overflow 'divider * base' commit b4d98bc3fc93ec3a58459948a2c0e0c9b501cd88 upstream. In `dt3k_ns_to_timer()` the following lines near the end of the function result in a signed integer overflow: prescale = 15; base = timer_base * (1 << prescale); divider = 65535; *nanosec = divider * base; (`divider`, `base` and `prescale` are type `int`, `timer_base` and `*nanosec` are type `unsigned int`. The value of `timer_base` will be either 50 or 100.) The main reason for the overflow is that the calculation for `base` is completely wrong. It should be: base = timer_base * (prescale + 1); which matches an earlier instance of this calculation in the same function. Reported-by: David Binderman Cc: Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20190812111517.26803-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt3000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c index 19e0b7be8495..6840b1ab620b 100644 --- a/drivers/staging/comedi/drivers/dt3000.c +++ b/drivers/staging/comedi/drivers/dt3000.c @@ -377,7 +377,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, } prescale = 15; - base = timer_base * (1 << prescale); + base = timer_base * (prescale + 1); divider = 65535; *nanosec = divider * base; return (prescale << 16) | (divider); From d49423187a6e4639948f1db23ed76e64165a92a0 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 12 Aug 2019 13:08:14 +0100 Subject: [PATCH 082/104] staging: comedi: dt3000: Fix rounding up of timer divisor commit 8e2a589a3fc36ce858d42e767c3bcd8fc62a512b upstream. `dt3k_ns_to_timer()` determines the prescaler and divisor to use to produce a desired timing period. It is influenced by a rounding mode and can round the divisor up, down, or to the nearest value. However, the code for rounding up currently does the same as rounding down! Fix ir by using the `DIV_ROUND_UP()` macro to calculate the divisor when rounding up. Also, change the types of the `divider`, `base` and `prescale` variables from `int` to `unsigned int` to avoid mixing signed and unsigned types in the calculations. Also fix a typo in a nearby comment: "improvment" => "improvement". Signed-off-by: Ian Abbott Cc: stable Link: https://lore.kernel.org/r/20190812120814.21188-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/dt3000.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c index 6840b1ab620b..917d13abef88 100644 --- a/drivers/staging/comedi/drivers/dt3000.c +++ b/drivers/staging/comedi/drivers/dt3000.c @@ -351,9 +351,9 @@ static irqreturn_t dt3k_interrupt(int irq, void *d) static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, unsigned int flags) { - int divider, base, prescale; + unsigned int divider, base, prescale; - /* This function needs improvment */ + /* This function needs improvement */ /* Don't know if divider==0 works. */ for (prescale = 0; prescale < 16; prescale++) { @@ -367,7 +367,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec, divider = (*nanosec) / base; break; case CMDF_ROUND_UP: - divider = (*nanosec) / base; + divider = DIV_ROUND_UP(*nanosec, base); break; } if (divider < 65536) { From 741b832658b98463d619fe4c320f8ab11b2ad4ee Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 12 Aug 2019 16:11:07 -0400 Subject: [PATCH 083/104] USB: core: Fix races in character device registration and deregistraion commit 303911cfc5b95d33687d9046133ff184cf5043ff upstream. The syzbot fuzzer has found two (!) races in the USB character device registration and deregistration routines. This patch fixes the races. The first race results from the fact that usb_deregister_dev() sets usb_minors[intf->minor] to NULL before calling device_destroy() on the class device. This leaves a window during which another thread can allocate the same minor number but will encounter a duplicate name error when it tries to register its own class device. A typical error message in the system log would look like: sysfs: cannot create duplicate filename '/class/usbmisc/ldusb0' The patch fixes this race by destroying the class device first. The second race is in usb_register_dev(). When that routine runs, it first allocates a minor number, then drops minor_rwsem, and then creates the class device. If the device creation fails, the minor number is deallocated and the whole routine returns an error. But during the time while minor_rwsem was dropped, there is a window in which the minor number is allocated and so another thread can successfully open the device file. Typically this results in use-after-free errors or invalid accesses when the other thread closes its open file reference, because the kernel then tries to release resources that were already deallocated when usb_register_dev() failed. The patch fixes this race by keeping minor_rwsem locked throughout the entire routine. Reported-and-tested-by: syzbot+30cf45ebfe0b0c4847a1@syzkaller.appspotmail.com Signed-off-by: Alan Stern CC: Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1908121607590.1659-100000@iolanthe.rowland.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index 422ce7b20d73..1626abeca8e8 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -191,9 +191,10 @@ int usb_register_dev(struct usb_interface *intf, intf->minor = minor; break; } - up_write(&minor_rwsem); - if (intf->minor < 0) + if (intf->minor < 0) { + up_write(&minor_rwsem); return -EXFULL; + } /* create a usb class device for this usb interface */ snprintf(name, sizeof(name), class_driver->name, minor - minor_base); @@ -201,12 +202,11 @@ int usb_register_dev(struct usb_interface *intf, MKDEV(USB_MAJOR, minor), class_driver, "%s", kbasename(name)); if (IS_ERR(intf->usb_dev)) { - down_write(&minor_rwsem); usb_minors[minor] = NULL; intf->minor = -1; - up_write(&minor_rwsem); retval = PTR_ERR(intf->usb_dev); } + up_write(&minor_rwsem); return retval; } EXPORT_SYMBOL_GPL(usb_register_dev); @@ -232,12 +232,12 @@ void usb_deregister_dev(struct usb_interface *intf, return; dev_dbg(&intf->dev, "removing %d minor\n", intf->minor); + device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor)); down_write(&minor_rwsem); usb_minors[intf->minor] = NULL; up_write(&minor_rwsem); - device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor)); intf->usb_dev = NULL; intf->minor = -1; destroy_usb_class(); From fccd6134d5addf2be1407e3250efdc854b5c5d8a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 8 Aug 2019 16:21:19 +0200 Subject: [PATCH 084/104] usb: cdc-acm: make sure a refcount is taken early enough commit c52873e5a1ef72f845526d9f6a50704433f9c625 upstream. destroy() will decrement the refcount on the interface, so that it needs to be taken so early that it never undercounts. Fixes: 7fb57a019f94e ("USB: cdc-acm: Fix potential deadlock (lockdep warning)") Cc: stable Reported-and-tested-by: syzbot+1b2449b7b5dc240d107a@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190808142119.7998-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 8d4d46f3fd16..b2edbd4bf8c4 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1264,10 +1264,6 @@ made_compressed_probe: if (acm == NULL) goto alloc_fail; - minor = acm_alloc_minor(acm); - if (minor < 0) - goto alloc_fail1; - ctrlsize = usb_endpoint_maxp(epctrl); readsize = usb_endpoint_maxp(epread) * (quirks == SINGLE_RX_URB ? 1 : 2); @@ -1275,6 +1271,13 @@ made_compressed_probe: acm->writesize = usb_endpoint_maxp(epwrite) * 20; acm->control = control_interface; acm->data = data_interface; + + usb_get_intf(acm->control); /* undone in destruct() */ + + minor = acm_alloc_minor(acm); + if (minor < 0) + goto alloc_fail1; + acm->minor = minor; acm->dev = usb_dev; if (h.usb_cdc_acm_descriptor) @@ -1420,7 +1423,6 @@ skip_countries: usb_driver_claim_interface(&acm_driver, data_interface, acm); usb_set_intfdata(data_interface, acm); - usb_get_intf(control_interface); tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor, &control_interface->dev); if (IS_ERR(tty_dev)) { From 911a8ca7697b26e95b7ec30b94cd5910bee546ff Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 13 Aug 2019 11:35:41 +0200 Subject: [PATCH 085/104] USB: CDC: fix sanity checks in CDC union parser commit 54364278fb3cabdea51d6398b07c87415065b3fc upstream. A few checks checked for the size of the pointer to a structure instead of the structure itself. Copy & paste issue presumably. Fixes: e4c6fb7794982 ("usbnet: move the CDC parser into USB core") Cc: stable Reported-by: syzbot+45a53506b65321c1fe91@syzkaller.appspotmail.com Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20190813093541.18889-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 955cd6552e95..d6075d45e10a 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -2142,14 +2142,14 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, (struct usb_cdc_dmm_desc *)buffer; break; case USB_CDC_MDLM_TYPE: - if (elength < sizeof(struct usb_cdc_mdlm_desc *)) + if (elength < sizeof(struct usb_cdc_mdlm_desc)) goto next_desc; if (desc) return -EINVAL; desc = (struct usb_cdc_mdlm_desc *)buffer; break; case USB_CDC_MDLM_DETAIL_TYPE: - if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *)) + if (elength < sizeof(struct usb_cdc_mdlm_detail_desc)) goto next_desc; if (detail) return -EINVAL; From bec06e446143d95847d45a7183adb3cdbc8c6459 Mon Sep 17 00:00:00 2001 From: Rogan Dawes Date: Wed, 17 Jul 2019 11:11:34 +0200 Subject: [PATCH 086/104] USB: serial: option: add D-Link DWM-222 device ID commit 552573e42aab5f75aff9bab855a9677979d9a7d5 upstream. Add device id for D-Link DWM-222 A2. MI_00 D-Link HS-USB Diagnostics MI_01 D-Link HS-USB Modem MI_02 D-Link HS-USB AT Port MI_03 D-Link HS-USB NMEA MI_04 D-Link HS-USB WWAN Adapter (qmi_wwan) MI_05 USB Mass Storage Device Cc: stable@vger.kernel.org Signed-off-by: Rogan Dawes Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d7b31fdce94d..d32501106d21 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1949,6 +1949,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e3d, 0xff), /* D-Link DWM-222 A2 */ + .driver_info = RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ From 89049143d4421b7c57b2278ee37554613e8ac98a Mon Sep 17 00:00:00 2001 From: Yoshiaki Okamoto Date: Sat, 20 Jul 2019 22:23:18 +0900 Subject: [PATCH 087/104] USB: serial: option: Add support for ZTE MF871A commit 7e7ae38bf928c5cfa6dd6e9a2cf8b42c84a27c92 upstream. This patch adds support for MF871A USB modem (aka Speed USB STICK U03) to option driver. This modem is manufactured by ZTE corporation, and sold by KDDI. Interface layout: 0: AT 1: MODEM usb-devices output: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 9 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=19d2 ProdID=1481 Rev=52.87 S: Manufacturer=ZTE,Incorporated S: Product=ZTE Technologies MSM S: SerialNumber=1234567890ABCDEF C: #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option Co-developed-by: Hiroyuki Yamamoto Signed-off-by: Hiroyuki Yamamoto Signed-off-by: Yoshiaki Okamoto Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d32501106d21..0a216dca8b95 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1544,6 +1544,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff), /* Telewell TW-LTE 4G v2 */ .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, From 35a85bf8d826e1f2f99fd93609c6cbd2f3241741 Mon Sep 17 00:00:00 2001 From: Bob Ham Date: Wed, 24 Jul 2019 07:52:26 -0700 Subject: [PATCH 088/104] USB: serial: option: add the BroadMobi BM818 card commit e5d8badf37e6b547842f2fcde10361b29e08bd36 upstream. Add a VID:PID for the BroadMobi BM818 M.2 card T: Bus=01 Lev=03 Prnt=40 Port=03 Cnt=01 Dev#= 44 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2060 Rev=00.00 S: Manufacturer=Qualcomm, Incorporated S: Product=Qualcomm CDMA Technologies MSM C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=(none) I: If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) Signed-off-by: Bob Ham Signed-off-by: Angus Ainslie (Purism) Cc: stable [ johan: use USB_DEVICE_INTERFACE_CLASS() ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0a216dca8b95..e3443c258246 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1957,6 +1957,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff), /* BroadMobi BM818 */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, From 765d9fe3c4f3e832092cf07ebeb2473202edcf17 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 15 Aug 2019 01:26:02 -0700 Subject: [PATCH 089/104] USB: serial: option: Add Motorola modem UARTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6caf0be40a707689e8ff8824fdb96ef77685b1ba upstream. On Motorola Mapphone devices such as Droid 4 there are five USB ports that do not use the same layout as Gobi 1K/2K/etc devices listed in qcserial.c. So we should use qcaux.c or option.c as noted by Dan Williams . As the Motorola USB serial ports have an interrupt endpoint as shown with lsusb -v, we should use option.c instead of qcaux.c as pointed out by Johan Hovold . The ff/ff/ff interfaces seem to always be UARTs on Motorola devices. For the other interfaces, class 0x0a (CDC Data) should not in general be added as they are typically part of a multi-interface function as noted earlier by Bjørn Mork . However, looking at the Motorola mapphone kernel code, the mdm6600 0x0a class is only used for flashing the modem firmware, and there are no other interfaces. So I've added that too with more details below as it works just fine. The ttyUSB ports on Droid 4 are: ttyUSB0 DIAG, CQDM-capable ttyUSB1 MUX or NMEA, no response ttyUSB2 MUX or NMEA, no response ttyUSB3 TCMD ttyUSB4 AT-capable The ttyUSB0 is detected as QCDM capable by ModemManager. I think it's only used for debugging with ModemManager --debug for sending custom AT commands though. ModemManager already can manage data connection using the USB QMI ports that are already handled by the qmi_wwan.c driver. To enable the MUX or NMEA ports, it seems that something needs to be done additionally to enable them, maybe via the DIAG or TCMD port. It might be just a NVRAM setting somewhere, but I have no idea what NVRAM settings may need changing for that. The TCMD port seems to be a Motorola custom protocol for testing the modem and to configure it's NVRAM and seems to work just fine based on a quick test with a minimal tcmdrw tool I wrote. The voice modem AT-capable port seems to provide only partial support, and no PM support compared to the TS 27.010 based UART wired directly to the modem. The UARTs added with this change are the same product IDs as the Motorola Mapphone Android Linux kernel mdm6600_id_table. I don't have any mdm9600 based devices, so I have only tested these on mdm6600 based droid 4. Then for the class 0x0a (CDC Data) mode, the Motorola Mapphone Android Linux kernel driver moto_flashqsc.c just seems to change the port->bulk_out_size to 8K from the default. And is only used for flashing the modem firmware it seems. I've verified that flashing the modem with signed firmware works just fine with the option driver after manually toggling the GPIO pins, so I've added droid 4 modem flashing mode to the option driver. I've not added the other devices listed in moto_flashqsc.c in case they really need different port->bulk_out_size. Those can be added as they get tested to work for flashing the modem. After this patch the output of /sys/kernel/debug/usb/devices has the following for normal 22b8:2a70 mode including the related qmi_wwan interfaces: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=2a70 Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=86(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=88(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=8b(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8c(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan E: Ad=8d(I) Atr=03(Int.) MxPS= 64 Ivl=5ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=09(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms In 22b8:900e "qc_dload" mode the device shows up as: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=900e Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms And in 22b8:4281 "ram_downloader" mode the device shows up as: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=22b8 ProdID=4281 Rev= 0.00 S: Manufacturer=Motorola, Incorporated S: Product=Flash MZ600 C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=fc Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms Cc: Bjørn Mork Cc: Dan Williams Cc: Lars Melin Cc: Marcel Partap Cc: Merlijn Wajer Cc: Michael Scott Cc: NeKit Cc: Pavel Machek Cc: Sebastian Reichel Tested-by: Pavel Machek Signed-off-by: Tony Lindgren Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e3443c258246..1bceb11f3782 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -967,6 +967,11 @@ static const struct usb_device_id option_ids[] = { { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7B) }, { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7C) }, + /* Motorola devices */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2a70, 0xff, 0xff, 0xff) }, /* mdm6600 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2e0a, 0xff, 0xff, 0xff) }, /* mdm9600 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x4281, 0x0a, 0x00, 0xfc) }, /* mdm ram dl */ + { USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x900e, 0xff, 0xff, 0xff) }, /* mdm qc dl */ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, From 912420e5252c6867d3c3c3ad7a69ca59e5fc48c8 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 2 Aug 2019 21:49:19 -0700 Subject: [PATCH 090/104] asm-generic: fix -Wtype-limits compiler warnings [ Upstream commit cbedfe11347fe418621bd188d58a206beb676218 ] Commit d66acc39c7ce ("bitops: Optimise get_order()") introduced a compilation warning because "rx_frag_size" is an "ushort" while PAGE_SHIFT here is 16. The commit changed the get_order() to be a multi-line macro where compilers insist to check all statements in the macro even when __builtin_constant_p(rx_frag_size) will return false as "rx_frag_size" is a module parameter. In file included from ./arch/powerpc/include/asm/page_64.h:107, from ./arch/powerpc/include/asm/page.h:242, from ./arch/powerpc/include/asm/mmu.h:132, from ./arch/powerpc/include/asm/lppaca.h:47, from ./arch/powerpc/include/asm/paca.h:17, from ./arch/powerpc/include/asm/current.h:13, from ./include/linux/thread_info.h:21, from ./arch/powerpc/include/asm/processor.h:39, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: drivers/net/ethernet/emulex/benet/be_main.c: In function 'be_rx_cqs_create': ./include/asm-generic/getorder.h:54:9: warning: comparison is always true due to limited range of data type [-Wtype-limits] (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ ^ drivers/net/ethernet/emulex/benet/be_main.c:3138:33: note: in expansion of macro 'get_order' adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; ^~~~~~~~~ Fix it by moving all of this multi-line macro into a proper function, and killing __get_order() off. [akpm@linux-foundation.org: remove __get_order() altogether] [cai@lca.pw: v2] Link: http://lkml.kernel.org/r/1564000166-31428-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/1563914986-26502-1-git-send-email-cai@lca.pw Fixes: d66acc39c7ce ("bitops: Optimise get_order()") Signed-off-by: Qian Cai Reviewed-by: Nathan Chancellor Cc: David S. Miller Cc: Arnd Bergmann Cc: David Howells Cc: Jakub Jelinek Cc: Nick Desaulniers Cc: Bill Wendling Cc: James Y Knight Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/asm-generic/getorder.h | 50 ++++++++++++++-------------------- 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h index 65e4468ac53d..52fbf236a90e 100644 --- a/include/asm-generic/getorder.h +++ b/include/asm-generic/getorder.h @@ -6,24 +6,6 @@ #include #include -/* - * Runtime evaluation of get_order() - */ -static inline __attribute_const__ -int __get_order(unsigned long size) -{ - int order; - - size--; - size >>= PAGE_SHIFT; -#if BITS_PER_LONG == 32 - order = fls(size); -#else - order = fls64(size); -#endif - return order; -} - /** * get_order - Determine the allocation order of a memory size * @size: The size for which to get the order @@ -42,19 +24,27 @@ int __get_order(unsigned long size) * to hold an object of the specified size. * * The result is undefined if the size is 0. - * - * This function may be used to initialise variables with compile time - * evaluations of constants. */ -#define get_order(n) \ -( \ - __builtin_constant_p(n) ? ( \ - ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ - (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ - ilog2((n) - 1) - PAGE_SHIFT + 1) \ - ) : \ - __get_order(n) \ -) +static inline __attribute_const__ int get_order(unsigned long size) +{ + if (__builtin_constant_p(size)) { + if (!size) + return BITS_PER_LONG - PAGE_SHIFT; + + if (size < (1UL << PAGE_SHIFT)) + return 0; + + return ilog2((size) - 1) - PAGE_SHIFT + 1; + } + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + return fls(size); +#else + return fls64(size); +#endif +} #endif /* __ASSEMBLY__ */ From 6c1dc8f96b54ad9e63ef3becac73750a588abe6e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Dec 2018 12:14:12 +0100 Subject: [PATCH 091/104] bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K [ Upstream commit fdadd04931c2d7cd294dc5b2b342863f94be53a3 ] Michael and Sandipan report: Commit ede95a63b5 introduced a bpf_jit_limit tuneable to limit BPF JIT allocations. At compile time it defaults to PAGE_SIZE * 40000, and is adjusted again at init time if MODULES_VADDR is defined. For ppc64 kernels, MODULES_VADDR isn't defined, so we're stuck with the compile-time default at boot-time, which is 0x9c400000 when using 64K page size. This overflows the signed 32-bit bpf_jit_limit value: root@ubuntu:/tmp# cat /proc/sys/net/core/bpf_jit_limit -1673527296 and can cause various unexpected failures throughout the network stack. In one case `strace dhclient eth0` reported: setsockopt(5, SOL_SOCKET, SO_ATTACH_FILTER, {len=11, filter=0x105dd27f8}, 16) = -1 ENOTSUPP (Unknown error 524) and similar failures can be seen with tools like tcpdump. This doesn't always reproduce however, and I'm not sure why. The more consistent failure I've seen is an Ubuntu 18.04 KVM guest booted on a POWER9 host would time out on systemd/netplan configuring a virtio-net NIC with no noticeable errors in the logs. Given this and also given that in near future some architectures like arm64 will have a custom area for BPF JIT image allocations we should get rid of the BPF_JIT_LIMIT_DEFAULT fallback / default entirely. For 4.21, we have an overridable bpf_jit_alloc_exec(), bpf_jit_free_exec() so therefore add another overridable bpf_jit_alloc_exec_limit() helper function which returns the possible size of the memory area for deriving the default heuristic in bpf_jit_charge_init(). Like bpf_jit_alloc_exec() and bpf_jit_free_exec(), the new bpf_jit_alloc_exec_limit() assumes that module_alloc() is the default JIT memory provider, and therefore in case archs implement their custom module_alloc() we use MODULES_{END,_VADDR} for limits and otherwise for vmalloc_exec() cases like on ppc64 we use VMALLOC_{END,_START}. Additionally, for archs supporting large page sizes, we should change the sysctl to be handled as long to not run into sysctl restrictions in future. Fixes: ede95a63b5e8 ("bpf: add bpf_jit_limit knob to restrict unpriv allocations") Reported-by: Sandipan Das Reported-by: Michael Roth Signed-off-by: Daniel Borkmann Tested-by: Michael Roth Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- include/linux/filter.h | 2 +- kernel/bpf/core.c | 21 +++++++++++++++------ net/core/sysctl_net_core.c | 20 +++++++++++++++++--- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 689bba102007..0837d904405a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -599,7 +599,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; -extern int bpf_jit_limit; +extern long bpf_jit_limit; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 09df9bbfb48e..df2ebce927ec 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -208,25 +208,34 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT -# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000) - /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); int bpf_jit_harden __read_mostly; -int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT; +long bpf_jit_limit __read_mostly; static atomic_long_t bpf_jit_current; +/* Can be overridden by an arch's JIT compiler if it has a custom, + * dedicated BPF backend memory area, or if neither of the two + * below apply. + */ +u64 __weak bpf_jit_alloc_exec_limit(void) +{ #if defined(MODULES_VADDR) + return MODULES_END - MODULES_VADDR; +#else + return VMALLOC_END - VMALLOC_START; +#endif +} + static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2, - PAGE_SIZE), INT_MAX); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + PAGE_SIZE), LONG_MAX); return 0; } pure_initcall(bpf_jit_charge_init); -#endif static int bpf_jit_charge_modmem(u32 pages) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 58d76ca45937..a6fc82704f0c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -28,6 +28,8 @@ static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static long long_one __maybe_unused = 1; +static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -263,6 +265,17 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } + +static int +proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} #endif static struct ctl_table net_core_table[] = { @@ -349,10 +362,11 @@ static struct ctl_table net_core_table[] = { { .procname = "bpf_jit_limit", .data = &bpf_jit_limit, - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0600, - .proc_handler = proc_dointvec_minmax_bpf_restricted, - .extra1 = &one, + .proc_handler = proc_dolongvec_minmax_bpf_restricted, + .extra1 = &long_one, + .extra2 = &long_max, }, #endif { From a953b4419f45a2f1496eb345c050baeb2936d0cc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Jul 2019 11:06:17 +0100 Subject: [PATCH 092/104] arm64: compat: Allow single-byte watchpoints on all addresses commit 849adec41203ac5837c40c2d7e08490ffdef3c2c upstream. Commit d968d2b801d8 ("ARM: 7497/1: hw_breakpoint: allow single-byte watchpoints on all addresses") changed the validation requirements for hardware watchpoints on arch/arm/. Update our compat layer to implement the same relaxation. Cc: Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/hw_breakpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0b9e5f6290f9..d168e52ee622 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -508,13 +508,14 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Aligned */ break; case 1: - /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) - break; case 2: /* Allow halfword watchpoints and breakpoints. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) break; + case 3: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; default: return -EINVAL; } From 62b0863eae921c09b8ae41154a2ae81a2deb515e Mon Sep 17 00:00:00 2001 From: Dirk Morris Date: Thu, 8 Aug 2019 13:57:51 -0700 Subject: [PATCH 093/104] netfilter: conntrack: Use consistent ct id hash calculation commit 656c8e9cc1badbc18eefe6ba01d33ebbcae61b9a upstream. Change ct id hash calculation to only use invariants. Currently the ct id hash calculation is based on some fields that can change in the lifetime on a conntrack entry in some corner cases. The current hash uses the whole tuple which contains an hlist pointer which will change when the conntrack is placed on the dying list resulting in a ct id change. This patch also removes the reply-side tuple and extension pointer from the hash calculation so that the ct id will will not change from initialization until confirmation. Fixes: 3c79107631db1f7 ("netfilter: ctnetlink: don't use conntrack/expect object addresses as id") Signed-off-by: Dirk Morris Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_core.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c3d3b10947b4..1bdae8f188e1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -308,13 +308,12 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); * table location, we assume id gets exposed to userspace. * * Following nf_conn items do not change throughout lifetime - * of the nf_conn after it has been committed to main hash table: + * of the nf_conn: * * 1. nf_conn address - * 2. nf_conn->ext address - * 3. nf_conn->master address (normally NULL) - * 4. tuple - * 5. the associated net namespace + * 2. nf_conn->master address (normally NULL) + * 3. the associated net namespace + * 4. the original direction tuple */ u32 nf_ct_get_id(const struct nf_conn *ct) { @@ -324,9 +323,10 @@ u32 nf_ct_get_id(const struct nf_conn *ct) net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); a = (unsigned long)ct; - b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct)); - c = (unsigned long)ct->ext; - d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash), + b = (unsigned long)ct->master; + c = (unsigned long)nf_ct_net(ct); + d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), &ct_id_seed); #ifdef CONFIG_64BIT return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); From 79ab4c1ffc3f43681493d003a0abd45c593c07c0 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 16 Jul 2019 20:17:20 +0200 Subject: [PATCH 094/104] Input: psmouse - fix build error of multiple definition commit 49e6979e7e92cf496105b5636f1df0ac17c159c0 upstream. trackpoint_detect() should be static inline while CONFIG_MOUSE_PS2_TRACKPOINT is not set, otherwise, we build fails: drivers/input/mouse/alps.o: In function `trackpoint_detect': alps.c:(.text+0x8e00): multiple definition of `trackpoint_detect' drivers/input/mouse/psmouse-base.o:psmouse-base.c:(.text+0x1b50): first defined here Reported-by: Hulk Robot Fixes: 55e3d9224b60 ("Input: psmouse - allow disabing certain protocol extensions") Signed-off-by: YueHaibing Signed-off-by: Dmitry Torokhov Cc: Hui Wang Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/trackpoint.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 88055755f82e..821b446a85dd 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -153,7 +153,8 @@ struct trackpoint_data #ifdef CONFIG_MOUSE_PS2_TRACKPOINT int trackpoint_detect(struct psmouse *psmouse, bool set_properties); #else -inline int trackpoint_detect(struct psmouse *psmouse, bool set_properties) +static inline int trackpoint_detect(struct psmouse *psmouse, + bool set_properties) { return -ENOSYS; } From 5f35f44a3700e8946a40b4541044b5a2fbc874b5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Oct 2018 12:32:46 +0200 Subject: [PATCH 095/104] iommu/amd: Move iommu_init_pci() to .init section commit 24d2c521749d8547765b555b7a85cca179bb2275 upstream. The function is only called from another __init function, so it should be moved to .init too. Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 13bbe5795e4e..9bb8d64b6f94 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1534,7 +1534,7 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; -static int iommu_init_pci(struct amd_iommu *iommu) +static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc, low, high; From f7be89699a42ba99015ddaafaadd1d7e3d1496fa Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Sun, 18 Aug 2019 07:25:48 -0700 Subject: [PATCH 096/104] bnx2x: Fix VF's VLAN reconfiguration in reload. [ Upstream commit 4a4d2d372fb9b9229327e2ed01d5d9572eddf4de ] Commit 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence."), introduced a regression in driver that as a part of VF's reload flow, VLANs created on the VF doesn't get re-configured in hardware as vlan metadata/info was not getting cleared for the VFs which causes vlan PING to stop. This patch clears the vlan metadata/info so that VLANs gets re-configured back in the hardware in VF's reload flow and PING/traffic continues for VLANs created over the VFs. Fixes: 04f05230c5c13 ("bnx2x: Remove configured vlans as part of unload sequence.") Signed-off-by: Manish Chopra Signed-off-by: Sudarsana Kalluru Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 7 ++++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h | 2 ++ .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 53a71166e784..46a7dcf2ff4a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3062,12 +3062,13 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) /* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations */ - if (IS_VF(bp)) + if (IS_VF(bp)) { + bnx2x_clear_vlan_info(bp); bnx2x_vfpf_close_vf(bp); - else if (unload_mode != UNLOAD_RECOVERY) + } else if (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip*/ bnx2x_chip_cleanup(bp, unload_mode, keep_link); - else { + } else { /* Send the UNLOAD_REQUEST to the MCP */ bnx2x_send_unload_req(bp, unload_mode); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 243cb9748d35..2ec1c43270b7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -425,6 +425,8 @@ void bnx2x_set_reset_global(struct bnx2x *bp); void bnx2x_disable_close_the_gate(struct bnx2x *bp); int bnx2x_init_hw_func_cnic(struct bnx2x *bp); +void bnx2x_clear_vlan_info(struct bnx2x *bp); + /** * bnx2x_sp_event - handle ramrods completion. * diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2ef6012c3dc5..a9681b191304 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -8488,11 +8488,21 @@ int bnx2x_set_vlan_one(struct bnx2x *bp, u16 vlan, return rc; } +void bnx2x_clear_vlan_info(struct bnx2x *bp) +{ + struct bnx2x_vlan_entry *vlan; + + /* Mark that hw forgot all entries */ + list_for_each_entry(vlan, &bp->vlan_reg, link) + vlan->hw = false; + + bp->vlan_cnt = 0; +} + static int bnx2x_del_all_vlans(struct bnx2x *bp) { struct bnx2x_vlan_mac_obj *vlan_obj = &bp->sp_objs[0].vlan_obj; unsigned long ramrod_flags = 0, vlan_flags = 0; - struct bnx2x_vlan_entry *vlan; int rc; __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); @@ -8501,10 +8511,7 @@ static int bnx2x_del_all_vlans(struct bnx2x *bp) if (rc) return rc; - /* Mark that hw forgot all entries */ - list_for_each_entry(vlan, &bp->vlan_reg, link) - vlan->hw = false; - bp->vlan_cnt = 0; + bnx2x_clear_vlan_info(bp); return 0; } From 7876df0500d23e353803318297d5b42944a2e369 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Aug 2019 02:11:57 -0700 Subject: [PATCH 097/104] net/packet: fix race in tpacket_snd() [ Upstream commit 32d3182cd2cd29b2e7e04df7b0db350fbe11289f ] packet_sendmsg() checks tx_ring.pg_vec to decide if it must call tpacket_snd(). Problem is that the check is lockless, meaning another thread can issue a concurrent setsockopt(PACKET_TX_RING ) to flip tx_ring.pg_vec back to NULL. Given that tpacket_snd() grabs pg_vec_lock mutex, we can perform the check again to solve the race. syzbot reported : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 11429 Comm: syz-executor394 Not tainted 5.3.0-rc4+ #101 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:packet_lookup_frame+0x8d/0x270 net/packet/af_packet.c:474 Code: c1 ee 03 f7 73 0c 80 3c 0e 00 0f 85 cb 01 00 00 48 8b 0b 89 c0 4c 8d 24 c1 48 b8 00 00 00 00 00 fc ff df 4c 89 e1 48 c1 e9 03 <80> 3c 01 00 0f 85 94 01 00 00 48 8d 7b 10 4d 8b 3c 24 48 b8 00 00 RSP: 0018:ffff88809f82f7b8 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff8880a45c7030 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 1ffff110148b8e06 RDI: ffff8880a45c703c RBP: ffff88809f82f7e8 R08: ffff888087aea200 R09: fffffbfff134ae50 R10: fffffbfff134ae4f R11: ffffffff89a5727f R12: 0000000000000000 R13: 0000000000000001 R14: ffff8880a45c6ac0 R15: 0000000000000000 FS: 00007fa04716f700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa04716edb8 CR3: 0000000091eb4000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: packet_current_frame net/packet/af_packet.c:487 [inline] tpacket_snd net/packet/af_packet.c:2667 [inline] packet_sendmsg+0x590/0x6250 net/packet/af_packet.c:2975 sock_sendmsg_nosec net/socket.c:637 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:657 ___sys_sendmsg+0x3e2/0x920 net/socket.c:2311 __sys_sendmmsg+0x1bf/0x4d0 net/socket.c:2413 __do_sys_sendmmsg net/socket.c:2442 [inline] __se_sys_sendmmsg net/socket.c:2439 [inline] __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2439 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3578121da79c..b1dbea544d64 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2651,6 +2651,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); + /* packet_sendmsg() check on tx_ring.pg_vec was lockless, + * we need to confirm it under protection of pg_vec_lock. + */ + if (unlikely(!po->tx_ring.pg_vec)) { + err = -EBUSY; + goto out; + } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; From f85c2f3ab4a074a23946a480b7c0755637a04f93 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Aug 2019 20:49:12 +0800 Subject: [PATCH 098/104] sctp: fix the transport error_count check [ Upstream commit a1794de8b92ea6bc2037f445b296814ac826693e ] As the annotation says in sctp_do_8_2_transport_strike(): "If the transport error count is greater than the pf_retrans threshold, and less than pathmaxrtx ..." It should be transport->error_count checked with pathmaxrxt, instead of asoc->pf_retrans. Fixes: 5aa93bcf66f4 ("sctp: Implement quick failover draft from tsvwg") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c345bf153bed..b1ead1776e81 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -508,7 +508,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, */ if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && - (asoc->pf_retrans < transport->pathmaxrxt) && + (transport->error_count < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, From 47fd4df0c2731cec115d1bee0eb28030355e84ea Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 5 Aug 2019 16:34:34 +0100 Subject: [PATCH 099/104] xen/netback: Reset nr_frags before freeing skb [ Upstream commit 3a0233ddec554b886298de2428edb5c50a20e694 ] At this point nr_frags has been incremented but the frag does not yet have a page assigned so freeing the skb results in a crash. Reset nr_frags before freeing the skb to prevent this. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/netback.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f57815befc90..a469fbe1abaf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -927,6 +927,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@ -942,6 +943,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; From e2d8229bba1bc6f61d5bca1a97dedf4da947e312 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 1 Aug 2019 11:10:19 -0500 Subject: [PATCH 100/104] net/mlx5e: Only support tx/rx pause setting for port owner [ Upstream commit 466df6eb4a9e813b3cfc674363316450c57a89c5 ] Only support changing tx/rx pause frame setting if the net device is the vport group manager. Fixes: 3c2d18ef22df ("net/mlx5e: Support ethtool get/set_pauseparam") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 54872f8f2f7d..e42ece20cd0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1149,6 +1149,9 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, struct mlx5_core_dev *mdev = priv->mdev; int err; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL; From 4aa2734ced922da3ffa5f4a93473a21c98466c1b Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 5 Jul 2019 17:59:28 +0300 Subject: [PATCH 101/104] net/mlx5e: Use flow keys dissector to parse packets for ARFS [ Upstream commit 405b93eb764367a670e729da18e54dc42db32620 ] The current ARFS code relies on certain fields to be set in the SKB (e.g. transport_header) and extracts IP addresses and ports by custom code that parses the packet. The necessary SKB fields, however, are not always set at that point, which leads to an out-of-bounds access. Use skb_flow_dissect_flow_keys() to get the necessary information reliably, fix the out-of-bounds access and reuse the code. Fixes: 18c908e477dc ("net/mlx5e: Add accelerated RFS support") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 97 +++++++------------ 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 4a51fc6908ad..098a6b56fd54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -441,12 +441,6 @@ arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, return &arfs_t->rules_hash[bucket_idx]; } -static u8 arfs_get_ip_proto(const struct sk_buff *skb) -{ - return (skb->protocol == htons(ETH_P_IP)) ? - ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; -} - static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, u8 ip_proto, __be16 etype) { @@ -605,31 +599,9 @@ out: arfs_may_expire_flow(priv); } -/* return L4 destination port from ip4/6 packets */ -static __be16 arfs_get_dst_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->dest; - return ((struct udphdr *)transport_header)->dest; -} - -/* return L4 source port from ip4/6 packets */ -static __be16 arfs_get_src_port(const struct sk_buff *skb) -{ - char *transport_header; - - transport_header = skb_transport_header(skb); - if (arfs_get_ip_proto(skb) == IPPROTO_TCP) - return ((struct tcphdr *)transport_header)->source; - return ((struct udphdr *)transport_header)->source; -} - static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t, - const struct sk_buff *skb, + const struct flow_keys *fk, u16 rxq, u32 flow_id) { struct arfs_rule *rule; @@ -644,19 +616,19 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, INIT_WORK(&rule->arfs_work, arfs_handle_work); tuple = &rule->tuple; - tuple->etype = skb->protocol; + tuple->etype = fk->basic.n_proto; + tuple->ip_proto = fk->basic.ip_proto; if (tuple->etype == htons(ETH_P_IP)) { - tuple->src_ipv4 = ip_hdr(skb)->saddr; - tuple->dst_ipv4 = ip_hdr(skb)->daddr; + tuple->src_ipv4 = fk->addrs.v4addrs.src; + tuple->dst_ipv4 = fk->addrs.v4addrs.dst; } else { - memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + memcpy(&tuple->src_ipv6, &fk->addrs.v6addrs.src, sizeof(struct in6_addr)); - memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + memcpy(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, sizeof(struct in6_addr)); } - tuple->ip_proto = arfs_get_ip_proto(skb); - tuple->src_port = arfs_get_src_port(skb); - tuple->dst_port = arfs_get_dst_port(skb); + tuple->src_port = fk->ports.src; + tuple->dst_port = fk->ports.dst; rule->flow_id = flow_id; rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; @@ -667,37 +639,33 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, return rule; } -static bool arfs_cmp_ips(struct arfs_tuple *tuple, - const struct sk_buff *skb) +static bool arfs_cmp(const struct arfs_tuple *tuple, const struct flow_keys *fk) { - if (tuple->etype == htons(ETH_P_IP) && - tuple->src_ipv4 == ip_hdr(skb)->saddr && - tuple->dst_ipv4 == ip_hdr(skb)->daddr) - return true; - if (tuple->etype == htons(ETH_P_IPV6) && - (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, - sizeof(struct in6_addr))) && - (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, - sizeof(struct in6_addr)))) - return true; + if (tuple->src_port != fk->ports.src || tuple->dst_port != fk->ports.dst) + return false; + if (tuple->etype != fk->basic.n_proto) + return false; + if (tuple->etype == htons(ETH_P_IP)) + return tuple->src_ipv4 == fk->addrs.v4addrs.src && + tuple->dst_ipv4 == fk->addrs.v4addrs.dst; + if (tuple->etype == htons(ETH_P_IPV6)) + return !memcmp(&tuple->src_ipv6, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&tuple->dst_ipv6, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); return false; } static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, - const struct sk_buff *skb) + const struct flow_keys *fk) { struct arfs_rule *arfs_rule; struct hlist_head *head; - __be16 src_port = arfs_get_src_port(skb); - __be16 dst_port = arfs_get_dst_port(skb); - head = arfs_hash_bucket(arfs_t, src_port, dst_port); + head = arfs_hash_bucket(arfs_t, fk->ports.src, fk->ports.dst); hlist_for_each_entry(arfs_rule, head, hlist) { - if (arfs_rule->tuple.src_port == src_port && - arfs_rule->tuple.dst_port == dst_port && - arfs_cmp_ips(&arfs_rule->tuple, skb)) { + if (arfs_cmp(&arfs_rule->tuple, fk)) return arfs_rule; - } } return NULL; @@ -710,20 +678,24 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_table *arfs_t; struct arfs_rule *arfs_rule; + struct flow_keys fk; - if (skb->protocol != htons(ETH_P_IP) && - skb->protocol != htons(ETH_P_IPV6)) + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) + return -EPROTONOSUPPORT; + + if (fk.basic.n_proto != htons(ETH_P_IP) && + fk.basic.n_proto != htons(ETH_P_IPV6)) return -EPROTONOSUPPORT; if (skb->encapsulation) return -EPROTONOSUPPORT; - arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + arfs_t = arfs_get_table(arfs, fk.basic.ip_proto, fk.basic.n_proto); if (!arfs_t) return -EPROTONOSUPPORT; spin_lock_bh(&arfs->arfs_lock); - arfs_rule = arfs_find_rule(arfs_t, skb); + arfs_rule = arfs_find_rule(arfs_t, &fk); if (arfs_rule) { if (arfs_rule->rxq == rxq_index) { spin_unlock_bh(&arfs->arfs_lock); @@ -731,8 +703,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, } arfs_rule->rxq = rxq_index; } else { - arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, - rxq_index, flow_id); + arfs_rule = arfs_alloc_rule(priv, arfs_t, &fk, rxq_index, flow_id); if (!arfs_rule) { spin_unlock_bh(&arfs->arfs_lock); return -ENOMEM; From a4cb8ca3e951a24e850f7074d93305f39f67464e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Aug 2019 14:22:47 +0800 Subject: [PATCH 102/104] team: Add vlan tx offload to hw_enc_features [ Upstream commit 227f2f030e28d8783c3d10ce70ff4ba79cad653f ] We should also enable team's vlan tx offload in hw_enc_features, pass the vlan packets to the slave devices with vlan tci, let the slave handle vlan tunneling offload implementation. Fixes: 3268e5cb494d ("team: Advertise tunneling offload features") Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/team/team.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 0acdf73aa1b0..fd2573cca803 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1014,7 +1014,9 @@ static void ___team_compute_features(struct team *team) } team->dev->vlan_features = vlan_features; - team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; + team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; team->dev->hard_header_len = max_hard_header_len; team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; From eab1f2dd7d68704e9eff24f97138cd954406e5be Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 7 Aug 2019 10:19:59 +0800 Subject: [PATCH 103/104] bonding: Add vlan tx offload to hw_enc_features [ Upstream commit d595b03de2cb0bdf9bcdf35ff27840cc3a37158f ] As commit 30d8177e8ac7 ("bonding: Always enable vlan tx offload") said, we should always enable bonding's vlan tx offload, pass the vlan packets to the slave devices with vlan tci, let them to handle vlan implementation. Now if encapsulation protocols like VXLAN is used, skb->encapsulation may be set, then the packet is passed to vlan device which based on bonding device. However in netif_skb_features(), the check of hw_enc_features: if (skb->encapsulation) features &= dev->hw_enc_features; clears NETIF_F_HW_VLAN_CTAG_TX/NETIF_F_HW_VLAN_STAG_TX. This results in same issue in commit 30d8177e8ac7 like this: vlan_dev_hard_start_xmit -->dev_queue_xmit -->validate_xmit_skb -->netif_skb_features //NETIF_F_HW_VLAN_CTAG_TX is cleared -->validate_xmit_vlan -->__vlan_hwaccel_push_inside //skb->tci is cleared ... --> bond_start_xmit --> bond_xmit_hash //BOND_XMIT_POLICY_ENCAP34 --> __skb_flow_dissect // nhoff point to IP header --> case htons(ETH_P_8021Q) // skb_vlan_tag_present is false, so vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), //vlan point to ip header wrongly Fixes: b2a103e6d0af ("bonding: convert to ndo_fix_features") Signed-off-by: YueHaibing Acked-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5b116ec756b4..d338c319b30e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1107,7 +1107,9 @@ static void bond_compute_features(struct bonding *bond) done: bond_dev->vlan_features = vlan_features; - bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; + bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); From 228e87c35b6c083be778d24b64c02ad05015f3d2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 25 Aug 2019 10:51:53 +0200 Subject: [PATCH 104/104] Linux 4.9.190 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4fdc9d984f80..4b6cf4641eba 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 189 +SUBLEVEL = 190 EXTRAVERSION = NAME = Roaring Lionus