From a5c6e0a76817a3751f58d761aaff7c0b0c4001ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Apr 2017 15:31:07 +0100 Subject: [PATCH 01/22] KEYS: Disallow keyrings beginning with '.' to be joined as session keyrings commit ee8f844e3c5a73b999edf733df1c529d6503ec2f upstream. This fixes CVE-2016-9604. Keyrings whose name begin with a '.' are special internal keyrings and so userspace isn't allowed to create keyrings by this name to prevent shadowing. However, the patch that added the guard didn't fix KEYCTL_JOIN_SESSION_KEYRING. Not only can that create dot-named keyrings, it can also subscribe to them as a session keyring if they grant SEARCH permission to the user. This, for example, allows a root process to set .builtin_trusted_keys as its session keyring, at which point it has full access because now the possessor permissions are added. This permits root to add extra public keys, thereby bypassing module verification. This also affects kexec and IMA. This can be tested by (as root): keyctl session .builtin_trusted_keys keyctl add user a a @s keyctl list @s which on my test box gives me: 2 keys in keyring: 180010936: ---lswrv 0 0 asymmetric: Build time autogenerated kernel key: ae3d4a31b82daa8e1a75b49dc2bba949fd992a05 801382539: --alswrv 0 0 user: a Fix this by rejecting names beginning with a '.' in the keyctl. Signed-off-by: David Howells Acked-by: Mimi Zohar cc: linux-ima-devel@lists.sourceforge.net Signed-off-by: Greg Kroah-Hartman --- security/keys/keyctl.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index d580ad06b792..7cdd5b550693 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -271,7 +271,8 @@ error: * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will - * be skipped over. + * be skipped over. It is not permitted for userspace to create or join + * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ @@ -288,12 +289,16 @@ long keyctl_join_session_keyring(const char __user *_name) ret = PTR_ERR(name); goto error; } + + ret = -EPERM; + if (name[0] == '.') + goto error_name; } /* join the session */ ret = join_session_keyring(name); +error_name: kfree(name); - error: return ret; } From b2dd90e812f3f733b55f0bf4487032e53b487665 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 18 Apr 2017 15:31:08 +0100 Subject: [PATCH 02/22] KEYS: Change the name of the dead type to ".dead" to prevent user access commit c1644fe041ebaf6519f6809146a77c3ead9193af upstream. This fixes CVE-2017-6951. Userspace should not be able to do things with the "dead" key type as it doesn't have some of the helper functions set upon it that the kernel needs. Attempting to use it may cause the kernel to crash. Fix this by changing the name of the type to ".dead" so that it's rejected up front on userspace syscalls by key_get_type_from_user(). Though this doesn't seem to affect recent kernels, it does affect older ones, certainly those prior to: commit c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81 Author: David Howells Date: Tue Sep 16 17:36:06 2014 +0100 KEYS: Remove key_type::match in favour of overriding default by match_preparse which went in before 3.18-rc1. Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- security/keys/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index addf060399e0..9cb4fe4478a1 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -46,7 +46,7 @@ static unsigned long key_gc_flags; * immediately unlinked. */ struct key_type key_type_dead = { - .name = "dead", + .name = ".dead", }; /* From 174a74dbca2ddc7269c265598399c000e5b9b870 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Apr 2017 15:31:09 +0100 Subject: [PATCH 03/22] KEYS: fix keyctl_set_reqkey_keyring() to not leak thread keyrings commit c9f838d104fed6f2f61d68164712e3204bf5271b upstream. This fixes CVE-2017-7472. Running the following program as an unprivileged user exhausts kernel memory by leaking thread keyrings: #include int main() { for (;;) keyctl_set_reqkey_keyring(KEY_REQKEY_DEFL_THREAD_KEYRING); } Fix it by only creating a new thread keyring if there wasn't one before. To make things more consistent, make install_thread_keyring_to_cred() and install_process_keyring_to_cred() both return 0 if the corresponding keyring is already present. Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials") Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- security/keys/keyctl.c | 11 ++++----- security/keys/process_keys.c | 44 ++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 7cdd5b550693..dbbfd7735ce5 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1256,8 +1256,8 @@ error: * Read or set the default keyring in which request_key() will cache keys and * return the old setting. * - * If a process keyring is specified then this will be created if it doesn't - * yet exist. The old setting will be returned if successful. + * If a thread or process keyring is specified then it will be created if it + * doesn't yet exist. The old setting will be returned if successful. */ long keyctl_set_reqkey_keyring(int reqkey_defl) { @@ -1282,11 +1282,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_PROCESS_KEYRING: ret = install_process_keyring_to_cred(new); - if (ret < 0) { - if (ret != -EEXIST) - goto error; - ret = 0; - } + if (ret < 0) + goto error; goto set; case KEY_REQKEY_DEFL_DEFAULT: diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 40a885239782..45536c677b05 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -127,13 +127,18 @@ error: } /* - * Install a fresh thread keyring directly to new credentials. This keyring is - * allowed to overrun the quota. + * Install a thread keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ int install_thread_keyring_to_cred(struct cred *new) { struct key *keyring; + if (new->thread_keyring) + return 0; + keyring = keyring_alloc("_tid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, @@ -146,7 +151,9 @@ int install_thread_keyring_to_cred(struct cred *new) } /* - * Install a fresh thread keyring, discarding the old one. + * Install a thread keyring to the current task if it didn't have one already. + * + * Return: 0 if a thread keyring is now present; -errno on failure. */ static int install_thread_keyring(void) { @@ -157,8 +164,6 @@ static int install_thread_keyring(void) if (!new) return -ENOMEM; - BUG_ON(new->thread_keyring); - ret = install_thread_keyring_to_cred(new); if (ret < 0) { abort_creds(new); @@ -169,17 +174,17 @@ static int install_thread_keyring(void) } /* - * Install a process keyring directly to a credentials struct. + * Install a process keyring to the given credentials struct if it didn't have + * one already. This is allowed to overrun the quota. * - * Returns -EEXIST if there was already a process keyring, 0 if one installed, - * and other value on any other error + * Return: 0 if a process keyring is now present; -errno on failure. */ int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; if (new->process_keyring) - return -EEXIST; + return 0; keyring = keyring_alloc("_pid", new->uid, new->gid, new, KEY_POS_ALL | KEY_USR_VIEW, @@ -193,11 +198,9 @@ int install_process_keyring_to_cred(struct cred *new) } /* - * Make sure a process keyring is installed for the current process. The - * existing process keyring is not replaced. + * Install a process keyring to the current task if it didn't have one already. * - * Returns 0 if there is a process keyring by the end of this function, some - * error otherwise. + * Return: 0 if a process keyring is now present; -errno on failure. */ static int install_process_keyring(void) { @@ -211,14 +214,18 @@ static int install_process_keyring(void) ret = install_process_keyring_to_cred(new); if (ret < 0) { abort_creds(new); - return ret != -EEXIST ? ret : 0; + return ret; } return commit_creds(new); } /* - * Install a session keyring directly to a credentials struct. + * Install the given keyring as the session keyring of the given credentials + * struct, replacing the existing one if any. If the given keyring is NULL, + * then install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) { @@ -253,8 +260,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* - * Install a session keyring, discarding the old one. If a keyring is not - * supplied, an empty one is invented. + * Install the given keyring as the session keyring of the current task, + * replacing the existing one if any. If the given keyring is NULL, then + * install a new anonymous session keyring. + * + * Return: 0 on success; -errno on failure. */ static int install_session_keyring(struct key *keyring) { From d4decac1edaadefe03bc011785cc622a64e8f19a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 19 Apr 2017 12:07:08 -0400 Subject: [PATCH 04/22] tracing: Allocate the snapshot buffer before enabling probe commit df62db5be2e5f070ecd1a5ece5945b590ee112e0 upstream. Currently the snapshot trigger enables the probe and then allocates the snapshot. If the probe triggers before the allocation, it could cause the snapshot to fail and turn tracing off. It's best to allocate the snapshot buffer first, and then enable the trigger. If something goes wrong in the enabling of the trigger, the snapshot buffer is still allocated, but it can also be freed by the user by writting zero into the snapshot buffer file. Also add a check of the return status of alloc_snapshot(). Fixes: 77fd5c15e3 ("tracing: Add snapshot trigger to function probes") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 862bc8805d97..83c60f9013cb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6481,11 +6481,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash, return ret; out_reg: + ret = alloc_snapshot(&global_trace); + if (ret < 0) + goto out; + ret = register_ftrace_function_probe(glob, ops, count); - if (ret >= 0) - alloc_snapshot(&global_trace); - + out: return ret < 0 ? ret : 0; } From d80e90712a50c8954fc3f93fa704da1a9a359077 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 19 Apr 2017 14:29:46 -0400 Subject: [PATCH 05/22] ring-buffer: Have ring_buffer_iter_empty() return true when empty commit 78f7a45dac2a2d2002f98a3a95f7979867868d73 upstream. I noticed that reading the snapshot file when it is empty no longer gives a status. It suppose to show the status of the snapshot buffer as well as how to allocate and use it. For example: ># cat snapshot # tracer: nop # # # * Snapshot is allocated * # # Snapshot commands: # echo 0 > snapshot : Clears and frees snapshot buffer # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated. # Takes a snapshot of the main buffer. # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free) # (Doesn't have to be '2' works with any number that # is not a '0' or '1') But instead it just showed an empty buffer: ># cat snapshot # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | What happened was that it was using the ring_buffer_iter_empty() function to see if it was empty, and if it was, it showed the status. But that function was returning false when it was empty. The reason was that the iter header page was on the reader page, and the reader page was empty, but so was the buffer itself. The check only tested to see if the iter was on the commit page, but the commit page was no longer pointing to the reader page, but as all pages were empty, the buffer is also. Fixes: 651e22f2701b ("ring-buffer: Always reset iterator to reader page") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f30847af7310..f5c016e8fc88 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3435,11 +3435,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); int ring_buffer_iter_empty(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_page *reader; + struct buffer_page *head_page; + struct buffer_page *commit_page; + unsigned commit; cpu_buffer = iter->cpu_buffer; - return iter->head_page == cpu_buffer->commit_page && - iter->head == rb_commit_index(cpu_buffer); + /* Remember, trace recording is off when iterator is in use */ + reader = cpu_buffer->reader_page; + head_page = cpu_buffer->head_page; + commit_page = cpu_buffer->commit_page; + commit = rb_page_commit(commit_page); + + return ((iter->head_page == commit_page && iter->head == commit) || + (iter->head_page == reader && commit_page == head_page && + head_page->read == commit && + iter->head == rb_page_commit(cpu_buffer->reader_page))); } EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); From c0a602ad31ee8ee84fb1df35c2ced661b7bddd07 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 20 Apr 2017 14:37:46 -0700 Subject: [PATCH 06/22] mm: prevent NR_ISOLATE_* stats from going negative commit fc280fe871449ead4bdbd1665fa52c7c01c64765 upstream. Commit 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn based migration") moved the dec_node_page_state() call (along with the page_is_file_cache() call) to after putback_lru_page(). But page_is_file_cache() can change after putback_lru_page() is called, so it should be called before putback_lru_page(), as it was before that patch, to prevent NR_ISOLATE_* stats from going negative. Without this fix, non-CONFIG_SMP kernels end up hanging in the while(too_many_isolated()) { congestion_wait() } loop in shrink_active_list() due to the negative stats. Mem-Info: active_anon:32567 inactive_anon:121 isolated_anon:1 active_file:6066 inactive_file:6639 isolated_file:4294967295 ^^^^^^^^^^ unevictable:0 dirty:115 writeback:0 unstable:0 slab_reclaimable:2086 slab_unreclaimable:3167 mapped:3398 shmem:18366 pagetables:1145 bounce:0 free:1798 free_pcp:13 free_cma:0 Fixes: 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn based migration") Link: http://lkml.kernel.org/r/1492683865-27549-1-git-send-email-rabin.vincent@axis.com Signed-off-by: Rabin Vincent Acked-by: Michal Hocko Cc: Ming Ling Cc: Minchan Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 66ce6b490b13..6850f62998cd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -183,9 +183,9 @@ void putback_movable_pages(struct list_head *l) unlock_page(page); put_page(page); } else { - putback_lru_page(page); dec_node_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); + putback_lru_page(page); } } } From 0b7c970663411cbca20517f38544983ff4fd9650 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Sun, 16 Apr 2017 20:37:24 +0100 Subject: [PATCH 07/22] cifs: Do not send echoes before Negotiate is complete commit 62a6cfddcc0a5313e7da3e8311ba16226fe0ac10 upstream. commit 4fcd1813e640 ("Fix reconnect to not defer smb3 session reconnect long after socket reconnect") added support for Negotiate requests to be initiated by echo calls. To avoid delays in calling echo after a reconnect, I added the patch introduced by the commit b8c600120fc8 ("Call echo service immediately after socket reconnect"). This has however caused a regression with cifs shares which do not have support for echo calls to trigger Negotiate requests. On connections which need to call Negotiation, the echo calls trigger an error which triggers a reconnect which in turn triggers another echo call. This results in a loop which is only broken when an operation is performed on the cifs share. For an idle share, it can DOS a server. The patch uses the smb_operation can_echo() for cifs so that it is called only if connection has been already been setup. kernel bz: 194531 Signed-off-by: Sachin Prabhu Tested-by: Jonathan Liu Acked-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index fc537c29044e..87b87e091e8e 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile) return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle; } +static bool +cifs_can_echo(struct TCP_Server_Info *server) +{ + if (server->tcpStatus == CifsGood) + return true; + + return false; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = { .get_dfs_refer = CIFSGetDFSRefer, .qfs_tcon = cifs_qfs_tcon, .is_path_accessible = cifs_is_path_accessible, + .can_echo = cifs_can_echo, .query_path_info = cifs_query_path_info, .query_file_info = cifs_query_file_info, .get_srv_inum = cifs_get_srv_inum, From f79ef57911ee598e0bfb7af1ab51413429a8f9d5 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Fri, 7 Apr 2017 12:29:37 +0100 Subject: [PATCH 08/22] CIFS: remove bad_network_name flag commit a0918f1ce6a43ac980b42b300ec443c154970979 upstream. STATUS_BAD_NETWORK_NAME can be received during node failover, causing the flag to be set and making the reconnect thread always unsuccessful, thereafter. Once the only place where it is set is removed, the remaining bits are rendered moot. Removing it does not prevent "mount" from failing when a non existent share is passed. What happens when the share really ceases to exist while the share is mounted is undefined now as much as it was before. Signed-off-by: Germano Percossi Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 1 - fs/cifs/smb2pdu.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 203287f86525..94661cf77ae8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -930,7 +930,6 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ #ifdef CONFIG_CIFS_SMB2 bool print:1; /* set if connection to printer share */ - bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7080dac3592c..802185386851 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1084,9 +1084,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, else return -EIO; - if (tcon && tcon->bad_network_name) - return -ENOENT; - if ((tcon && tcon->seal) && ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) { cifs_dbg(VFS, "encryption requested but no server support"); @@ -1188,8 +1185,6 @@ tcon_exit: tcon_error_exit: if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) { cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); - if (tcon) - tcon->bad_network_name = true; } goto tcon_exit; } From 3d42ca46f47a4d3cd5abe465735227534c292aed Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 9 Apr 2017 22:09:38 +0200 Subject: [PATCH 09/22] s390/mm: fix CMMA vs KSM vs others commit a8f60d1fadf7b8b54449fcc9d6b15248917478ba upstream. On heavy paging with KSM I see guest data corruption. Turns out that KSM will add pages to its tree, where the mapping return true for pte_unused (or might become as such later). KSM will unmap such pages and reinstantiate with different attributes (e.g. write protected or special, e.g. in replace_page or write_protect_page)). This uncovered a bug in our pagetable handling: We must remove the unused flag as soon as an entry becomes present again. Signed-of-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0362cd5fa187..0cea7026e4ff 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1029,6 +1029,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { + if (pte_present(entry)) + pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) ptep_set_pte_at(mm, addr, ptep, entry); else From 11ba522d7929b9c319fd31d95bbc917baad9fe6f Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 18 Apr 2017 11:14:28 -0700 Subject: [PATCH 10/22] Input: elantech - add Fujitsu Lifebook E547 to force crc_enabled commit 704de489e0e3640a2ee2d0daf173e9f7375582ba upstream. Temporary got a Lifebook E547 into my hands and noticed the touchpad only works after running: echo "1" > /sys/devices/platform/i8042/serio2/crc_enabled Add it to the list of machines that need this workaround. Signed-off-by: Thorsten Leemhuis Reviewed-by: Ulrik De Bie Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elantech.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index db7d1d666ac1..7826994c45bf 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse, * Asus UX32VD 0x361f02 00, 15, 0e clickpad * Avatar AVIU-145A2 0x361f00 ? clickpad * Fujitsu LIFEBOOK E544 0x470f00 d0, 12, 09 2 hw buttons + * Fujitsu LIFEBOOK E547 0x470f00 50, 12, 09 2 hw buttons * Fujitsu LIFEBOOK E554 0x570f01 40, 14, 0c 2 hw buttons * Fujitsu T725 0x470f01 05, 12, 09 2 hw buttons * Fujitsu H730 0x570f00 c0, 14, 0c 3 hw buttons (**) @@ -1523,6 +1524,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E544"), }, }, + { + /* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"), + }, + }, { /* Fujitsu LIFEBOOK E554 does not work with crc_enabled == 0 */ .matches = { From 4420e5f323c408990834dddb61eb16e4a95a41e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Apr 2017 19:47:04 +0200 Subject: [PATCH 11/22] ACPI / power: Avoid maybe-uninitialized warning commit fe8c470ab87d90e4b5115902dd94eced7e3305c3 upstream. gcc -O2 cannot always prove that the loop in acpi_power_get_inferred_state() is enterered at least once, so it assumes that cur_state might not get initialized: drivers/acpi/power.c: In function 'acpi_power_get_inferred_state': drivers/acpi/power.c:222:9: error: 'cur_state' may be used uninitialized in this function [-Werror=maybe-uninitialized] This sets the variable to zero at the start of the loop, to ensure that there is well-defined behavior even for an empty list. This gets rid of the warning. The warning first showed up when the -Os flag got removed in a bug fix patch in linux-4.11-rc5. I would suggest merging this addon patch on top of that bug fix to avoid introducing a new warning in the stable kernels. Fixes: 61b79e16c68d (ACPI: Fix incompatibility with mcount-based function graph tracing) Signed-off-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/power.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index fcd4ce6f78d5..1c2b846c5776 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state) return -EINVAL; /* The state of the list is 'on' IFF all resources are 'on'. */ + cur_state = 0; list_for_each_entry(entry, list, node) { struct acpi_power_resource *resource = entry->resource; acpi_handle handle = resource->device.handle; From 9fc1314285360151792b6ee153c0d5181698986d Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 19 Apr 2017 10:53:51 +0800 Subject: [PATCH 12/22] mmc: sdhci-esdhc-imx: increase the pad I/O drive strength for DDR50 card commit 9f327845358d3dd0d8a5a7a5436b0aa5c432e757 upstream. Currently for DDR50 card, it need tuning in default. We meet tuning fail issue for DDR50 card and some data CRC error when DDR50 sd card works. This is because the default pad I/O drive strength can't make sure DDR50 card work stable. So increase the pad I/O drive strength for DDR50 card, and use pins_100mhz. This fixes DDR50 card support for IMX since DDR50 tuning was enabled from commit 9faac7b95ea4 ("mmc: sdhci: enable tuning for DDR50") Tested-and-reported-by: Tim Harvey Signed-off-by: Haibo Chen Acked-by: Dong Aisheng Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 7123ef96ed18..445fc47dc3e7 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host, switch (uhs) { case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_DDR50: pinctrl = imx_data->pins_100mhz; break; case MMC_TIMING_UHS_SDR104: From a260ff509b4d5fde2ac5f863cd3015cefbc45189 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 6 Mar 2017 10:04:25 +0100 Subject: [PATCH 13/22] ubifs: Fix RENAME_WHITEOUT support commit c3d9fda688742c06e89aa1f0f8fd943fc11468cb upstream. Remove faulty leftover check in do_rename(), apparently introduced in a merge that combined whiteout support changes with commit f03b8ad8d386 ("fs: support RENAME_NOREPLACE for local filesystems") Fixes: f03b8ad8d386 ("fs: support RENAME_NOREPLACE for local filesystems") Fixes: 9e0a1fff8db5 ("ubifs: Implement RENAME_WHITEOUT") Signed-off-by: Felix Fietkau Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ca16c5d7bab1..0b5a19de4636 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1088,9 +1088,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, struct timespec time; unsigned int uninitialized_var(saved_nlink); - if (flags & ~RENAME_NOREPLACE) - return -EINVAL; - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. From b93858556fd13c76a36a0c110450fa35eadf5671 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 30 Mar 2017 10:50:49 +0200 Subject: [PATCH 14/22] ubifs: Fix O_TMPFILE corner case in ubifs_link() commit 32fe905c17f001c0eee13c59afddd0bf2eed509c upstream. It is perfectly fine to link a tmpfile back using linkat(). Since tmpfiles are created with a link count of 0 they appear on the orphan list, upon re-linking the inode has to be removed from the orphan list again. Ralph faced a filesystem corruption in combination with overlayfs due to this bug. Cc: Ralph Sennhauser Cc: Amir Goldstein Reported-by: Ralph Sennhauser Tested-by: Ralph Sennhauser Reported-by: Amir Goldstein Fixes: 474b93704f321 ("ubifs: Implement O_TMPFILE") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/dir.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 0b5a19de4636..87ab02e2d666 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -622,6 +622,11 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, return err; lock_2_inodes(dir, inode); + + /* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */ + if (inode->i_nlink == 0) + ubifs_delete_orphan(c, inode->i_ino); + inc_nlink(inode); ihold(inode); inode->i_ctime = ubifs_current_time(inode); @@ -641,6 +646,8 @@ out_cancel: dir->i_size -= sz_change; dir_ui->ui_size = dir->i_size; drop_nlink(inode); + if (inode->i_nlink == 0) + ubifs_add_orphan(c, inode->i_ino); unlock_2_inodes(dir, inode); ubifs_release_budget(c, &req); iput(inode); From e0411f1eb549a7993c9821c05f1787c0bd1523b4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 20 Apr 2017 21:32:16 +0200 Subject: [PATCH 15/22] mac80211: reject ToDS broadcast data frames commit 3018e947d7fd536d57e2b550c33e456d921fff8c upstream. AP/AP_VLAN modes don't accept any real 802.11 multicast data frames, but since they do need to accept broadcast management frames the same is currently permitted for data frames. This opens a security problem because such frames would be decrypted with the GTK, and could even contain unicast L3 frames. Since the spec says that ToDS frames must always have the BSSID as the RA (addr1), reject any other data frames. The problem was originally reported in "Predicting, Decrypting, and Abusing WPA2/802.11 Group Keys" at usenix https://www.usenix.org/conference/usenixsecurity16/technical-sessions/presentation/vanhoef and brought to my attention by Jouni. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman -- --- net/mac80211/rx.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index a697ddf56334..e2bbad0e494c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3617,6 +3617,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) !ether_addr_equal(bssid, hdr->addr1)) return false; } + + /* + * 802.11-2016 Table 9-26 says that for data frames, A1 must be + * the BSSID - we've checked that already but may have accepted + * the wildcard (ff:ff:ff:ff:ff:ff). + * + * It also says: + * The BSSID of the Data frame is determined as follows: + * a) If the STA is contained within an AP or is associated + * with an AP, the BSSID is the address currently in use + * by the STA contained in the AP. + * + * So we should not accept data frames with an address that's + * multicast. + * + * Accepting it also opens a security problem because stations + * could encrypt it with the GTK and inject traffic that way. + */ + if (ieee80211_is_data(hdr->frame_control) && multicast) + return false; + return true; case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) From 87cfeaa5e5a1834c2f55ade5da5a117991305269 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Apr 2017 14:23:49 +0200 Subject: [PATCH 16/22] mac80211: fix MU-MIMO follow-MAC mode commit 9e478066eae41211c92a8f63cc69aafc391bd6ab upstream. There are two bugs in the follow-MAC code: * it treats the radiotap header as the 802.11 header (therefore it can't possibly work) * it doesn't verify that the skb data it accesses is actually present in the header, which is mitigated by the first point Fix this by moving all of this out into a separate function. This function copies the data it needs using skb_copy_bits() to make sure it can be accessed if it's paged, and offsets that by the possibly present vendor radiotap header. This also makes all those conditions more readable. Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 65 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index e2bbad0e494c..acaaf616da71 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -208,6 +208,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, return len; } +static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + int rtap_vendor_space) +{ + struct { + struct ieee80211_hdr_3addr hdr; + u8 category; + u8 action_code; + } __packed action; + + if (!sdata) + return; + + BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1); + + if (skb->len < rtap_vendor_space + sizeof(action) + + VHT_MUMIMO_GROUPS_DATA_LEN) + return; + + if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr)) + return; + + skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action)); + + if (!ieee80211_is_action(action.hdr.frame_control)) + return; + + if (action.category != WLAN_CATEGORY_VHT) + return; + + if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT) + return; + + if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr)) + return; + + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) + return; + + skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, skb); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); +} + /* * ieee80211_add_rx_radiotap_header - add radiotap header * @@ -515,7 +560,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct net_device *prev_dev = NULL; int present_fcs_len = 0; unsigned int rtap_vendor_space = 0; - struct ieee80211_mgmt *mgmt; struct ieee80211_sub_if_data *monitor_sdata = rcu_dereference(local->monitor_sdata); @@ -553,6 +597,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, return remove_monitor_info(local, origskb, rtap_vendor_space); } + ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space); + /* room for the radiotap header based on driver features */ rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb); needed_headroom = rt_hdrlen - rtap_vendor_space; @@ -618,23 +664,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, ieee80211_rx_stats(sdata->dev, skb->len); } - mgmt = (void *)skb->data; - if (monitor_sdata && - skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN && - ieee80211_is_action(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_VHT && - mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT && - is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) && - ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) { - struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC); - - if (mu_skb) { - mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; - skb_queue_tail(&monitor_sdata->skb_queue, mu_skb); - ieee80211_queue_work(&local->hw, &monitor_sdata->work); - } - } - if (prev_dev) { skb->dev = prev_dev; netif_receive_skb(skb); From 790b2b5a01cea4ee4284d52ea995e2af06f4f8d7 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Wed, 22 Feb 2017 17:15:21 +0100 Subject: [PATCH 17/22] ubi/upd: Always flush after prepared for an update commit 9cd9a21ce070be8a918ffd3381468315a7a76ba6 upstream. In commit 6afaf8a484cb ("UBI: flush wl before clearing update marker") I managed to trigger and fix a similar bug. Now here is another version of which I assumed it wouldn't matter back then but it turns out UBI has a check for it and will error out like this: |ubi0 warning: validate_vid_hdr: inconsistent used_ebs |ubi0 error: validate_vid_hdr: inconsistent VID header at PEB 592 All you need to trigger this is? "ubiupdatevol /dev/ubi0_0 file" + a powercut in the middle of the operation. ubi_start_update() sets the update-marker and puts all EBs on the erase list. After that userland can proceed to write new data while the old EB aren't erased completely. A powercut at this point is usually not that much of a tragedy. UBI won't give read access to the static volume because it has the update marker. It will most likely set the corrupted flag because it misses some EBs. So we are all good. Unless the size of the image that has been written differs from the old image in the magnitude of at least one EB. In that case UBI will find two different values for `used_ebs' and refuse to attach the image with the error message mentioned above. So in order not to get in the situation, the patch will ensure that we wait until everything is removed before it tries to write any data. The alternative would be to detect such a case and remove all EBs at the attached time after we processed the volume-table and see the update-marker set. The patch looks bigger and I doubt it is worth it since usually the write() will wait from time to time for a new EB since usually there not that many spare EB that can be used. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/upd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 0134ba32a057..39712560b4c1 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -148,11 +148,11 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, return err; } - if (bytes == 0) { - err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); - if (err) - return err; + err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL); + if (err) + return err; + if (bytes == 0) { err = clear_update_marker(ubi, vol, 0); if (err) return err; From 1bd55ab13039a0d1c004eeb7a487810138bc42c8 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 11 Apr 2017 10:38:13 +0530 Subject: [PATCH 18/22] powerpc/kprobe: Fix oops when kprobed on 'stdu' instruction commit 9e1ba4f27f018742a1aa95d11e35106feba08ec1 upstream. If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel OOPS: Bad kernel stack pointer cd93c840 at c000000000009868 Oops: Bad kernel stack pointer, sig: 6 [#1] ... GPR00: c000001fcd93cb30 00000000cd93c840 c0000000015c5e00 00000000cd93c840 ... NIP [c000000000009868] resume_kernel+0x2c/0x58 LR [c000000000006208] program_check_common+0x108/0x180 On a 64-bit system when the user probes on a 'stdu' instruction, the kernel does not emulate actual store in emulate_step() because it may corrupt the exception frame. So the kernel does the actual store operation in exception return code i.e. resume_kernel(). resume_kernel() loads the saved stack pointer from memory using lwz, which only loads the low 32-bits of the address, causing the kernel crash. Fix this by loading the 64-bit value instead. Fixes: be96f63375a1 ("powerpc: Split out instruction analysis part of emulate_step()") Signed-off-by: Ravi Bangoria Reviewed-by: Naveen N. Rao Reviewed-by: Ananth N Mavinakayanahalli [mpe: Change log massage, add stable tag] Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/entry_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6432d4bf08c8..767ef6d68c9e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -689,7 +689,7 @@ resume_kernel: addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ - lwz r3,GPR1(r1) + ld r3,GPR1(r1) subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ mr r4,r1 /* src: current exception frame */ mr r1,r3 /* Reroute the trampoline frame to r1 */ @@ -703,8 +703,8 @@ resume_kernel: addi r6,r6,8 bdnz 2b - /* Do real store operation to complete stwu */ - lwz r5,GPR1(r1) + /* Do real store operation to complete stdu */ + ld r5,GPR1(r1) std r8,0(r5) /* Clear _TIF_EMULATE_STACK_STORE flag */ From 6966a6579e1bc7b3486bed395e7d6875912a9a9b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 30 Mar 2017 13:17:14 +0200 Subject: [PATCH 19/22] x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs commit 29f72ce3e4d18066ec75c79c857bee0618a3504b upstream. MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. However, MCA bank 3 is defined on Fam17h systems and can be accessed using legacy MSRs. Without a name we get a stack trace on Fam17h systems when trying to register sysfs files for bank 3 on kernels that don't recognize Scalable MCA. Call MCA bank 3 "decode_unit" since this is what it represents on Fam17h. This will allow kernels without SMCA support to see this bank on Fam17h+ and prevent the stack trace. This will not affect older systems since this bank is reserved on them, i.e. it'll be ignored. Tested on AMD Fam15h and Fam17h systems. WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal kobject: (ffff88085bb256c0): attempted to be registered with empty name! ... Call Trace: kobject_add_internal kobject_add kobject_create_and_add threshold_create_device threshold_init_device Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9b5403462936..3dfca7b302dc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -59,7 +59,7 @@ static const char * const th_names[] = { "load_store", "insn_fetch", "combined_unit", - "", + "decode_unit", "northbridge", "execution_unit", }; From f8bc0881fe95496ddf3f8a16808d9860db207941 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 18 Apr 2017 20:42:35 +0200 Subject: [PATCH 20/22] x86/mce: Make the MCE notifier a blocking one commit 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 upstream. The NFIT MCE handler callback (for handling media errors on NVDIMMs) takes a mutex to add the location of a memory error to a list. But since the notifier call chain for machine checks (x86_mce_decoder_chain) is atomic, we get a lockdep splat like: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0 [..] Call Trace: dump_stack ___might_sleep __might_sleep mutex_lock_nested ? __lock_acquire nfit_handle_mce notifier_call_chain atomic_notifier_call_chain ? atomic_notifier_call_chain mce_gen_pool_process Convert the notifier to a blocking one which gets to run only in process context. Boris: remove the notifier call in atomic context in print_mce(). For now, let's print the MCE on the atomic path so that we can make sure they go out and get logged at least. Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error") Reported-by: Ross Zwisler Signed-off-by: Vishal Verma Acked-by: Tony Luck Cc: Dan Williams Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 2 +- arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 16 +++------------- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 93d824ec3120..040af1939460 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -85,7 +85,7 @@ void mce_gen_pool_process(void) head = llist_reverse_order(head); llist_for_each_entry_safe(node, tmp, head, llnode) { mce = &node->mce; - atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); + blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index cd74a3f00aea..de20902ecf23 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -13,7 +13,7 @@ enum severity_level { MCE_PANIC_SEVERITY, }; -extern struct atomic_notifier_head x86_mce_decoder_chain; +extern struct blocking_notifier_head x86_mce_decoder_chain; #define ATTR_LEN 16 #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..22cda29d654e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -213,13 +213,13 @@ void mce_register_decode_chain(struct notifier_block *nb) if (nb != &mce_srao_nb && nb->priority == INT_MAX) nb->priority -= 1; - atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); + blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_register_decode_chain); void mce_unregister_decode_chain(struct notifier_block *nb) { - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); + blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); @@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = { static void print_mce(struct mce *m) { - int ret = 0; - pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); @@ -309,14 +307,6 @@ static void print_mce(struct mce *m) m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, cpu_data(m->extcpu).microcode); - /* - * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that) - */ - ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); - if (ret == NOTIFY_STOP) - return; - pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); } From c36eaa6ca346dc8d45e9125af62db75625b42a6f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Apr 2017 16:42:08 -0700 Subject: [PATCH 21/22] device-dax: switch to srcu, fix rcu_read_lock() vs pte allocation commit 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 upstream. The following warning triggers with a new unit test that stresses the device-dax interface. =============================== [ ERR: suspicious RCU usage. ] 4.11.0-rc4+ #1049 Tainted: G O ------------------------------- ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 2 locks held by fio/9070: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x167/0x4f0 #1: (rcu_read_lock){......}, at: [] dax_dev_huge_fault+0x32/0x620 [dax] Call Trace: dump_stack+0x86/0xc3 lockdep_rcu_suspicious+0xd7/0x110 ___might_sleep+0xac/0x250 __might_sleep+0x4a/0x80 __alloc_pages_nodemask+0x23a/0x360 alloc_pages_current+0xa1/0x1f0 pte_alloc_one+0x17/0x80 __pte_alloc+0x1e/0x120 __get_locked_pte+0x1bf/0x1d0 insert_pfn.isra.70+0x3a/0x100 ? lookup_memtype+0xa6/0xd0 vm_insert_mixed+0x64/0x90 dax_dev_huge_fault+0x520/0x620 [dax] ? dax_dev_huge_fault+0x32/0x620 [dax] dax_dev_fault+0x10/0x20 [dax] __do_fault+0x1e/0x140 __handle_mm_fault+0x9af/0x10d0 handle_mm_fault+0x16d/0x370 ? handle_mm_fault+0x47/0x370 __do_page_fault+0x28c/0x4f0 trace_do_page_fault+0x58/0x2a0 do_async_page_fault+0x1a/0xa0 async_page_fault+0x28/0x30 Inserting a page table entry may trigger an allocation while we are holding a read lock to keep the device instance alive for the duration of the fault. Use srcu for this keep-alive protection. Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/dax/Kconfig | 1 + drivers/dax/dax.c | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 3e2ab3b14eea..9e95bf94eb13 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -2,6 +2,7 @@ menuconfig DEV_DAX tristate "DAX: direct access to differentiated memory" default m if NVDIMM_DAX depends on TRANSPARENT_HUGEPAGE + select SRCU help Support raw access to differentiated (persistence, bandwidth, latency...) memory via an mmap(2) capable character diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 152552d2c306..193224889e41 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -24,6 +24,7 @@ #include "dax.h" static dev_t dax_devt; +DEFINE_STATIC_SRCU(dax_srcu); static struct class *dax_class; static DEFINE_IDA(dax_minor_ida); static int nr_dax = CONFIG_NR_DEV_DAX; @@ -59,7 +60,7 @@ struct dax_region { * @region - parent region * @dev - device backing the character device * @cdev - core chardev data - * @alive - !alive + rcu grace period == no new mappings can be established + * @alive - !alive + srcu grace period == no new mappings can be established * @id - child id in the region * @num_resources - number of physical address extents in this device * @res - array of physical address ranges @@ -437,7 +438,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { - int rc; + int rc, id; struct file *filp = vma->vm_file; struct dax_dev *dax_dev = filp->private_data; @@ -445,9 +446,9 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr, current->comm, (flags & FAULT_FLAG_WRITE) ? "write" : "read", vma->vm_start, vma->vm_end); - rcu_read_lock(); + id = srcu_read_lock(&dax_srcu); rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags); - rcu_read_unlock(); + srcu_read_unlock(&dax_srcu, id); return rc; } @@ -563,11 +564,11 @@ static void unregister_dax_dev(void *dev) * Note, rcu is not protecting the liveness of dax_dev, rcu is * ensuring that any fault handlers that might have seen * dax_dev->alive == true, have completed. Any fault handlers - * that start after synchronize_rcu() has started will abort + * that start after synchronize_srcu() has started will abort * upon seeing dax_dev->alive == false. */ dax_dev->alive = false; - synchronize_rcu(); + synchronize_srcu(&dax_srcu); unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1); cdev_del(cdev); device_unregister(dev); From a8c90ef62281db933118aa84489eb0e1e9cc347c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Apr 2017 09:11:26 +0200 Subject: [PATCH 22/22] Linux 4.9.25 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 50436f502d81..8e18c63388c4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 9 -SUBLEVEL = 24 +SUBLEVEL = 25 EXTRAVERSION = NAME = Roaring Lionus