From a5c6e0a76817a3751f58d761aaff7c0b0c4001ff Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 18 Apr 2017 15:31:07 +0100
Subject: [PATCH 01/22] KEYS: Disallow keyrings beginning with '.' to be joined
 as session keyrings

commit ee8f844e3c5a73b999edf733df1c529d6503ec2f upstream.

This fixes CVE-2016-9604.

Keyrings whose name begin with a '.' are special internal keyrings and so
userspace isn't allowed to create keyrings by this name to prevent
shadowing.  However, the patch that added the guard didn't fix
KEYCTL_JOIN_SESSION_KEYRING.  Not only can that create dot-named keyrings,
it can also subscribe to them as a session keyring if they grant SEARCH
permission to the user.

This, for example, allows a root process to set .builtin_trusted_keys as
its session keyring, at which point it has full access because now the
possessor permissions are added.  This permits root to add extra public
keys, thereby bypassing module verification.

This also affects kexec and IMA.

This can be tested by (as root):

	keyctl session .builtin_trusted_keys
	keyctl add user a a @s
	keyctl list @s

which on my test box gives me:

	2 keys in keyring:
	180010936: ---lswrv     0     0 asymmetric: Build time autogenerated kernel key: ae3d4a31b82daa8e1a75b49dc2bba949fd992a05
	801382539: --alswrv     0     0 user: a


Fix this by rejecting names beginning with a '.' in the keyctl.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
cc: linux-ima-devel@lists.sourceforge.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/keys/keyctl.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index d580ad06b792..7cdd5b550693 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -271,7 +271,8 @@ error:
  * Create and join an anonymous session keyring or join a named session
  * keyring, creating it if necessary.  A named session keyring must have Search
  * permission for it to be joined.  Session keyrings without this permit will
- * be skipped over.
+ * be skipped over.  It is not permitted for userspace to create or join
+ * keyrings whose name begin with a dot.
  *
  * If successful, the ID of the joined session keyring will be returned.
  */
@@ -288,12 +289,16 @@ long keyctl_join_session_keyring(const char __user *_name)
 			ret = PTR_ERR(name);
 			goto error;
 		}
+
+		ret = -EPERM;
+		if (name[0] == '.')
+			goto error_name;
 	}
 
 	/* join the session */
 	ret = join_session_keyring(name);
+error_name:
 	kfree(name);
-
 error:
 	return ret;
 }

From b2dd90e812f3f733b55f0bf4487032e53b487665 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 18 Apr 2017 15:31:08 +0100
Subject: [PATCH 02/22] KEYS: Change the name of the dead type to ".dead" to
 prevent user access

commit c1644fe041ebaf6519f6809146a77c3ead9193af upstream.

This fixes CVE-2017-6951.

Userspace should not be able to do things with the "dead" key type as it
doesn't have some of the helper functions set upon it that the kernel
needs.  Attempting to use it may cause the kernel to crash.

Fix this by changing the name of the type to ".dead" so that it's rejected
up front on userspace syscalls by key_get_type_from_user().

Though this doesn't seem to affect recent kernels, it does affect older
ones, certainly those prior to:

	commit c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81
	Author: David Howells <dhowells@redhat.com>
	Date:   Tue Sep 16 17:36:06 2014 +0100
	KEYS: Remove key_type::match in favour of overriding default by match_preparse

which went in before 3.18-rc1.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/keys/gc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/keys/gc.c b/security/keys/gc.c
index addf060399e0..9cb4fe4478a1 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -46,7 +46,7 @@ static unsigned long key_gc_flags;
  * immediately unlinked.
  */
 struct key_type key_type_dead = {
-	.name = "dead",
+	.name = ".dead",
 };
 
 /*

From 174a74dbca2ddc7269c265598399c000e5b9b870 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 18 Apr 2017 15:31:09 +0100
Subject: [PATCH 03/22] KEYS: fix keyctl_set_reqkey_keyring() to not leak
 thread keyrings

commit c9f838d104fed6f2f61d68164712e3204bf5271b upstream.

This fixes CVE-2017-7472.

Running the following program as an unprivileged user exhausts kernel
memory by leaking thread keyrings:

	#include <keyutils.h>

	int main()
	{
		for (;;)
			keyctl_set_reqkey_keyring(KEY_REQKEY_DEFL_THREAD_KEYRING);
	}

Fix it by only creating a new thread keyring if there wasn't one before.
To make things more consistent, make install_thread_keyring_to_cred()
and install_process_keyring_to_cred() both return 0 if the corresponding
keyring is already present.

Fixes: d84f4f992cbd ("CRED: Inaugurate COW credentials")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 security/keys/keyctl.c       | 11 ++++-----
 security/keys/process_keys.c | 44 ++++++++++++++++++++++--------------
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 7cdd5b550693..dbbfd7735ce5 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1256,8 +1256,8 @@ error:
  * Read or set the default keyring in which request_key() will cache keys and
  * return the old setting.
  *
- * If a process keyring is specified then this will be created if it doesn't
- * yet exist.  The old setting will be returned if successful.
+ * If a thread or process keyring is specified then it will be created if it
+ * doesn't yet exist.  The old setting will be returned if successful.
  */
 long keyctl_set_reqkey_keyring(int reqkey_defl)
 {
@@ -1282,11 +1282,8 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
 
 	case KEY_REQKEY_DEFL_PROCESS_KEYRING:
 		ret = install_process_keyring_to_cred(new);
-		if (ret < 0) {
-			if (ret != -EEXIST)
-				goto error;
-			ret = 0;
-		}
+		if (ret < 0)
+			goto error;
 		goto set;
 
 	case KEY_REQKEY_DEFL_DEFAULT:
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 40a885239782..45536c677b05 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -127,13 +127,18 @@ error:
 }
 
 /*
- * Install a fresh thread keyring directly to new credentials.  This keyring is
- * allowed to overrun the quota.
+ * Install a thread keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
  */
 int install_thread_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
 
+	if (new->thread_keyring)
+		return 0;
+
 	keyring = keyring_alloc("_tid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
 				KEY_ALLOC_QUOTA_OVERRUN,
@@ -146,7 +151,9 @@ int install_thread_keyring_to_cred(struct cred *new)
 }
 
 /*
- * Install a fresh thread keyring, discarding the old one.
+ * Install a thread keyring to the current task if it didn't have one already.
+ *
+ * Return: 0 if a thread keyring is now present; -errno on failure.
  */
 static int install_thread_keyring(void)
 {
@@ -157,8 +164,6 @@ static int install_thread_keyring(void)
 	if (!new)
 		return -ENOMEM;
 
-	BUG_ON(new->thread_keyring);
-
 	ret = install_thread_keyring_to_cred(new);
 	if (ret < 0) {
 		abort_creds(new);
@@ -169,17 +174,17 @@ static int install_thread_keyring(void)
 }
 
 /*
- * Install a process keyring directly to a credentials struct.
+ * Install a process keyring to the given credentials struct if it didn't have
+ * one already.  This is allowed to overrun the quota.
  *
- * Returns -EEXIST if there was already a process keyring, 0 if one installed,
- * and other value on any other error
+ * Return: 0 if a process keyring is now present; -errno on failure.
  */
 int install_process_keyring_to_cred(struct cred *new)
 {
 	struct key *keyring;
 
 	if (new->process_keyring)
-		return -EEXIST;
+		return 0;
 
 	keyring = keyring_alloc("_pid", new->uid, new->gid, new,
 				KEY_POS_ALL | KEY_USR_VIEW,
@@ -193,11 +198,9 @@ int install_process_keyring_to_cred(struct cred *new)
 }
 
 /*
- * Make sure a process keyring is installed for the current process.  The
- * existing process keyring is not replaced.
+ * Install a process keyring to the current task if it didn't have one already.
  *
- * Returns 0 if there is a process keyring by the end of this function, some
- * error otherwise.
+ * Return: 0 if a process keyring is now present; -errno on failure.
  */
 static int install_process_keyring(void)
 {
@@ -211,14 +214,18 @@ static int install_process_keyring(void)
 	ret = install_process_keyring_to_cred(new);
 	if (ret < 0) {
 		abort_creds(new);
-		return ret != -EEXIST ? ret : 0;
+		return ret;
 	}
 
 	return commit_creds(new);
 }
 
 /*
- * Install a session keyring directly to a credentials struct.
+ * Install the given keyring as the session keyring of the given credentials
+ * struct, replacing the existing one if any.  If the given keyring is NULL,
+ * then install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
  */
 int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 {
@@ -253,8 +260,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring)
 }
 
 /*
- * Install a session keyring, discarding the old one.  If a keyring is not
- * supplied, an empty one is invented.
+ * Install the given keyring as the session keyring of the current task,
+ * replacing the existing one if any.  If the given keyring is NULL, then
+ * install a new anonymous session keyring.
+ *
+ * Return: 0 on success; -errno on failure.
  */
 static int install_session_keyring(struct key *keyring)
 {

From d4decac1edaadefe03bc011785cc622a64e8f19a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 19 Apr 2017 12:07:08 -0400
Subject: [PATCH 04/22] tracing: Allocate the snapshot buffer before enabling
 probe

commit df62db5be2e5f070ecd1a5ece5945b590ee112e0 upstream.

Currently the snapshot trigger enables the probe and then allocates the
snapshot. If the probe triggers before the allocation, it could cause the
snapshot to fail and turn tracing off. It's best to allocate the snapshot
buffer first, and then enable the trigger. If something goes wrong in the
enabling of the trigger, the snapshot buffer is still allocated, but it can
also be freed by the user by writting zero into the snapshot buffer file.

Also add a check of the return status of alloc_snapshot().

Fixes: 77fd5c15e3 ("tracing: Add snapshot trigger to function probes")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 862bc8805d97..83c60f9013cb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6481,11 +6481,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
 		return ret;
 
  out_reg:
+	ret = alloc_snapshot(&global_trace);
+	if (ret < 0)
+		goto out;
+
 	ret = register_ftrace_function_probe(glob, ops, count);
 
-	if (ret >= 0)
-		alloc_snapshot(&global_trace);
-
+ out:
 	return ret < 0 ? ret : 0;
 }
 

From d80e90712a50c8954fc3f93fa704da1a9a359077 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 19 Apr 2017 14:29:46 -0400
Subject: [PATCH 05/22] ring-buffer: Have ring_buffer_iter_empty() return true
 when empty

commit 78f7a45dac2a2d2002f98a3a95f7979867868d73 upstream.

I noticed that reading the snapshot file when it is empty no longer gives a
status. It suppose to show the status of the snapshot buffer as well as how
to allocate and use it. For example:

 ># cat snapshot
 # tracer: nop
 #
 #
 # * Snapshot is allocated *
 #
 # Snapshot commands:
 # echo 0 > snapshot : Clears and frees snapshot buffer
 # echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.
 #                      Takes a snapshot of the main buffer.
 # echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)
 #                      (Doesn't have to be '2' works with any number that
 #                       is not a '0' or '1')

But instead it just showed an empty buffer:

 ># cat snapshot
 # tracer: nop
 #
 # entries-in-buffer/entries-written: 0/0   #P:4
 #
 #                              _-----=> irqs-off
 #                             / _----=> need-resched
 #                            | / _---=> hardirq/softirq
 #                            || / _--=> preempt-depth
 #                            ||| /     delay
 #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
 #              | |       |   ||||       |         |

What happened was that it was using the ring_buffer_iter_empty() function to
see if it was empty, and if it was, it showed the status. But that function
was returning false when it was empty. The reason was that the iter header
page was on the reader page, and the reader page was empty, but so was the
buffer itself. The check only tested to see if the iter was on the commit
page, but the commit page was no longer pointing to the reader page, but as
all pages were empty, the buffer is also.

Fixes: 651e22f2701b ("ring-buffer: Always reset iterator to reader page")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/ring_buffer.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f30847af7310..f5c016e8fc88 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3435,11 +3435,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
 int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	struct buffer_page *reader;
+	struct buffer_page *head_page;
+	struct buffer_page *commit_page;
+	unsigned commit;
 
 	cpu_buffer = iter->cpu_buffer;
 
-	return iter->head_page == cpu_buffer->commit_page &&
-		iter->head == rb_commit_index(cpu_buffer);
+	/* Remember, trace recording is off when iterator is in use */
+	reader = cpu_buffer->reader_page;
+	head_page = cpu_buffer->head_page;
+	commit_page = cpu_buffer->commit_page;
+	commit = rb_page_commit(commit_page);
+
+	return ((iter->head_page == commit_page && iter->head == commit) ||
+		(iter->head_page == reader && commit_page == head_page &&
+		 head_page->read == commit &&
+		 iter->head == rb_page_commit(cpu_buffer->reader_page)));
 }
 EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
 

From c0a602ad31ee8ee84fb1df35c2ced661b7bddd07 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Thu, 20 Apr 2017 14:37:46 -0700
Subject: [PATCH 06/22] mm: prevent NR_ISOLATE_* stats from going negative

commit fc280fe871449ead4bdbd1665fa52c7c01c64765 upstream.

Commit 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn
based migration") moved the dec_node_page_state() call (along with the
page_is_file_cache() call) to after putback_lru_page().

But page_is_file_cache() can change after putback_lru_page() is called,
so it should be called before putback_lru_page(), as it was before that
patch, to prevent NR_ISOLATE_* stats from going negative.

Without this fix, non-CONFIG_SMP kernels end up hanging in the
while(too_many_isolated()) { congestion_wait() } loop in
shrink_active_list() due to the negative stats.

 Mem-Info:
  active_anon:32567 inactive_anon:121 isolated_anon:1
  active_file:6066 inactive_file:6639 isolated_file:4294967295
                                                    ^^^^^^^^^^
  unevictable:0 dirty:115 writeback:0 unstable:0
  slab_reclaimable:2086 slab_unreclaimable:3167
  mapped:3398 shmem:18366 pagetables:1145 bounce:0
  free:1798 free_pcp:13 free_cma:0

Fixes: 6afcf8ef0ca0 ("mm, compaction: fix NR_ISOLATED_* stats for pfn based migration")
Link: http://lkml.kernel.org/r/1492683865-27549-1-git-send-email-rabin.vincent@axis.com
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Ming Ling <ming.ling@spreadtrum.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 66ce6b490b13..6850f62998cd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -183,9 +183,9 @@ void putback_movable_pages(struct list_head *l)
 			unlock_page(page);
 			put_page(page);
 		} else {
-			putback_lru_page(page);
 			dec_node_page_state(page, NR_ISOLATED_ANON +
 					page_is_file_cache(page));
+			putback_lru_page(page);
 		}
 	}
 }

From 0b7c970663411cbca20517f38544983ff4fd9650 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Sun, 16 Apr 2017 20:37:24 +0100
Subject: [PATCH 07/22] cifs: Do not send echoes before Negotiate is complete

commit 62a6cfddcc0a5313e7da3e8311ba16226fe0ac10 upstream.

commit 4fcd1813e640 ("Fix reconnect to not defer smb3 session reconnect
long after socket reconnect") added support for Negotiate requests to
be initiated by echo calls.

To avoid delays in calling echo after a reconnect, I added the patch
introduced by the commit b8c600120fc8 ("Call echo service immediately
after socket reconnect").

This has however caused a regression with cifs shares which do not have
support for echo calls to trigger Negotiate requests. On connections
which need to call Negotiation, the echo calls trigger an error which
triggers a reconnect which in turn triggers another echo call. This
results in a loop which is only broken when an operation is performed on
the cifs share. For an idle share, it can DOS a server.

The patch uses the smb_operation can_echo() for cifs so that it is
called only if connection has been already been setup.

kernel bz: 194531

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Tested-by: Jonathan Liu <net147@gmail.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/smb1ops.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index fc537c29044e..87b87e091e8e 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -1015,6 +1015,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile)
 	return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
 }
 
+static bool
+cifs_can_echo(struct TCP_Server_Info *server)
+{
+	if (server->tcpStatus == CifsGood)
+		return true;
+
+	return false;
+}
+
 struct smb_version_operations smb1_operations = {
 	.send_cancel = send_nt_cancel,
 	.compare_fids = cifs_compare_fids,
@@ -1049,6 +1058,7 @@ struct smb_version_operations smb1_operations = {
 	.get_dfs_refer = CIFSGetDFSRefer,
 	.qfs_tcon = cifs_qfs_tcon,
 	.is_path_accessible = cifs_is_path_accessible,
+	.can_echo = cifs_can_echo,
 	.query_path_info = cifs_query_path_info,
 	.query_file_info = cifs_query_file_info,
 	.get_srv_inum = cifs_get_srv_inum,

From f79ef57911ee598e0bfb7af1ab51413429a8f9d5 Mon Sep 17 00:00:00 2001
From: Germano Percossi <germano.percossi@citrix.com>
Date: Fri, 7 Apr 2017 12:29:37 +0100
Subject: [PATCH 08/22] CIFS: remove bad_network_name flag

commit a0918f1ce6a43ac980b42b300ec443c154970979 upstream.

STATUS_BAD_NETWORK_NAME can be received during node failover,
causing the flag to be set and making the reconnect thread
always unsuccessful, thereafter.

Once the only place where it is set is removed, the remaining
bits are rendered moot.

Removing it does not prevent "mount" from failing when a non
existent share is passed.

What happens when the share really ceases to exist while the
share is mounted is undefined now as much as it was before.

Signed-off-by: Germano Percossi <germano.percossi@citrix.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifsglob.h | 1 -
 fs/cifs/smb2pdu.c  | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 203287f86525..94661cf77ae8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -930,7 +930,6 @@ struct cifs_tcon {
 	bool use_persistent:1; /* use persistent instead of durable handles */
 #ifdef CONFIG_CIFS_SMB2
 	bool print:1;		/* set if connection to printer share */
-	bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
 	__le32 capabilities;
 	__u32 share_flags;
 	__u32 maximal_access;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7080dac3592c..802185386851 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1084,9 +1084,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	else
 		return -EIO;
 
-	if (tcon && tcon->bad_network_name)
-		return -ENOENT;
-
 	if ((tcon && tcon->seal) &&
 	    ((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
 		cifs_dbg(VFS, "encryption requested but no server support");
@@ -1188,8 +1185,6 @@ tcon_exit:
 tcon_error_exit:
 	if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
 		cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
-		if (tcon)
-			tcon->bad_network_name = true;
 	}
 	goto tcon_exit;
 }

From 3d42ca46f47a4d3cd5abe465735227534c292aed Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Sun, 9 Apr 2017 22:09:38 +0200
Subject: [PATCH 09/22] s390/mm: fix CMMA vs KSM vs others

commit a8f60d1fadf7b8b54449fcc9d6b15248917478ba upstream.

On heavy paging with KSM I see guest data corruption. Turns out that
KSM will add pages to its tree, where the mapping return true for
pte_unused (or might become as such later).  KSM will unmap such pages
and reinstantiate with different attributes (e.g. write protected or
special, e.g. in replace_page or write_protect_page)). This uncovered
a bug in our pagetable handling: We must remove the unused flag as
soon as an entry becomes present again.

Signed-of-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0362cd5fa187..0cea7026e4ff 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1029,6 +1029,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t entry)
 {
+	if (pte_present(entry))
+		pte_val(entry) &= ~_PAGE_UNUSED;
 	if (mm_has_pgste(mm))
 		ptep_set_pte_at(mm, addr, ptep, entry);
 	else

From 11ba522d7929b9c319fd31d95bbc917baad9fe6f Mon Sep 17 00:00:00 2001
From: Thorsten Leemhuis <linux@leemhuis.info>
Date: Tue, 18 Apr 2017 11:14:28 -0700
Subject: [PATCH 10/22] Input: elantech - add Fujitsu Lifebook E547 to force
 crc_enabled

commit 704de489e0e3640a2ee2d0daf173e9f7375582ba upstream.

Temporary got a Lifebook E547 into my hands and noticed the touchpad
only works after running:

	echo "1" > /sys/devices/platform/i8042/serio2/crc_enabled

Add it to the list of machines that need this workaround.

Signed-off-by: Thorsten Leemhuis <linux@leemhuis.info>
Reviewed-by: Ulrik De Bie <ulrik.debie-os@e2big.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/mouse/elantech.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index db7d1d666ac1..7826994c45bf 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1118,6 +1118,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * Asus UX32VD             0x361f02        00, 15, 0e      clickpad
  * Avatar AVIU-145A2       0x361f00        ?               clickpad
  * Fujitsu LIFEBOOK E544   0x470f00        d0, 12, 09      2 hw buttons
+ * Fujitsu LIFEBOOK E547   0x470f00        50, 12, 09      2 hw buttons
  * Fujitsu LIFEBOOK E554   0x570f01        40, 14, 0c      2 hw buttons
  * Fujitsu T725            0x470f01        05, 12, 09      2 hw buttons
  * Fujitsu H730            0x570f00        c0, 14, 0c      3 hw buttons (**)
@@ -1523,6 +1524,13 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E544"),
 		},
 	},
+	{
+		/* Fujitsu LIFEBOOK E547 does not work with crc_enabled == 0 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E547"),
+		},
+	},
 	{
 		/* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
 		.matches = {

From 4420e5f323c408990834dddb61eb16e4a95a41e4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Apr 2017 19:47:04 +0200
Subject: [PATCH 11/22] ACPI / power: Avoid maybe-uninitialized warning

commit fe8c470ab87d90e4b5115902dd94eced7e3305c3 upstream.

gcc -O2 cannot always prove that the loop in acpi_power_get_inferred_state()
is enterered at least once, so it assumes that cur_state might not get
initialized:

drivers/acpi/power.c: In function 'acpi_power_get_inferred_state':
drivers/acpi/power.c:222:9: error: 'cur_state' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This sets the variable to zero at the start of the loop, to ensure that
there is well-defined behavior even for an empty list. This gets rid of
the warning.

The warning first showed up when the -Os flag got removed in a bug fix
patch in linux-4.11-rc5.

I would suggest merging this addon patch on top of that bug fix to avoid
introducing a new warning in the stable kernels.

Fixes: 61b79e16c68d (ACPI: Fix incompatibility with mcount-based function graph tracing)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/power.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index fcd4ce6f78d5..1c2b846c5776 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -200,6 +200,7 @@ static int acpi_power_get_list_state(struct list_head *list, int *state)
 		return -EINVAL;
 
 	/* The state of the list is 'on' IFF all resources are 'on'. */
+	cur_state = 0;
 	list_for_each_entry(entry, list, node) {
 		struct acpi_power_resource *resource = entry->resource;
 		acpi_handle handle = resource->device.handle;

From 9fc1314285360151792b6ee153c0d5181698986d Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Wed, 19 Apr 2017 10:53:51 +0800
Subject: [PATCH 12/22] mmc: sdhci-esdhc-imx: increase the pad I/O drive
 strength for DDR50 card

commit 9f327845358d3dd0d8a5a7a5436b0aa5c432e757 upstream.

Currently for DDR50 card, it need tuning in default. We meet tuning fail
issue for DDR50 card and some data CRC error when DDR50 sd card works.

This is because the default pad I/O drive strength can't make sure DDR50
card work stable. So increase the pad I/O drive strength for DDR50 card,
and use pins_100mhz.

This fixes DDR50 card support for IMX since DDR50 tuning was enabled from
commit 9faac7b95ea4 ("mmc: sdhci: enable tuning for DDR50")

Tested-and-reported-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Acked-by: Dong Aisheng <aisheng.dong@nxp.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 7123ef96ed18..445fc47dc3e7 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -830,6 +830,7 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
 
 	switch (uhs) {
 	case MMC_TIMING_UHS_SDR50:
+	case MMC_TIMING_UHS_DDR50:
 		pinctrl = imx_data->pins_100mhz;
 		break;
 	case MMC_TIMING_UHS_SDR104:

From a260ff509b4d5fde2ac5f863cd3015cefbc45189 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Mar 2017 10:04:25 +0100
Subject: [PATCH 13/22] ubifs: Fix RENAME_WHITEOUT support

commit c3d9fda688742c06e89aa1f0f8fd943fc11468cb upstream.

Remove faulty leftover check in do_rename(), apparently introduced in a
merge that combined whiteout support changes with commit f03b8ad8d386
("fs: support RENAME_NOREPLACE for local filesystems")

Fixes: f03b8ad8d386 ("fs: support RENAME_NOREPLACE for local filesystems")
Fixes: 9e0a1fff8db5 ("ubifs: Implement RENAME_WHITEOUT")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/dir.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ca16c5d7bab1..0b5a19de4636 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1088,9 +1088,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct timespec time;
 	unsigned int uninitialized_var(saved_nlink);
 
-	if (flags & ~RENAME_NOREPLACE)
-		return -EINVAL;
-
 	/*
 	 * Budget request settings: deletion direntry, new direntry, removing
 	 * the old inode, and changing old and new parent directory inodes.

From b93858556fd13c76a36a0c110450fa35eadf5671 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 30 Mar 2017 10:50:49 +0200
Subject: [PATCH 14/22] ubifs: Fix O_TMPFILE corner case in ubifs_link()

commit 32fe905c17f001c0eee13c59afddd0bf2eed509c upstream.

It is perfectly fine to link a tmpfile back using linkat().
Since tmpfiles are created with a link count of 0 they appear
on the orphan list, upon re-linking the inode has to be removed
from the orphan list again.

Ralph faced a filesystem corruption in combination with overlayfs
due to this bug.

Cc: Ralph Sennhauser <ralph.sennhauser@gmail.com>
Cc: Amir Goldstein <amir73il@gmail.com>
Reported-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
Tested-by: Ralph Sennhauser <ralph.sennhauser@gmail.com>
Reported-by: Amir Goldstein <amir73il@gmail.com>
Fixes: 474b93704f321 ("ubifs: Implement O_TMPFILE")
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/dir.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0b5a19de4636..87ab02e2d666 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -622,6 +622,11 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 		return err;
 
 	lock_2_inodes(dir, inode);
+
+	/* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */
+	if (inode->i_nlink == 0)
+		ubifs_delete_orphan(c, inode->i_ino);
+
 	inc_nlink(inode);
 	ihold(inode);
 	inode->i_ctime = ubifs_current_time(inode);
@@ -641,6 +646,8 @@ out_cancel:
 	dir->i_size -= sz_change;
 	dir_ui->ui_size = dir->i_size;
 	drop_nlink(inode);
+	if (inode->i_nlink == 0)
+		ubifs_add_orphan(c, inode->i_ino);
 	unlock_2_inodes(dir, inode);
 	ubifs_release_budget(c, &req);
 	iput(inode);

From e0411f1eb549a7993c9821c05f1787c0bd1523b4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 20 Apr 2017 21:32:16 +0200
Subject: [PATCH 15/22] mac80211: reject ToDS broadcast data frames

commit 3018e947d7fd536d57e2b550c33e456d921fff8c upstream.

AP/AP_VLAN modes don't accept any real 802.11 multicast data
frames, but since they do need to accept broadcast management
frames the same is currently permitted for data frames. This
opens a security problem because such frames would be decrypted
with the GTK, and could even contain unicast L3 frames.

Since the spec says that ToDS frames must always have the BSSID
as the RA (addr1), reject any other data frames.

The problem was originally reported in "Predicting, Decrypting,
and Abusing WPA2/802.11 Group Keys" at usenix
https://www.usenix.org/conference/usenixsecurity16/technical-sessions/presentation/vanhoef
and brought to my attention by Jouni.

Reported-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
--
---
 net/mac80211/rx.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a697ddf56334..e2bbad0e494c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3617,6 +3617,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
 			    !ether_addr_equal(bssid, hdr->addr1))
 				return false;
 		}
+
+		/*
+		 * 802.11-2016 Table 9-26 says that for data frames, A1 must be
+		 * the BSSID - we've checked that already but may have accepted
+		 * the wildcard (ff:ff:ff:ff:ff:ff).
+		 *
+		 * It also says:
+		 *	The BSSID of the Data frame is determined as follows:
+		 *	a) If the STA is contained within an AP or is associated
+		 *	   with an AP, the BSSID is the address currently in use
+		 *	   by the STA contained in the AP.
+		 *
+		 * So we should not accept data frames with an address that's
+		 * multicast.
+		 *
+		 * Accepting it also opens a security problem because stations
+		 * could encrypt it with the GTK and inject traffic that way.
+		 */
+		if (ieee80211_is_data(hdr->frame_control) && multicast)
+			return false;
+
 		return true;
 	case NL80211_IFTYPE_WDS:
 		if (bssid || !ieee80211_is_data(hdr->frame_control))

From 87cfeaa5e5a1834c2f55ade5da5a117991305269 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 13 Apr 2017 14:23:49 +0200
Subject: [PATCH 16/22] mac80211: fix MU-MIMO follow-MAC mode

commit 9e478066eae41211c92a8f63cc69aafc391bd6ab upstream.

There are two bugs in the follow-MAC code:
 * it treats the radiotap header as the 802.11 header
   (therefore it can't possibly work)
 * it doesn't verify that the skb data it accesses is actually
   present in the header, which is mitigated by the first point

Fix this by moving all of this out into a separate function.
This function copies the data it needs using skb_copy_bits()
to make sure it can be accessed if it's paged, and offsets
that by the possibly present vendor radiotap header.

This also makes all those conditions more readable.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/rx.c | 65 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e2bbad0e494c..acaaf616da71 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -208,6 +208,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 	return len;
 }
 
+static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
+					 struct sk_buff *skb,
+					 int rtap_vendor_space)
+{
+	struct {
+		struct ieee80211_hdr_3addr hdr;
+		u8 category;
+		u8 action_code;
+	} __packed action;
+
+	if (!sdata)
+		return;
+
+	BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1);
+
+	if (skb->len < rtap_vendor_space + sizeof(action) +
+		       VHT_MUMIMO_GROUPS_DATA_LEN)
+		return;
+
+	if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr))
+		return;
+
+	skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action));
+
+	if (!ieee80211_is_action(action.hdr.frame_control))
+		return;
+
+	if (action.category != WLAN_CATEGORY_VHT)
+		return;
+
+	if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT)
+		return;
+
+	if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr))
+		return;
+
+	skb = skb_copy(skb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+	skb_queue_tail(&sdata->skb_queue, skb);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+}
+
 /*
  * ieee80211_add_rx_radiotap_header - add radiotap header
  *
@@ -515,7 +560,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	struct net_device *prev_dev = NULL;
 	int present_fcs_len = 0;
 	unsigned int rtap_vendor_space = 0;
-	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_sub_if_data *monitor_sdata =
 		rcu_dereference(local->monitor_sdata);
 
@@ -553,6 +597,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		return remove_monitor_info(local, origskb, rtap_vendor_space);
 	}
 
+	ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
+
 	/* room for the radiotap header based on driver features */
 	rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb);
 	needed_headroom = rt_hdrlen - rtap_vendor_space;
@@ -618,23 +664,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		ieee80211_rx_stats(sdata->dev, skb->len);
 	}
 
-	mgmt = (void *)skb->data;
-	if (monitor_sdata &&
-	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN &&
-	    ieee80211_is_action(mgmt->frame_control) &&
-	    mgmt->u.action.category == WLAN_CATEGORY_VHT &&
-	    mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT &&
-	    is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) &&
-	    ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) {
-		struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC);
-
-		if (mu_skb) {
-			mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
-			skb_queue_tail(&monitor_sdata->skb_queue, mu_skb);
-			ieee80211_queue_work(&local->hw, &monitor_sdata->work);
-		}
-	}
-
 	if (prev_dev) {
 		skb->dev = prev_dev;
 		netif_receive_skb(skb);

From 790b2b5a01cea4ee4284d52ea995e2af06f4f8d7 Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linutronix.de>
Date: Wed, 22 Feb 2017 17:15:21 +0100
Subject: [PATCH 17/22] ubi/upd: Always flush after prepared for an update

commit 9cd9a21ce070be8a918ffd3381468315a7a76ba6 upstream.

In commit 6afaf8a484cb ("UBI: flush wl before clearing update marker") I
managed to trigger and fix a similar bug. Now here is another version of
which I assumed it wouldn't matter back then but it turns out UBI has a
check for it and will error out like this:

|ubi0 warning: validate_vid_hdr: inconsistent used_ebs
|ubi0 error: validate_vid_hdr: inconsistent VID header at PEB 592

All you need to trigger this is? "ubiupdatevol /dev/ubi0_0 file" + a
powercut in the middle of the operation.
ubi_start_update() sets the update-marker and puts all EBs on the erase
list. After that userland can proceed to write new data while the old EB
aren't erased completely. A powercut at this point is usually not that
much of a tragedy. UBI won't give read access to the static volume
because it has the update marker. It will most likely set the corrupted
flag because it misses some EBs.
So we are all good. Unless the size of the image that has been written
differs from the old image in the magnitude of at least one EB. In that
case UBI will find two different values for `used_ebs' and refuse to
attach the image with the error message mentioned above.

So in order not to get in the situation, the patch will ensure that we
wait until everything is removed before it tries to write any data.
The alternative would be to detect such a case and remove all EBs at the
attached time after we processed the volume-table and see the
update-marker set. The patch looks bigger and I doubt it is worth it
since usually the write() will wait from time to time for a new EB since
usually there not that many spare EB that can be used.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/ubi/upd.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 0134ba32a057..39712560b4c1 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -148,11 +148,11 @@ int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol,
 			return err;
 	}
 
-	if (bytes == 0) {
-		err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
-		if (err)
-			return err;
+	err = ubi_wl_flush(ubi, UBI_ALL, UBI_ALL);
+	if (err)
+		return err;
 
+	if (bytes == 0) {
 		err = clear_update_marker(ubi, vol, 0);
 		if (err)
 			return err;

From 1bd55ab13039a0d1c004eeb7a487810138bc42c8 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 10:38:13 +0530
Subject: [PATCH 18/22] powerpc/kprobe: Fix oops when kprobed on 'stdu'
 instruction

commit 9e1ba4f27f018742a1aa95d11e35106feba08ec1 upstream.

If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel
OOPS:

  Bad kernel stack pointer cd93c840 at c000000000009868
  Oops: Bad kernel stack pointer, sig: 6 [#1]
  ...
  GPR00: c000001fcd93cb30 00000000cd93c840 c0000000015c5e00 00000000cd93c840
  ...
  NIP [c000000000009868] resume_kernel+0x2c/0x58
  LR [c000000000006208] program_check_common+0x108/0x180

On a 64-bit system when the user probes on a 'stdu' instruction, the kernel does
not emulate actual store in emulate_step() because it may corrupt the exception
frame. So the kernel does the actual store operation in exception return code
i.e. resume_kernel().

resume_kernel() loads the saved stack pointer from memory using lwz, which only
loads the low 32-bits of the address, causing the kernel crash.

Fix this by loading the 64-bit value instead.

Fixes: be96f63375a1 ("powerpc: Split out instruction analysis part of emulate_step()")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
[mpe: Change log massage, add stable tag]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/entry_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6432d4bf08c8..767ef6d68c9e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -689,7 +689,7 @@ resume_kernel:
 
 	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
 
-	lwz	r3,GPR1(r1)
+	ld	r3,GPR1(r1)
 	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
 	mr	r4,r1			/* src:  current exception frame */
 	mr	r1,r3			/* Reroute the trampoline frame to r1 */
@@ -703,8 +703,8 @@ resume_kernel:
 	addi	r6,r6,8
 	bdnz	2b
 
-	/* Do real store operation to complete stwu */
-	lwz	r5,GPR1(r1)
+	/* Do real store operation to complete stdu */
+	ld	r5,GPR1(r1)
 	std	r8,0(r5)
 
 	/* Clear _TIF_EMULATE_STACK_STORE flag */

From 6966a6579e1bc7b3486bed395e7d6875912a9a9b Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Thu, 30 Mar 2017 13:17:14 +0200
Subject: [PATCH 19/22] x86/mce/AMD: Give a name to MCA bank 3 when accessed
 with legacy MSRs

commit 29f72ce3e4d18066ec75c79c857bee0618a3504b upstream.

MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name.
However, MCA bank 3 is defined on Fam17h systems and can be accessed
using legacy MSRs. Without a name we get a stack trace on Fam17h systems
when trying to register sysfs files for bank 3 on kernels that don't
recognize Scalable MCA.

Call MCA bank 3 "decode_unit" since this is what it represents on
Fam17h. This will allow kernels without SMCA support to see this bank on
Fam17h+ and prevent the stack trace. This will not affect older systems
since this bank is reserved on them, i.e. it'll be ignored.

Tested on AMD Fam15h and Fam17h systems.

  WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal
  kobject: (ffff88085bb256c0): attempted to be registered with empty name!
  ...
  Call Trace:
   kobject_add_internal
   kobject_add
   kobject_create_and_add
   threshold_create_device
   threshold_init_device

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9b5403462936..3dfca7b302dc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -59,7 +59,7 @@ static const char * const th_names[] = {
 	"load_store",
 	"insn_fetch",
 	"combined_unit",
-	"",
+	"decode_unit",
 	"northbridge",
 	"execution_unit",
 };

From f8bc0881fe95496ddf3f8a16808d9860db207941 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Tue, 18 Apr 2017 20:42:35 +0200
Subject: [PATCH 20/22] x86/mce: Make the MCE notifier a blocking one

commit 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 upstream.

The NFIT MCE handler callback (for handling media errors on NVDIMMs)
takes a mutex to add the location of a memory error to a list. But since
the notifier call chain for machine checks (x86_mce_decoder_chain) is
atomic, we get a lockdep splat like:

  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
  in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
  [..]
  Call Trace:
   dump_stack
   ___might_sleep
   __might_sleep
   mutex_lock_nested
   ? __lock_acquire
   nfit_handle_mce
   notifier_call_chain
   atomic_notifier_call_chain
   ? atomic_notifier_call_chain
   mce_gen_pool_process

Convert the notifier to a blocking one which gets to run only in process
context.

Boris: remove the notifier call in atomic context in print_mce(). For
now, let's print the MCE on the atomic path so that we can make sure
they go out and get logged at least.

Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce-genpool.c  |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-internal.h |  2 +-
 arch/x86/kernel/cpu/mcheck/mce.c          | 16 +++-------------
 3 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 93d824ec3120..040af1939460 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -85,7 +85,7 @@ void mce_gen_pool_process(void)
 	head = llist_reverse_order(head);
 	llist_for_each_entry_safe(node, tmp, head, llnode) {
 		mce = &node->mce;
-		atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
+		blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
 		gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
 	}
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index cd74a3f00aea..de20902ecf23 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -13,7 +13,7 @@ enum severity_level {
 	MCE_PANIC_SEVERITY,
 };
 
-extern struct atomic_notifier_head x86_mce_decoder_chain;
+extern struct blocking_notifier_head x86_mce_decoder_chain;
 
 #define ATTR_LEN		16
 #define INITIAL_CHECK_INTERVAL	5 * 60 /* 5 minutes */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7fdf453d895..22cda29d654e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
  */
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
 
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct notifier_block *nb)
 	if (nb != &mce_srao_nb && nb->priority == INT_MAX)
 		nb->priority -= 1;
 
-	atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
+	blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
 }
 EXPORT_SYMBOL_GPL(mce_register_decode_chain);
 
 void mce_unregister_decode_chain(struct notifier_block *nb)
 {
-	atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+	blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
 }
 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
 
@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = {
 
 static void print_mce(struct mce *m)
 {
-	int ret = 0;
-
 	pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
 	       m->extcpu, m->mcgstatus, m->bank, m->status);
 
@@ -309,14 +307,6 @@ static void print_mce(struct mce *m)
 		m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
 		cpu_data(m->extcpu).microcode);
 
-	/*
-	 * Print out human-readable details about the MCE error,
-	 * (if the CPU has an implementation for that)
-	 */
-	ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
-	if (ret == NOTIFY_STOP)
-		return;
-
 	pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
 }
 

From c36eaa6ca346dc8d45e9125af62db75625b42a6f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 16:42:08 -0700
Subject: [PATCH 21/22] device-dax: switch to srcu, fix rcu_read_lock() vs pte
 allocation

commit 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 upstream.

The following warning triggers with a new unit test that stresses the
device-dax interface.

 ===============================
 [ ERR: suspicious RCU usage.  ]
 4.11.0-rc4+ #1049 Tainted: G           O
 -------------------------------
 ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 0
 2 locks held by fio/9070:
  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0
  #1:  (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax]

 Call Trace:
  dump_stack+0x86/0xc3
  lockdep_rcu_suspicious+0xd7/0x110
  ___might_sleep+0xac/0x250
  __might_sleep+0x4a/0x80
  __alloc_pages_nodemask+0x23a/0x360
  alloc_pages_current+0xa1/0x1f0
  pte_alloc_one+0x17/0x80
  __pte_alloc+0x1e/0x120
  __get_locked_pte+0x1bf/0x1d0
  insert_pfn.isra.70+0x3a/0x100
  ? lookup_memtype+0xa6/0xd0
  vm_insert_mixed+0x64/0x90
  dax_dev_huge_fault+0x520/0x620 [dax]
  ? dax_dev_huge_fault+0x32/0x620 [dax]
  dax_dev_fault+0x10/0x20 [dax]
  __do_fault+0x1e/0x140
  __handle_mm_fault+0x9af/0x10d0
  handle_mm_fault+0x16d/0x370
  ? handle_mm_fault+0x47/0x370
  __do_page_fault+0x28c/0x4f0
  trace_do_page_fault+0x58/0x2a0
  do_async_page_fault+0x1a/0xa0
  async_page_fault+0x28/0x30

Inserting a page table entry may trigger an allocation while we are
holding a read lock to keep the device instance alive for the duration
of the fault. Use srcu for this keep-alive protection.

Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dax/Kconfig |  1 +
 drivers/dax/dax.c   | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 3e2ab3b14eea..9e95bf94eb13 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
 	tristate "DAX: direct access to differentiated memory"
 	default m if NVDIMM_DAX
 	depends on TRANSPARENT_HUGEPAGE
+	select SRCU
 	help
 	  Support raw access to differentiated (persistence, bandwidth,
 	  latency...) memory via an mmap(2) capable character
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 152552d2c306..193224889e41 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -24,6 +24,7 @@
 #include "dax.h"
 
 static dev_t dax_devt;
+DEFINE_STATIC_SRCU(dax_srcu);
 static struct class *dax_class;
 static DEFINE_IDA(dax_minor_ida);
 static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -59,7 +60,7 @@ struct dax_region {
  * @region - parent region
  * @dev - device backing the character device
  * @cdev - core chardev data
- * @alive - !alive + rcu grace period == no new mappings can be established
+ * @alive - !alive + srcu grace period == no new mappings can be established
  * @id - child id in the region
  * @num_resources - number of physical address extents in this device
  * @res - array of physical address ranges
@@ -437,7 +438,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
 static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 		pmd_t *pmd, unsigned int flags)
 {
-	int rc;
+	int rc, id;
 	struct file *filp = vma->vm_file;
 	struct dax_dev *dax_dev = filp->private_data;
 
@@ -445,9 +446,9 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 			current->comm, (flags & FAULT_FLAG_WRITE)
 			? "write" : "read", vma->vm_start, vma->vm_end);
 
-	rcu_read_lock();
+	id = srcu_read_lock(&dax_srcu);
 	rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
-	rcu_read_unlock();
+	srcu_read_unlock(&dax_srcu, id);
 
 	return rc;
 }
@@ -563,11 +564,11 @@ static void unregister_dax_dev(void *dev)
 	 * Note, rcu is not protecting the liveness of dax_dev, rcu is
 	 * ensuring that any fault handlers that might have seen
 	 * dax_dev->alive == true, have completed.  Any fault handlers
-	 * that start after synchronize_rcu() has started will abort
+	 * that start after synchronize_srcu() has started will abort
 	 * upon seeing dax_dev->alive == false.
 	 */
 	dax_dev->alive = false;
-	synchronize_rcu();
+	synchronize_srcu(&dax_srcu);
 	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
 	cdev_del(cdev);
 	device_unregister(dev);

From a8c90ef62281db933118aa84489eb0e1e9cc347c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 27 Apr 2017 09:11:26 +0200
Subject: [PATCH 22/22] Linux 4.9.25

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 50436f502d81..8e18c63388c4 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 9
-SUBLEVEL = 24
+SUBLEVEL = 25
 EXTRAVERSION =
 NAME = Roaring Lionus