From 319b324649db73987723eae30af1f9c3157ec0d2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Apr 2020 13:23:27 -0500 Subject: [PATCH 001/636] objtool: Support Clang non-section symbols in ORC generation commit e81e0724432542af8d8c702c31e9d82f57b1ff31 upstream. When compiling the kernel with AS=clang, objtool produces a lot of warnings: warning: objtool: missing symbol for section .text warning: objtool: missing symbol for section .init.text warning: objtool: missing symbol for section .ref.text It then fails to generate the ORC table. The problem is that objtool assumes text section symbols always exist. But the Clang assembler is aggressive about removing them. When generating relocations for the ORC table, objtool always tries to reference instructions by their section symbol offset. If the section symbol doesn't exist, it bails. Do a fallback: when a section symbol isn't available, reference a function symbol instead. Reported-by: Dmitry Golovin Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Tested-by: Nathan Chancellor Reviewed-by: Miroslav Benes Acked-by: Peter Zijlstra (Intel) Link: https://github.com/ClangBuiltLinux/linux/issues/669 Link: https://lkml.kernel.org/r/9a9cae7fcf628843aabe5a086b1a3c5bf50f42e8.1585761021.git.jpoimboe@redhat.com Cc: Nick Desaulniers Signed-off-by: Greg Kroah-Hartman --- tools/objtool/orc_gen.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 3f98dcfbc177..0b1ba8e7d18a 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -100,11 +100,6 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, struct orc_entry *orc; struct rela *rela; - if (!insn_sec->sym) { - WARN("missing symbol for section %s", insn_sec->name); - return -1; - } - /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); @@ -117,8 +112,32 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec, } memset(rela, 0, sizeof(*rela)); - rela->sym = insn_sec->sym; - rela->addend = insn_off; + if (insn_sec->sym) { + rela->sym = insn_sec->sym; + rela->addend = insn_off; + } else { + /* + * The Clang assembler doesn't produce section symbols, so we + * have to reference the function symbol instead: + */ + rela->sym = find_symbol_containing(insn_sec, insn_off); + if (!rela->sym) { + /* + * Hack alert. This happens when we need to reference + * the NOP pad insn immediately after the function. + */ + rela->sym = find_symbol_containing(insn_sec, + insn_off - 1); + } + if (!rela->sym) { + WARN("missing symbol for insn at offset 0x%lx\n", + insn_off); + return -1; + } + + rela->addend = insn_off - rela->sym->offset; + } + rela->type = R_X86_64_PC32; rela->offset = idx * sizeof(int); From aefcff70505dc0dc2305a90d1aa525037286cb1f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 17 Sep 2020 08:56:11 +0200 Subject: [PATCH 002/636] scripts/setlocalversion: make git describe output more reliable commit 548b8b5168c90c42e88f70fcf041b4ce0b8e7aa8 upstream. When building for an embedded target using Yocto, we're sometimes observing that the version string that gets built into vmlinux (and thus what uname -a reports) differs from the path under /lib/modules/ where modules get installed in the rootfs, but only in the length of the -gabc123def suffix. Hence modprobe always fails. The problem is that Yocto has the concept of "sstate" (shared state), which allows different developers/buildbots/etc. to share build artifacts, based on a hash of all the metadata that went into building that artifact - and that metadata includes all dependencies (e.g. the compiler used etc.). That normally works quite well; usually a clean build (without using any sstate cache) done by one developer ends up being binary identical to a build done on another host. However, one thing that can cause two developers to end up with different builds [and thus make one's vmlinux package incompatible with the other's kernel-dev package], which is not captured by the metadata hashing, is this `git describe`: The output of that can be affected by (1) git version: before 2.11 git defaulted to a minimum of 7, since 2.11 (git.git commit e6c587) the default is dynamic based on the number of objects in the repo (2) hence even if both run the same git version, the output can differ based on how many remotes are being tracked (or just lots of local development branches or plain old garbage) (3) and of course somebody could have a core.abbrev config setting in ~/.gitconfig So in order to avoid `uname -a` output relying on such random details of the build environment which are rather hard to ensure are consistent between developers and buildbots, make sure the abbreviated sha1 always consists of exactly 12 hex characters. That is consistent with the current rule for -stable patches, and is almost always enough to identify the head commit unambigously - in the few cases where it does not, the v5.4.3-00021- prefix would certainly nail it down. [Adapt to `` vs $() differences between 5.4 and upstream.] Signed-off-by: Rasmus Villemoes Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/setlocalversion | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 365b3c2b8f43..2cb0b92f40be 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -45,7 +45,7 @@ scm_version() # Check for git and a git repo. if test -z "$(git rev-parse --show-cdup 2>/dev/null)" && - head=`git rev-parse --verify --short HEAD 2>/dev/null`; then + head=$(git rev-parse --verify HEAD 2>/dev/null); then # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore # it, because this version is defined in the top level Makefile. @@ -59,11 +59,22 @@ scm_version() fi # If we are past a tagged commit (like # "v2.6.30-rc5-302-g72357d5"), we pretty print it. - if atag="`git describe 2>/dev/null`"; then - echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),$(NF))}' + # + # Ensure the abbreviated sha1 has exactly 12 + # hex characters, to make the output + # independent of git version, local + # core.abbrev settings and/or total number of + # objects in the current repository - passing + # --abbrev=12 ensures a minimum of 12, and the + # awk substr() then picks the 'g' and first 12 + # hex chars. + if atag="$(git describe --abbrev=12 2>/dev/null)"; then + echo "$atag" | awk -F- '{printf("-%05d-%s", $(NF-1),substr($(NF),0,13))}' - # If we don't have a tag at all we print -g{commitish}. + # If we don't have a tag at all we print -g{commitish}, + # again using exactly 12 hex chars. else + head="$(echo $head | cut -c1-12)" printf '%s%s' -g $head fi fi From d953cd3fb149a999534ff48af323eb091fd5aa3b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Jul 2020 17:11:09 +0100 Subject: [PATCH 003/636] arm64: Run ARCH_WORKAROUND_1 enabling code on all CPUs commit 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 upstream. Commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") changed the way we deal with ARCH_WORKAROUND_1, by moving most of the enabling code to the .matches() callback. This has the unfortunate effect that the workaround gets only enabled on the first affected CPU, and no other. In order to address this, forcefully call the .matches() callback from a .cpu_enable() callback, which brings us back to the original behaviour. Fixes: 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof") Cc: Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/cpu_errata.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 76490b0cefce..d191ce8410db 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -619,6 +619,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static void +cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) +{ + cap->matches(cap, SCOPE_LOCAL_CPU); +} + static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -813,9 +819,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Branch predictor hardening", .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, + .cpu_enable = cpu_enable_branch_predictor_hardening, }, #ifdef CONFIG_HARDEN_EL2_VECTORS { From e844158495b752533593e0963c9d53493197a358 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 16 Oct 2020 10:53:39 -0700 Subject: [PATCH 004/636] arm64: link with -z norelro regardless of CONFIG_RELOCATABLE commit 3b92fa7485eba16b05166fddf38ab42f2ff6ab95 upstream. With CONFIG_EXPERT=y, CONFIG_KASAN=y, CONFIG_RANDOMIZE_BASE=n, CONFIG_RELOCATABLE=n, we observe the following failure when trying to link the kernel image with LD=ld.lld: error: section: .exit.data is not contiguous with other relro sections ld.lld defaults to -z relro while ld.bfd defaults to -z norelro. This was previously fixed, but only for CONFIG_RELOCATABLE=y. Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") Signed-off-by: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201016175339.2429280-1-ndesaulniers@google.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9a5e28141211..dc54a883513a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X -z norelro CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ $(call ld-option, --no-apply-dynamic-relocs) endif From 8d339b3470b23ef49ddf9c7ade1a60f18944e7fd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 21 Aug 2020 17:10:27 -0700 Subject: [PATCH 005/636] x86/PCI: Fix intel_mid_pci.c build error when ACPI is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 035fff1f7aab43e420e0098f0854470a5286fb83 upstream. Fix build error when CONFIG_ACPI is not set/enabled by adding the header file which contains a stub for the function in the build error. ../arch/x86/pci/intel_mid_pci.c: In function ‘intel_mid_pci_init’: ../arch/x86/pci/intel_mid_pci.c:303:2: error: implicit declaration of function ‘acpi_noirq_set’; did you mean ‘acpi_irq_get’? [-Werror=implicit-function-declaration] acpi_noirq_set(); Fixes: a912a7584ec3 ("x86/platform/intel-mid: Move PCI initialization to arch_init()") Link: https://lore.kernel.org/r/ea903917-e51b-4cc9-2680-bc1e36efa026@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Jesse Barnes Acked-by: Thomas Gleixner Cc: stable@vger.kernel.org # v4.16+ Cc: Jacob Pan Cc: Len Brown Cc: Jesse Barnes Cc: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- arch/x86/pci/intel_mid_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 43867bc85368..eea5a0f3b959 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -33,6 +33,7 @@ #include #include #include +#include #define PCIE_CAP_OFFSET 0x100 From 02bb497cd6de22e9ce17396957e76fa5aa11102f Mon Sep 17 00:00:00 2001 From: Michael Schaller Date: Fri, 25 Sep 2020 09:45:02 +0200 Subject: [PATCH 006/636] efivarfs: Replace invalid slashes with exclamation marks in dentries. commit 336af6a4686d885a067ecea8c3c3dd129ba4fc75 upstream. Without this patch efivarfs_alloc_dentry creates dentries with slashes in their name if the respective EFI variable has slashes in its name. This in turn causes EIO on getdents64, which prevents a complete directory listing of /sys/firmware/efi/efivars/. This patch replaces the invalid shlashes with exclamation marks like kobject_set_name_vargs does for /sys/firmware/efi/vars/ to have consistently named dentries under /sys/firmware/efi/vars/ and /sys/firmware/efi/efivars/. Signed-off-by: Michael Schaller Link: https://lore.kernel.org/r/20200925074502.150448-1-misch@google.com Signed-off-by: Ard Biesheuvel Signed-off-by: dann frazier Signed-off-by: Greg Kroah-Hartman --- fs/efivarfs/super.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 5b68e4294faa..834615f13f3e 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -145,6 +145,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; + /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */ + strreplace(name, '/', '!'); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, is_removable); if (!inode) From f9dcae22d40c195425cdeba16618b6c9c60886e4 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 26 Oct 2020 01:05:39 +0530 Subject: [PATCH 007/636] chelsio/chtls: fix deadlock issue [ Upstream commit 28e9dcd9172028263c8225c15c4e329e08475e89 ] In chtls_pass_establish() we hold child socket lock using bh_lock_sock and we are again trying bh_lock_sock in add_to_reap_list, causing deadlock. Remove bh_lock_sock in add_to_reap_list() as lock is already held. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201025193538.31112-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index f1ca66147c28..2b32f8d7ce83 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -1348,7 +1348,6 @@ static void add_to_reap_list(struct sock *sk) struct chtls_sock *csk = sk->sk_user_data; local_bh_disable(); - bh_lock_sock(sk); release_tcp_port(sk); /* release the port immediately */ spin_lock(&reap_list_lock); @@ -1357,7 +1356,6 @@ static void add_to_reap_list(struct sock *sk) if (!csk->passive_reap_next) schedule_work(&reap_task); spin_unlock(&reap_list_lock); - bh_unlock_sock(sk); local_bh_enable(); } From df2e825b3af547458df24846b00ed3e650c3610c Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 26 Oct 2020 01:12:29 +0530 Subject: [PATCH 008/636] chelsio/chtls: fix memory leaks in CPL handlers [ Upstream commit 6daa1da4e262b0cd52ef0acc1989ff22b5540264 ] CPL handler functions chtls_pass_open_rpl() and chtls_close_listsrv_rpl() should return CPL_RET_BUF_DONE so that caller function will do skb free to avoid leak. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201025194228.31271-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 27 +++++++++++-------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index 2b32f8d7ce83..f7334c42ebd9 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -696,14 +696,13 @@ static int chtls_pass_open_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) @@ -720,15 +719,13 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) if (rpl->status != CPL_ERR_NONE) { pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n", rpl->status, stid); - return CPL_RET_BUF_DONE; + } else { + cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); + sock_put(listen_ctx->lsk); + kfree(listen_ctx); + module_put(THIS_MODULE); } - - cxgb4_free_stid(cdev->tids, stid, listen_ctx->lsk->sk_family); - sock_put(listen_ctx->lsk); - kfree(listen_ctx); - module_put(THIS_MODULE); - - return 0; + return CPL_RET_BUF_DONE; } static void chtls_purge_wr_queue(struct sock *sk) From efea090aff4b690cb5c3175724ea69de23d9ea19 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Fri, 23 Oct 2020 00:35:57 +0530 Subject: [PATCH 009/636] chelsio/chtls: fix tls record info to user [ Upstream commit 4f3391ce8f5a69e7e6d66d0a3fc654eb6dbdc919 ] chtls_pt_recvmsg() receives a skb with tls header and subsequent skb with data, need to finalize the data copy whenever next skb with tls header is available. but here current tls header is overwritten by next available tls header, ends up corrupting user buffer data. fixing it by finalizing current record whenever next skb contains tls header. v1->v2: - Improved commit message. Fixes: 17a7d24aa89d ("crypto: chtls - generic handling of data and hdr") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201022190556.21308-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_io.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index e9573e7f9e80..f9874da23a29 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1549,6 +1549,7 @@ skip_copy: tp->urg_data = 0; if ((avail + offset) >= skb->len) { + struct sk_buff *next_skb; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; @@ -1558,8 +1559,10 @@ skip_copy: chtls_free_skb(sk, skb); buffers_freed++; hws->copied_seq = 0; - if (copied >= target && - !skb_peek(&sk->sk_receive_queue)) + next_skb = skb_peek(&sk->sk_receive_queue); + if (copied >= target && !next_skb) + break; + if (ULP_SKB_CB(next_skb)->flags & ULPCB_FLAG_TLS_HDR) break; } } while (len > 0); From 601b6b5176104490d332addcbc00b1b49eb3620b Mon Sep 17 00:00:00 2001 From: Masahiro Fujiwara Date: Tue, 27 Oct 2020 20:48:46 +0900 Subject: [PATCH 010/636] gtp: fix an use-before-init in gtp_newlink() [ Upstream commit 51467431200b91682b89d31317e35dcbca1469ce ] *_pdp_find() from gtp_encap_recv() would trigger a crash when a peer sends GTP packets while creating new GTP device. RIP: 0010:gtp1_pdp_find.isra.0+0x68/0x90 [gtp] Call Trace: gtp_encap_recv+0xc2/0x2e0 [gtp] ? gtp1_pdp_find.isra.0+0x90/0x90 [gtp] udp_queue_rcv_one_skb+0x1fe/0x530 udp_queue_rcv_skb+0x40/0x1b0 udp_unicast_rcv_skb.isra.0+0x78/0x90 __udp4_lib_rcv+0x5af/0xc70 udp_rcv+0x1a/0x20 ip_protocol_deliver_rcu+0xc5/0x1b0 ip_local_deliver_finish+0x48/0x50 ip_local_deliver+0xe5/0xf0 ? ip_protocol_deliver_rcu+0x1b0/0x1b0 gtp_encap_enable() should be called after gtp_hastable_new() otherwise *_pdp_find() will access the uninitialized hash table. Fixes: 1e3a3abd8b28 ("gtp: make GTP sockets in gtp_newlink optional") Signed-off-by: Masahiro Fujiwara Link: https://lore.kernel.org/r/20201027114846.3924-1-fujiwara.masahiro@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/gtp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f2fecb684220..bb9cd1262cc9 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -667,10 +667,6 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, gtp = netdev_priv(dev); - err = gtp_encap_enable(gtp, data); - if (err < 0) - return err; - if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { @@ -681,12 +677,16 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, err = gtp_hashtable_new(gtp, hashsize); if (err < 0) - goto out_encap; + return err; + + err = gtp_encap_enable(gtp, data); + if (err < 0) + goto out_hashtable; err = register_netdevice(dev); if (err < 0) { netdev_dbg(dev, "failed to register new netdev %d\n", err); - goto out_hashtable; + goto out_encap; } gn = net_generic(dev_net(dev), gtp_net_id); @@ -697,11 +697,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, return 0; +out_encap: + gtp_encap_disable(gtp); out_hashtable: kfree(gtp->addr_hash); kfree(gtp->tid_hash); -out_encap: - gtp_encap_disable(gtp); return err; } From 84013ba77c1704c1461b299fbd336d6d6b6d3a9f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 24 Oct 2020 16:37:32 +0300 Subject: [PATCH 011/636] mlxsw: core: Fix memory leak on module removal [ Upstream commit adc80b6cfedff6dad8b93d46a5ea2775fd5af9ec ] Free the devlink instance during the teardown sequence in the non-reload case to avoid the following memory leak. unreferenced object 0xffff888232895000 (size 2048): comm "modprobe", pid 1073, jiffies 4295568857 (age 164.871s) hex dump (first 32 bytes): 00 01 00 00 00 00 ad de 22 01 00 00 00 00 ad de ........"....... 10 50 89 32 82 88 ff ff 10 50 89 32 82 88 ff ff .P.2.....P.2.... backtrace: [<00000000c704e9a6>] __kmalloc+0x13a/0x2a0 [<00000000ee30129d>] devlink_alloc+0xff/0x760 [<0000000092ab3e5d>] 0xffffffffa042e5b0 [<000000004f3f8a31>] 0xffffffffa042f6ad [<0000000092800b4b>] 0xffffffffa0491df3 [<00000000c4843903>] local_pci_probe+0xcb/0x170 [<000000006993ded7>] pci_device_probe+0x2c2/0x4e0 [<00000000a8e0de75>] really_probe+0x2c5/0xf90 [<00000000d42ba75d>] driver_probe_device+0x1eb/0x340 [<00000000bcc95e05>] device_driver_attach+0x294/0x300 [<000000000e2bc177>] __driver_attach+0x167/0x2f0 [<000000007d44cd6e>] bus_for_each_dev+0x148/0x1f0 [<000000003cd5a91e>] driver_attach+0x45/0x60 [<000000000041ce51>] bus_add_driver+0x3b8/0x720 [<00000000f5215476>] driver_register+0x230/0x4e0 [<00000000d79356f5>] __pci_register_driver+0x190/0x200 Fixes: a22712a96291 ("mlxsw: core: Fix devlink unregister flow") Signed-off-by: Ido Schimmel Reported-by: Vadim Pasternak Tested-by: Oleksandr Shamray Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index d8e7ca48753f..1ae8234053c1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1111,6 +1111,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + if (!reload) + devlink_free(devlink); return; From 95ba2236b8e69de3cb9b12e1cd6c4252a1574a19 Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Wed, 28 Oct 2020 17:07:31 +0000 Subject: [PATCH 012/636] netem: fix zero division in tabledist [ Upstream commit eadd1befdd778a1eca57fad058782bd22b4db804 ] Currently it is possible to craft a special netlink RTM_NEWQDISC command that can result in jitter being equal to 0x80000000. It is enough to set the 32 bit jitter to 0x02000000 (it will later be multiplied by 2^6) or just set the 64 bit jitter via TCA_NETEM_JITTER64. This causes an overflow during the generation of uniformly distributed numbers in tabledist(), which in turn leads to division by zero (sigma != 0, but sigma * 2 is 0). The related fragment of code needs 32-bit division - see commit 9b0ed89 ("netem: remove unnecessary 64 bit modulus"), so switching to 64 bit is not an option. Fix the issue by keeping the value of jitter within the range that can be adequately handled by tabledist() - [0;INT_MAX]. As negative std deviation makes no sense, take the absolute value of the passed value and cap it at INT_MAX. Inside tabledist(), switch to unsigned 32 bit arithmetic in order to prevent overflows. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Aleksandr Nogikh Reported-by: syzbot+ec762a6342ad0d3c0d8f@syzkaller.appspotmail.com Acked-by: Stephen Hemminger Link: https://lore.kernel.org/r/20201028170731.1383332-1-aleksandrnogikh@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 014a28d8dd4f..02d8d3fd84a5 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma, /* default uniform distribution */ if (dist == NULL) - return ((rnd % (2 * sigma)) + mu) - sigma; + return ((rnd % (2 * (u32)sigma)) + mu) - sigma; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; @@ -787,6 +787,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) q->slot_config.max_packets = INT_MAX; if (q->slot_config.max_bytes == 0) q->slot_config.max_bytes = INT_MAX; + + /* capping dist_jitter to the range acceptable by tabledist() */ + q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); + q->slot.packets_left = q->slot_config.max_packets; q->slot.bytes_left = q->slot_config.max_bytes; if (q->slot_config.min_delay | q->slot_config.max_delay | @@ -1011,6 +1015,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_NETEM_SLOT]) get_slot(q, tb[TCA_NETEM_SLOT]); + /* capping jitter to the range acceptable by tabledist() */ + q->jitter = min_t(s64, abs(q->jitter), INT_MAX); + return ret; get_table_failure: From 32ac9183ede9b4384c3ec1b285a46ba540b01a81 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Mon, 26 Oct 2020 05:21:30 -0500 Subject: [PATCH 013/636] ravb: Fix bit fields checking in ravb_hwtstamp_get() [ Upstream commit 68b9f0865b1ef545da180c57d54b82c94cb464a4 ] In the function ravb_hwtstamp_get() in ravb_main.c with the existing values for RAVB_RXTSTAMP_TYPE_V2_L2_EVENT (0x2) and RAVB_RXTSTAMP_TYPE_ALL (0x6) if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) config.rx_filter = HWTSTAMP_FILTER_ALL; if the test on RAVB_RXTSTAMP_TYPE_ALL should be true, it will never be reached. This issue can be verified with 'hwtstamp_config' testing program (tools/testing/selftests/net/hwtstamp_config.c). Setting filter type to ALL and subsequent retrieving it gives incorrect value: $ hwtstamp_config eth0 OFF ALL flags = 0 tx_type = OFF rx_filter = ALL $ hwtstamp_config eth0 flags = 0 tx_type = OFF rx_filter = PTP_V2_L2_EVENT Correct this by converting if-else's to switch. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Reported-by: Julia Lawall Signed-off-by: Andrew Gabbasov Reviewed-by: Sergei Shtylyov Link: https://lore.kernel.org/r/20201026102130.29368-1-andrew_gabbasov@mentor.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 569e698b5c80..c24b7ea37e39 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1732,12 +1732,16 @@ static int ravb_hwtstamp_get(struct net_device *ndev, struct ifreq *req) config.flags = 0; config.tx_type = priv->tstamp_tx_ctrl ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_V2_L2_EVENT) + switch (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE) { + case RAVB_RXTSTAMP_TYPE_V2_L2_EVENT: config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (priv->tstamp_rx_ctrl & RAVB_RXTSTAMP_TYPE_ALL) + break; + case RAVB_RXTSTAMP_TYPE_ALL: config.rx_filter = HWTSTAMP_FILTER_ALL; - else + break; + default: config.rx_filter = HWTSTAMP_FILTER_NONE; + } return copy_to_user(req->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; From f5a4ea7839b15d702fb9c731eddcca3e3fecf8d8 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 23 Oct 2020 11:47:09 -0700 Subject: [PATCH 014/636] tcp: Prevent low rmem stalls with SO_RCVLOWAT. [ Upstream commit 435ccfa894e35e3d4a1799e6ac030e48a7b69ef5 ] With SO_RCVLOWAT, under memory pressure, it is possible to enter a state where: 1. We have not received enough bytes to satisfy SO_RCVLOWAT. 2. We have not entered buffer pressure (see tcp_rmem_pressure()). 3. But, we do not have enough buffer space to accept more packets. In this case, we advertise 0 rwnd (due to #3) but the application does not drain the receive queue (no wakeup because of #1 and #2) so the flow stalls. Modify the heuristic for SO_RCVLOWAT so that, if we are advertising rwnd<=rcv_mss, force a wakeup to prevent a stall. Without this patch, setting tcp_rmem to 6143 and disabling TCP autotune causes a stalled flow. With this patch, no stall occurs. This is with RPC-style traffic with large messages. Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") Signed-off-by: Arjun Roy Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20201023184709.217614-1-arjunroy.kdev@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_input.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4ce3397e6fcf..98e8ee8bb759 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -495,6 +495,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, return true; if (tcp_rmem_pressure(sk)) return true; + if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss) + return true; } if (sk->sk_prot->stream_memory_read) return sk->sk_prot->stream_memory_read(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c19870d56186..686833dfaa7f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4704,7 +4704,8 @@ void tcp_data_ready(struct sock *sk) int avail = tp->rcv_nxt - tp->copied_seq; if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && - !sock_flag(sk, SOCK_DONE)) + !sock_flag(sk, SOCK_DONE) && + tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss) return; sk->sk_data_ready(sk); From 48ea03f8bb651ed06c29bad75fcb35b7c2a13d3e Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 27 Oct 2020 10:24:03 +0700 Subject: [PATCH 015/636] tipc: fix memory leak caused by tipc_buf_append() [ Upstream commit ceb1eb2fb609c88363e06618b8d4bbf7815a4e03 ] Commit ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()") replaced skb_unshare() with skb_copy() to not reduce the data reference counter of the original skb intentionally. This is not the correct way to handle the cloned skb because it causes memory leak in 2 following cases: 1/ Sending multicast messages via broadcast link The original skb list is cloned to the local skb list for local destination. After that, the data reference counter of each skb in the original list has the value of 2. This causes each skb not to be freed after receiving ACK: tipc_link_advance_transmq() { ... /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); <-- memory exists after being freed } 2/ Sending multicast messages via replicast link Similar to the above case, each skb cannot be freed after purging the skb list: tipc_mcast_xmit() { ... __skb_queue_purge(pkts); <-- memory exists after being freed } This commit fixes this issue by using skb_unshare() instead. Besides, to avoid use-after-free error reported by KASAN, the pointer to the fragment is set to NULL before calling skb_unshare() to make sure that the original skb is not freed after freeing the fragment 2 times in case skb_unshare() returns NULL. Fixes: ed42989eab57 ("tipc: fix the skb_unshare() in tipc_buf_append()") Acked-by: Jon Maloy Reported-by: Thang Hoang Ngo Signed-off-by: Tung Nguyen Reviewed-by: Xin Long Acked-by: Cong Wang Link: https://lore.kernel.org/r/20201027032403.1823-1-tung.q.nguyen@dektech.com.au Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tipc/msg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 0b8446cd541c..f04843ca8216 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -140,12 +140,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (skb_cloned(frag)) - frag = skb_copy(frag, GFP_ATOMIC); + *buf = NULL; + frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; head = *headbuf = frag; - *buf = NULL; TIPC_SKB_CB(head)->tail = NULL; if (skb_is_nonlinear(head)) { skb_walk_frags(head, tail) { From daf14c65803b1413d576cabf9092136800036db0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 29 Oct 2020 10:18:53 +0100 Subject: [PATCH 016/636] r8169: fix issue with forced threading in combination with shared interrupts [ Upstream commit 2734a24e6e5d18522fbf599135c59b82ec9b2c9e ] As reported by Serge flag IRQF_NO_THREAD causes an error if the interrupt is actually shared and the other driver(s) don't have this flag set. This situation can occur if a PCI(e) legacy interrupt is used in combination with forced threading. There's no good way to deal with this properly, therefore we have to remove flag IRQF_NO_THREAD. For fixing the original forced threading issue switch to napi_schedule(). Fixes: 424a646e072a ("r8169: fix operation under forced interrupt threading") Link: https://www.spinics.net/lists/netdev/msg694960.html Reported-by: Serge Belyshev Signed-off-by: Heiner Kallweit Tested-by: Serge Belyshev Link: https://lore.kernel.org/r/b5b53bfe-35ac-3768-85bf-74d1290cf394@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 58dc4fe139fb..1555c0dae490 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6630,7 +6630,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) return IRQ_NONE; rtl_irq_disable(tp); - napi_schedule_irqoff(&tp->napi); + napi_schedule(&tp->napi); return IRQ_HANDLED; } @@ -6886,7 +6886,7 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); retval = request_irq(pci_irq_vector(pdev, 0), rtl8169_interrupt, - IRQF_NO_THREAD | IRQF_SHARED, dev->name, tp); + IRQF_SHARED, dev->name, tp); if (retval < 0) goto err_release_fw_2; From d3b8171face63754448c1d0fcb40f2bf124fd2ab Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 23 Oct 2020 17:28:52 +0530 Subject: [PATCH 017/636] cxgb4: set up filter action after rewrites [ Upstream commit 937d8420588421eaa5c7aa5c79b26b42abb288ef ] The current code sets up the filter action field before rewrites are set up. When the action 'switch' is used with rewrites, this may result in initial few packets that get switched out don't have rewrites applied on them. So, make sure filter action is set up along with rewrites or only after everything else is set up for rewrites. Fixes: 12b276fbf6e0 ("cxgb4: add support to create hash filters") Signed-off-by: Raju Rangoju Link: https://lore.kernel.org/r/20201023115852.18262-1-rajur@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 56 +++++++++---------- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 4 ++ 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index bb3ee55cb72c..a62c96001761 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -145,13 +145,13 @@ static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) int err; /* do a set-tcb for smac-sel and CWR bit.. */ - err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); - if (err) - goto smac_err; - err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), TCB_SMAC_SEL_V(f->smt->idx), 1); + if (err) + goto smac_err; + + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); if (!err) return 0; @@ -608,6 +608,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || @@ -625,7 +626,7 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; + fwr->smac_sel = f->smt->idx; fwr->rx_chan_rx_rpl_iq = htons(FW_FILTER_WR_RX_CHAN_V(0) | FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); @@ -1019,11 +1020,8 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_F | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, @@ -1059,11 +1057,8 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | RX_CHANNEL_F | - CONG_CNTRL_V((f->fs.action == FILTER_DROP) | - (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | - ((f->fs.dirsteerhash) << 1)) | - CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); + ((f->fs.dirsteerhash) << 1))); } static int cxgb4_set_hash_filter(struct net_device *dev, @@ -1722,6 +1717,20 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) } return; } + switch (f->fs.action) { + case FILTER_PASS: + if (f->fs.dirsteer) + set_tcb_tflag(adap, f, tid, + TF_DIRECT_STEER_S, 1, 1); + break; + case FILTER_DROP: + set_tcb_tflag(adap, f, tid, TF_DROP_S, 1, 1); + break; + case FILTER_SWITCH: + set_tcb_tflag(adap, f, tid, TF_LPBK_S, 1, 1); + break; + } + break; default: @@ -1781,22 +1790,11 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) if (ctx) ctx->result = 0; } else if (ret == FW_FILTER_WR_FLT_ADDED) { - int err = 0; - - if (f->fs.newsmac) - err = configure_filter_smac(adap, f); - - if (!err) { - f->pending = 0; /* async setup completed */ - f->valid = 1; - if (ctx) { - ctx->result = 0; - ctx->tid = idx; - } - } else { - clear_filter(adap, f); - if (ctx) - ctx->result = err; + f->pending = 0; /* async setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; } } else { /* Something went wrong. Issue a warning about the diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 3297ce025e8b..6ddc2baa7481 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -42,6 +42,10 @@ #define TCB_T_FLAGS_W 1 +#define TF_DROP_S 22 +#define TF_DIRECT_STEER_S 23 +#define TF_LPBK_S 59 + #define TF_CCTRL_ECE_S 60 #define TF_CCTRL_CWR_S 61 #define TF_CCTRL_RFR_S 62 From 8fc2f485ea6583876553546e60f294016ec9f296 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:36 -0500 Subject: [PATCH 018/636] arch/x86/amd/ibs: Fix re-arming IBS Fetch commit 221bfce5ebbdf72ff08b3bf2510ae81058ee568b upstream. Stephane Eranian found a bug in that IBS' current Fetch counter was not being reset when the driver would write the new value to clear it along with the enable bit set, and found that adding an MSR write that would first disable IBS Fetch would make IBS Fetch reset its current count. Indeed, the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54 - Sep 12, 2019 states "The periodic fetch counter is set to IbsFetchCnt [...] when IbsFetchEn is changed from 0 to 1." Explicitly set IbsFetchEn to 0 and then to 1 when re-enabling IBS Fetch, so the driver properly resets the internal counter to 0 and IBS Fetch starts counting again. A family 15h machine tested does not have this problem, and the extra wrmsr is also not needed on Family 19h, so only do the extra wrmsr on families 16h through 18h. Reported-by: Stephane Eranian Signed-off-by: Kim Phillips [peterz: optimized] Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 07bf5517d9d8..801c50be8e1d 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,7 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -375,7 +376,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 tmp = hwc->config | config; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + + wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -744,6 +750,13 @@ static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { From ed63866eddecb754d41f0b15c89d56971af4d3e3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 25 Sep 2020 16:07:51 +0200 Subject: [PATCH 019/636] x86/xen: disable Firmware First mode for correctable memory errors commit d759af38572f97321112a0852353613d18126038 upstream. When running as Xen dom0 the kernel isn't responsible for selecting the error handling mode, this should be handled by the hypervisor. So disable setting FF mode when running as Xen pv guest. Not doing so might result in boot splats like: [ 7.509696] HEST: Enabling Firmware First mode for corrected errors. [ 7.510382] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 2. [ 7.510383] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 3. [ 7.510384] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 4. [ 7.510384] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 5. [ 7.510385] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 6. [ 7.510386] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 7. [ 7.510386] mce: [Firmware Bug]: Ignoring request to disable invalid MCA bank 8. Reason is that the HEST ACPI table contains the real number of MCA banks, while the hypervisor is emulating only 2 banks for guests. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200925140751.31381-1-jgross@suse.com Signed-off-by: Boris Ostrovsky Signed-off-by: Greg Kroah-Hartman --- arch/x86/xen/enlighten_pv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 76864ea59160..9f8995cd28f6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1383,6 +1383,15 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_init.mpparse.get_smp_config = x86_init_uint_noop; xen_boot_params_init_edd(); + +#ifdef CONFIG_ACPI + /* + * Disable selecting "Firmware First mode" for correctable + * memory errors, as this is the duty of the hypervisor to + * decide. + */ + acpi_disable_cmcff = 1; +#endif } if (!boot_params.screen_info.orig_video_isVGA) From 6ef82327906270f66adb4b13517e818d3a20448e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 18 Sep 2020 10:36:50 +0200 Subject: [PATCH 020/636] fuse: fix page dereference after free commit d78092e4937de9ce55edcb4ee4c5e3c707be0190 upstream. After unlock_request() pages from the ap->pages[] array may be put (e.g. by aborting the connection) and the pages can be freed. Prevent use after free by grabbing a reference to the page before calling unlock_request(). The original patch was created by Pradeep P V K. Reported-by: Pradeep P V K Cc: Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dev.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c51c9a6881e4..1ff5a6b21db0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -853,15 +853,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) struct page *newpage; struct pipe_buffer *buf = cs->pipebufs; + get_page(oldpage); err = unlock_request(cs->req); if (err) - return err; + goto out_put_old; fuse_copy_finish(cs); err = pipe_buf_confirm(cs->pipe, buf); if (err) - return err; + goto out_put_old; BUG_ON(!cs->nr_segs); cs->currbuf = buf; @@ -901,7 +902,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); if (err) { unlock_page(newpage); - return err; + goto out_put_old; } get_page(newpage); @@ -920,14 +921,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (err) { unlock_page(newpage); put_page(newpage); - return err; + goto out_put_old; } unlock_page(oldpage); + /* Drop ref for ap->pages[] array */ put_page(oldpage); cs->len = 0; - return 0; + err = 0; +out_put_old: + /* Drop ref obtained in this function */ + put_page(oldpage); + return err; out_fallback_unlock: unlock_page(newpage); @@ -936,10 +942,10 @@ out_fallback: cs->offset = buf->offset; err = lock_request(cs->req); - if (err) - return err; + if (!err) + err = 1; - return 1; + goto out_put_old; } static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, @@ -951,14 +957,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, if (cs->nr_segs == cs->pipe->buffers) return -EIO; + get_page(page); err = unlock_request(cs->req); - if (err) + if (err) { + put_page(page); return err; + } fuse_copy_finish(cs); buf = cs->pipebufs; - get_page(page); buf->page = page; buf->offset = offset; buf->len = count; From 277d0de875a4eb1f03f213c2a4951b7487cc5185 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 10 Sep 2020 13:33:14 -0700 Subject: [PATCH 021/636] bpf: Fix comment for helper bpf_current_task_under_cgroup() commit 1aef5b4391f0c75c0a1523706a7b0311846ee12f upstream. This should be "current" not "skb". Fixes: c6b5fb8690fa ("bpf: add documentation for eBPF helpers (42-50)") Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Cc: Link: https://lore.kernel.org/bpf/20200910203314.70018-1-songliubraving@fb.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/bpf.h | 4 ++-- tools/include/uapi/linux/bpf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d143e277cdaf..71ca8c4dc290 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1193,8 +1193,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf4cd924aed5..13944978ada5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1191,8 +1191,8 @@ union bpf_attr { * Return * The return value depends on the result of the test, and can be: * - * * 0, if the *skb* task belongs to the cgroup2. - * * 1, if the *skb* task does not belong to the cgroup2. + * * 0, if current task belongs to the cgroup2. + * * 1, if current task does not belong to the cgroup2. * * A negative error code, if an error occurred. * * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) From 05f703b07727c0eb81f487143b583d5f8561d900 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 4 Sep 2020 11:23:30 +0200 Subject: [PATCH 022/636] evm: Check size of security.evm before using it commit 455b6c9112eff8d249e32ba165742085678a80a4 upstream. This patch checks the size for the EVM_IMA_XATTR_DIGSIG and EVM_XATTR_PORTABLE_DIGSIG types to ensure that the algorithm is read from the buffer returned by vfs_getxattr_alloc(). Cc: stable@vger.kernel.org # 4.19.x Fixes: 5feeb61183dde ("evm: Allow non-SHA1 digital signatures") Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/evm/evm_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index e11d860fdce4..651c0127c00d 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -186,6 +186,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry, break; case EVM_IMA_XATTR_DIGSIG: case EVM_XATTR_PORTABLE_DIGSIG: + /* accept xattr with non-empty signature field */ + if (xattr_len <= sizeof(struct signature_v2_hdr)) { + evm_status = INTEGRITY_FAIL; + goto out; + } + hdr = (struct signature_v2_hdr *)xattr_data; digest.hdr.algo = hdr->hash_algo; rc = evm_calc_hash(dentry, xattr_name, xattr_value, From df605e1b5c2ccb55e42c144aecca4bbd76ff149f Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sun, 2 Aug 2020 21:29:49 +0800 Subject: [PATCH 023/636] p54: avoid accessing the data mapped to streaming DMA commit 478762855b5ae9f68fa6ead1edf7abada70fcd5f upstream. In p54p_tx(), skb->data is mapped to streaming DMA on line 337: mapping = pci_map_single(..., skb->data, ...); Then skb->data is accessed on line 349: desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; This access may cause data inconsistency between CPU cache and hardware. To fix this problem, ((struct p54_hdr *)skb->data)->req_id is stored in a local variable before DMA mapping, and then the driver accesses this local variable instead of skb->data. Cc: Signed-off-by: Jia-Ju Bai Acked-by: Christian Lamparter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200802132949.26788-1-baijiaju@tsinghua.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/intersil/p54/p54pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intersil/p54/p54pci.c b/drivers/net/wireless/intersil/p54/p54pci.c index 57ad56435dda..8bc0286b4f8c 100644 --- a/drivers/net/wireless/intersil/p54/p54pci.c +++ b/drivers/net/wireless/intersil/p54/p54pci.c @@ -332,10 +332,12 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) struct p54p_desc *desc; dma_addr_t mapping; u32 idx, i; + __le32 device_addr; spin_lock_irqsave(&priv->lock, flags); idx = le32_to_cpu(ring_control->host_idx[1]); i = idx % ARRAY_SIZE(ring_control->tx_data); + device_addr = ((struct p54_hdr *)skb->data)->req_id; mapping = pci_map_single(priv->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); @@ -349,7 +351,7 @@ static void p54p_tx(struct ieee80211_hw *dev, struct sk_buff *skb) desc = &ring_control->tx_data[i]; desc->host_addr = cpu_to_le32(mapping); - desc->device_addr = ((struct p54_hdr *)skb->data)->req_id; + desc->device_addr = device_addr; desc->len = cpu_to_le16(skb->len); desc->flags = 0; From 421cbc540e0e37a9793674c6a877e4b2fd572113 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 7 Apr 2020 13:56:01 +0200 Subject: [PATCH 024/636] cxl: Rework error message for incompatible slots commit 40ac790d99c6dd16b367d5c2339e446a5f1b0593 upstream. Improve the error message shown if a capi adapter is plugged on a capi-incompatible slot directly under the PHB (no intermediate switch). Fixes: 5632874311db ("cxl: Add support for POWER9 DD2") Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200407115601.25453-1-fbarrat@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 787a69a2a726..b9d3c87e9318 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -397,8 +397,8 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid, *capp_unit_id = get_capp_unit_id(np, *phb_index); of_node_put(np); if (!*capp_unit_id) { - pr_err("cxl: invalid capp unit id (phb_index: %d)\n", - *phb_index); + pr_err("cxl: No capp unit found for PHB[%lld,%d]. Make sure the adapter is on a capi-compatible slot\n", + *chipid, *phb_index); return -ENODEV; } From 2dda0d4e17c91ab84595468edee57f9ffab8d93c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 30 Sep 2020 10:20:07 +0300 Subject: [PATCH 025/636] RDMA/addr: Fix race with netevent_callback()/rdma_addr_cancel() commit 2ee9bf346fbfd1dad0933b9eb3a4c2c0979b633e upstream. This three thread race can result in the work being run once the callback becomes NULL: CPU1 CPU2 CPU3 netevent_callback() process_one_req() rdma_addr_cancel() [..] spin_lock_bh() set_timeout() spin_unlock_bh() spin_lock_bh() list_del_init(&req->list); spin_unlock_bh() req->callback = NULL spin_lock_bh() if (!list_empty(&req->list)) // Skipped! // cancel_delayed_work(&req->work); spin_unlock_bh() process_one_req() // again req->callback() // BOOM cancel_delayed_work_sync() The solution is to always cancel the work once it is completed so any in between set_timeout() does not result in it running again. Cc: stable@vger.kernel.org Fixes: 44e75052bc2a ("RDMA/rdma_cm: Make rdma_addr_cancel into a fence") Link: https://lore.kernel.org/r/20200930072007.1009692-1-leon@kernel.org Reported-by: Dan Aloni Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/addr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index df8f5ceea2dd..30385ba7c5d9 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -571,13 +571,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } From 4a2b6dadaeb4600ec7d51cdb6490ad0c970b1687 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 27 Apr 2020 14:50:37 -0500 Subject: [PATCH 026/636] mtd: lpddr: Fix bad logic in print_drs_error commit 1c9c02bb22684f6949d2e7ddc0a3ff364fd5a6fc upstream. Update logic for broken test. Use a more common logging style. It appears the logic in this function is broken for the consecutive tests of if (prog_status & 0x3) ... else if (prog_status & 0x2) ... else (prog_status & 0x1) ... Likely the first test should be if ((prog_status & 0x3) == 0x3) Found by inspection of include files using printk. Fixes: eb3db27507f7 ("[MTD] LPDDR PFOW definition") Cc: stable@vger.kernel.org Reported-by: Joe Perches Signed-off-by: Gustavo A. R. Silva Acked-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/3fb0e29f5b601db8be2938a01d974b00c8788501.1588016644.git.gustavo@embeddedor.com Signed-off-by: Greg Kroah-Hartman --- include/linux/mtd/pfow.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 122f3439e1af..c65d7a3be3c6 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -128,7 +128,7 @@ static inline void print_drs_error(unsigned dsr) if (!(dsr & DSR_AVAILABLE)) printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) + if ((prog_status & 0x03) == 0x03) printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " "half with 41h command\n"); else if (prog_status & 0x02) From ad1e8fc2a51e48c6c51f6ca2236e9b9fcbe21fea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Sep 2020 13:04:32 +0100 Subject: [PATCH 027/636] serial: pl011: Fix lockdep splat when handling magic-sysrq interrupt commit 534cf755d9df99e214ddbe26b91cd4d81d2603e2 upstream. Issuing a magic-sysrq via the PL011 causes the following lockdep splat, which is easily reproducible under QEMU: | sysrq: Changing Loglevel | sysrq: Loglevel set to 9 | | ====================================================== | WARNING: possible circular locking dependency detected | 5.9.0-rc7 #1 Not tainted | ------------------------------------------------------ | systemd-journal/138 is trying to acquire lock: | ffffab133ad950c0 (console_owner){-.-.}-{0:0}, at: console_lock_spinning_enable+0x34/0x70 | | but task is already holding lock: | ffff0001fd47b098 (&port_lock_key){-.-.}-{2:2}, at: pl011_int+0x40/0x488 | | which lock already depends on the new lock. [...] | Possible unsafe locking scenario: | | CPU0 CPU1 | ---- ---- | lock(&port_lock_key); | lock(console_owner); | lock(&port_lock_key); | lock(console_owner); | | *** DEADLOCK *** The issue being that CPU0 takes 'port_lock' on the irq path in pl011_int() before taking 'console_owner' on the printk() path, whereas CPU1 takes the two locks in the opposite order on the printk() path due to setting the "console_owner" prior to calling into into the actual console driver. Fix this in the same way as the msm-serial driver by dropping 'port_lock' before handling the sysrq. Cc: # 4.19+ Cc: Russell King Cc: Greg Kroah-Hartman Cc: Jiri Slaby Link: https://lore.kernel.org/r/20200811101313.GA6970@willie-the-truck Signed-off-by: Peter Zijlstra Tested-by: Will Deacon Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20200930120432.16551-1-will@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 45e4f2952143..1306ce5c5d9b 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -313,8 +313,9 @@ static void pl011_write(unsigned int val, const struct uart_amba_port *uap, */ static int pl011_fifo_to_tty(struct uart_amba_port *uap) { - u16 status; unsigned int ch, flag, fifotaken; + int sysrq; + u16 status; for (fifotaken = 0; fifotaken != 256; fifotaken++) { status = pl011_read(uap, REG_FR); @@ -349,10 +350,12 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap) flag = TTY_FRAME; } - if (uart_handle_sysrq_char(&uap->port, ch & 255)) - continue; + spin_unlock(&uap->port.lock); + sysrq = uart_handle_sysrq_char(&uap->port, ch & 255); + spin_lock(&uap->port.lock); - uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); + if (!sysrq) + uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag); } return fifotaken; From 888a77cd35ca8ae4cc610b6e78596ec91c390bc8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 17 Sep 2020 15:09:20 +0200 Subject: [PATCH 028/636] ata: sata_rcar: Fix DMA boundary mask commit df9c590986fdb6db9d5636d6cd93bc919c01b451 upstream. Before commit 9495b7e92f716ab2 ("driver core: platform: Initialize dma_parms for platform devices"), the R-Car SATA device didn't have DMA parameters. Hence the DMA boundary mask supplied by its driver was silently ignored, as __scsi_init_queue() doesn't check the return value of dma_set_seg_boundary(), and the default value of 0xffffffff was used. Now the device has gained DMA parameters, the driver-supplied value is used, and the following warning is printed on Salvator-XS: DMA-API: sata_rcar ee300000.sata: mapping sg segment across boundary [start=0x00000000ffffe000] [end=0x00000000ffffefff] [boundary=0x000000001ffffffe] WARNING: CPU: 5 PID: 38 at kernel/dma/debug.c:1233 debug_dma_map_sg+0x298/0x300 (the range of start/end values depend on whether IOMMU support is enabled or not) The issue here is that SATA_RCAR_DMA_BOUNDARY doesn't have bit 0 set, so any typical end value, which is odd, will trigger the check. Fix this by increasing the DMA boundary value by 1. This also fixes the following WRITE DMA EXT timeout issue: # dd if=/dev/urandom of=/mnt/de1/file1-1024M bs=1M count=1024 ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6 frozen ata1.00: failed command: WRITE DMA EXT ata1.00: cmd 35/00:00:00:e6:0c/00:0a:00:00:00/e0 tag 0 dma 1310720 out res 40/00:01:00:00:00/00:00:00:00:00/00 Emask 0x4 (timeout) ata1.00: status: { DRDY } as seen by Shimoda-san since commit 429120f3df2dba2b ("block: fix splitting segments on boundary masks"). Fixes: 8bfbeed58665dbbf ("sata_rcar: correct 'sata_rcar_sht'") Fixes: 9495b7e92f716ab2 ("driver core: platform: Initialize dma_parms for platform devices") Fixes: 429120f3df2dba2b ("block: fix splitting segments on boundary masks") Signed-off-by: Geert Uytterhoeven Tested-by: Lad Prabhakar Tested-by: Yoshihiro Shimoda Reviewed-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman Reviewed-by: Sergei Shtylyov Reviewed-by: Ulf Hansson Cc: stable Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/ata/sata_rcar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c index 8323f88d17a5..4dcdf8ee0055 100644 --- a/drivers/ata/sata_rcar.c +++ b/drivers/ata/sata_rcar.c @@ -124,7 +124,7 @@ /* Descriptor table word 0 bit (when DTA32M = 1) */ #define SATA_RCAR_DTEND BIT(0) -#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFEUL +#define SATA_RCAR_DMA_BOUNDARY 0x1FFFFFFFUL /* Gen2 Physical Layer Control Registers */ #define RCAR_GEN2_PHY_CTL1_REG 0x1704 From 705ac26aedd49372c121c91af921e7069ff1ceff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Jan 2019 16:20:21 -0800 Subject: [PATCH 029/636] fscrypt: return -EXDEV for incompatible rename or link into encrypted dir commit f5e55e777cc93eae1416f0fa4908e8846b6d7825 upstream. Currently, trying to rename or link a regular file, directory, or symlink into an encrypted directory fails with EPERM when the source file is unencrypted or is encrypted with a different encryption policy, and is on the same mountpoint. It is correct for the operation to fail, but the choice of EPERM breaks tools like 'mv' that know to copy rather than rename if they see EXDEV, but don't know what to do with EPERM. Our original motivation for EPERM was to encourage users to securely handle their data. Encrypting files by "moving" them into an encrypted directory can be insecure because the unencrypted data may remain in free space on disk, where it can later be recovered by an attacker. It's much better to encrypt the data from the start, or at least try to securely delete the source data e.g. using the 'shred' program. However, the current behavior hasn't been effective at achieving its goal because users tend to be confused, hack around it, and complain; see e.g. https://github.com/google/fscrypt/issues/76. And in some cases it's actually inconsistent or unnecessary. For example, 'mv'-ing files between differently encrypted directories doesn't work even in cases where it can be secure, such as when in userspace the same passphrase protects both directories. Yet, you *can* already 'mv' unencrypted files into an encrypted directory if the source files are on a different mountpoint, even though doing so is often insecure. There are probably better ways to teach users to securely handle their files. For example, the 'fscrypt' userspace tool could provide a command that migrates unencrypted files into an encrypted directory, acting like 'shred' on the source files and providing appropriate warnings depending on the type of the source filesystem and disk. Receiving errors on unimportant files might also force some users to disable encryption, thus making the behavior counterproductive. It's desirable to make encryption as unobtrusive as possible. Therefore, change the error code from EPERM to EXDEV so that tools looking for EXDEV will fall back to a copy. This, of course, doesn't prevent users from still doing the right things to securely manage their files. Note that this also matches the behavior when a file is renamed between two project quota hierarchies; so there's precedent for using EXDEV for things other than mountpoints. xfstests generic/398 will require an update with this change. [Rewritten from an earlier patch series by Michael Halcrow.] Cc: Michael Halcrow Cc: Joe Richey Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/fscrypt.rst | 12 ++++++++++-- fs/crypto/hooks.c | 6 +++--- fs/crypto/policy.c | 3 +-- include/linux/fscrypt.h | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index cfbc18f0d9c9..5b667ee1242a 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -426,10 +426,18 @@ astute users may notice some differences in behavior: - Unencrypted files, or files encrypted with a different encryption policy (i.e. different key, modes, or flags), cannot be renamed or linked into an encrypted directory; see `Encryption policy - enforcement`_. Attempts to do so will fail with EPERM. However, + enforcement`_. Attempts to do so will fail with EXDEV. However, encrypted files can be renamed within an encrypted directory, or into an unencrypted directory. + Note: "moving" an unencrypted file into an encrypted directory, e.g. + with the `mv` program, is implemented in userspace by a copy + followed by a delete. Be aware that the original unencrypted data + may remain recoverable from free space on the disk; prefer to keep + all files encrypted from the very beginning. The `shred` program + may be used to overwrite the source files but isn't guaranteed to be + effective on all filesystems and storage devices. + - Direct I/O is not supported on encrypted files. Attempts to use direct I/O on such files will fall back to buffered I/O. @@ -516,7 +524,7 @@ not be encrypted. Except for those special files, it is forbidden to have unencrypted files, or files encrypted with a different encryption policy, in an encrypted directory tree. Attempts to link or rename such a file into -an encrypted directory will fail with EPERM. This is also enforced +an encrypted directory will fail with EXDEV. This is also enforced during ->lookup() to provide limited protection against offline attacks that try to disable or downgrade encryption in known locations where applications may later write sensitive data. It is recommended diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 926e5df20ec3..56debb1fcf5e 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -58,7 +58,7 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) return err; if (!fscrypt_has_permitted_context(dir, inode)) - return -EPERM; + return -EXDEV; return 0; } @@ -82,13 +82,13 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, d_inode(old_dentry))) - return -EPERM; + return -EXDEV; if ((flags & RENAME_EXCHANGE) && IS_ENCRYPTED(old_dir) && !fscrypt_has_permitted_context(old_dir, d_inode(new_dentry))) - return -EPERM; + return -EXDEV; } return 0; } diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4288839501e9..e9d975f39f46 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -153,8 +153,7 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy); * malicious offline violations of this constraint, while the link and rename * checks are needed to prevent online violations of this constraint. * - * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail - * the filesystem operation with EPERM. + * Return: 1 if permitted, 0 if forbidden. */ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 952ab97af325..9fa48180a1f5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -89,7 +89,7 @@ static inline int fscrypt_require_key(struct inode *inode) * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, - * -EPERM if the link would result in an inconsistent encryption policy, or + * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, @@ -119,7 +119,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * - * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, From 1ae161d27b01ebe062cc4bf228a1e5debc473310 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 15:05:49 -0700 Subject: [PATCH 030/636] fscrypt: clean up and improve dentry revalidation commit 6cc248684d3d23bbd073ae2fa73d3416c0558909 upstream. Make various improvements to fscrypt dentry revalidation: - Don't try to handle the case where the per-directory key is removed, as this can't happen without the inode (and dentries) being evicted. - Flag ciphertext dentries rather than plaintext dentries, since it's ciphertext dentries that need the special handling. - Avoid doing unnecessary work for non-ciphertext dentries. - When revalidating ciphertext dentries, try to set up the directory's i_crypt_info to make sure the key is really still absent, rather than invalidating all negative dentries as the previous code did. An old comment suggested we can't do this for locking reasons, but AFAICT this comment was outdated and it actually works fine. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/crypto.c | 58 +++++++++++++++++++++-------------------- fs/crypto/hooks.c | 4 +-- include/linux/dcache.h | 2 +- include/linux/fscrypt.h | 6 ++--- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index c83ddff3ff4a..04a3c2c92b21 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -314,45 +314,47 @@ int fscrypt_decrypt_page(const struct inode *inode, struct page *page, EXPORT_SYMBOL(fscrypt_decrypt_page); /* - * Validate dentries for encrypted directories to make sure we aren't - * potentially caching stale data after a key has been added or - * removed. + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. */ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; - int dir_has_key, cached_with_key; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ if (flags & LOOKUP_RCU) return -ECHILD; dir = dget_parent(dentry); - if (!IS_ENCRYPTED(d_inode(dir))) { - dput(dir); - return 0; - } - - spin_lock(&dentry->d_lock); - cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; - spin_unlock(&dentry->d_lock); - dir_has_key = (d_inode(dir)->i_crypt_info != NULL); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); dput(dir); - /* - * If the dentry was cached without the key, and it is a - * negative dentry, it might be a valid name. We can't check - * if the key has since been made available due to locking - * reasons, so we fail the validation so ext4_lookup() can do - * this check. - * - * We also fail the validation if the dentry was created with - * the key present, but we no longer have the key, or vice versa. - */ - if ((!cached_with_key && d_is_negative(dentry)) || - (!cached_with_key && dir_has_key) || - (cached_with_key && !dir_has_key)) - return 0; - return 1; + if (err < 0) + return err; + + return valid; } const struct dentry_operations fscrypt_d_ops = { diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 56debb1fcf5e..a9492f75bbe1 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -101,9 +101,9 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) if (err) return err; - if (fscrypt_has_encryption_key(dir)) { + if (!fscrypt_has_encryption_key(dir)) { spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY; + dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 0880baefd85f..02b1b40fea5b 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -210,7 +210,7 @@ struct dentry_operations { #define DCACHE_MAY_FREE 0x00800000 #define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */ -#define DCACHE_ENCRYPTED_WITH_KEY 0x02000000 /* dir is encrypted with a valid key */ +#define DCACHE_ENCRYPTED_NAME 0x02000000 /* Encrypted name (dir key was unavailable) */ #define DCACHE_OP_REAL 0x04000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 9fa48180a1f5..c83e4dc55c06 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -145,10 +145,8 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * To allow invalidating stale dentries if the directory's encryption key is - * added later, we also install a custom ->d_revalidate() method and use the - * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a - * plaintext name (flag set) or a ciphertext name (flag cleared). + * This also installs a custom ->d_revalidate() method which will invalidate the + * dentry if it was created without the key and the key is later added. * * Return: 0 on success, -errno if a problem occurred while setting up the * encryption key From 000d849574fa636f1764919be459827df0114185 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 15:05:50 -0700 Subject: [PATCH 031/636] fscrypt: fix race allowing rename() and link() of ciphertext dentries commit 968dd6d0c6d6b6a989c6ddb9e2584a031b83e7b5 upstream. Close some race conditions where fscrypt allowed rename() and link() on ciphertext dentries that had been looked up just prior to the key being concurrently added. It's better to return -ENOKEY in this case. This avoids doing the nonsensical thing of encrypting the names a second time when searching for the actual on-disk dir entries. It also guarantees that DCACHE_ENCRYPTED_NAME dentries are never rename()d, so the dcache won't have support all possible combinations of moving DCACHE_ENCRYPTED_NAME around during __d_move(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/hooks.c | 12 +++++++++++- include/linux/fscrypt.h | 2 +- include/linux/fscrypt_notsupp.h | 4 ++-- include/linux/fscrypt_supp.h | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index a9492f75bbe1..2e7498a821a4 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -49,7 +49,8 @@ int fscrypt_file_open(struct inode *inode, struct file *filp) } EXPORT_SYMBOL_GPL(fscrypt_file_open); -int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) +int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry) { int err; @@ -57,6 +58,10 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir) if (err) return err; + /* ... in case we looked up ciphertext name before key was added */ + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) + return -ENOKEY; + if (!fscrypt_has_permitted_context(dir, inode)) return -EXDEV; @@ -78,6 +83,11 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) return err; + /* ... in case we looked up ciphertext name(s) before key was added */ + if ((old_dentry->d_flags | new_dentry->d_flags) & + DCACHE_ENCRYPTED_NAME) + return -ENOKEY; + if (old_dir != new_dir) { if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c83e4dc55c06..af52a240c739 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -97,7 +97,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, struct dentry *dentry) { if (IS_ENCRYPTED(dir)) - return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return __fscrypt_prepare_link(d_inode(old_dentry), dir, dentry); return 0; } diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index ee8b43e4c15a..15bbbd7c52b8 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -183,8 +183,8 @@ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) return 0; } -static inline int __fscrypt_prepare_link(struct inode *inode, - struct inode *dir) +static inline int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry) { return -EOPNOTSUPP; } diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 6456c6b2005f..b8443a2e47dc 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -184,7 +184,8 @@ extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, /* hooks.c */ extern int fscrypt_file_open(struct inode *inode, struct file *filp); -extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, + struct dentry *dentry); extern int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, From 906242d10b0b9279e5fda9691c13500d5b1fb543 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 15:05:51 -0700 Subject: [PATCH 032/636] fs, fscrypt: clear DCACHE_ENCRYPTED_NAME when unaliasing directory commit 0bf3d5c1604ecbbd4e49e9f5b3c79152b87adb0d upstream. Make __d_move() clear DCACHE_ENCRYPTED_NAME on the source dentry. This is needed for when d_splice_alias() moves a directory's encrypted alias to its decrypted alias as a result of the encryption key being added. Otherwise, the decrypted alias will incorrectly be invalidated on the next lookup, causing problems such as unmounting a mount the user just mount()ed there. Note that we don't have to support arbitrary moves of this flag because fscrypt doesn't allow dentries with DCACHE_ENCRYPTED_NAME to be the source or target of a rename(). Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key") Reported-by: Sarthak Kukreti Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 20370a0997bf..1897833a4668 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2712,6 +2712,20 @@ static void copy_name(struct dentry *dentry, struct dentry *target) call_rcu(&old_name->u.head, __d_free_external_name); } +/* + * When d_splice_alias() moves a directory's encrypted alias to its decrypted + * alias as a result of the encryption key being added, DCACHE_ENCRYPTED_NAME + * must be cleared. Note that we don't have to support arbitrary moves of this + * flag because fscrypt doesn't allow encrypted aliases to be the source or + * target of a rename(). + */ +static inline void fscrypt_handle_d_move(struct dentry *dentry) +{ +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) + dentry->d_flags &= ~DCACHE_ENCRYPTED_NAME; +#endif +} + /* * __d_move - move a dentry * @dentry: entry to move @@ -2787,6 +2801,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); __d_rehash(dentry); fsnotify_update_flags(dentry); + fscrypt_handle_d_move(dentry); write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); From f217c9685c39735f8a8080d00d17ae2e9d536823 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 15:05:52 -0700 Subject: [PATCH 033/636] fscrypt: only set dentry_operations on ciphertext dentries commit d456a33f041af4b54f3ce495a86d00c246165032 upstream. Plaintext dentries are always valid, so only set fscrypt_d_ops on ciphertext dentries. Besides marginally improved performance, this allows overlayfs to use an fscrypt-encrypted upperdir, provided that all the following are true: (1) The fscrypt encryption key is placed in the keyring before mounting overlayfs, and remains while the overlayfs is mounted. (2) The overlayfs workdir uses the same encryption policy. (3) No dentries for the ciphertext names of subdirectories have been created in the upperdir or workdir yet. (Since otherwise d_splice_alias() will reuse the old dentry with ->d_op set.) One potential use case is using an ephemeral encryption key to encrypt all files created or changed by a container, so that they can be securely erased ("crypto-shredded") after the container stops. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/hooks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 2e7498a821a4..9d8910e86ee5 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -115,9 +115,8 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); + d_set_d_op(dentry, &fscrypt_d_ops); } - - d_set_d_op(dentry, &fscrypt_d_ops); return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); From b699b0067c0ba01ca7068470d42d76e34eaafe84 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 31 Oct 2020 15:05:53 -0700 Subject: [PATCH 034/636] fscrypt: fix race where ->lookup() marks plaintext dentry as ciphertext commit b01531db6cec2aa330dbc91bfbfaaef4a0d387a4 upstream. ->lookup() in an encrypted directory begins as follows: 1. fscrypt_prepare_lookup(): a. Try to load the directory's encryption key. b. If the key is unavailable, mark the dentry as a ciphertext name via d_flags. 2. fscrypt_setup_filename(): a. Try to load the directory's encryption key. b. If the key is available, encrypt the name (treated as a plaintext name) to get the on-disk name. Otherwise decode the name (treated as a ciphertext name) to get the on-disk name. But if the key is concurrently added, it may be found at (2a) but not at (1a). In this case, the dentry will be wrongly marked as a ciphertext name even though it was actually treated as plaintext. This will cause the dentry to be wrongly invalidated on the next lookup, potentially causing problems. For example, if the racy ->lookup() was part of sys_mount(), then the new mount will be detached when anything tries to access it. This is despite the mountpoint having a plaintext path, which should remain valid now that the key was added. Of course, this is only possible if there's a userspace race. Still, the additional kernel-side race is confusing and unexpected. Close the kernel-side race by changing fscrypt_prepare_lookup() to also set the on-disk filename (step 2b), consistent with the d_flags update. Fixes: 28b4c263961c ("ext4 crypto: revalidate dentry after adding or removing the key") Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/crypto/fname.c | 1 + fs/crypto/hooks.c | 11 ++--- fs/ext4/ext4.h | 62 ++++++++++++++++++++------- fs/ext4/namei.c | 76 ++++++++++++++++++++++----------- fs/f2fs/namei.c | 17 +++++--- fs/ubifs/dir.c | 8 ++-- include/linux/fscrypt.h | 22 +++++++--- include/linux/fscrypt_notsupp.h | 5 ++- include/linux/fscrypt_supp.h | 3 +- 9 files changed, 139 insertions(+), 66 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index d7a0f682ca12..17bb9a3fc0b0 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -354,6 +354,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, } if (!lookup) return -ENOKEY; + fname->is_ciphertext_name = true; /* * We don't have the key and we are doing a lookup; decode the diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 9d8910e86ee5..042d5b44f4ed 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -104,20 +104,21 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, } EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); -int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry) +int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *fname) { - int err = fscrypt_get_encryption_info(dir); + int err = fscrypt_setup_filename(dir, &dentry->d_name, 1, fname); - if (err) + if (err && err != -ENOENT) return err; - if (!fscrypt_has_encryption_key(dir)) { + if (fname->is_ciphertext_name) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); d_set_d_op(dentry, &fscrypt_d_ops); } - return 0; + return err; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a4461ac4225..19109c04710e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2326,23 +2326,47 @@ static inline bool ext4_encrypted_inode(struct inode *inode) } #ifdef CONFIG_EXT4_FS_ENCRYPTION +static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, + const struct fscrypt_name *src) +{ + memset(dst, 0, sizeof(*dst)); + + dst->usr_fname = src->usr_fname; + dst->disk_name = src->disk_name; + dst->hinfo.hash = src->hash; + dst->hinfo.minor_hash = src->minor_hash; + dst->crypto_buf = src->crypto_buf; +} + static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct ext4_filename *fname) + const struct qstr *iname, + int lookup, + struct ext4_filename *fname) { struct fscrypt_name name; int err; - memset(fname, 0, sizeof(struct ext4_filename)); - err = fscrypt_setup_filename(dir, iname, lookup, &name); + if (err) + return err; - fname->usr_fname = name.usr_fname; - fname->disk_name = name.disk_name; - fname->hinfo.hash = name.hash; - fname->hinfo.minor_hash = name.minor_hash; - fname->crypto_buf = name.crypto_buf; - return err; + ext4_fname_from_fscrypt_name(fname, &name); + return 0; +} + +static inline int ext4_fname_prepare_lookup(struct inode *dir, + struct dentry *dentry, + struct ext4_filename *fname) +{ + struct fscrypt_name name; + int err; + + err = fscrypt_prepare_lookup(dir, dentry, &name); + if (err) + return err; + + ext4_fname_from_fscrypt_name(fname, &name); + return 0; } static inline void ext4_fname_free_filename(struct ext4_filename *fname) @@ -2356,19 +2380,27 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; } -#else +#else /* !CONFIG_EXT4_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, struct ext4_filename *fname) + const struct qstr *iname, + int lookup, + struct ext4_filename *fname) { fname->usr_fname = iname; fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; return 0; } -static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } -#endif +static inline int ext4_fname_prepare_lookup(struct inode *dir, + struct dentry *dentry, + struct ext4_filename *fname) +{ + return ext4_fname_setup_filename(dir, &dentry->d_name, 1, fname); +} + +static inline void ext4_fname_free_filename(struct ext4_filename *fname) { } +#endif /* !CONFIG_EXT4_FS_ENCRYPTION */ /* dir.c */ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 186a2dd05bd8..4191552880bd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1343,7 +1343,7 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, } /* - * ext4_find_entry() + * __ext4_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the cache buffer in which the entry was found, and the entry @@ -1353,39 +1353,32 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext4_find_entry (struct inode *dir, - const struct qstr *d_name, - struct ext4_dir_entry_2 **res_dir, - int *inlined) +static struct buffer_head *__ext4_find_entry(struct inode *dir, + struct ext4_filename *fname, + struct ext4_dir_entry_2 **res_dir, + int *inlined) { struct super_block *sb; struct buffer_head *bh_use[NAMEI_RA_SIZE]; struct buffer_head *bh, *ret = NULL; ext4_lblk_t start, block; - const u8 *name = d_name->name; + const u8 *name = fname->usr_fname->name; size_t ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ size_t ra_ptr = 0; /* Current index into readahead buffer */ ext4_lblk_t nblocks; int i, namelen, retval; - struct ext4_filename fname; *res_dir = NULL; sb = dir->i_sb; - namelen = d_name->len; + namelen = fname->usr_fname->len; if (namelen > EXT4_NAME_LEN) return NULL; - retval = ext4_fname_setup_filename(dir, d_name, 1, &fname); - if (retval == -ENOENT) - return NULL; - if (retval) - return ERR_PTR(retval); - if (ext4_has_inline_data(dir)) { int has_inline_data = 1; - ret = ext4_find_inline_entry(dir, &fname, res_dir, + ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); if (has_inline_data) { if (inlined) @@ -1405,7 +1398,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, goto restart; } if (is_dx(dir)) { - ret = ext4_dx_find_entry(dir, &fname, res_dir); + ret = ext4_dx_find_entry(dir, fname, res_dir); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the @@ -1470,7 +1463,7 @@ restart: goto cleanup_and_exit; } set_buffer_verified(bh); - i = search_dirblock(bh, dir, &fname, + i = search_dirblock(bh, dir, fname, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -1501,10 +1494,50 @@ cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) brelse(bh_use[ra_ptr]); - ext4_fname_free_filename(&fname); return ret; } +static struct buffer_head *ext4_find_entry(struct inode *dir, + const struct qstr *d_name, + struct ext4_dir_entry_2 **res_dir, + int *inlined) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_setup_filename(dir, d_name, 1, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, inlined); + + ext4_fname_free_filename(&fname); + return bh; +} + +static struct buffer_head *ext4_lookup_entry(struct inode *dir, + struct dentry *dentry, + struct ext4_dir_entry_2 **res_dir) +{ + int err; + struct ext4_filename fname; + struct buffer_head *bh; + + err = ext4_fname_prepare_lookup(dir, dentry, &fname); + if (err == -ENOENT) + return NULL; + if (err) + return ERR_PTR(err); + + bh = __ext4_find_entry(dir, &fname, res_dir, NULL); + + ext4_fname_free_filename(&fname); + return bh; +} + static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir) @@ -1563,16 +1596,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct inode *inode; struct ext4_dir_entry_2 *de; struct buffer_head *bh; - int err; - - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - return ERR_PTR(err); if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); + bh = ext4_lookup_entry(dir, dentry, &de); if (IS_ERR(bh)) return (struct dentry *) bh; inode = NULL; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4f0cc0c79d1e..8617e742d087 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -432,19 +432,23 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); + struct fscrypt_name fname; trace_f2fs_lookup_start(dir, dentry, flags); - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - goto out; - if (dentry->d_name.len > F2FS_NAME_LEN) { err = -ENAMETOOLONG; goto out; } - de = f2fs_find_entry(dir, &dentry->d_name, &page); + err = fscrypt_prepare_lookup(dir, dentry, &fname); + if (err == -ENOENT) + goto out_splice; + if (err) + goto out; + de = __f2fs_find_entry(dir, &fname, &page); + fscrypt_free_filename(&fname); + if (!de) { if (IS_ERR(page)) { err = PTR_ERR(page); @@ -484,8 +488,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } out_splice: new = d_splice_alias(inode, dentry); - if (IS_ERR(new)) - err = PTR_ERR(new); + err = PTR_ERR_OR_ZERO(new); trace_f2fs_lookup_end(dir, dentry, ino, err); return new; out_iput: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index d7c0aa0626cd..10aab5dccaee 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -220,11 +220,9 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); - err = fscrypt_prepare_lookup(dir, dentry, flags); - if (err) - return ERR_PTR(err); - - err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &nm); + err = fscrypt_prepare_lookup(dir, dentry, &nm); + if (err == -ENOENT) + return d_splice_alias(NULL, dentry); if (err) return ERR_PTR(err); diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index af52a240c739..c1e4a615bd1c 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -32,6 +32,7 @@ struct fscrypt_name { u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; + bool is_ciphertext_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } @@ -138,25 +139,32 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory * @dir: directory being searched * @dentry: filename being looked up - * @flags: lookup flags + * @fname: (output) the name to use to search the on-disk directory * - * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be - * done with or without the directory's encryption key; without the key, + * Prepare for ->lookup() in a directory which may be encrypted by determining + * the name that will actually be used to search the directory on-disk. Lookups + * can be done with or without the directory's encryption key; without the key, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * * This also installs a custom ->d_revalidate() method which will invalidate the * dentry if it was created without the key and the key is later added. * - * Return: 0 on success, -errno if a problem occurred while setting up the - * encryption key + * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a + * correctly formed encoded ciphertext name, so a negative dentry should be + * created; or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) + struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) - return __fscrypt_prepare_lookup(dir, dentry); + return __fscrypt_prepare_lookup(dir, dentry, fname); + + memset(fname, 0, sizeof(*fname)); + fname->usr_fname = &dentry->d_name; + fname->disk_name.name = (unsigned char *)dentry->d_name.name; + fname->disk_name.len = dentry->d_name.len; return 0; } diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 15bbbd7c52b8..24b261e49dc1 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -112,7 +112,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; - memset(fname, 0, sizeof(struct fscrypt_name)); + memset(fname, 0, sizeof(*fname)); fname->usr_fname = iname; fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; @@ -199,7 +199,8 @@ static inline int __fscrypt_prepare_rename(struct inode *old_dir, } static inline int __fscrypt_prepare_lookup(struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, + struct fscrypt_name *fname) { return -EOPNOTSUPP; } diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index b8443a2e47dc..8641e20694ce 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -191,7 +191,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct fscrypt_name *fname); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); From 8dda141d0b16031e0b24d6fed46f69873e000f08 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 1 Nov 2020 11:42:18 +0100 Subject: [PATCH 035/636] Revert "block: ratelimit handle_bad_sector() message" This reverts commit f86b9bf6228bb334fe1addcd566a658ecbd08f7e which is commit f4ac712e4fe009635344b9af5d890fe25fcc8c0d upstream. Jari Ruusu writes: Above change "block: ratelimit handle_bad_sector() message" upstream commit f4ac712e4fe009635344b9af5d890fe25fcc8c0d in 4.19.154 kernel is not completely OK. Removing casts from arguments 4 and 5 produces these compile warnings: ... For 64 bit systems it is only compile time cosmetic warning. For 32 bit system + CONFIG_LBDAF=n it introduces bugs: output formats are "%llu" and passed parameters are 32 bits. That is not OK. Upstream kernels have hardcoded 64 bit sector_t. In older stable trees sector_t can be either 64 or 32 bit. In other words, backport of above patch needs to keep those original casts. And Tetsuo Handa writes: Indeed, commit f4ac712e4fe00963 ("block: ratelimit handle_bad_sector() message") depends on commit 72deb455b5ec619f ("block: remove CONFIG_LBDAF") which was merged into 5.2 kernel. So let's revert it. Reported-by: Jari Ruusu Reported-by: Tetsuo Handa Cc: Christoph Hellwig Cc: Jens Axboe Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 445b878e3519..ce3710404544 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2127,10 +2127,11 @@ static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; - pr_info_ratelimited("attempt to access beyond end of device\n" - "%s: rw=%d, want=%llu, limit=%llu\n", - bio_devname(bio, b), bio->bi_opf, - bio_end_sector(bio), maxsector); + printk(KERN_INFO "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", + bio_devname(bio, b), bio->bi_opf, + (unsigned long long)bio_end_sector(bio), + (long long)maxsector); } #ifdef CONFIG_FAIL_MAKE_REQUEST From 82856cd739c1dee7ea6a76d0ddc8ea2dce2a11f1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 30 Sep 2020 11:16:14 +0200 Subject: [PATCH 036/636] xen/events: don't use chip_data for legacy IRQs commit 0891fb39ba67bd7ae023ea0d367297ffff010781 upstream. Since commit c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.") Xen is using the chip_data pointer for storing IRQ specific data. When running as a HVM domain this can result in problems for legacy IRQs, as those might use chip_data for their own purposes. Use a local array for this purpose in case of legacy IRQs, avoiding the double use. Cc: stable@vger.kernel.org Fixes: c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.") Signed-off-by: Juergen Gross Tested-by: Stefan Bader Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200930091614.13660-1-jgross@suse.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 95e5a9300ff0..fdeeef2b9947 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -90,6 +90,8 @@ static bool (*pirq_needs_eoi)(unsigned irq); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) +static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; @@ -154,7 +156,18 @@ int get_evtchn_to_irq(unsigned evtchn) /* Get info for IRQ */ struct irq_info *info_for_irq(unsigned irq) { - return irq_get_chip_data(irq); + if (irq < nr_legacy_irqs()) + return legacy_info_ptrs[irq]; + else + return irq_get_chip_data(irq); +} + +static void set_info_for_irq(unsigned int irq, struct irq_info *info) +{ + if (irq < nr_legacy_irqs()) + legacy_info_ptrs[irq] = info; + else + irq_set_chip_data(irq, info); } /* Constructors for packed IRQ information. */ @@ -375,7 +388,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; - irq_set_chip_data(irq, info); + set_info_for_irq(irq, info); list_add_tail(&info->list, &xen_irq_list_head); } @@ -424,14 +437,14 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; list_del(&info->list); - irq_set_chip_data(irq, NULL); + set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); @@ -601,7 +614,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (info->refcnt > 0) { info->refcnt--; @@ -1105,7 +1118,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; @@ -1139,7 +1152,7 @@ int evtchn_make_refcounted(unsigned int evtchn) if (irq == -1) return -ENOENT; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) return -ENOENT; @@ -1167,7 +1180,7 @@ int evtchn_get(unsigned int evtchn) if (irq == -1) goto done; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) goto done; From 61d359d51a1cce8a5913843c8c3601dc878cc519 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: [PATCH 037/636] xen/events: avoid removing an event channel while handling it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2 upstream. Today it can happen that an event channel is being removed from the system while the event handling loop is active. This can lead to a race resulting in crashes or WARN() splats when trying to access the irq_info structure related to the event channel. Fix this problem by using a rwlock taken as reader in the event handling loop and as writer when deallocating the irq_info structure. As the observed problem was a NULL dereference in evtchn_from_irq() make this function more robust against races by testing the irq_info pointer to be not NULL before dereferencing it. And finally make all accesses to evtchn_to_irq[row][col] atomic ones in order to avoid seeing partial updates of an array element in irq handling. Note that irq handling can be entered only for event channels which have been valid before, so any not populated row isn't a problem in this regard, as rows are only ever added and never removed. This is XSA-331. Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki Reported-by: Jinoh Kang Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index fdeeef2b9947..1ccc308d5f66 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -69,6 +70,23 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -103,7 +121,7 @@ static void clear_evtchn_to_irq_row(unsigned row) unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_to_irq[row][col], -1); } static void clear_evtchn_to_irq_all(void) @@ -140,7 +158,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -150,7 +168,7 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ @@ -259,10 +277,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -438,16 +460,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + list_del(&info->list); set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -1233,6 +1260,8 @@ static void __xen_evtchn_do_upcall(void) int cpu = get_cpu(); unsigned count; + read_lock(&evtchn_rwlock); + do { vcpu_info->evtchn_upcall_pending = 0; @@ -1248,6 +1277,7 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: + read_unlock(&evtchn_rwlock); put_cpu(); } From 25f6b08895d579b461487291d6e48b3953a8bf65 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: [PATCH 038/636] xen/events: add a proper barrier to 2-level uevent unmasking commit 4d3fe31bd993ef504350989786858aefdb877daa upstream. A follow-up patch will require certain write to happen before an event channel is unmasked. While the memory barrier is not strictly necessary for all the callers, the main one will need it. In order to avoid an extra memory barrier when using fifo event channels, mandate evtchn_unmask() to provide write ordering. The 2-level event handling unmask operation is missing an appropriate barrier, so add it. Fifo event channels are fine in this regard due to using sync_cmpxchg(). This is part of XSA-332. Cc: stable@vger.kernel.org Suggested-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Julien Grall Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_2l.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index 8edef51c92e5..e4b75693600e 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -91,6 +91,8 @@ static void evtchn_2l_unmask(unsigned port) BUG_ON(!irqs_disabled()); + smp_wmb(); /* All writes before unmask must be visible. */ + if (unlikely((cpu != cpu_from_evtchn(port)))) do_hypercall = 1; else { From c7f95d899f492c1615538cbebdb781e4a1e7b7cf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Oct 2020 06:52:55 +0200 Subject: [PATCH 039/636] xen/events: fix race in evtchn_fifo_unmask() commit f01337197419b7e8a492e83089552b77d3b5fb90 upstream. Unmasking a fifo event channel can result in unmasking it twice, once directly in the kernel and once via a hypercall in case the event was pending. Fix that by doing the local unmask only if the event is not pending. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_fifo.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 76b318e88382..3071256a9413 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -227,19 +227,25 @@ static bool evtchn_fifo_is_masked(unsigned port) return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); } /* - * Clear MASKED, spinning if BUSY is set. + * Clear MASKED if not PENDING, spinning if BUSY is set. + * Return true if mask was cleared. */ -static void clear_masked(volatile event_word_t *word) +static bool clear_masked_cond(volatile event_word_t *word) { event_word_t new, old, w; w = *word; do { + if (w & (1 << EVTCHN_FIFO_PENDING)) + return false; + old = w & ~(1 << EVTCHN_FIFO_BUSY); new = old & ~(1 << EVTCHN_FIFO_MASKED); w = sync_cmpxchg(word, old, new); } while (w != old); + + return true; } static void evtchn_fifo_unmask(unsigned port) @@ -248,8 +254,7 @@ static void evtchn_fifo_unmask(unsigned port) BUG_ON(!irqs_disabled()); - clear_masked(word); - if (evtchn_fifo_is_pending(port)) { + if (!clear_masked_cond(word)) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } From dea09436da034930fbd63420b3c6a010b98e8fab Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: [PATCH 040/636] xen/events: add a new "late EOI" evtchn framework commit 54c9de89895e0a36047fcc4ae754ea5b8655fb9d upstream. In order to avoid tight event channel related IRQ loops add a new framework of "late EOI" handling: the IRQ the event channel is bound to will be masked until the event has been handled and the related driver is capable to handle another event. The driver is responsible for unmasking the event channel via the new function xen_irq_lateeoi(). This is similar to binding an event channel to a threaded IRQ, but without having to structure the driver accordingly. In order to support a future special handling in case a rogue guest is sending lots of unsolicited events, add a flag to xen_irq_lateeoi() which can be set by the caller to indicate the event was a spurious one. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 165 ++++++++++++++++++++++++++----- include/xen/events.h | 29 +++++- 2 files changed, 166 insertions(+), 28 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 1ccc308d5f66..0c0114800f6c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -111,6 +111,7 @@ static bool (*pirq_needs_eoi)(unsigned irq); static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; static struct irq_chip xen_dynamic_chip; +static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); @@ -395,6 +396,33 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +static void xen_irq_lateeoi_locked(struct irq_info *info) +{ + evtchn_port_t evtchn; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn)) + return; + + unmask_evtchn(evtchn); +} + +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) +{ + struct irq_info *info; + unsigned long flags; + + read_lock_irqsave(&evtchn_rwlock, flags); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} +EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -866,7 +894,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) { int irq; int ret; @@ -883,7 +911,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); ret = xen_irq_info_evtchn_setup(irq, evtchn); @@ -904,8 +932,19 @@ out: return irq; } + +int bind_evtchn_to_irq(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -947,8 +986,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) +static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, + evtchn_port_t remote_port, + struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -959,10 +999,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, + chip); +} + +int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); + static int find_virq(unsigned int virq, unsigned int cpu) { struct evtchn_status status; @@ -1058,14 +1114,15 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) +static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id, + struct irq_chip *chip) { int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_evtchn_to_irq_chip(evtchn, chip); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1076,31 +1133,76 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, return irq; } + +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); + +static int bind_interdomain_evtchn_to_irqhandler_chip( + unsigned int remote_domain, evtchn_port_t remote_port, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, struct irq_chip *chip) +{ + int irq, retval; + + irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + chip); + if (irq < 0) + return irq; + + retval = request_irq(irq, handler, irqflags, devname, dev_id); + if (retval != 0) { + unbind_from_irq(irq); + return retval; + } + + return irq; +} + int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - int irq, retval; - - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); - if (irq < 0) - return irq; - - retval = request_irq(irq, handler, irqflags, devname, dev_id); - if (retval != 0) { - unbind_from_irq(irq); - return retval; - } - - return irq; + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1644,6 +1746,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_retrigger = retrigger_dynirq, }; +static struct irq_chip xen_lateeoi_chip __read_mostly = { + /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ + .name = "xen-dyn-lateeoi", + + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = mask_ack_dynirq, + .irq_mask_ack = mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_pirq_chip __read_mostly = { .name = "xen-pirq", diff --git a/include/xen/events.h b/include/xen/events.h index 1650d39decae..d8255ed2052c 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -14,11 +14,16 @@ unsigned xen_evtchn_nr_channels(void); -int bind_evtchn_to_irq(unsigned int evtchn); -int bind_evtchn_to_irqhandler(unsigned int evtchn, +int bind_evtchn_to_irq(evtchn_port_t evtchn); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn); +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, const char *devname, + void *dev_id); int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu); int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, @@ -31,13 +36,21 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, const char *devname, void *dev_id); int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port); + evtchn_port_t remote_port); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port); int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, + evtchn_port_t remote_port, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); /* * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -46,6 +59,14 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, */ void unbind_from_irqhandler(unsigned int irq, void *dev_id); +/* + * Send late EOI for an IRQ bound to an event channel via one of the *_lateeoi + * functions above. + */ +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags); +/* Signal an event was spurious, i.e. there was no action resulting from it. */ +#define XEN_EOI_FLAG_SPURIOUS 0x00000001 + #define XEN_IRQ_PRIORITY_MAX EVTCHN_FIFO_PRIORITY_MAX #define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT #define XEN_IRQ_PRIORITY_MIN EVTCHN_FIFO_PRIORITY_MIN From 1506109a85c894cdd1884442b3479d62a74c6f3e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:27 +0200 Subject: [PATCH 041/636] xen/blkback: use lateeoi irq binding commit 01263a1fabe30b4d542f34c7e2364a22587ddaf2 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving blkfront use the lateeoi irq binding for blkback and unmask the event channel only after processing all pending requests. As the thread processing requests is used to do purging work in regular intervals an EOI may be sent only after having received an event. If there was no pending I/O request flag the EOI as spurious. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/block/xen-blkback/blkback.c | 22 +++++++++++++++++----- drivers/block/xen-blkback/xenbus.c | 5 ++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 3666afa639d1..b18f0162cb9c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -202,7 +202,7 @@ static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num) #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) -static int do_block_io_op(struct xen_blkif_ring *ring); +static int do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags); static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct blkif_request *req, struct pending_req *pending_req); @@ -615,6 +615,8 @@ int xen_blkif_schedule(void *arg) struct xen_vbd *vbd = &blkif->vbd; unsigned long timeout; int ret; + bool do_eoi; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; set_freezable(); while (!kthread_should_stop()) { @@ -639,16 +641,23 @@ int xen_blkif_schedule(void *arg) if (timeout == 0) goto purge_gnt_list; + do_eoi = ring->waiting_reqs; + ring->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ - ret = do_block_io_op(ring); + ret = do_block_io_op(ring, &eoi_flags); if (ret > 0) ring->waiting_reqs = 1; if (ret == -EACCES) wait_event_interruptible(ring->shutdown_wq, kthread_should_stop()); + if (do_eoi && !ring->waiting_reqs) { + xen_irq_lateeoi(ring->irq, eoi_flags); + eoi_flags |= XEN_EOI_FLAG_SPURIOUS; + } + purge_gnt_list: if (blkif->vbd.feature_gnt_persistent && time_after(jiffies, ring->next_lru)) { @@ -1121,7 +1130,7 @@ static void end_block_io_op(struct bio *bio) * and transmute it to the block API to hand it over to the proper block disk. */ static int -__do_block_io_op(struct xen_blkif_ring *ring) +__do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; struct blkif_request req; @@ -1144,6 +1153,9 @@ __do_block_io_op(struct xen_blkif_ring *ring) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; + /* We've seen a request, so clear spurious eoi flag. */ + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (kthread_should_stop()) { more_to_do = 1; break; @@ -1202,13 +1214,13 @@ done: } static int -do_block_io_op(struct xen_blkif_ring *ring) +do_block_io_op(struct xen_blkif_ring *ring, unsigned int *eoi_flags) { union blkif_back_rings *blk_rings = &ring->blk_rings; int more_to_do; do { - more_to_do = __do_block_io_op(ring); + more_to_do = __do_block_io_op(ring, eoi_flags); if (more_to_do) break; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 25c41ce070a7..93896c992245 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -237,9 +237,8 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, BUG(); } - err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, - xen_blkif_be_int, 0, - "blkif-backend", ring); + err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid, + evtchn, xen_blkif_be_int, 0, "blkif-backend", ring); if (err < 0) { xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); ring->blk_rings.common.sring = NULL; From 6a052f3a6282da6861ac590fcda9a0d6b16182e6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: [PATCH 042/636] xen/netback: use lateeoi irq binding commit 23025393dbeb3b8b3b60ebfa724cdae384992e27 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving netfront use the lateeoi irq binding for netback and unmask the event channel only just before going to sleep waiting for new events. Make sure not to issue an EOI when none is pending by introducing an eoi_pending element to struct xenvif_queue. When no request has been consumed set the spurious flag when sending the EOI for an interrupt. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netback/common.h | 15 +++++++ drivers/net/xen-netback/interface.c | 61 ++++++++++++++++++++++++----- drivers/net/xen-netback/netback.c | 11 +++++- drivers/net/xen-netback/rx.c | 13 ++++-- 4 files changed, 86 insertions(+), 14 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 936c0b3e0ba2..86d23d0f563c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -140,6 +140,20 @@ struct xenvif_queue { /* Per-queue data for xenvif */ char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */ struct xenvif *vif; /* Parent VIF */ + /* + * TX/RX common EOI handling. + * When feature-split-event-channels = 0, interrupt handler sets + * NETBK_COMMON_EOI, otherwise NETBK_RX_EOI and NETBK_TX_EOI are set + * by the RX and TX interrupt handlers. + * RX and TX handler threads will issue an EOI when either + * NETBK_COMMON_EOI or their specific bits (NETBK_RX_EOI or + * NETBK_TX_EOI) are set and they will reset those bits. + */ + atomic_t eoi_pending; +#define NETBK_RX_EOI 0x01 +#define NETBK_TX_EOI 0x02 +#define NETBK_COMMON_EOI 0x04 + /* Use NAPI for guest TX */ struct napi_struct napi; /* When feature-split-event-channels = 0, tx_irq = rx_irq. */ @@ -357,6 +371,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 4cafc31b98b7..c960cb7e3251 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -77,12 +77,28 @@ int xenvif_schedulable(struct xenvif *vif) !vif->disabled; } +static bool xenvif_handle_tx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = RING_HAS_UNCONSUMED_REQUESTS(&queue->tx); + if (rc) + napi_schedule(&queue->napi); + return rc; +} + static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - if (RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)) - napi_schedule(&queue->napi); + old = atomic_fetch_or(NETBK_TX_EOI, &queue->eoi_pending); + WARN(old & NETBK_TX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_tx_interrupt(queue)) { + atomic_andnot(NETBK_TX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -116,19 +132,46 @@ static int xenvif_poll(struct napi_struct *napi, int budget) return work_done; } +static bool xenvif_handle_rx_interrupt(struct xenvif_queue *queue) +{ + bool rc; + + rc = xenvif_have_rx_work(queue, false); + if (rc) + xenvif_kick_thread(queue); + return rc; +} + static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; + int old; - xenvif_kick_thread(queue); + old = atomic_fetch_or(NETBK_RX_EOI, &queue->eoi_pending); + WARN(old & NETBK_RX_EOI, "Interrupt while EOI pending\n"); + + if (!xenvif_handle_rx_interrupt(queue)) { + atomic_andnot(NETBK_RX_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } irqreturn_t xenvif_interrupt(int irq, void *dev_id) { - xenvif_tx_interrupt(irq, dev_id); - xenvif_rx_interrupt(irq, dev_id); + struct xenvif_queue *queue = dev_id; + int old; + + old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); + WARN(old, "Interrupt while EOI pending\n"); + + /* Use bitwise or as we need to call both functions. */ + if ((!xenvif_handle_tx_interrupt(queue) | + !xenvif_handle_rx_interrupt(queue))) { + atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); + } return IRQ_HANDLED; } @@ -595,7 +638,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -653,7 +696,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_interrupt, 0, queue->name, queue); if (err < 0) @@ -664,7 +707,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, /* feature-split-event-channels == 1 */ snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0, queue->tx_irq_name, queue); if (err < 0) @@ -674,7 +717,7 @@ int xenvif_connect_data(struct xenvif_queue *queue, snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0, queue->rx_irq_name, queue); if (err < 0) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1c849106b793..f228298c3bd0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -162,6 +162,10 @@ void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue) if (more_to_do) napi_schedule(&queue->napi); + else if (atomic_fetch_andnot(NETBK_TX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_TX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->tx_irq, 0); } static void tx_add_credit(struct xenvif_queue *queue) @@ -1613,9 +1617,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; + unsigned int eoi_flag = XEN_EOI_FLAG_SPURIOUS; - while (xenvif_ctrl_work_todo(vif)) + while (xenvif_ctrl_work_todo(vif)) { xenvif_ctrl_action(vif); + eoi_flag = 0; + } + + xen_irq_lateeoi(irq, eoi_flag); return IRQ_HANDLED; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index ef5887037b22..9b62f65b630e 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -490,13 +490,13 @@ static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) return queue->stalled && prod - cons >= 1; } -static bool xenvif_have_rx_work(struct xenvif_queue *queue) +bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) { return xenvif_rx_ring_slots_available(queue) || (queue->vif->stall_timeout && (xenvif_rx_queue_stalled(queue) || xenvif_rx_queue_ready(queue))) || - kthread_should_stop() || + (test_kthread && kthread_should_stop()) || queue->vif->disabled; } @@ -527,15 +527,20 @@ static void xenvif_wait_for_rx_work(struct xenvif_queue *queue) { DEFINE_WAIT(wait); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) return; for (;;) { long ret; prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE); - if (xenvif_have_rx_work(queue)) + if (xenvif_have_rx_work(queue, true)) break; + if (atomic_fetch_andnot(NETBK_RX_EOI | NETBK_COMMON_EOI, + &queue->eoi_pending) & + (NETBK_RX_EOI | NETBK_COMMON_EOI)) + xen_irq_lateeoi(queue->rx_irq, 0); + ret = schedule_timeout(xenvif_rx_queue_timeout(queue)); if (!ret) break; From fb22061b20ea20fd83da4ef25cebda18b1365a77 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: [PATCH 043/636] xen/scsiback: use lateeoi irq binding commit 86991b6e7ea6c613b7692f65106076943449b6b7 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving scsifront use the lateeoi irq binding for scsiback and unmask the event channel only just before leaving the event handling function. In case of a ring protocol error don't issue an EOI in order to avoid the possibility to use that for producing an event storm. This at once will result in no further call of scsiback_irq_fn(), so the ring_error struct member can be dropped and scsiback_do_cmd_fn() can signal the protocol error via a negative return value. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-scsiback.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 14a3d4cbc2a7..1abc0a55b8d9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -91,7 +91,6 @@ struct vscsibk_info { unsigned int irq; struct vscsiif_back_ring ring; - int ring_error; spinlock_t ring_lock; atomic_t nr_unreplied_reqs; @@ -722,7 +721,8 @@ static struct vscsibk_pend *prepare_pending_reqs(struct vscsibk_info *info, return pending_req; } -static int scsiback_do_cmd_fn(struct vscsibk_info *info) +static int scsiback_do_cmd_fn(struct vscsibk_info *info, + unsigned int *eoi_flags) { struct vscsiif_back_ring *ring = &info->ring; struct vscsiif_request ring_req; @@ -739,11 +739,12 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) rc = ring->rsp_prod_pvt; pr_warn("Dom%d provided bogus ring requests (%#x - %#x = %u). Halting ring processing\n", info->domid, rp, rc, rp - rc); - info->ring_error = 1; - return 0; + return -EINVAL; } while ((rc != rp)) { + *eoi_flags &= ~XEN_EOI_FLAG_SPURIOUS; + if (RING_REQUEST_CONS_OVERFLOW(ring, rc)) break; @@ -802,13 +803,16 @@ static int scsiback_do_cmd_fn(struct vscsibk_info *info) static irqreturn_t scsiback_irq_fn(int irq, void *dev_id) { struct vscsibk_info *info = dev_id; + int rc; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (info->ring_error) - return IRQ_HANDLED; - - while (scsiback_do_cmd_fn(info)) + while ((rc = scsiback_do_cmd_fn(info, &eoi_flags)) > 0) cond_resched(); + /* In case of a ring error we keep the event channel masked. */ + if (!rc) + xen_irq_lateeoi(irq, eoi_flags); + return IRQ_HANDLED; } @@ -829,7 +833,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref, sring = (struct vscsiif_sring *)area; BACK_RING_INIT(&info->ring, sring, PAGE_SIZE); - err = bind_interdomain_evtchn_to_irq(info->domid, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn); if (err < 0) goto unmap_page; @@ -1252,7 +1256,6 @@ static int scsiback_probe(struct xenbus_device *dev, info->domid = dev->otherend_id; spin_lock_init(&info->ring_lock); - info->ring_error = 0; atomic_set(&info->nr_unreplied_reqs, 0); init_waitqueue_head(&info->waiting_to_free); info->dev = dev; From 128b11967a26322e8d6bf98edc0406160d481105 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:28 +0200 Subject: [PATCH 044/636] xen/pvcallsback: use lateeoi irq binding commit c8d647a326f06a39a8e5f0f1af946eacfa1835f8 upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pvcallsfront use the lateeoi irq binding for pvcallsback and unmask the event channel only after handling all write requests, which are the ones coming in via an irq. This requires modifying the logic a little bit to not require an event for each write request, but to keep the ioworker running until no further data is found on the ring page to be processed. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/pvcalls-back.c | 76 +++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 398fd8b1639d..f94bb6034a5a 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -75,6 +75,7 @@ struct sock_mapping { atomic_t write; atomic_t io; atomic_t release; + atomic_t eoi; void (*saved_data_ready)(struct sock *sk); struct pvcalls_ioworker ioworker; }; @@ -96,7 +97,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev, struct pvcalls_fedata *fedata, struct sock_mapping *map); -static void pvcalls_conn_back_read(void *opaque) +static bool pvcalls_conn_back_read(void *opaque) { struct sock_mapping *map = (struct sock_mapping *)opaque; struct msghdr msg; @@ -116,17 +117,17 @@ static void pvcalls_conn_back_read(void *opaque) virt_mb(); if (error) - return; + return false; size = pvcalls_queued(prod, cons, array_size); if (size >= array_size) - return; + return false; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) { atomic_set(&map->read, 0); spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); - return; + return true; } spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags); wanted = array_size - size; @@ -150,7 +151,7 @@ static void pvcalls_conn_back_read(void *opaque) ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT); WARN_ON(ret > wanted); if (ret == -EAGAIN) /* shouldn't happen */ - return; + return true; if (!ret) ret = -ENOTCONN; spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags); @@ -169,10 +170,10 @@ static void pvcalls_conn_back_read(void *opaque) virt_wmb(); notify_remote_via_irq(map->irq); - return; + return true; } -static void pvcalls_conn_back_write(struct sock_mapping *map) +static bool pvcalls_conn_back_write(struct sock_mapping *map) { struct pvcalls_data_intf *intf = map->ring; struct pvcalls_data *data = &map->data; @@ -189,7 +190,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) array_size = XEN_FLEX_RING_SIZE(map->ring_order); size = pvcalls_queued(prod, cons, array_size); if (size == 0) - return; + return false; memset(&msg, 0, sizeof(msg)); msg.msg_flags |= MSG_DONTWAIT; @@ -207,12 +208,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) atomic_set(&map->write, 0); ret = inet_sendmsg(map->sock, &msg, size); - if (ret == -EAGAIN || (ret >= 0 && ret < size)) { + if (ret == -EAGAIN) { atomic_inc(&map->write); atomic_inc(&map->io); + return true; } - if (ret == -EAGAIN) - return; /* write the data, then update the indexes */ virt_wmb(); @@ -225,9 +225,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) } /* update the indexes, then notify the other end */ virt_wmb(); - if (prod != cons + ret) + if (prod != cons + ret) { atomic_inc(&map->write); + atomic_inc(&map->io); + } notify_remote_via_irq(map->irq); + + return true; } static void pvcalls_back_ioworker(struct work_struct *work) @@ -236,6 +240,7 @@ static void pvcalls_back_ioworker(struct work_struct *work) struct pvcalls_ioworker, register_work); struct sock_mapping *map = container_of(ioworker, struct sock_mapping, ioworker); + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; while (atomic_read(&map->io) > 0) { if (atomic_read(&map->release) > 0) { @@ -243,10 +248,18 @@ static void pvcalls_back_ioworker(struct work_struct *work) return; } - if (atomic_read(&map->read) > 0) - pvcalls_conn_back_read(map); - if (atomic_read(&map->write) > 0) - pvcalls_conn_back_write(map); + if (atomic_read(&map->read) > 0 && + pvcalls_conn_back_read(map)) + eoi_flags = 0; + if (atomic_read(&map->write) > 0 && + pvcalls_conn_back_write(map)) + eoi_flags = 0; + + if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) { + atomic_set(&map->eoi, 0); + xen_irq_lateeoi(map->irq, eoi_flags); + eoi_flags = XEN_EOI_FLAG_SPURIOUS; + } atomic_dec(&map->io); } @@ -343,12 +356,9 @@ static struct sock_mapping *pvcalls_new_active_socket( goto out; map->bytes = page; - ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, - evtchn, - pvcalls_back_conn_event, - 0, - "pvcalls-backend", - map); + ret = bind_interdomain_evtchn_to_irqhandler_lateeoi( + fedata->dev->otherend_id, evtchn, + pvcalls_back_conn_event, 0, "pvcalls-backend", map); if (ret < 0) goto out; map->irq = ret; @@ -882,15 +892,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) { struct xenbus_device *dev = dev_id; struct pvcalls_fedata *fedata = NULL; + unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS; - if (dev == NULL) - return IRQ_HANDLED; + if (dev) { + fedata = dev_get_drvdata(&dev->dev); + if (fedata) { + pvcalls_back_work(fedata); + eoi_flags = 0; + } + } - fedata = dev_get_drvdata(&dev->dev); - if (fedata == NULL) - return IRQ_HANDLED; + xen_irq_lateeoi(irq, eoi_flags); - pvcalls_back_work(fedata); return IRQ_HANDLED; } @@ -900,12 +913,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) struct pvcalls_ioworker *iow; if (map == NULL || map->sock == NULL || map->sock->sk == NULL || - map->sock->sk->sk_user_data != map) + map->sock->sk->sk_user_data != map) { + xen_irq_lateeoi(irq, 0); return IRQ_HANDLED; + } iow = &map->ioworker; atomic_inc(&map->write); + atomic_inc(&map->eoi); atomic_inc(&map->io); queue_work(iow->wq, &iow->register_work); @@ -940,7 +956,7 @@ static int backend_connect(struct xenbus_device *dev) goto error; } - err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn); + err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (err < 0) goto error; fedata->irq = err; From 4988884d53c01ca6b4f6e0b6bdcbb2d28b13c860 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:29 +0200 Subject: [PATCH 045/636] xen/pciback: use lateeoi irq binding commit c2711441bc961b37bba0615dd7135857d189035f upstream. In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pcifront use the lateeoi irq binding for pciback and unmask the event channel only just before leaving the event handling function. Restructure the handling to support that scheme. Basically an event can come in for two reasons: either a normal request for a pciback action, which is handled in a worker, or in case the guest has finished an AER request which was requested by pciback. When an AER request is issued to the guest and a normal pciback action is currently active issue an EOI early in order to be able to receive another event when the AER request has been finished by the guest. Let the worker processing the normal requests run until no further request is pending, instead of starting a new worker ion that case. Issue the EOI only just before leaving the worker. This scheme allows to drop calling the generic function xen_pcibk_test_and_schedule_op() after processing of any request as the handling of both request types is now separated more cleanly. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-pciback/pci_stub.c | 14 ++++---- drivers/xen/xen-pciback/pciback.h | 12 +++++-- drivers/xen/xen-pciback/pciback_ops.c | 48 +++++++++++++++++++++------ drivers/xen/xen-pciback/xenbus.c | 2 +- 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 097410a7cdb7..adf3aae2939f 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -733,10 +733,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, wmb(); notify_remote_via_irq(pdev->evtchn_irq); + /* Enable IRQ to signal "request done". */ + xen_pcibk_lateeoi(pdev, 0); + ret = wait_event_timeout(xen_pcibk_aer_wait_queue, !(test_bit(_XEN_PCIB_active, (unsigned long *) &sh_info->flags)), 300*HZ); + /* Enable IRQ for pcifront request if not already active. */ + if (!test_bit(_PDEVF_op_active, &pdev->flags)) + xen_pcibk_lateeoi(pdev, 0); + if (!ret) { if (test_bit(_XEN_PCIB_active, (unsigned long *)&sh_info->flags)) { @@ -750,13 +757,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, } clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); - if (test_bit(_XEN_PCIF_active, - (unsigned long *)&sh_info->flags)) { - dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in " DRV_NAME "\n"); - xen_pcibk_test_and_schedule_op(psdev->pdev); - } - res = (pci_ers_result_t)aer_op->err; return res; } diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 263c059bff90..235cdfe13494 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #define DRV_NAME "xen-pciback" @@ -27,6 +28,8 @@ struct pci_dev_entry { #define PDEVF_op_active (1<<(_PDEVF_op_active)) #define _PCIB_op_pending (1) #define PCIB_op_pending (1<<(_PCIB_op_pending)) +#define _EOI_pending (2) +#define EOI_pending (1<<(_EOI_pending)) struct xen_pcibk_device { void *pci_dev_data; @@ -182,12 +185,17 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); +static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev, + unsigned int eoi_flag) +{ + if (test_and_clear_bit(_EOI_pending, &pdev->flags)) + xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag); +} + int xen_pcibk_xenbus_register(void); void xen_pcibk_xenbus_unregister(void); extern int verbose_request; - -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev); #endif /* Handles shared IRQs that can to device domain and control domain. */ diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 787966f44589..c4ed2c634ca7 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -297,26 +297,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, return 0; } #endif + +static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev) +{ + return test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags) && + !test_and_set_bit(_PDEVF_op_active, &pdev->flags); +} + /* * Now the same evtchn is used for both pcifront conf_read_write request * as well as pcie aer front end ack. We use a new work_queue to schedule * xen_pcibk conf_read_write service for avoiding confict with aer_core * do_recovery job which also use the system default work_queue */ -void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) +static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) { + bool eoi = true; + /* Check that frontend is requesting an operation and that we are not * already processing a request */ - if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) - && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { + if (xen_pcibk_test_op_pending(pdev)) { schedule_work(&pdev->op_work); + eoi = false; } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) && test_bit(_PCIB_op_pending, &pdev->flags)) { wake_up(&xen_pcibk_aer_wait_queue); + eoi = false; } + + /* EOI if there was nothing to do. */ + if (eoi) + xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS); } /* Performing the configuration space reads/writes must not be done in atomic @@ -324,10 +339,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * use of semaphores). This function is intended to be called from a work * queue in process context taking a struct xen_pcibk_device as a parameter */ -void xen_pcibk_do_op(struct work_struct *data) +static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev) { - struct xen_pcibk_device *pdev = - container_of(data, struct xen_pcibk_device, op_work); struct pci_dev *dev; struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; @@ -400,16 +413,31 @@ void xen_pcibk_do_op(struct work_struct *data) smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ clear_bit(_PDEVF_op_active, &pdev->flags); smp_mb__after_atomic(); /* /before/ final check for work */ +} - /* Check to see if the driver domain tried to start another request in - * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. - */ - xen_pcibk_test_and_schedule_op(pdev); +void xen_pcibk_do_op(struct work_struct *data) +{ + struct xen_pcibk_device *pdev = + container_of(data, struct xen_pcibk_device, op_work); + + do { + xen_pcibk_do_one_op(pdev); + } while (xen_pcibk_test_op_pending(pdev)); + + xen_pcibk_lateeoi(pdev, 0); } irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) { struct xen_pcibk_device *pdev = dev_id; + bool eoi; + + /* IRQs might come in before pdev->evtchn_irq is written. */ + if (unlikely(pdev->evtchn_irq != irq)) + pdev->evtchn_irq = irq; + + eoi = test_and_set_bit(_EOI_pending, &pdev->flags); + WARN(eoi, "IRQ while EOI pending\n"); xen_pcibk_test_and_schedule_op(pdev); diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 581c4e1a8b82..3bbed47da3fa 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, pdev->sh_info = vaddr; - err = bind_interdomain_evtchn_to_irqhandler( + err = bind_interdomain_evtchn_to_irqhandler_lateeoi( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, 0, DRV_NAME, pdev); if (err < 0) { From 8363670c4d0b302cdf0146ee8203401111568b6d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:29 +0200 Subject: [PATCH 046/636] xen/events: switch user event channels to lateeoi model commit c44b849cee8c3ac587da3b0980e01f77500d158c upstream. Instead of disabling the irq when an event is received and enabling it again when handled by the user process use the lateeoi model. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Tested-by: Stefano Stabellini Reviewed-by: Stefano Stabellini Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/evtchn.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 47c70b826a6a..4b11e60e37a3 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -166,7 +166,6 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) "Interrupt for port %d, but apparently not enabled; per-user %p\n", evtchn->port, u); - disable_irq_nosync(irq); evtchn->enabled = false; spin_lock(&u->ring_prod_lock); @@ -292,7 +291,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, evtchn = find_evtchn(u, port); if (evtchn && !evtchn->enabled) { evtchn->enabled = true; - enable_irq(irq_from_evtchn(port)); + xen_irq_lateeoi(irq_from_evtchn(port), 0); } } @@ -392,8 +391,8 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) if (rc < 0) goto err; - rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0, - u->name, evtchn); + rc = bind_evtchn_to_irqhandler_lateeoi(port, evtchn_interrupt, 0, + u->name, evtchn); if (rc < 0) goto err; From 536075098940120179416dc884e08ced37b2c02a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Sun, 13 Sep 2020 14:23:02 +0200 Subject: [PATCH 047/636] xen/events: use a common cpu hotplug hook for event channels commit 7beb290caa2adb0a399e735a1e175db9aae0523a upstream. Today only fifo event channels have a cpu hotplug callback. In order to prepare for more percpu (de)init work move that callback into events_base.c and add percpu_init() and percpu_deinit() hooks to struct evtchn_ops. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 25 +++++++++++++++++ drivers/xen/events/events_fifo.c | 40 +++++++++++++--------------- drivers/xen/events/events_internal.h | 3 +++ 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 0c0114800f6c..bbeb1d3aecfd 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #include @@ -1833,6 +1834,26 @@ void xen_callback_vector(void) {} static bool fifo_events = true; module_param(fifo_events, bool, 0); +static int xen_evtchn_cpu_prepare(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + + return ret; +} + +static int xen_evtchn_cpu_dead(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_deinit) + ret = evtchn_ops->percpu_deinit(cpu); + + return ret; +} + void __init xen_init_IRQ(void) { int ret = -EINVAL; @@ -1843,6 +1864,10 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, + "xen/evtchn:prepare", + xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 3071256a9413..59e6002c9699 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -385,21 +385,6 @@ static void evtchn_fifo_resume(void) event_array_pages = 0; } -static const struct evtchn_ops evtchn_ops_fifo = { - .max_channels = evtchn_fifo_max_channels, - .nr_channels = evtchn_fifo_nr_channels, - .setup = evtchn_fifo_setup, - .bind_to_cpu = evtchn_fifo_bind_to_cpu, - .clear_pending = evtchn_fifo_clear_pending, - .set_pending = evtchn_fifo_set_pending, - .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, - .mask = evtchn_fifo_mask, - .unmask = evtchn_fifo_unmask, - .handle_events = evtchn_fifo_handle_events, - .resume = evtchn_fifo_resume, -}; - static int evtchn_fifo_alloc_control_block(unsigned cpu) { void *control_block = NULL; @@ -422,19 +407,36 @@ static int evtchn_fifo_alloc_control_block(unsigned cpu) return ret; } -static int xen_evtchn_cpu_prepare(unsigned int cpu) +static int evtchn_fifo_percpu_init(unsigned int cpu) { if (!per_cpu(cpu_control_block, cpu)) return evtchn_fifo_alloc_control_block(cpu); return 0; } -static int xen_evtchn_cpu_dead(unsigned int cpu) +static int evtchn_fifo_percpu_deinit(unsigned int cpu) { __evtchn_fifo_handle_events(cpu, true); return 0; } +static const struct evtchn_ops evtchn_ops_fifo = { + .max_channels = evtchn_fifo_max_channels, + .nr_channels = evtchn_fifo_nr_channels, + .setup = evtchn_fifo_setup, + .bind_to_cpu = evtchn_fifo_bind_to_cpu, + .clear_pending = evtchn_fifo_clear_pending, + .set_pending = evtchn_fifo_set_pending, + .is_pending = evtchn_fifo_is_pending, + .test_and_set_mask = evtchn_fifo_test_and_set_mask, + .mask = evtchn_fifo_mask, + .unmask = evtchn_fifo_unmask, + .handle_events = evtchn_fifo_handle_events, + .resume = evtchn_fifo_resume, + .percpu_init = evtchn_fifo_percpu_init, + .percpu_deinit = evtchn_fifo_percpu_deinit, +}; + int __init xen_evtchn_fifo_init(void) { int cpu = smp_processor_id(); @@ -448,9 +450,5 @@ int __init xen_evtchn_fifo_init(void) evtchn_ops = &evtchn_ops_fifo; - cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, - "xen/evtchn:prepare", - xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); - return ret; } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 50c2050a1e32..980a56d51e4c 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -71,6 +71,9 @@ struct evtchn_ops { void (*handle_events)(unsigned cpu); void (*resume)(void); + + int (*percpu_init)(unsigned int cpu); + int (*percpu_deinit)(unsigned int cpu); }; extern const struct evtchn_ops *evtchn_ops; From 17ef715c0c88fe2df485fba68aa7c6c6cb3004f9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 7 Sep 2020 15:47:30 +0200 Subject: [PATCH 048/636] xen/events: defer eoi in case of excessive number of events commit e99502f76271d6bc4e374fe368c50c67a1fd3070 upstream. In case rogue guests are sending events at high frequency it might happen that xen_evtchn_do_upcall() won't stop processing events in dom0. As this is done in irq handling a crash might be the result. In order to avoid that, delay further inter-domain events after some time in xen_evtchn_do_upcall() by forcing eoi processing into a worker on the same cpu, thus inhibiting new events coming in. The time after which eoi processing is to be delayed is configurable via a new module parameter "event_loop_timeout" which specifies the maximum event loop time in jiffies (default: 2, the value was chosen after some tests showing that a value of 2 was the lowest with an only slight drop of dom0 network throughput while multiple guests performed an event storm). How long eoi processing will be delayed can be specified via another parameter "event_eoi_delay" (again in jiffies, default 10, again the value was chosen after testing with different delay values). This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 8 + drivers/xen/events/events_2l.c | 7 +- drivers/xen/events/events_base.c | 189 +++++++++++++++++- drivers/xen/events/events_fifo.c | 30 +-- drivers/xen/events/events_internal.h | 14 +- 5 files changed, 216 insertions(+), 32 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb129272240c..8dbc8d4ec8f0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5270,6 +5270,14 @@ with /sys/devices/system/xen_memory/xen_memory0/scrub_pages. Default value controlled with CONFIG_XEN_SCRUB_PAGES_DEFAULT. + xen.event_eoi_delay= [XEN] + How long to delay EOI handling in case of event + storms (jiffies). Default is 10. + + xen.event_loop_timeout= [XEN] + After which time (jiffies) the event handling loop + should start to delay EOI handling. Default is 2. + xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index e4b75693600e..f026624898e7 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -161,7 +161,7 @@ static inline xen_ulong_t active_evtchns(unsigned int cpu, * a bitset of words which contain pending event bits. The second * level is a bitset of pending events themselves. */ -static void evtchn_2l_handle_events(unsigned cpu) +static void evtchn_2l_handle_events(unsigned cpu, struct evtchn_loop_ctrl *ctrl) { int irq; xen_ulong_t pending_words; @@ -242,10 +242,7 @@ static void evtchn_2l_handle_events(unsigned cpu) /* Process port. */ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; - irq = get_evtchn_to_irq(port); - - if (irq != -1) - generic_handle_irq(irq); + handle_irq_for_port(port, ctrl); bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index bbeb1d3aecfd..d565f7ebb8ef 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86 #include @@ -63,6 +65,15 @@ #include "events_internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly event_loop_timeout = 2; +module_param(event_loop_timeout, uint, 0644); + +static uint __read_mostly event_eoi_delay = 10; +module_param(event_eoi_delay, uint, 0644); + const struct evtchn_ops *evtchn_ops; /* @@ -86,6 +97,7 @@ static DEFINE_RWLOCK(evtchn_rwlock); * irq_mapping_update_lock * evtchn_rwlock * IRQ-desc lock + * percpu eoi_list_lock */ static LIST_HEAD(xen_irq_list_head); @@ -118,6 +130,8 @@ static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); +static DEFINE_PER_CPU(unsigned int, irq_epoch); + static void clear_evtchn_to_irq_row(unsigned row) { unsigned col; @@ -397,17 +411,120 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +struct lateeoi_work { + struct delayed_work delayed; + spinlock_t eoi_list_lock; + struct list_head eoi_list; +}; + +static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); + +static void lateeoi_list_del(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + unsigned long flags; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + list_del_init(&info->eoi_list); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void lateeoi_list_add(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + struct irq_info *elem; + u64 now = get_jiffies_64(); + unsigned long delay; + unsigned long flags; + + if (now < info->eoi_time) + delay = info->eoi_time - now; + else + delay = 1; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + if (list_empty(&eoi->eoi_list)) { + list_add(&info->eoi_list, &eoi->eoi_list); + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, delay); + } else { + list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { + if (elem->eoi_time <= info->eoi_time) + break; + } + list_add(&info->eoi_list, &elem->eoi_list); + } + + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + static void xen_irq_lateeoi_locked(struct irq_info *info) { evtchn_port_t evtchn; + unsigned int cpu; evtchn = info->evtchn; - if (!VALID_EVTCHN(evtchn)) + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) return; + cpu = info->eoi_cpu; + if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { + lateeoi_list_add(info); + return; + } + + info->eoi_time = 0; unmask_evtchn(evtchn); } +static void xen_irq_lateeoi_worker(struct work_struct *work) +{ + struct lateeoi_work *eoi; + struct irq_info *info; + u64 now = get_jiffies_64(); + unsigned long flags; + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + + read_lock_irqsave(&evtchn_rwlock, flags); + + while (true) { + spin_lock(&eoi->eoi_list_lock); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + + if (info == NULL || now < info->eoi_time) { + spin_unlock(&eoi->eoi_list_lock); + break; + } + + list_del_init(&info->eoi_list); + + spin_unlock(&eoi->eoi_list_lock); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info); + } + + if (info) + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, info->eoi_time - now); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} + +static void xen_cpu_init_eoi(unsigned int cpu) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); + + INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); + spin_lock_init(&eoi->eoi_list_lock); + INIT_LIST_HEAD(&eoi->eoi_list); +} + void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) { struct irq_info *info; @@ -427,6 +544,7 @@ EXPORT_SYMBOL_GPL(xen_irq_lateeoi); static void xen_irq_init(unsigned irq) { struct irq_info *info; + #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); @@ -441,6 +559,7 @@ static void xen_irq_init(unsigned irq) set_info_for_irq(irq, info); + INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); } @@ -496,6 +615,9 @@ static void xen_free_irq(unsigned irq) write_lock_irqsave(&evtchn_rwlock, flags); + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + list_del(&info->list); set_info_for_irq(irq, NULL); @@ -1355,6 +1477,54 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +struct evtchn_loop_ctrl { + ktime_t timeout; + unsigned count; + bool defer_eoi; +}; + +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) +{ + int irq; + struct irq_info *info; + + irq = get_evtchn_to_irq(port); + if (irq == -1) + return; + + /* + * Check for timeout every 256 events. + * We are setting the timeout value only after the first 256 + * events in order to not hurt the common case of few loop + * iterations. The 256 is basically an arbitrary value. + * + * In case we are hitting the timeout we need to defer all further + * EOIs in order to ensure to leave the event handling loop rather + * sooner than later. + */ + if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { + ktime_t kt = ktime_get(); + + if (!ctrl->timeout) { + kt = ktime_add_ms(kt, + jiffies_to_msecs(event_loop_timeout)); + ctrl->timeout = kt; + } else if (kt > ctrl->timeout) { + ctrl->defer_eoi = true; + } + } + + info = info_for_irq(irq); + + if (ctrl->defer_eoi) { + info->eoi_cpu = smp_processor_id(); + info->irq_epoch = __this_cpu_read(irq_epoch); + info->eoi_time = get_jiffies_64() + event_eoi_delay; + } + + generic_handle_irq(irq); +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); static void __xen_evtchn_do_upcall(void) @@ -1362,6 +1532,7 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = get_cpu(); unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; read_lock(&evtchn_rwlock); @@ -1371,7 +1542,7 @@ static void __xen_evtchn_do_upcall(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; - xen_evtchn_handle_events(cpu); + xen_evtchn_handle_events(cpu, &ctrl); BUG_ON(!irqs_disabled()); @@ -1382,6 +1553,13 @@ static void __xen_evtchn_do_upcall(void) out: read_unlock(&evtchn_rwlock); + /* + * Increment irq_epoch only now to defer EOIs only for + * xen_irq_lateeoi() invocations occurring from inside the loop + * above. + */ + __this_cpu_inc(irq_epoch); + put_cpu(); } @@ -1828,9 +2006,6 @@ void xen_callback_vector(void) void xen_callback_vector(void) {} #endif -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "xen." - static bool fifo_events = true; module_param(fifo_events, bool, 0); @@ -1838,6 +2013,8 @@ static int xen_evtchn_cpu_prepare(unsigned int cpu) { int ret = 0; + xen_cpu_init_eoi(cpu); + if (evtchn_ops->percpu_init) ret = evtchn_ops->percpu_init(cpu); @@ -1864,6 +2041,8 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + xen_cpu_init_eoi(smp_processor_id()); + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, "xen/evtchn:prepare", xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 59e6002c9699..33462521bfd0 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -275,19 +275,9 @@ static uint32_t clear_linked(volatile event_word_t *word) return w & EVTCHN_FIFO_LINK_MASK; } -static void handle_irq_for_port(unsigned port) -{ - int irq; - - irq = get_evtchn_to_irq(port); - if (irq != -1) - generic_handle_irq(irq); -} - -static void consume_one_event(unsigned cpu, +static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl, struct evtchn_fifo_control_block *control_block, - unsigned priority, unsigned long *ready, - bool drop) + unsigned priority, unsigned long *ready) { struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); uint32_t head; @@ -320,16 +310,17 @@ static void consume_one_event(unsigned cpu, clear_bit(priority, ready); if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) { - if (unlikely(drop)) + if (unlikely(!ctrl)) pr_warn("Dropping pending event for port %u\n", port); else - handle_irq_for_port(port); + handle_irq_for_port(port, ctrl); } q->head[priority] = head; } -static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) +static void __evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { struct evtchn_fifo_control_block *control_block; unsigned long ready; @@ -341,14 +332,15 @@ static void __evtchn_fifo_handle_events(unsigned cpu, bool drop) while (ready) { q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES); - consume_one_event(cpu, control_block, q, &ready, drop); + consume_one_event(cpu, ctrl, control_block, q, &ready); ready |= xchg(&control_block->ready, 0); } } -static void evtchn_fifo_handle_events(unsigned cpu) +static void evtchn_fifo_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - __evtchn_fifo_handle_events(cpu, false); + __evtchn_fifo_handle_events(cpu, ctrl); } static void evtchn_fifo_resume(void) @@ -416,7 +408,7 @@ static int evtchn_fifo_percpu_init(unsigned int cpu) static int evtchn_fifo_percpu_deinit(unsigned int cpu) { - __evtchn_fifo_handle_events(cpu, true); + __evtchn_fifo_handle_events(cpu, NULL); return 0; } diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 980a56d51e4c..2cb9c2d2c5c0 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -32,11 +32,15 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; + struct list_head eoi_list; int refcnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned int evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ + unsigned short eoi_cpu; /* EOI must happen on this cpu */ + unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ + u64 eoi_time; /* Time in jiffies when to EOI. */ union { unsigned short virq; @@ -55,6 +59,8 @@ struct irq_info { #define PIRQ_SHAREABLE (1 << 1) #define PIRQ_MSI_GROUP (1 << 2) +struct evtchn_loop_ctrl; + struct evtchn_ops { unsigned (*max_channels)(void); unsigned (*nr_channels)(void); @@ -69,7 +75,7 @@ struct evtchn_ops { void (*mask)(unsigned port); void (*unmask)(unsigned port); - void (*handle_events)(unsigned cpu); + void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl); void (*resume)(void); int (*percpu_init)(unsigned int cpu); @@ -80,6 +86,7 @@ extern const struct evtchn_ops *evtchn_ops; extern int **evtchn_to_irq; int get_evtchn_to_irq(unsigned int evtchn); +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl); struct irq_info *info_for_irq(unsigned irq); unsigned cpu_from_irq(unsigned irq); @@ -137,9 +144,10 @@ static inline void unmask_evtchn(unsigned port) return evtchn_ops->unmask(port); } -static inline void xen_evtchn_handle_events(unsigned cpu) +static inline void xen_evtchn_handle_events(unsigned cpu, + struct evtchn_loop_ctrl *ctrl) { - return evtchn_ops->handle_events(cpu); + return evtchn_ops->handle_events(cpu, ctrl); } static inline void xen_evtchn_resume(void) From c1f90a9cd988bae62273c99aa7c58bf24f9bf0c1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Sep 2020 14:01:02 +0200 Subject: [PATCH 049/636] xen/events: block rogue events for some time commit 5f7f77400ab5b357b5fdb7122c3442239672186c upstream. In order to avoid high dom0 load due to rogue guests sending events at high frequency, block those events in case there was no action needed in dom0 to handle the events. This is done by adding a per-event counter, which set to zero in case an EOI without the XEN_EOI_FLAG_SPURIOUS is received from a backend driver, and incremented when this flag has been set. In case the counter is 2 or higher delay the EOI by 1 << (cnt - 2) jiffies, but not more than 1 second. In order not to waste memory shorten the per-event refcnt to two bytes (it should normally never exceed a value of 2). Add an overflow check to evtchn_get() to make sure the 2 bytes really won't overflow. This is part of XSA-332. Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Stefano Stabellini Reviewed-by: Wei Liu Signed-off-by: Greg Kroah-Hartman --- drivers/xen/events/events_base.c | 27 ++++++++++++++++++++++----- drivers/xen/events/events_internal.h | 3 ++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d565f7ebb8ef..aca845675279 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -459,17 +459,34 @@ static void lateeoi_list_add(struct irq_info *info) spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); } -static void xen_irq_lateeoi_locked(struct irq_info *info) +static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) { evtchn_port_t evtchn; unsigned int cpu; + unsigned int delay = 0; evtchn = info->evtchn; if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) return; + if (spurious) { + if ((1 << info->spurious_cnt) < (HZ << 2)) + info->spurious_cnt++; + if (info->spurious_cnt > 1) { + delay = 1 << (info->spurious_cnt - 2); + if (delay > HZ) + delay = HZ; + if (!info->eoi_time) + info->eoi_cpu = smp_processor_id(); + info->eoi_time = get_jiffies_64() + delay; + } + } else { + info->spurious_cnt = 0; + } + cpu = info->eoi_cpu; - if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) { + if (info->eoi_time && + (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { lateeoi_list_add(info); return; } @@ -506,7 +523,7 @@ static void xen_irq_lateeoi_worker(struct work_struct *work) info->eoi_time = 0; - xen_irq_lateeoi_locked(info); + xen_irq_lateeoi_locked(info, false); } if (info) @@ -535,7 +552,7 @@ void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) info = info_for_irq(irq); if (info) - xen_irq_lateeoi_locked(info); + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); read_unlock_irqrestore(&evtchn_rwlock, flags); } @@ -1438,7 +1455,7 @@ int evtchn_get(unsigned int evtchn) goto done; err = -EINVAL; - if (info->refcnt <= 0) + if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) goto done; info->refcnt++; diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 2cb9c2d2c5c0..b9b4f5919893 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -33,7 +33,8 @@ enum xen_irq_type { struct irq_info { struct list_head list; struct list_head eoi_list; - int refcnt; + short refcnt; + short spurious_cnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned int evtchn; /* event channel */ From 5579d7502ff6374d0953691de5bca353e1ba6b30 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 14 Oct 2020 07:30:51 +0200 Subject: [PATCH 050/636] x86/unwind/orc: Fix inactive tasks with stack pointer in %sp on GCC 10 compiled kernels [ Upstream commit f2ac57a4c49d40409c21c82d23b5706df9b438af ] GCC 10 optimizes the scheduler code differently than its predecessors. When CONFIG_DEBUG_SECTION_MISMATCH=y, the Makefile forces GCC not to inline some functions (-fno-inline-functions-called-once). Before GCC 10, "no-inlined" __schedule() starts with the usual prologue: push %bp mov %sp, %bp So the ORC unwinder simply picks stack pointer from %bp and unwinds from __schedule() just perfectly: $ cat /proc/1/stack [<0>] ep_poll+0x3e9/0x450 [<0>] do_epoll_wait+0xaa/0xc0 [<0>] __x64_sys_epoll_wait+0x1a/0x20 [<0>] do_syscall_64+0x33/0x40 [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 But now, with GCC 10, there is no %bp prologue in __schedule(): $ cat /proc/1/stack The ORC entry of the point in __schedule() is: sp:sp+88 bp:last_sp-48 type:call end:0 In this case, nobody subtracts sizeof "struct inactive_task_frame" in __unwind_start(). The struct is put on the stack by __switch_to_asm() and only then __switch_to_asm() stores %sp to task->thread.sp. But we start unwinding from a point in __schedule() (stored in frame->ret_addr by 'call') and not in __switch_to_asm(). So for these example values in __unwind_start(): sp=ffff94b50001fdc8 bp=ffff8e1f41d29340 ip=__schedule+0x1f0 The stack is: ffff94b50001fdc8: ffff8e1f41578000 # struct inactive_task_frame ffff94b50001fdd0: 0000000000000000 ffff94b50001fdd8: ffff8e1f41d29340 ffff94b50001fde0: ffff8e1f41611d40 # ... ffff94b50001fde8: ffffffff93c41920 # bx ffff94b50001fdf0: ffff8e1f41d29340 # bp ffff94b50001fdf8: ffffffff9376cad0 # ret_addr (and end of the struct) 0xffffffff9376cad0 is __schedule+0x1f0 (after the call to __switch_to_asm). Now follow those 88 bytes from the ORC entry (sp+88). The entry is correct, __schedule() really pushes 48 bytes (8*7) + 32 bytes via subq to store some local values (like 4U below). So to unwind, look at the offset 88-sizeof(long) = 0x50 from here: ffff94b50001fe00: ffff8e1f41578618 ffff94b50001fe08: 00000cc000000255 ffff94b50001fe10: 0000000500000004 ffff94b50001fe18: 7793fab6956b2d00 # NOTE (see below) ffff94b50001fe20: ffff8e1f41578000 ffff94b50001fe28: ffff8e1f41578000 ffff94b50001fe30: ffff8e1f41578000 ffff94b50001fe38: ffff8e1f41578000 ffff94b50001fe40: ffff94b50001fed8 ffff94b50001fe48: ffff8e1f41577ff0 ffff94b50001fe50: ffffffff9376cf12 Here ^^^^^^^^^^^^^^^^ is the correct ret addr from __schedule(). It translates to schedule+0x42 (insn after a call to __schedule()). BUT, unwind_next_frame() tries to take the address starting from 0xffff94b50001fdc8. That is exactly from thread.sp+88-sizeof(long) = 0xffff94b50001fdc8+88-8 = 0xffff94b50001fe18, which is garbage marked as NOTE above. So this quits the unwinding as 7793fab6956b2d00 is obviously not a kernel address. There was a fix to skip 'struct inactive_task_frame' in unwind_get_return_address_ptr in the following commit: 187b96db5ca7 ("x86/unwind/orc: Fix unwind_get_return_address_ptr() for inactive tasks") But we need to skip the struct already in the unwinder proper. So subtract the size (increase the stack pointer) of the structure in __unwind_start() directly. This allows for removal of the code added by commit 187b96db5ca7 completely, as the address is now at '(unsigned long *)state->sp - 1', the same as in the generic case. [ mingo: Cleaned up the changelog a bit, for better readability. ] Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") Bug: https://bugzilla.suse.com/show_bug.cgi?id=1176907 Signed-off-by: Jiri Slaby Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20201014053051.24199-1-jslaby@suse.cz Signed-off-by: Sasha Levin --- arch/x86/kernel/unwind_orc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1d264ba1e56d..8fa9ca3c3bd7 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -300,19 +300,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { - struct task_struct *task = state->task; - if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; - if (task != current && state->sp == task->thread.sp) { - struct inactive_task_frame *frame = (void *)task->thread.sp; - return &frame->ret_addr; - } - if (state->sp) return (unsigned long *)state->sp - 1; @@ -634,7 +627,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } else { struct inactive_task_frame *frame = (void *)task->thread.sp; - state->sp = task->thread.sp; + state->sp = task->thread.sp + sizeof(*frame); state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); state->signal = (void *)state->ip == ret_from_fork; From 30e91357e3e47c160d51decf4400603ea66b8925 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Sat, 24 Oct 2020 16:37:33 +0300 Subject: [PATCH 051/636] mlxsw: core: Fix use-after-free in mlxsw_emad_trans_finish() [ Upstream commit 0daf2bf5a2dcf33d446b76360908f109816e2e21 ] Each EMAD transaction stores the skb used to issue the EMAD request ('trans->tx_skb') so that the request could be retried in case of a timeout. The skb can be freed when a corresponding response is received or as part of the retry logic (e.g., failed retransmit, exceeded maximum number of retries). The two tasks (i.e., response processing and retransmits) are synchronized by the atomic 'trans->active' field which ensures that responses to inactive transactions are ignored. In case of a failed retransmit the transaction is finished and all of its resources are freed. However, the current code does not mark it as inactive. Syzkaller was able to hit a race condition in which a concurrent response is processed while the transaction's resources are being freed, resulting in a use-after-free [1]. Fix the issue by making sure to mark the transaction as inactive after a failed retransmit and free its resources only if a concurrent task did not already do that. [1] BUG: KASAN: use-after-free in consume_skb+0x30/0x370 net/core/skbuff.c:833 Read of size 4 at addr ffff88804f570494 by task syz-executor.0/1004 CPU: 0 PID: 1004 Comm: syz-executor.0 Not tainted 5.8.0-rc7+ #68 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xf6/0x16e lib/dump_stack.c:118 print_address_description.constprop.0+0x1c/0x250 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 check_memory_region_inline mm/kasan/generic.c:186 [inline] check_memory_region+0x14e/0x1b0 mm/kasan/generic.c:192 instrument_atomic_read include/linux/instrumented.h:56 [inline] atomic_read include/asm-generic/atomic-instrumented.h:27 [inline] refcount_read include/linux/refcount.h:147 [inline] skb_unref include/linux/skbuff.h:1044 [inline] consume_skb+0x30/0x370 net/core/skbuff.c:833 mlxsw_emad_trans_finish+0x64/0x1c0 drivers/net/ethernet/mellanox/mlxsw/core.c:592 mlxsw_emad_process_response drivers/net/ethernet/mellanox/mlxsw/core.c:651 [inline] mlxsw_emad_rx_listener_func+0x5c9/0xac0 drivers/net/ethernet/mellanox/mlxsw/core.c:672 mlxsw_core_skb_receive+0x4df/0x770 drivers/net/ethernet/mellanox/mlxsw/core.c:2063 mlxsw_pci_cqe_rdq_handle drivers/net/ethernet/mellanox/mlxsw/pci.c:595 [inline] mlxsw_pci_cq_tasklet+0x12a6/0x2520 drivers/net/ethernet/mellanox/mlxsw/pci.c:651 tasklet_action_common.isra.0+0x13f/0x3e0 kernel/softirq.c:550 __do_softirq+0x223/0x964 kernel/softirq.c:292 asm_call_on_stack+0x12/0x20 arch/x86/entry/entry_64.S:711 Allocated by task 1006: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc mm/kasan/common.c:494 [inline] __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:467 slab_post_alloc_hook mm/slab.h:586 [inline] slab_alloc_node mm/slub.c:2824 [inline] slab_alloc mm/slub.c:2832 [inline] kmem_cache_alloc+0xcd/0x2e0 mm/slub.c:2837 __build_skb+0x21/0x60 net/core/skbuff.c:311 __netdev_alloc_skb+0x1e2/0x360 net/core/skbuff.c:464 netdev_alloc_skb include/linux/skbuff.h:2810 [inline] mlxsw_emad_alloc drivers/net/ethernet/mellanox/mlxsw/core.c:756 [inline] mlxsw_emad_reg_access drivers/net/ethernet/mellanox/mlxsw/core.c:787 [inline] mlxsw_core_reg_access_emad+0x1ab/0x1420 drivers/net/ethernet/mellanox/mlxsw/core.c:1817 mlxsw_reg_trans_query+0x39/0x50 drivers/net/ethernet/mellanox/mlxsw/core.c:1831 mlxsw_sp_sb_pm_occ_clear drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c:260 [inline] mlxsw_sp_sb_occ_max_clear+0xbff/0x10a0 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c:1365 mlxsw_devlink_sb_occ_max_clear+0x76/0xb0 drivers/net/ethernet/mellanox/mlxsw/core.c:1037 devlink_nl_cmd_sb_occ_max_clear_doit+0x1ec/0x280 net/core/devlink.c:1765 genl_family_rcv_msg_doit net/netlink/genetlink.c:669 [inline] genl_family_rcv_msg net/netlink/genetlink.c:714 [inline] genl_rcv_msg+0x617/0x980 net/netlink/genetlink.c:731 netlink_rcv_skb+0x152/0x440 net/netlink/af_netlink.c:2470 genl_rcv+0x24/0x40 net/netlink/genetlink.c:742 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x53a/0x750 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x850/0xd90 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0x150/0x190 net/socket.c:671 ____sys_sendmsg+0x6d8/0x840 net/socket.c:2359 ___sys_sendmsg+0xff/0x170 net/socket.c:2413 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2446 do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:384 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 73: save_stack+0x1b/0x40 mm/kasan/common.c:48 set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x12c/0x170 mm/kasan/common.c:455 slab_free_hook mm/slub.c:1474 [inline] slab_free_freelist_hook mm/slub.c:1507 [inline] slab_free mm/slub.c:3072 [inline] kmem_cache_free+0xbe/0x380 mm/slub.c:3088 kfree_skbmem net/core/skbuff.c:622 [inline] kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:616 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:837 [inline] consume_skb+0xe1/0x370 net/core/skbuff.c:831 mlxsw_emad_trans_finish+0x64/0x1c0 drivers/net/ethernet/mellanox/mlxsw/core.c:592 mlxsw_emad_transmit_retry.isra.0+0x9d/0xc0 drivers/net/ethernet/mellanox/mlxsw/core.c:613 mlxsw_emad_trans_timeout_work+0x43/0x50 drivers/net/ethernet/mellanox/mlxsw/core.c:625 process_one_work+0xa3e/0x17a0 kernel/workqueue.c:2269 worker_thread+0x9e/0x1050 kernel/workqueue.c:2415 kthread+0x355/0x470 kernel/kthread.c:291 ret_from_fork+0x22/0x30 arch/x86/entry/entry_64.S:293 The buggy address belongs to the object at ffff88804f5703c0 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 212 bytes inside of 224-byte region [ffff88804f5703c0, ffff88804f5704a0) The buggy address belongs to the page: page:ffffea00013d5c00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x100000000000200(slab) raw: 0100000000000200 dead000000000100 dead000000000122 ffff88806c625400 raw: 0000000000000000 00000000000c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88804f570380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff88804f570400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88804f570480: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88804f570500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88804f570580: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc Fixes: caf7297e7ab5f ("mlxsw: core: Introduce support for asynchronous EMAD register access") Signed-off-by: Amit Cohen Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 1ae8234053c1..423c3e9925d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -488,6 +488,9 @@ static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, err = mlxsw_emad_transmit(trans->core, trans); if (err == 0) return; + + if (!atomic_dec_and_test(&trans->active)) + return; } else { err = -EIO; } From 2380b57afdcca42c3a529969c7f460cadb44892c Mon Sep 17 00:00:00 2001 From: Alok Prasad Date: Wed, 21 Oct 2020 11:50:08 +0000 Subject: [PATCH 052/636] RDMA/qedr: Fix memory leak in iWARP CM [ Upstream commit a2267f8a52eea9096861affd463f691be0f0e8c9 ] Fixes memory leak in iWARP CM Fixes: e411e0587e0d ("RDMA/qedr: Add iWARP connection management functions") Link: https://lore.kernel.org/r/20201021115008.28138-1-palok@marvell.com Signed-off-by: Michal Kalderon Signed-off-by: Igor Russkikh Signed-off-by: Alok Prasad Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/qedr/qedr_iw_cm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index e908dfbaa137..1f1d6a000e5c 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -677,6 +677,7 @@ int qedr_iw_destroy_listen(struct iw_cm_id *cm_id) listener->qed_handle); cm_id->rem_ref(cm_id); + kfree(listener); return rc; } From b19305641e3c0c4b3758606b7bf6e57256094a1f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 8 May 2020 07:28:19 +0200 Subject: [PATCH 053/636] ata: sata_nv: Fix retrieving of active qcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8e4c309f9f33b76c09daa02b796ef87918eee494 ] ata_qc_complete_multiple() has to be called with the tags physically active, that is the hw tag is at bit 0. ap->qc_active has the same tag at bit ATA_TAG_INTERNAL instead, so call ata_qc_get_active() to fix that up. This is done in the vein of 8385d756e114 ("libata: Fix retrieving of active qcs"). Fixes: 28361c403683 ("libata: add extra internal command") Tested-by: Pali Rohár Signed-off-by: Sascha Hauer Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/ata/sata_nv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 798d549435cc..2248a40631bf 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -2122,7 +2122,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap) pp->dhfis_bits &= ~done_mask; pp->dmafis_bits &= ~done_mask; pp->sdbfis_bits |= done_mask; - ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask); if (!ap->qc_active) { DPRINTK("over\n"); From ae5aa5685c7cc1def1e07e0ab562087c5b8c78e7 Mon Sep 17 00:00:00 2001 From: Mateusz Nosek Date: Sun, 27 Sep 2020 02:08:58 +0200 Subject: [PATCH 054/636] futex: Fix incorrect should_fail_futex() handling [ Upstream commit 921c7ebd1337d1a46783d7e15a850e12aed2eaa0 ] If should_futex_fail() returns true in futex_wake_pi(), then the 'ret' variable is set to -EFAULT and then immediately overwritten. So the failure injection is non-functional. Fix it by actually leaving the function and returning -EFAULT. The Fixes tag is kinda blury because the initial commit which introduced failure injection was already sloppy, but the below mentioned commit broke it completely. [ tglx: Massaged changelog ] Fixes: 6b4f4bc9cb22 ("locking/futex: Allow low-level atomic operations to return -EAGAIN") Signed-off-by: Mateusz Nosek Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200927000858.24219-1-mateusznosek0@gmail.com Signed-off-by: Sasha Levin --- kernel/futex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index 920d853a8e9e..eabb9180ffa8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1517,8 +1517,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (unlikely(should_fail_futex(true))) + if (unlikely(should_fail_futex(true))) { ret = -EFAULT; + goto out_unlock; + } ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); if (!ret && (curval != uval)) { From 92c244538056862c19861fc2d3dabb9b0d543126 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 4 Aug 2020 10:54:05 +1000 Subject: [PATCH 055/636] powerpc/powernv/smp: Fix spurious DBG() warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f6bac19cf65c5be21d14a0c9684c8f560f2096dd ] When building with W=1 we get the following warning: arch/powerpc/platforms/powernv/smp.c: In function ‘pnv_smp_cpu_kill_self’: arch/powerpc/platforms/powernv/smp.c:276:16: error: suggest braces around empty body in an ‘if’ statement [-Werror=empty-body] 276 | cpu, srr1); | ^ cc1: all warnings being treated as errors The full context is this block: if (srr1 && !generic_check_cpu_restart(cpu)) DBG("CPU%d Unexpected exit while offline srr1=%lx!\n", cpu, srr1); When building with DEBUG undefined DBG() expands to nothing and GCC emits the warning due to the lack of braces around an empty statement. Signed-off-by: Oliver O'Halloran Reviewed-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200804005410.146094-2-oohall@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8d49ba370c50..889c3dbec6fb 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -47,7 +47,7 @@ #include #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) From b664645274e922b3356115be9392a49cf3937ce2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:16 +1000 Subject: [PATCH 056/636] mm: fix exec activate_mm vs TLB shootdown and lazy tlb switching race commit d53c3dfb23c45f7d4f910c3a3ca84bf0a99c6143 upstream. Reading and modifying current->mm and current->active_mm and switching mm should be done with irqs off, to prevent races seeing an intermediate state. This is similar to commit 38cf307c1f20 ("mm: fix kthread_use_mm() vs TLB invalidate"). At exec-time when the new mm is activated, the old one should usually be single-threaded and no longer used, unless something else is holding an mm_users reference (which may be possible). Absent other mm_users, there is also a race with preemption and lazy tlb switching. Consider the kernel_execve case where the current thread is using a lazy tlb active mm: call_usermodehelper() kernel_execve() old_mm = current->mm; active_mm = current->active_mm; *** preempt *** --------------------> schedule() prev->active_mm = NULL; mmdrop(prev active_mm); ... <-------------------- schedule() current->mm = mm; current->active_mm = mm; if (!old_mm) mmdrop(active_mm); If we switch back to the kernel thread from a different mm, there is a double free of the old active_mm, and a missing free of the new one. Closing this race only requires interrupts to be disabled while ->mm and ->active_mm are being switched, but the TLB problem requires also holding interrupts off over activate_mm. Unfortunately not all archs can do that yet, e.g., arm defers the switch if irqs are disabled and expects finish_arch_post_lock_switch() to be called to complete the flush; um takes a blocking lock in activate_mm(). So as a first step, disable interrupts across the mm/active_mm updates to close the lazy tlb preempt race, and provide an arch option to extend that to activate_mm which allows architectures doing IPI based TLB shootdowns to close the second race. This is a bit ugly, but in the interest of fixing the bug and backporting before all architectures are converted this is a compromise. Signed-off-by: Nicholas Piggin Acked-by: Peter Zijlstra (Intel) [mpe: Manual backport to 4.19 due to membarrier_exec_mmap(mm) changes] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-2-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 7 +++++++ fs/exec.c | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index a336548487e6..e3a030f7a722 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -366,6 +366,13 @@ config HAVE_RCU_TABLE_FREE config HAVE_RCU_TABLE_INVALIDATE bool +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/fs/exec.c b/fs/exec.c index cece8c14f377..52788644c4af 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1028,10 +1028,23 @@ static int exec_mmap(struct mm_struct *mm) } } task_lock(tsk); + + local_irq_disable(); active_mm = tsk->active_mm; - tsk->mm = mm; tsk->active_mm = mm; + tsk->mm = mm; + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for + * lazy tlb mm refcounting when these are updated by context + * switches. Not all architectures can handle irqs off over + * activate_mm yet. + */ + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); activate_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); From 19da77b9014170356e30ccf5d319af3f50c8af1e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:17 +1000 Subject: [PATCH 057/636] powerpc: select ARCH_WANT_IRQS_OFF_ACTIVATE_MM [ Upstream commit 66acd46080bd9e5ad2be4b0eb1d498d5145d058e ] powerpc uses IPIs in some situations to switch a kernel thread away from a lazy tlb mm, which is subject to the TLB flushing race described in the changelog introducing ARCH_WANT_IRQS_OFF_ACTIVATE_MM. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-3-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/mmu_context.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f38d153d2586..0bc53f0e37c0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_EXTABLE_SORT diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index ae953958c0f3..d93bdcaa4a46 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -204,7 +204,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ From 4775def575a57774d320232a076933f195c171bd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 14 Sep 2020 14:52:18 +1000 Subject: [PATCH 058/636] sparc64: remove mm_cpumask clearing to fix kthread_use_mm race [ Upstream commit bafb056ce27940c9994ea905336aa8f27b4f7275 ] The de facto (and apparently uncommented) standard for using an mm had, thanks to this code in sparc if nothing else, been that you must have a reference on mm_users *and that reference must have been obtained with mmget()*, i.e., from a thread with a reference to mm_users that had used the mm. The introduction of mmget_not_zero() in commit d2005e3f41d4 ("userfaultfd: don't pin the user memory in userfaultfd_file_create()") allowed mm_count holders to aoperate on user mappings asynchronously from the actual threads using the mm, but they were not to load those mappings into their TLB (i.e., walking vmas and page tables is okay, kthread_use_mm() is not). io_uring 2b188cc1bb857 ("Add io_uring IO interface") added code which does a kthread_use_mm() from a mmget_not_zero() refcount. The problem with this is code which previously assumed mm == current->mm and mm->mm_users == 1 implies the mm will remain single-threaded at least until this thread creates another mm_users reference, has now broken. arch/sparc/kernel/smp_64.c: if (atomic_read(&mm->mm_users) == 1) { cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); goto local_flush_and_out; } vs fs/io_uring.c if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) || !mmget_not_zero(ctx->sqo_mm))) return -EFAULT; kthread_use_mm(ctx->sqo_mm); mmget_not_zero() could come in right after the mm_users == 1 test, then kthread_use_mm() which sets its CPU in the mm_cpumask. That update could be lost if cpumask_copy() occurs afterward. I propose we fix this by allowing mmget_not_zero() to be a first-class reference, and not have this obscure undocumented and unchecked restriction. The basic fix for sparc64 is to remove its mm_cpumask clearing code. The optimisation could be effectively restored by sending IPIs to mm_cpumask members and having them remove themselves from mm_cpumask. This is more tricky so I leave it as an exercise for someone with a sparc64 SMP. powerpc has a (currently similarly broken) example. Signed-off-by: Nicholas Piggin Acked-by: David S. Miller Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200914045219.3736466-4-npiggin@gmail.com Signed-off-by: Sasha Levin --- arch/sparc/kernel/smp_64.c | 65 ++++++++------------------------------ 1 file changed, 14 insertions(+), 51 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3ea1f3c06a0..a7d7b7ade42f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); From 3c09f4652f7cfa655b723ef94cdcfc74f892da2d Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Mon, 21 Sep 2020 20:45:44 +0800 Subject: [PATCH 059/636] f2fs: add trace exit in exception path [ Upstream commit 9b66482282888d02832b7d90239e1cdb18e4b431 ] Missing the trace exit in f2fs_sync_dirty_inodes Signed-off-by: Zhang Qilong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 388500eec729..d412fc150988 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1003,8 +1003,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) get_pages(sbi, is_dir ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); retry: - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { + trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir, + get_pages(sbi, is_dir ? + F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return -EIO; + } spin_lock(&sbi->inode_lock[type]); From 0ca99ffc6befae0f2d90182a66182b19c0954c94 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 29 Sep 2020 09:22:50 +0800 Subject: [PATCH 060/636] f2fs: fix uninit-value in f2fs_lookup [ Upstream commit 6d7ab88a98c1b7a47c228f8ffb4f44d631eaf284 ] As syzbot reported: Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x21c/0x280 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:122 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:219 f2fs_lookup+0xe05/0x1a80 fs/f2fs/namei.c:503 lookup_open fs/namei.c:3082 [inline] open_last_lookups fs/namei.c:3177 [inline] path_openat+0x2729/0x6a90 fs/namei.c:3365 do_filp_open+0x2b8/0x710 fs/namei.c:3395 do_sys_openat2+0xa88/0x1140 fs/open.c:1168 do_sys_open fs/open.c:1184 [inline] __do_compat_sys_openat fs/open.c:1242 [inline] __se_compat_sys_openat+0x2a4/0x310 fs/open.c:1240 __ia32_compat_sys_openat+0x56/0x70 fs/open.c:1240 do_syscall_32_irqs_on arch/x86/entry/common.c:80 [inline] __do_fast_syscall_32+0x129/0x180 arch/x86/entry/common.c:139 do_fast_syscall_32+0x6a/0xc0 arch/x86/entry/common.c:162 do_SYSENTER_32+0x73/0x90 arch/x86/entry/common.c:205 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c In f2fs_lookup(), @res_page could be used before being initialized, because in __f2fs_find_entry(), once F2FS_I(dir)->i_current_depth was been fuzzed to zero, then @res_page will never be initialized, causing this kmsan warning, relocating @res_page initialization place to fix this bug. Reported-by: syzbot+0eac6f0bbd558fd866d7@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/dir.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index ebe19894884b..2cd85ce3e450 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -208,16 +208,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + *res_page = NULL; + if (f2fs_has_inline_dentry(dir)) { - *res_page = NULL; de = f2fs_find_in_inline_dir(dir, fname, res_page); goto out; } - if (npages == 0) { - *res_page = NULL; + if (npages == 0) goto out; - } max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { @@ -229,7 +228,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - *res_page = NULL; de = find_in_level(dir, level, fname, res_page); if (de || IS_ERR(*res_page)) break; From f552d88c0ddd90963b6ea8546aedb09622a9d855 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 29 Sep 2020 09:23:12 +0800 Subject: [PATCH 061/636] f2fs: fix to check segment boundary during SIT page readahead [ Upstream commit 6a257471fa42c8c9c04a875cd3a2a22db148e0f0 ] As syzbot reported: kernel BUG at fs/f2fs/segment.h:657! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16220 Comm: syz-executor.0 Not tainted 5.9.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:f2fs_ra_meta_pages+0xa51/0xdc0 fs/f2fs/segment.h:657 Call Trace: build_sit_entries fs/f2fs/segment.c:4195 [inline] f2fs_build_segment_manager+0x4b8a/0xa3c0 fs/f2fs/segment.c:4779 f2fs_fill_super+0x377d/0x6b80 fs/f2fs/super.c:3633 mount_bdev+0x32e/0x3f0 fs/super.c:1417 legacy_get_tree+0x105/0x220 fs/fs_context.c:592 vfs_get_tree+0x89/0x2f0 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x1387/0x2070 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount fs/namespace.c:3390 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3390 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 @blkno in f2fs_ra_meta_pages could exceed max segment count, causing panic in following sanity check in current_sit_addr(), add check condition to avoid this issue. Reported-by: syzbot+3698081bcf0bb2d12174@syzkaller.appspotmail.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/checkpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index d412fc150988..a563de5ccd21 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -218,6 +218,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: + if (unlikely(blkno >= TOTAL_SEGS(sbi))) + goto out; /* get sit block addr */ fio.new_blkaddr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); From 798f1eb12a4d3a9f38b822d4ea97c5fd24e494a5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 Jun 2020 13:23:17 +0200 Subject: [PATCH 062/636] um: change sigio_spinlock to a mutex [ Upstream commit f2d05059e15af3f70502074f4e3a504530af504a ] Lockdep complains at boot: ============================= [ BUG: Invalid wait context ] 5.7.0-05093-g46d91ecd597b #98 Not tainted ----------------------------- swapper/1 is trying to lock: 0000000060931b98 (&desc[i].request_mutex){+.+.}-{3:3}, at: __setup_irq+0x11d/0x623 other info that might help us debug this: context-{4:4} 1 lock held by swapper/1: #0: 000000006074fed8 (sigio_spinlock){+.+.}-{2:2}, at: sigio_lock+0x1a/0x1c stack backtrace: CPU: 0 PID: 1 Comm: swapper Not tainted 5.7.0-05093-g46d91ecd597b #98 Stack: 7fa4fab0 6028dfd1 0000002a 6008bea5 7fa50700 7fa50040 7fa4fac0 6028e016 7fa4fb50 6007f6da 60959c18 00000000 Call Trace: [<60023a0e>] show_stack+0x13b/0x155 [<6028e016>] dump_stack+0x2a/0x2c [<6007f6da>] __lock_acquire+0x515/0x15f2 [<6007eb50>] lock_acquire+0x245/0x273 [<6050d9f1>] __mutex_lock+0xbd/0x325 [<6050dc76>] mutex_lock_nested+0x1d/0x1f [<6008e27e>] __setup_irq+0x11d/0x623 [<6008e8ed>] request_threaded_irq+0x169/0x1a6 [<60021eb0>] um_request_irq+0x1ee/0x24b [<600234ee>] write_sigio_irq+0x3b/0x76 [<600383ca>] sigio_broken+0x146/0x2e4 [<60020bd8>] do_one_initcall+0xde/0x281 Because we hold sigio_spinlock and then get into requesting an interrupt with a mutex. Change the spinlock to a mutex to avoid that. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/kernel/sigio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index b5e0cbb34382..476ded92affa 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -36,14 +36,14 @@ int write_sigio_irq(int fd) } /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ -static DEFINE_SPINLOCK(sigio_spinlock); +static DEFINE_MUTEX(sigio_mutex); void sigio_lock(void) { - spin_lock(&sigio_spinlock); + mutex_lock(&sigio_mutex); } void sigio_unlock(void) { - spin_unlock(&sigio_spinlock); + mutex_unlock(&sigio_mutex); } From 2bb93af1822d392e8cce94fb7a470e5f926376dd Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 6 Aug 2020 23:24:35 +0100 Subject: [PATCH 063/636] ARM: 8997/2: hw_breakpoint: Handle inexact watchpoint addresses [ Upstream commit 22c9e58299e5f18274788ce54c03d4fb761e3c5d ] This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact watchpoint addresses") but ported to arm32, which has the same problem. This problem was found by Android CTS tests, notably the "watchpoint_imprecise" test [1]. I tested locally against a copycat (simplified) version of the test though. [1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp Link: https://lkml.kernel.org/r/20191019111216.1.I82eae759ca6dc28a245b043f485ca490e3015321@changeid Signed-off-by: Douglas Anderson Reviewed-by: Matthias Kaehlcke Acked-by: Will Deacon Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/hw_breakpoint.c | 100 +++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 28 deletions(-) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 8a8470d36c65..97fa9c167757 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -688,6 +688,40 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } +/* + * Arm32 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See this same function in the arm64 platform code, which has the same + * problem. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u32 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + static int watchpoint_fault_on_uaccess(struct pt_regs *regs, struct arch_hw_breakpoint *info) { @@ -697,23 +731,25 @@ static int watchpoint_fault_on_uaccess(struct pt_regs *regs, static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int i, access; - u32 val, ctrl_reg, alignment_mask; + int i, access, closest_match = 0; + u32 min_dist = -1, dist; + u32 val, ctrl_reg; struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; + continue; - info = counter_arch_bp(wp); /* * The DFAR is an unknown value on debug architectures prior * to 7.1. Since we only allow a single watchpoint on these @@ -722,33 +758,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, */ if (debug_arch < ARM_DEBUG_ARCH_V7_1) { BUG_ON(i > 0); + info = counter_arch_bp(wp); info->trigger = wp->attr.bp_addr; } else { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(ARM_BASE_WVR + i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select. */ - ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; - /* Check that the access type matches. */ if (debug_exception_updates_fsr()) { access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; } + val = read_wb_reg(ARM_BASE_WVR + i); + ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + /* We have a winner. */ + info = counter_arch_bp(wp); info->trigger = addr; } @@ -770,13 +804,23 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * we can single-step over the watchpoint trigger. */ if (!is_default_overflow_handler(wp)) - goto unlock; - + continue; step: enable_single_step(wp, instruction_pointer(regs)); -unlock: - rcu_read_unlock(); } + + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + perf_bp_event(wp, regs); + if (is_default_overflow_handler(wp)) + enable_single_step(wp, instruction_pointer(regs)); + } + + rcu_read_unlock(); } static void watchpoint_single_step_handler(unsigned long pc) From ae20c9f54daa944cd3f16e5b421049aab83a6a7c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 19 Sep 2020 16:04:14 +0200 Subject: [PATCH 064/636] power: supply: bq27xxx: report "not charging" on all types [ Upstream commit 7bf738ba110722b63e9dc8af760d3fb2aef25593 ] Commit 6f24ff97e323 ("power: supply: bq27xxx_battery: Add the BQ27Z561 Battery monitor") and commit d74534c27775 ("power: bq27xxx_battery: Add support for additional bq27xxx family devices") added support for new device types by copying most of the code and adding necessary quirks. However they did not copy the code in bq27xxx_battery_status() responsible for returning POWER_SUPPLY_STATUS_NOT_CHARGING. Unify the bq27xxx_battery_status() so for all types when charger is supplied, it will return "not charging" status. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/bq27xxx_battery.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c index ff02a917556a..93e3d9c747aa 100644 --- a/drivers/power/supply/bq27xxx_battery.c +++ b/drivers/power/supply/bq27xxx_battery.c @@ -1680,8 +1680,6 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_FULL; else if (di->cache.flags & BQ27000_FLAG_CHGS) status = POWER_SUPPLY_STATUS_CHARGING; - else if (power_supply_am_i_supplied(di->bat) > 0) - status = POWER_SUPPLY_STATUS_NOT_CHARGING; else status = POWER_SUPPLY_STATUS_DISCHARGING; } else { @@ -1693,6 +1691,10 @@ static int bq27xxx_battery_status(struct bq27xxx_device_info *di, status = POWER_SUPPLY_STATUS_CHARGING; } + if ((status == POWER_SUPPLY_STATUS_DISCHARGING) && + (power_supply_am_i_supplied(di->bat) > 0)) + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + val->intval = status; return 0; From bda64b43564ac3eca39202db4f1462487ca4a6bf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 7 Oct 2020 13:55:16 -0700 Subject: [PATCH 065/636] xfs: fix realtime bitmap/summary file truncation when growing rt volume [ Upstream commit f4c32e87de7d66074d5612567c5eac7325024428 ] The realtime bitmap and summary files are regular files that are hidden away from the directory tree. Since they're regular files, inode inactivation will try to purge what it thinks are speculative preallocations beyond the incore size of the file. Unfortunately, xfs_growfs_rt forgets to update the incore size when it resizes the inodes, with the result that inactivating the rt inodes at unmount time will cause their contents to be truncated. Fix this by updating the incore size when we change the ondisk size as part of updating the superblock. Note that we don't do this when we're allocating blocks to the rt inodes because we actually want those blocks to get purged if the growfs fails. This fixes corruption complaints from the online rtsummary checker when running xfs/233. Since that test requires rmap, one can also trigger this by growing an rt volume, cycling the mount, and creating rt files. Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Sasha Levin --- fs/xfs/xfs_rtalloc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 08da48b66235..280965fc9bbd 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -998,10 +998,13 @@ xfs_growfs_rt( xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* - * Update the bitmap inode's size. + * Update the bitmap inode's size ondisk and incore. We need + * to update the incore size so that inode inactivation won't + * punch what it thinks are "posteof" blocks. */ mp->m_rbmip->i_d.di_size = nsbp->sb_rbmblocks * nsbp->sb_blocksize; + i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); /* * Get the summary inode into the transaction. @@ -1009,9 +1012,12 @@ xfs_growfs_rt( xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* - * Update the summary inode's size. + * Update the summary inode's size. We need to update the + * incore size so that inode inactivation won't punch what it + * thinks are "posteof" blocks. */ mp->m_rsumip->i_d.di_size = nmp->m_rsumsize; + i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size); xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE); /* * Copy summary data from old to new sizes. From 8c755f49ac7cdae0d704c99e8299018aaa761fbe Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 20 Jul 2020 12:18:45 -0700 Subject: [PATCH 066/636] video: fbdev: pvr2fb: initialize variables [ Upstream commit 8e1ba47c60bcd325fdd097cd76054639155e5d2e ] clang static analysis reports this repesentative error pvr2fb.c:1049:2: warning: 1st function call argument is an uninitialized value [core.CallAndMessage] if (*cable_arg) ^~~~~~~~~~~~~~~ Problem is that cable_arg depends on the input loop to set the cable_arg[0]. If it does not, then some random value from the stack is used. A similar problem exists for output_arg. So initialize cable_arg and output_arg. Signed-off-by: Tom Rix Acked-by: Arnd Bergmann Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200720191845.20115-1-trix@redhat.com Signed-off-by: Sasha Levin --- drivers/video/fbdev/pvr2fb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 8a53d1de611d..3fd2cb4cdfa9 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -1027,6 +1027,8 @@ static int __init pvr2fb_setup(char *options) if (!options || !*options) return 0; + cable_arg[0] = output_arg[0] = 0; + while ((this_opt = strsep(&options, ","))) { if (!*this_opt) continue; From 1a58018084f8025734c15fad1a31c888608d9e26 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 14 Aug 2020 18:17:08 +0300 Subject: [PATCH 067/636] ath10k: start recovery process when payload length exceeds max htc length for sdio [ Upstream commit 2fd3c8f34d08af0a6236085f9961866ad92ef9ec ] When simulate random transfer fail for sdio write and read, it happened "payload length exceeds max htc length" and recovery later sometimes. Test steps: 1. Add config and update kernel: CONFIG_FAIL_MMC_REQUEST=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y 2. Run simulate fail: cd /sys/kernel/debug/mmc1/fail_mmc_request echo 10 > probability echo 10 > times # repeat until hitting issues 3. It happened payload length exceeds max htc length. [ 199.935506] ath10k_sdio mmc1:0001:1: payload length 57005 exceeds max htc length: 4088 .... [ 264.990191] ath10k_sdio mmc1:0001:1: payload length 57005 exceeds max htc length: 4088 4. after some time, such as 60 seconds, it start recovery which triggered by wmi command timeout for periodic scan. [ 269.229232] ieee80211 phy0: Hardware restart was requested [ 269.734693] ath10k_sdio mmc1:0001:1: device successfully recovered The simulate fail of sdio is not a real sdio transter fail, it only set an error status in mmc_should_fail_request after the transfer end, actually the transfer is success, then sdio_io_rw_ext_helper will return error status and stop transfer the left data. For example, the really RX len is 286 bytes, then it will split to 2 blocks in sdio_io_rw_ext_helper, one is 256 bytes, left is 30 bytes, if the first 256 bytes get an error status by mmc_should_fail_request,then the left 30 bytes will not read in this RX operation. Then when the next RX arrive, the left 30 bytes will be considered as the header of the read, the top 4 bytes of the 30 bytes will be considered as lookaheads, but actually the 4 bytes is not the lookaheads, so the len from this lookaheads is not correct, it exceeds max htc length 4088 sometimes. When happened exceeds, the buffer chain is not matched between firmware and ath10k, then it need to start recovery ASAP. Recently then recovery will be started by wmi command timeout, but it will be long time later, for example, it is 60+ seconds later from the periodic scan, if it does not have periodic scan, it will be longer. Start recovery when it happened "payload length exceeds max htc length" will be reasonable. This patch only effect sdio chips. Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00029. Signed-off-by: Wen Gong Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200108031957.22308-3-wgong@codeaurora.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/sdio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 0cdaecb0e28a..28d86da65c05 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -561,6 +561,10 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar, le16_to_cpu(htc_hdr->len), ATH10K_HTC_MBOX_MAX_PAYLOAD_LENGTH); ret = -ENOMEM; + + queue_work(ar->workqueue, &ar->restart_work); + ath10k_warn(ar, "exceeds length, start recovery\n"); + goto err; } From 9d8e083bcf6b5540d404a278f9311095a4284fff Mon Sep 17 00:00:00 2001 From: Sathishkumar Muruganandam Date: Fri, 14 Aug 2020 13:46:11 +0530 Subject: [PATCH 068/636] ath10k: fix VHT NSS calculation when STBC is enabled [ Upstream commit 99f41b8e43b8b4b31262adb8ac3e69088fff1289 ] When STBC is enabled, NSTS_SU value need to be accounted for VHT NSS calculation for SU case. Without this fix, 1SS + STBC enabled case was reported wrongly as 2SS in radiotap header on monitor mode capture. Tested-on: QCA9984 10.4-3.10-00047 Signed-off-by: Sathishkumar Muruganandam Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1597392971-3897-1-git-send-email-murugana@codeaurora.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath10k/htt_rx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 7d15f6208b46..68cda1564c77 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -767,6 +767,7 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, u8 preamble = 0; u8 group_id; u32 info1, info2, info3; + u32 stbc, nsts_su; info1 = __le32_to_cpu(rxd->ppdu_start.info1); info2 = __le32_to_cpu(rxd->ppdu_start.info2); @@ -811,11 +812,16 @@ static void ath10k_htt_rx_h_rates(struct ath10k *ar, */ bw = info2 & 3; sgi = info3 & 1; + stbc = (info2 >> 3) & 1; group_id = (info2 >> 4) & 0x3F; if (GROUP_ID_IS_SU_MIMO(group_id)) { mcs = (info3 >> 4) & 0x0F; - nss = ((info2 >> 10) & 0x07) + 1; + nsts_su = ((info2 >> 10) & 0x07); + if (stbc) + nss = (nsts_su >> 2) + 1; + else + nss = (nsts_su + 1); } else { /* Hardware doesn't decode VHT-SIG-B into Rx descriptor * so it's impossible to decode MCS. Also since From 17c4ba10aec5e2b9d20dbf5f47ad4ca0851cf1b3 Mon Sep 17 00:00:00 2001 From: Nadezda Lutovinova Date: Wed, 19 Aug 2020 17:37:56 +0300 Subject: [PATCH 069/636] drm/brige/megachips: Add checking if ge_b850v3_lvds_init() is working correctly [ Upstream commit f688a345f0d7a6df4dd2aeca8e4f3c05e123a0ee ] If ge_b850v3_lvds_init() does not allocate memory for ge_b850v3_lvds_ptr, then a null pointer dereference is accessed. The patch adds checking of the return value of ge_b850v3_lvds_init(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Nadezda Lutovinova Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200819143756.30626-1-lutovinova@ispras.ru Signed-off-by: Sasha Levin --- .../gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 2136c97aeb8e..dcf091f9d843 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -306,8 +306,12 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp4028_i2c->dev; + int ret; - ge_b850v3_lvds_init(dev); + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); @@ -365,8 +369,12 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, const struct i2c_device_id *id) { struct device *dev = &stdp2690_i2c->dev; + int ret; - ge_b850v3_lvds_init(dev); + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); From df661bb764a2d54d7353c39521a4879de5bb2ebf Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 20 Aug 2020 12:47:16 +0200 Subject: [PATCH 070/636] media: videodev2.h: RGB BT2020 and HSV are always full range [ Upstream commit b305dfe2e93434b12d438434461b709641f62af4 ] The default RGB quantization range for BT.2020 is full range (just as for all the other RGB pixel encodings), not limited range. Update the V4L2_MAP_QUANTIZATION_DEFAULT macro and documentation accordingly. Also mention that HSV is always full range and cannot be limited range. When RGB BT2020 was introduced in V4L2 it was not clear whether it should be limited or full range, but full range is the right (and consistent) choice. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- .../media/uapi/v4l/colorspaces-defs.rst | 9 ++++----- .../media/uapi/v4l/colorspaces-details.rst | 5 ++--- include/uapi/linux/videodev2.h | 17 ++++++++--------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index f24615544792..16e46bec8093 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -29,8 +29,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum :c:type:`v4l2_hsv_encoding` specifies which encoding is used. .. note:: The default R'G'B' quantization is full range for all - colorspaces except for BT.2020 which uses limited range R'G'B' - quantization. + colorspaces. HSV formats are always full range. .. tabularcolumns:: |p{6.0cm}|p{11.5cm}| @@ -162,8 +161,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Details * - ``V4L2_QUANTIZATION_DEFAULT`` - Use the default quantization encoding as defined by the - colorspace. This is always full range for R'G'B' (except for the - BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr. + colorspace. This is always full range for R'G'B' and HSV. + It is usually limited range for Y'CbCr. * - ``V4L2_QUANTIZATION_FULL_RANGE`` - Use the full range quantization encoding. I.e. the range [0…1] is mapped to [0…255] (with possible clipping to [1…254] to avoid the @@ -173,4 +172,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum * - ``V4L2_QUANTIZATION_LIM_RANGE`` - Use the limited range quantization encoding. I.e. the range [0…1] is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to - [16…240]. + [16…240]. Limited Range cannot be used with HSV. diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index 09fabf4cd412..ca7176cae8dd 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -370,9 +370,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020) The :ref:`itu2020` standard defines the colorspace used by Ultra-high definition television (UHDTV). The default transfer function is ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited -range (!), and so is the default Y'CbCr quantization. The chromaticities -of the primary colors and the white reference are: +``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range. +The chromaticities of the primary colors and the white reference are: diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1aae2e4b8f10..b73f4423bc09 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -362,9 +362,9 @@ enum v4l2_hsv_encoding { enum v4l2_quantization { /* - * The default for R'G'B' quantization is always full range, except - * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG: this is full range. + * The default for R'G'B' quantization is always full range. + * For Y'CbCr the quantization is always limited range, except + * for COLORSPACE_JPEG: this is full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -373,14 +373,13 @@ enum v4l2_quantization { /* * Determine how QUANTIZATION_DEFAULT should map to a proper quantization. - * This depends on whether the image is RGB or not, the colorspace and the - * Y'CbCr encoding. + * This depends on whether the image is RGB or not, the colorspace. + * The Y'CbCr encoding is not used anymore, but is still there for backwards + * compatibility. */ #define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb_or_hsv, colsp, ycbcr_enc) \ - (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ - V4L2_QUANTIZATION_LIM_RANGE : \ - (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ - V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) + (((is_rgb_or_hsv) || (colsp) == V4L2_COLORSPACE_JPEG) ? \ + V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE) /* * Deprecated names for opRGB colorspace (IEC 61966-2-5) From e135a0bebb7ae2023b2181a025634187b6bd9281 Mon Sep 17 00:00:00 2001 From: Xia Jiang Date: Fri, 14 Aug 2020 09:11:35 +0200 Subject: [PATCH 071/636] media: platform: Improve queue set up flow for bug fixing [ Upstream commit 5095a6413a0cf896ab468009b6142cb0fe617e66 ] Add checking created buffer size follow in mtk_jpeg_queue_setup(). Reviewed-by: Tomasz Figa Signed-off-by: Xia Jiang Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c index 11429633b2fb..f0bca30a0a80 100644 --- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c +++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c @@ -579,6 +579,13 @@ static int mtk_jpeg_queue_setup(struct vb2_queue *q, if (!q_data) return -EINVAL; + if (*num_planes) { + for (i = 0; i < *num_planes; i++) + if (sizes[i] < q_data->sizeimage[i]) + return -EINVAL; + return 0; + } + *num_planes = q_data->fmt->colplanes; for (i = 0; i < q_data->fmt->colplanes; i++) { sizes[i] = q_data->sizeimage[i]; From 00943757170b126923ade6e705cbf532327e4989 Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Mon, 17 Aug 2020 11:38:27 -0700 Subject: [PATCH 072/636] usb: typec: tcpm: During PR_SWAP, source caps should be sent only after tSwapSourceStart [ Upstream commit 6bbe2a90a0bb4af8dd99c3565e907fe9b5e7fd88 ] The patch addresses the compliance test failures while running TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 of the "Deterministic PD Compliance MOI" test plan published in https://www.usb.org/usbc. For a product to be Type-C compliant, it's expected that these tests are run on usb.org certified Type-C compliance tester as mentioned in https://www.usb.org/usbc. The purpose of the tests TD.PD.CP.E3, TD.PD.CP.E4, TD.PD.CP.E5 is to verify the PR_SWAP response of the device. While doing so, the test asserts that Source Capabilities message is NOT received from the test device within tSwapSourceStart min (20 ms) from the time the last bit of GoodCRC corresponding to the RS_RDY message sent by the UUT was sent. If it does then the test fails. This is in line with the requirements from the USB Power Delivery Specification Revision 3.0, Version 1.2: "6.6.8.1 SwapSourceStartTimer The SwapSourceStartTimer Shall be used by the new Source, after a Power Role Swap or Fast Role Swap, to ensure that it does not send Source_Capabilities Message before the new Sink is ready to receive the Source_Capabilities Message. The new Source Shall Not send the Source_Capabilities Message earlier than tSwapSourceStart after the last bit of the EOP of GoodCRC Message sent in response to the PS_RDY Message sent by the new Source indicating that its power supply is ready." The patch makes sure that TCPM does not send the Source_Capabilities Message within tSwapSourceStart(20ms) by transitioning into SRC_STARTUP only after tSwapSourceStart(20ms). Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200817183828.1895015-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/tcpm.c | 2 +- include/linux/usb/pd.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index 29d72e9b0f01..7086ebb24532 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -3486,7 +3486,7 @@ static void run_state_machine(struct tcpm_port *port) */ tcpm_set_pwr_role(port, TYPEC_SOURCE); tcpm_pd_send_control(port, PD_CTRL_PS_RDY); - tcpm_set_state(port, SRC_STARTUP, 0); + tcpm_set_state(port, SRC_STARTUP, PD_T_SWAP_SRC_START); break; case VCONN_SWAP_ACCEPT: diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index f2162e0fe531..bdf4c88d2aa0 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -451,6 +451,7 @@ static inline unsigned int rdo_max_power(u32 rdo) #define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ #define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ #define PD_T_NEWSRC 250 /* Maximum of 275ms */ +#define PD_T_SWAP_SRC_START 20 /* Minimum of 20ms */ #define PD_T_DRP_TRY 100 /* 75 - 150 ms */ #define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ From 7ed97d8cdad8ef27846b394ba0969c508cbc4cc7 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 10 Aug 2020 21:25:18 +0200 Subject: [PATCH 073/636] media: tw5864: check status of tw5864_frameinterval_get [ Upstream commit 780d815dcc9b34d93ae69385a8465c38d423ff0f ] clang static analysis reports this problem tw5864-video.c:773:32: warning: The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage fintv->stepwise.max.numerator *= std_max_fps; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^ stepwise.max is set with frameinterval, which comes from ret = tw5864_frameinterval_get(input, &frameinterval); fintv->stepwise.step = frameinterval; fintv->stepwise.min = frameinterval; fintv->stepwise.max = frameinterval; fintv->stepwise.max.numerator *= std_max_fps; When tw5864_frameinterval_get() fails, frameinterval is not set. So check the status and fix another similar problem. Signed-off-by: Tom Rix Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/pci/tw5864/tw5864-video.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/media/pci/tw5864/tw5864-video.c b/drivers/media/pci/tw5864/tw5864-video.c index 6c40e60ac993..b0f8d1532b70 100644 --- a/drivers/media/pci/tw5864/tw5864-video.c +++ b/drivers/media/pci/tw5864/tw5864-video.c @@ -776,6 +776,9 @@ static int tw5864_enum_frameintervals(struct file *file, void *priv, fintv->type = V4L2_FRMIVAL_TYPE_STEPWISE; ret = tw5864_frameinterval_get(input, &frameinterval); + if (ret) + return ret; + fintv->stepwise.step = frameinterval; fintv->stepwise.min = frameinterval; fintv->stepwise.max = frameinterval; @@ -794,6 +797,9 @@ static int tw5864_g_parm(struct file *file, void *priv, cp->capability = V4L2_CAP_TIMEPERFRAME; ret = tw5864_frameinterval_get(input, &cp->timeperframe); + if (ret) + return ret; + cp->timeperframe.numerator *= input->frame_interval; cp->capturemode = 0; cp->readbuffers = 2; From 4550a7f409cbc13ce220f606440457f327ba6cd7 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 3 Jul 2020 11:20:32 +0200 Subject: [PATCH 074/636] media: imx274: fix frame interval handling [ Upstream commit 49b20d981d723fae5a93843c617af2b2c23611ec ] 1) the numerator and/or denominator might be 0, in that case fall back to the default frame interval. This is per the spec and this caused a v4l2-compliance failure. 2) the updated frame interval wasn't returned in the s_frame_interval subdev op. Signed-off-by: Hans Verkuil Reviewed-by: Luca Ceresoli Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/i2c/imx274.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c index 8cc3bdb7f608..0fe8b869245b 100644 --- a/drivers/media/i2c/imx274.c +++ b/drivers/media/i2c/imx274.c @@ -1239,6 +1239,8 @@ static int imx274_s_frame_interval(struct v4l2_subdev *sd, ret = imx274_set_frame_interval(imx274, fi->interval); if (!ret) { + fi->interval = imx274->frame_interval; + /* * exposure time range is decided by frame interval * need to update it after frame interval changes @@ -1760,9 +1762,9 @@ static int imx274_set_frame_interval(struct stimx274 *priv, __func__, frame_interval.numerator, frame_interval.denominator); - if (frame_interval.numerator == 0) { - err = -EINVAL; - goto fail; + if (frame_interval.numerator == 0 || frame_interval.denominator == 0) { + frame_interval.denominator = IMX274_DEF_FRAME_RATE; + frame_interval.numerator = 1; } req_frame_rate = (u32)(frame_interval.denominator From 418121257b0c4761198126c4925bc666d5ed5e8e Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Sat, 22 Aug 2020 11:45:28 +0530 Subject: [PATCH 075/636] mmc: via-sdmmc: Fix data race bug [ Upstream commit 87d7ad089b318b4f319bf57f1daa64eb6d1d10ad ] via_save_pcictrlreg() should be called with host->lock held as it writes to pm_pcictrl_reg, otherwise there can be a race condition between via_sd_suspend() and via_sdc_card_detect(). The same pattern is used in the function via_reset_pcictrl() as well, where via_save_pcictrlreg() is called with host->lock held. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Madhuparna Bhowmik Link: https://lore.kernel.org/r/20200822061528.7035-1-madhuparnabhowmik10@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/via-sdmmc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c index 246dc6255e69..9fdb92729c28 100644 --- a/drivers/mmc/host/via-sdmmc.c +++ b/drivers/mmc/host/via-sdmmc.c @@ -1273,11 +1273,14 @@ static void via_init_sdc_pm(struct via_crdr_mmc_host *host) static int via_sd_suspend(struct pci_dev *pcidev, pm_message_t state) { struct via_crdr_mmc_host *host; + unsigned long flags; host = pci_get_drvdata(pcidev); + spin_lock_irqsave(&host->lock, flags); via_save_pcictrlreg(host); via_save_sdcreg(host); + spin_unlock_irqrestore(&host->lock, flags); pci_save_state(pcidev); pci_enable_wake(pcidev, pci_choose_state(pcidev, state), 0); From 2c7a26926ec656bc8c9adbd515f24fce8dd3f2d6 Mon Sep 17 00:00:00 2001 From: Antonio Borneo Date: Wed, 1 Jul 2020 21:42:34 +0200 Subject: [PATCH 076/636] drm/bridge/synopsys: dsi: add support for non-continuous HS clock [ Upstream commit c6d94e37bdbb6dfe7e581e937a915ab58399b8a5 ] Current code enables the HS clock when video mode is started or to send out a HS command, and disables the HS clock to send out a LP command. This is not what DSI spec specify. Enable HS clock either in command and in video mode. Set automatic HS clock management for panels and devices that support non-continuous HS clock. Signed-off-by: Antonio Borneo Tested-by: Philippe Cornu Reviewed-by: Philippe Cornu Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200701194234.18123-1-yannick.fertre@st.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index fd7999642cf8..8b5f9241a887 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -326,7 +326,6 @@ static void dw_mipi_message_config(struct dw_mipi_dsi *dsi, if (lpm) val |= CMD_MODE_ALL_LP; - dsi_write(dsi, DSI_LPCLK_CTRL, lpm ? 0 : PHY_TXREQUESTCLKHS); dsi_write(dsi, DSI_CMD_MODE_CFG, val); } @@ -488,16 +487,22 @@ static void dw_mipi_dsi_video_mode_config(struct dw_mipi_dsi *dsi) static void dw_mipi_dsi_set_mode(struct dw_mipi_dsi *dsi, unsigned long mode_flags) { + u32 val; + dsi_write(dsi, DSI_PWR_UP, RESET); if (mode_flags & MIPI_DSI_MODE_VIDEO) { dsi_write(dsi, DSI_MODE_CFG, ENABLE_VIDEO_MODE); dw_mipi_dsi_video_mode_config(dsi); - dsi_write(dsi, DSI_LPCLK_CTRL, PHY_TXREQUESTCLKHS); } else { dsi_write(dsi, DSI_MODE_CFG, ENABLE_CMD_MODE); } + val = PHY_TXREQUESTCLKHS; + if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) + val |= AUTO_CLKLANE_CTRL; + dsi_write(dsi, DSI_LPCLK_CTRL, val); + dsi_write(dsi, DSI_PWR_UP, POWERUP); } From f3dc5ac4011eac5a3a8b28fc13fe08b7dfb5af34 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Sat, 29 Aug 2020 14:00:16 +0100 Subject: [PATCH 077/636] arm64: topology: Stop using MPIDR for topology information [ Upstream commit 3102bc0e6ac752cc5df896acb557d779af4d82a1 ] In the absence of ACPI or DT topology data, we fallback to haphazardly decoding *something* out of MPIDR. Sadly, the contents of that register are mostly unusable due to the implementation leniancy and things like Aff0 having to be capped to 15 (despite being encoded on 8 bits). Consider a simple system with a single package of 32 cores, all under the same LLC. We ought to be shoving them in the same core_sibling mask, but MPIDR is going to look like: | CPU | 0 | ... | 15 | 16 | ... | 31 | |------+---+-----+----+----+-----+----+ | Aff0 | 0 | ... | 15 | 0 | ... | 15 | | Aff1 | 0 | ... | 0 | 1 | ... | 1 | | Aff2 | 0 | ... | 0 | 0 | ... | 0 | Which will eventually yield core_sibling(0-15) == 0-15 core_sibling(16-31) == 16-31 NUMA woes ========= If we try to play games with this and set up NUMA boundaries within those groups of 16 cores via e.g. QEMU: # Node0: 0-9; Node1: 10-19 $ qemu-system-aarch64 \ -smp 20 -numa node,cpus=0-9,nodeid=0 -numa node,cpus=10-19,nodeid=1 The scheduler's MC domain (all CPUs with same LLC) is going to be built via arch_topology.c::cpu_coregroup_mask() In there we try to figure out a sensible mask out of the topology information we have. In short, here we'll pick the smallest of NUMA or core sibling mask. node_mask(CPU9) == 0-9 core_sibling(CPU9) == 0-15 MC mask for CPU9 will thus be 0-9, not a problem. node_mask(CPU10) == 10-19 core_sibling(CPU10) == 0-15 MC mask for CPU10 will thus be 10-19, not a problem. node_mask(CPU16) == 10-19 core_sibling(CPU16) == 16-19 MC mask for CPU16 will thus be 16-19... Uh oh. CPUs 16-19 are in two different unique MC spans, and the scheduler has no idea what to make of that. That triggers the WARN_ON() added by commit ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") Fixing MPIDR-derived topology ============================= We could try to come up with some cleverer scheme to figure out which of the available masks to pick, but really if one of those masks resulted from MPIDR then it should be discarded because it's bound to be bogus. I was hoping to give MPIDR a chance for SMT, to figure out which threads are in the same core using Aff1-3 as core ID, but Sudeep and Robin pointed out to me that there are systems out there where *all* cores have non-zero values in their higher affinity fields (e.g. RK3288 has "5" in all of its cores' MPIDR.Aff1), which would expose a bogus core ID to userspace. Stop using MPIDR for topology information. When no other source of topology information is available, mark each CPU as its own core and its NUMA node as its LLC domain. Signed-off-by: Valentin Schneider Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20200829130016.26106-1-valentin.schneider@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/topology.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 6106c49f84bc..655a308af9e3 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -272,21 +272,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, From 040b531929dae903c5b347d4a87ca969a86e6906 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 12 Aug 2020 09:37:22 +0206 Subject: [PATCH 078/636] printk: reduce LOG_BUF_SHIFT range for H8300 [ Upstream commit 550c10d28d21bd82a8bb48debbb27e6ed53262f6 ] The .bss section for the h8300 is relatively small. A value of CONFIG_LOG_BUF_SHIFT that is larger than 19 will create a static printk ringbuffer that is too large. Limit the range appropriately for the H8300. Reported-by: kernel test robot Signed-off-by: John Ogness Reviewed-by: Sergey Senozhatsky Acked-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20200812073122.25412-1-john.ogness@linutronix.de Signed-off-by: Sasha Levin --- init/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 47035b5a46f6..5eb91dde4018 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -535,7 +535,8 @@ config IKCONFIG_PROC config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" - range 12 25 + range 12 25 if !H8300 + range 12 19 if H8300 default 17 depends on PRINTK help From d3380de483d55d904fb94a241406b34ed2fada7d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 29 Aug 2020 22:01:09 +0900 Subject: [PATCH 079/636] ia64: kprobes: Use generic kretprobe trampoline handler [ Upstream commit e792ff804f49720ce003b3e4c618b5d996256a18 ] Use the generic kretprobe trampoline handler. Don't use framepointer verification. Signed-off-by: Masami Hiramatsu Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/159870606883.1229682.12331813108378725668.stgit@devnote2 Signed-off-by: Sasha Levin --- arch/ia64/kernel/kprobes.c | 77 +------------------------------------- 1 file changed, 2 insertions(+), 75 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index aa41bd5cf9b7..8207b897b49d 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -409,83 +409,9 @@ static void kretprobe_trampoline(void) { } -/* - * At this point the target function has been tricked into - * returning into our trampoline. Lookup the associated instance - * and then: - * - call the handler function - * - cleanup by marking the instance as unused - * - long jump back to the original return address - */ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = - ((struct fnptr *)kretprobe_trampoline)->ip; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - - /* - * It is possible to have multiple instances associated with a given - * task either because an multiple functions in the call path - * have a return probe installed on them, and/or more than one return - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always inserted at the head of the list - * - when multiple return probes are registered for the same - * function, the first instance's ret_addr will point to the - * real return address, and all the rest will point to - * kretprobe_trampoline - */ - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - regs->cr_iip = orig_ret_address; - - hlist_for_each_entry_safe(ri, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - if (ri->rp && ri->rp->handler) - ri->rp->handler(ri, regs); - - orig_ret_address = (unsigned long)ri->ret_addr; - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - kretprobe_hash_unlock(current, &flags); - - hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } + regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -498,6 +424,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->b0; + ri->fp = NULL; /* Replace the return addr with trampoline addr */ regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; From cbc03dedecb6e5366f2d3f0a328cfc3ef4762a25 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 30 Jun 2020 15:14:38 -0700 Subject: [PATCH 080/636] kgdb: Make "kgdbcon" work properly with "kgdb_earlycon" [ Upstream commit b18b099e04f450cdc77bec72acefcde7042bd1f3 ] On my system the kernel processes the "kgdb_earlycon" parameter before the "kgdbcon" parameter. When we setup "kgdb_earlycon" we'll end up in kgdb_register_callbacks() and "kgdb_use_con" won't have been set yet so we'll never get around to starting "kgdbcon". Let's remedy this by detecting that the IO module was already registered when setting "kgdb_use_con" and registering the console then. As part of this, to avoid pre-declaring things, move the handling of the "kgdbcon" further down in the file. Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20200630151422.1.I4aa062751ff5e281f5116655c976dff545c09a46@changeid Signed-off-by: Daniel Thompson Signed-off-by: Sasha Levin --- kernel/debug/debug_core.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index fbb1bfdd2fa5..8c76141c99c8 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -95,14 +95,6 @@ int dbg_switch_cpu; /* Use kdb or gdbserver mode */ int dbg_kdb_mode = 1; -static int __init opt_kgdb_con(char *str) -{ - kgdb_use_con = 1; - return 0; -} - -early_param("kgdbcon", opt_kgdb_con); - module_param(kgdb_use_con, int, 0644); module_param(kgdbreboot, int, 0644); @@ -820,6 +812,20 @@ static struct console kgdbcons = { .index = -1, }; +static int __init opt_kgdb_con(char *str) +{ + kgdb_use_con = 1; + + if (kgdb_io_module_registered && !kgdb_con_registered) { + register_console(&kgdbcons); + kgdb_con_registered = 1; + } + + return 0; +} + +early_param("kgdbcon", opt_kgdb_con); + #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_dbg(int key) { From 3aff03d9b92c0553f491e36807cda5826c3dc95d Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Fri, 7 Aug 2020 10:35:30 +0200 Subject: [PATCH 081/636] media: uvcvideo: Fix dereference of out-of-bound list iterator [ Upstream commit f875bcc375c738bf2f599ff2e1c5b918dbd07c45 ] Fixes the following coccinelle report: drivers/media/usb/uvc/uvc_ctrl.c:1860:5-11: ERROR: invalid reference to the index variable of the iterator on line 1854 by adding a boolean variable to check if the loop has found the Found using - Coccinelle (http://coccinelle.lip6.fr) [Replace cursor variable with bool found] Signed-off-by: Daniel W. S. Almeida Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/uvc/uvc_ctrl.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index abfc49901222..843dc19bada0 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -1853,30 +1853,35 @@ int uvc_xu_ctrl_query(struct uvc_video_chain *chain, { struct uvc_entity *entity; struct uvc_control *ctrl; - unsigned int i, found = 0; + unsigned int i; + bool found; u32 reqflags; u16 size; u8 *data = NULL; int ret; /* Find the extension unit. */ + found = false; list_for_each_entry(entity, &chain->entities, chain) { if (UVC_ENTITY_TYPE(entity) == UVC_VC_EXTENSION_UNIT && - entity->id == xqry->unit) + entity->id == xqry->unit) { + found = true; break; + } } - if (entity->id != xqry->unit) { + if (!found) { uvc_trace(UVC_TRACE_CONTROL, "Extension unit %u not found.\n", xqry->unit); return -ENOENT; } /* Find the control and perform delayed initialization if needed. */ + found = false; for (i = 0; i < entity->ncontrols; ++i) { ctrl = &entity->controls[i]; if (ctrl->index == xqry->selector - 1) { - found = 1; + found = true; break; } } From 5d095e529694a55815a2d2370ca151fe2fe190d6 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 31 Aug 2020 15:33:49 +0800 Subject: [PATCH 082/636] riscv: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO [ Upstream commit b5fca7c55f9fbab5ad732c3bce00f31af6ba5cfa ] AT_VECTOR_SIZE_ARCH should be defined with the maximum number of NEW_AUX_ENT entries that ARCH_DLINFO can contain, but it wasn't defined for RISC-V at all even though ARCH_DLINFO will contain one NEW_AUX_ENT for the VDSO address. Signed-off-by: Zong Li Reviewed-by: Palmer Dabbelt Reviewed-by: Pekka Enberg Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/include/uapi/asm/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 1376515547cd..ed7bf7c7add5 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -21,4 +21,7 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ From 2de342e2b36e04dbe64fc3c9c82807956c5e6993 Mon Sep 17 00:00:00 2001 From: Alain Volmat Date: Mon, 31 Aug 2020 08:10:11 +0200 Subject: [PATCH 083/636] cpufreq: sti-cpufreq: add stih418 support [ Upstream commit 01a163c52039e9426c7d3d3ab16ca261ad622597 ] The STiH418 can be controlled the same way as STiH407 & STiH410 regarding cpufreq. Signed-off-by: Alain Volmat Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/sti-cpufreq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c index 47105735df12..6b5d241c30b7 100644 --- a/drivers/cpufreq/sti-cpufreq.c +++ b/drivers/cpufreq/sti-cpufreq.c @@ -144,7 +144,8 @@ static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = { static const struct reg_field *sti_cpufreq_match(void) { if (of_machine_is_compatible("st,stih407") || - of_machine_is_compatible("st,stih410")) + of_machine_is_compatible("st,stih410") || + of_machine_is_compatible("st,stih418")) return sti_stih407_dvfs_regfields; return NULL; @@ -261,7 +262,8 @@ static int sti_cpufreq_init(void) int ret; if ((!of_machine_is_compatible("st,stih407")) && - (!of_machine_is_compatible("st,stih410"))) + (!of_machine_is_compatible("st,stih410")) && + (!of_machine_is_compatible("st,stih418"))) return -ENODEV; ddata.cpu = get_cpu_device(0); From 263be4104fd40762ccad5cc69385be33d4b0b6c0 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 17 Sep 2020 13:26:00 +0200 Subject: [PATCH 084/636] USB: adutux: fix debugging [ Upstream commit c56150c1bc8da5524831b1dac2eec3c67b89f587 ] Handling for removal of the controller was missing at one place. Add it. Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20200917112600.26508-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/misc/adutux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index b8073f36ffdc..62fdfde4ad03 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -209,6 +209,7 @@ static void adu_interrupt_out_callback(struct urb *urb) if (status != 0) { if ((status != -ENOENT) && + (status != -ESHUTDOWN) && (status != -ECONNRESET)) { dev_dbg(&dev->udev->dev, "%s :nonzero status received: %d\n", __func__, From ed23397d1daabd47fe463a3c0d5cfdd495448ccb Mon Sep 17 00:00:00 2001 From: Lang Dai Date: Mon, 14 Sep 2020 11:26:41 +0800 Subject: [PATCH 085/636] uio: free uio id after uio file node is freed [ Upstream commit 8fd0e2a6df262539eaa28b0a2364cca10d1dc662 ] uio_register_device() do two things. 1) get an uio id from a global pool, e.g. the id is 2) create file nodes like /sys/class/uio/uio uio_unregister_device() do two things. 1) free the uio id and return it to the global pool 2) free the file node /sys/class/uio/uio There is a situation is that one worker is calling uio_unregister_device(), and another worker is calling uio_register_device(). If the two workers are X and Y, they go as below sequence, 1) X free the uio id 2) Y get an uio id 3) Y create file node /sys/class/uio/uio 4) X free the file note /sys/class/uio/uio Then it will failed at the 3rd step and cause the phenomenon we saw as it is creating a duplicated file node. Failure reports as follows: sysfs: cannot create duplicate filename '/class/uio/uio10' Call Trace: sysfs_do_create_link_sd.isra.2+0x9e/0xb0 sysfs_create_link+0x25/0x40 device_add+0x2c4/0x640 __uio_register_device+0x1c5/0x576 [uio] adf_uio_init_bundle_dev+0x231/0x280 [intel_qat] adf_uio_register+0x1c0/0x340 [intel_qat] adf_dev_start+0x202/0x370 [intel_qat] adf_dev_start_async+0x40/0xa0 [intel_qat] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ? process_one_work+0x410/0x410 ? kthread_bind+0x40/0x40 ret_from_fork+0x1f/0x40 Code: 85 c0 48 89 c3 74 12 b9 00 10 00 00 48 89 c2 31 f6 4c 89 ef e8 ec c4 ff ff 4c 89 e2 48 89 de 48 c7 c7 e8 b4 ee b4 e8 6a d4 d7 ff <0f> 0b 48 89 df e8 20 fa f3 ff 5b 41 5c 41 5d 5d c3 66 0f 1f 84 ---[ end trace a7531c1ed5269e84 ]--- c6xxvf b002:00:00.0: Failed to register UIO devices c6xxvf b002:00:00.0: Failed to register UIO devices Signed-off-by: Lang Dai Link: https://lore.kernel.org/r/1600054002-17722-1-git-send-email-lang.dai@intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/uio/uio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 9c788748bdc6..3926be659147 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -1008,8 +1008,6 @@ void uio_unregister_device(struct uio_info *info) idev = info->uio_dev; - uio_free_minor(idev); - mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1021,6 +1019,8 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); + uio_free_minor(idev); + return; } EXPORT_SYMBOL_GPL(uio_unregister_device); From 37ad7b2cca29201c6e88a8621508fe81c70b31e4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 18 Sep 2020 16:17:49 +0300 Subject: [PATCH 086/636] usb: xhci: omit duplicate actions when suspending a runtime suspended host. [ Upstream commit 18a367e8947d72dd91b6fc401e88a2952c6363f7 ] If the xhci-plat.c is the platform driver, after the runtime pm is enabled, the xhci_suspend is called if nothing is connected on the port. When the system goes to suspend, it will call xhci_suspend again if USB wakeup is enabled. Since the runtime suspend wakeup setting is not always the same as system suspend wakeup setting, eg, at runtime suspend we always need wakeup if the controller is in low power mode; but at system suspend, we may not need wakeup. So, we move the judgement after changing wakeup setting. [commit message rewording -Mathias] Reviewed-by: Jun Li Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200918131752.16488-8-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6f976c4cccda..0348ea899d06 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -972,12 +972,15 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) xhci->shared_hcd->state != HC_STATE_SUSPENDED) return -EINVAL; - xhci_dbc_suspend(xhci); - /* Clear root port wake on bits if wakeup not allowed. */ if (!do_wakeup) xhci_disable_port_wake_on_bits(xhci); + if (!HCD_HW_ACCESSIBLE(hcd)) + return 0; + + xhci_dbc_suspend(xhci); + /* Don't poll the roothubs on bus suspend. */ xhci_dbg(xhci, "%s: stopping port polling.\n", __func__); clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); From 0a2cb1eae0a70acf701600b510de04c55404bb31 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 21 Sep 2020 10:39:36 +0800 Subject: [PATCH 087/636] arm64/mm: return cpu_all_mask when node is NUMA_NO_NODE [ Upstream commit a194c5f2d2b3a05428805146afcabe5140b5d378 ] The @node passed to cpumask_of_node() can be NUMA_NO_NODE, in that case it will trigger the following WARN_ON(node >= nr_node_ids) due to mismatched data types of @node and @nr_node_ids. Actually we should return cpu_all_mask just like most other architectures do if passed NUMA_NO_NODE. Also add a similar check to the inline cpumask_of_node() in numa.h. Signed-off-by: Zhengyuan Liu Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20200921023936.21846-1-liuzhengyuan@tj.kylinos.cn Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/numa.h | 3 +++ arch/arm64/mm/numa.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf..dd870390d639 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 54529b4ed513..15eaf1e09d0c 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -58,7 +58,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) From 451a7c74c23b2ec42410ca5a7110c1a85ebae688 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 21 Sep 2020 09:15:08 -0700 Subject: [PATCH 088/636] xfs: don't free rt blocks when we're doing a REMAP bunmapi call [ Upstream commit 8df0fa39bdd86ca81a8d706a6ed9d33cc65ca625 ] When callers pass XFS_BMAPI_REMAP into xfs_bunmapi, they want the extent to be unmapped from the given file fork without the extent being freed. We do this for non-rt files, but we forgot to do this for realtime files. So far this isn't a big deal since nobody makes a bunmapi call to a rt file with the REMAP flag set, but don't leave a logic bomb. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_bmap.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f35e1801f1c9..fc9950a505e6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4920,20 +4920,25 @@ xfs_bmap_del_extent_real( flags = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { - xfs_fsblock_t bno; xfs_filblks_t len; xfs_extlen_t mod; - bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, - &mod); - ASSERT(mod == 0); len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, &mod); ASSERT(mod == 0); - error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); - if (error) - goto done; + if (!(bflags & XFS_BMAPI_REMAP)) { + xfs_fsblock_t bno; + + bno = div_u64_rem(del->br_startblock, + mp->m_sb.sb_rextsize, &mod); + ASSERT(mod == 0); + + error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); + if (error) + goto done; + } + do_fx = 0; nblks = len * mp->m_sb.sb_rextsize; qfield = XFS_TRANS_DQ_RTBCOUNT; From d2828ab0a92683e56e210cae4769af1f83519197 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Tue, 18 Aug 2020 22:24:25 +0800 Subject: [PATCH 089/636] ACPI: Add out of bounds and numa_off protections to pxm_to_node() [ Upstream commit 8a3decac087aa897df5af04358c2089e52e70ac4 ] The function should check the validity of the pxm value before using it to index the pxm_to_node_map[] array. Whilst hardening this code may be good in general, the main intent here is to enable following patches that use this function to replace acpi_map_pxm_to_node() for non SRAT usecases which should return NO_NUMA_NODE for PXM entries not matching with those in SRAT. Signed-off-by: Jonathan Cameron Reviewed-by: Barry Song Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 0da58f0bf7e5..a28ff3cfbc29 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -46,7 +46,7 @@ int acpi_numa __initdata; int pxm_to_node(int pxm) { - if (pxm < 0) + if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off) return NUMA_NO_NODE; return pxm_to_node_map[pxm]; } From 59e6db9d2798c94d1d32ebdf0ed7a7bd162504a1 Mon Sep 17 00:00:00 2001 From: Xie He Date: Mon, 28 Sep 2020 05:56:43 -0700 Subject: [PATCH 090/636] drivers/net/wan/hdlc_fr: Correctly handle special skb->protocol values [ Upstream commit 8306266c1d51aac9aa7aa907fe99032a58c6382c ] The fr_hard_header function is used to prepend the header to skbs before transmission. It is used in 3 situations: 1) When a control packet is generated internally in this driver; 2) When a user sends an skb on an Ethernet-emulating PVC device; 3) When a user sends an skb on a normal PVC device. These 3 situations need to be handled differently by fr_hard_header. Different headers should be prepended to the skb in different situations. Currently fr_hard_header distinguishes these 3 situations using skb->protocol. For situation 1 and 2, a special skb->protocol value will be assigned before calling fr_hard_header, so that it can recognize these 2 situations. All skb->protocol values other than these special ones are treated by fr_hard_header as situation 3. However, it is possible that in situation 3, the user sends an skb with one of the special skb->protocol values. In this case, fr_hard_header would incorrectly treat it as situation 1 or 2. This patch tries to solve this issue by using skb->dev instead of skb->protocol to distinguish between these 3 situations. For situation 1, skb->dev would be NULL; for situation 2, skb->dev->type would be ARPHRD_ETHER; and for situation 3, skb->dev->type would be ARPHRD_DLCI. This way fr_hard_header would be able to distinguish these 3 situations correctly regardless what skb->protocol value the user tries to use in situation 3. Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/hdlc_fr.c | 90 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 03b5f5cce6f4..96b4ce13f3a5 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -276,63 +276,69 @@ static inline struct net_device **get_dev_p(struct pvc_device *pvc, static int fr_hard_header(struct sk_buff **skb_p, u16 dlci) { - u16 head_len; struct sk_buff *skb = *skb_p; - switch (skb->protocol) { - case cpu_to_be16(NLPID_CCITT_ANSI_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CCITT_ANSI_LMI; - break; + if (!skb->dev) { /* Control packets */ + switch (dlci) { + case LMI_CCITT_ANSI_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CCITT_ANSI_LMI; + break; - case cpu_to_be16(NLPID_CISCO_LMI): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_CISCO_LMI; - break; + case LMI_CISCO_DLCI: + skb_push(skb, 4); + skb->data[3] = NLPID_CISCO_LMI; + break; - case cpu_to_be16(ETH_P_IP): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IP; - break; + default: + return -EINVAL; + } - case cpu_to_be16(ETH_P_IPV6): - head_len = 4; - skb_push(skb, head_len); - skb->data[3] = NLPID_IPV6; - break; + } else if (skb->dev->type == ARPHRD_DLCI) { + switch (skb->protocol) { + case htons(ETH_P_IP): + skb_push(skb, 4); + skb->data[3] = NLPID_IP; + break; - case cpu_to_be16(ETH_P_802_3): - head_len = 10; - if (skb_headroom(skb) < head_len) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, - head_len); + case htons(ETH_P_IPV6): + skb_push(skb, 4); + skb->data[3] = NLPID_IPV6; + break; + + default: + skb_push(skb, 10); + skb->data[3] = FR_PAD; + skb->data[4] = NLPID_SNAP; + /* OUI 00-00-00 indicates an Ethertype follows */ + skb->data[5] = 0x00; + skb->data[6] = 0x00; + skb->data[7] = 0x00; + /* This should be an Ethertype: */ + *(__be16 *)(skb->data + 8) = skb->protocol; + } + + } else if (skb->dev->type == ARPHRD_ETHER) { + if (skb_headroom(skb) < 10) { + struct sk_buff *skb2 = skb_realloc_headroom(skb, 10); if (!skb2) return -ENOBUFS; dev_kfree_skb(skb); skb = *skb_p = skb2; } - skb_push(skb, head_len); + skb_push(skb, 10); skb->data[3] = FR_PAD; skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; + /* OUI 00-80-C2 stands for the 802.1 organization */ + skb->data[5] = 0x00; skb->data[6] = 0x80; skb->data[7] = 0xC2; + /* PID 00-07 stands for Ethernet frames without FCS */ skb->data[8] = 0x00; - skb->data[9] = 0x07; /* bridged Ethernet frame w/out FCS */ - break; + skb->data[9] = 0x07; - default: - head_len = 10; - skb_push(skb, head_len); - skb->data[3] = FR_PAD; - skb->data[4] = NLPID_SNAP; - skb->data[5] = FR_PAD; - skb->data[6] = FR_PAD; - skb->data[7] = FR_PAD; - *(__be16*)(skb->data + 8) = skb->protocol; + } else { + return -EINVAL; } dlci_to_q922(skb->data, dlci); @@ -428,8 +434,8 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) skb_put(skb, pad); memset(skb->data + len, 0, pad); } - skb->protocol = cpu_to_be16(ETH_P_802_3); } + skb->dev = dev; if (!fr_hard_header(&skb, pvc->dlci)) { dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; @@ -497,10 +503,8 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) memset(skb->data, 0, len); skb_reserve(skb, 4); if (lmi == LMI_CISCO) { - skb->protocol = cpu_to_be16(NLPID_CISCO_LMI); fr_hard_header(&skb, LMI_CISCO_DLCI); } else { - skb->protocol = cpu_to_be16(NLPID_CCITT_ANSI_LMI); fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI); } data = skb_tail_pointer(skb); From 9c95724b7d1ce6714b0e918bfef6781bbb32096a Mon Sep 17 00:00:00 2001 From: Diana Craciun Date: Tue, 29 Sep 2020 11:54:38 +0300 Subject: [PATCH 091/636] bus/fsl_mc: Do not rely on caller to provide non NULL mc_io [ Upstream commit 5026cf605143e764e1785bbf9158559d17f8d260 ] Before destroying the mc_io, check first that it was allocated. Reviewed-by: Laurentiu Tudor Acked-by: Laurentiu Tudor Signed-off-by: Diana Craciun Link: https://lore.kernel.org/r/20200929085441.17448-11-diana.craciun@oss.nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/bus/fsl-mc/mc-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c index 7226cfc49b6f..3f806599748a 100644 --- a/drivers/bus/fsl-mc/mc-io.c +++ b/drivers/bus/fsl-mc/mc-io.c @@ -129,7 +129,12 @@ error_destroy_mc_io: */ void fsl_destroy_mc_io(struct fsl_mc_io *mc_io) { - struct fsl_mc_device *dpmcp_dev = mc_io->dpmcp_dev; + struct fsl_mc_device *dpmcp_dev; + + if (!mc_io) + return; + + dpmcp_dev = mc_io->dpmcp_dev; if (dpmcp_dev) fsl_mc_io_unset_dpmcp(mc_io); From 13a1f012bd9c4c7a9ce9f38e0b99dbdc306a1e4e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 4 Sep 2020 14:09:58 +0800 Subject: [PATCH 092/636] power: supply: test_power: add missing newlines when printing parameters by sysfs [ Upstream commit c07fa6c1631333f02750cf59f22b615d768b4d8f ] When I cat some module parameters by sysfs, it displays as follows. It's better to add a newline for easy reading. root@syzkaller:~# cd /sys/module/test_power/parameters/ root@syzkaller:/sys/module/test_power/parameters# cat ac_online onroot@syzkaller:/sys/module/test_power/parameters# cat battery_present trueroot@syzkaller:/sys/module/test_power/parameters# cat battery_health goodroot@syzkaller:/sys/module/test_power/parameters# cat battery_status dischargingroot@syzkaller:/sys/module/test_power/parameters# cat battery_technology LIONroot@syzkaller:/sys/module/test_power/parameters# cat usb_online onroot@syzkaller:/sys/module/test_power/parameters# Signed-off-by: Xiongfeng Wang Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin --- drivers/power/supply/test_power.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c index 57246cdbd042..925abec45380 100644 --- a/drivers/power/supply/test_power.c +++ b/drivers/power/supply/test_power.c @@ -344,6 +344,7 @@ static int param_set_ac_online(const char *key, const struct kernel_param *kp) static int param_get_ac_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, ac_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -357,6 +358,7 @@ static int param_set_usb_online(const char *key, const struct kernel_param *kp) static int param_get_usb_online(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_ac_online, usb_online, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -371,6 +373,7 @@ static int param_set_battery_status(const char *key, static int param_get_battery_status(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_status, battery_status, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -385,6 +388,7 @@ static int param_set_battery_health(const char *key, static int param_get_battery_health(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_health, battery_health, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -400,6 +404,7 @@ static int param_get_battery_present(char *buffer, const struct kernel_param *kp) { strcpy(buffer, map_get_key(map_present, battery_present, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } @@ -417,6 +422,7 @@ static int param_get_battery_technology(char *buffer, { strcpy(buffer, map_get_key(map_technology, battery_technology, "unknown")); + strcat(buffer, "\n"); return strlen(buffer); } From 1c3b4b2d87c15acfcc4ea9861e071d6bca1f51de Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Mon, 21 Sep 2020 17:52:43 -0400 Subject: [PATCH 093/636] drm/amd/display: HDMI remote sink need mode validation for Linux [ Upstream commit 95d620adb48f7728e67d82f56f756e8d451cf8d2 ] [Why] Currently mode validation is bypassed if remote sink exists. That leads to mode set issue when a BW bottle neck exists in the link path, e.g., a DP-to-HDMI converter that only supports HDMI 1.4. Any invalid mode passed to Linux user space will cause the modeset failure due to limitation of Linux user space implementation. [How] Mode validation is skipped only if in edid override. For real remote sink, clock limit check should be done for HDMI remote sink. Have HDMI related remote sink going through mode validation to elimiate modes which pixel clock exceeds BW limitation. Signed-off-by: Fangzhi Zuo Reviewed-by: Hersen Wu Acked-by: Eryk Brol Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 2fb2c683ad54..fa0e6c8e2447 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2009,7 +2009,7 @@ enum dc_status dc_link_validate_mode_timing( /* A hack to avoid failing any modes for EDID override feature on * topology change such as lower quality cable for DP or different dongle */ - if (link->remote_sinks[0]) + if (link->remote_sinks[0] && link->remote_sinks[0]->sink_signal == SIGNAL_TYPE_VIRTUAL) return DC_OK; /* Passive Dongle */ From c9830b1f2b4a7105541cf8796b75e5860805c6fa Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 5 Sep 2020 01:34:22 +0800 Subject: [PATCH 094/636] btrfs: fix replace of seed device [ Upstream commit c6a5d954950c5031444173ad2195efc163afcac9 ] If you replace a seed device in a sprouted fs, it appears to have successfully replaced the seed device, but if you look closely, it didn't. Here is an example. $ mkfs.btrfs /dev/sda $ btrfstune -S1 /dev/sda $ mount /dev/sda /btrfs $ btrfs device add /dev/sdb /btrfs $ umount /btrfs $ btrfs device scan --forget $ mount -o device=/dev/sda /dev/sdb /btrfs $ btrfs replace start -f /dev/sda /dev/sdc /btrfs $ echo $? 0 BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc started BTRFS info (device sdb): dev_replace from /dev/sda (devid 1) to /dev/sdc finished $ btrfs fi show Label: none uuid: ab2c88b7-be81-4a7e-9849-c3666e7f9f4f Total devices 2 FS bytes used 256.00KiB devid 1 size 3.00GiB used 520.00MiB path /dev/sdc devid 2 size 3.00GiB used 896.00MiB path /dev/sdb Label: none uuid: 10bd3202-0415-43af-96a8-d5409f310a7e Total devices 1 FS bytes used 128.00KiB devid 1 size 3.00GiB used 536.00MiB path /dev/sda So as per the replace start command and kernel log replace was successful. Now let's try to clean mount. $ umount /btrfs $ btrfs device scan --forget $ mount -o device=/dev/sdc /dev/sdb /btrfs mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/sdb, missing codepage or helper program, or other error. [ 636.157517] BTRFS error (device sdc): failed to read chunk tree: -2 [ 636.180177] BTRFS error (device sdc): open_ctree failed That's because per dev items it is still looking for the original seed device. $ btrfs inspect-internal dump-tree -d /dev/sdb item 0 key (DEV_ITEMS DEV_ITEM 1) itemoff 16185 itemsize 98 devid 1 total_bytes 3221225472 bytes_used 545259520 io_align 4096 io_width 4096 sector_size 4096 type 0 generation 6 start_offset 0 dev_group 0 seek_speed 0 bandwidth 0 uuid 59368f50-9af2-4b17-91da-8a783cc418d4 <--- seed uuid fsid 10bd3202-0415-43af-96a8-d5409f310a7e <--- seed fsid item 1 key (DEV_ITEMS DEV_ITEM 2) itemoff 16087 itemsize 98 devid 2 total_bytes 3221225472 bytes_used 939524096 io_align 4096 io_width 4096 sector_size 4096 type 0 generation 0 start_offset 0 dev_group 0 seek_speed 0 bandwidth 0 uuid 56a0a6bc-4630-4998-8daf-3c3030c4256a <- sprout uuid fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f <- sprout fsid But the replaced target has the following uuid+fsid in its superblock which doesn't match with the expected uuid+fsid in its devitem. $ btrfs in dump-super /dev/sdc | egrep '^generation|dev_item.uuid|dev_item.fsid|devid' generation 20 dev_item.uuid 59368f50-9af2-4b17-91da-8a783cc418d4 dev_item.fsid ab2c88b7-be81-4a7e-9849-c3666e7f9f4f [match] dev_item.devid 1 So if you provide the original seed device the mount shall be successful. Which so long happening in the test case btrfs/163. $ btrfs device scan --forget $ mount -o device=/dev/sda /dev/sdb /btrfs Fix in this patch: If a seed is not sprouted then there is no replacement of it, because of its read-only filesystem with a read-only device. Similarly, in the case of a sprouted filesystem, the seed device is still read only. So, mark it as you can't replace a seed device, you can only add a new device and then delete the seed device. If replace is attempted then returns -EINVAL. Signed-off-by: Anand Jain Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/dev-replace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 1b9c8ffb038f..36c0490156ac 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -190,7 +190,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, int ret = 0; *device_out = NULL; - if (fs_info->fs_devices->seeding) { + if (srcdev->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } From 868260482d8ab773afec2d0c6878d02cf80e1efa Mon Sep 17 00:00:00 2001 From: Zhao Heming Date: Tue, 6 Oct 2020 00:00:24 +0800 Subject: [PATCH 095/636] md/bitmap: md_bitmap_get_counter returns wrong blocks [ Upstream commit d837f7277f56e70d82b3a4a037d744854e62f387 ] md_bitmap_get_counter() has code: ``` if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + PAGE_COUNTER_SHIFT - 1); ``` The minus 1 is wrong, this branch should report 2048 bits of space. With "-1" action, this only report 1024 bit of space. This bug code returns wrong blocks, but it doesn't inflence bitmap logic: 1. Most callers focus this function return value (the counter of offset), not the parameter blocks. 2. The bug is only triggered when hijacked is true or map is NULL. the hijacked true condition is very rare. the "map == null" only true when array is creating or resizing. 3. Even the caller gets wrong blocks, current code makes caller just to call md_bitmap_get_counter() one more time. Signed-off-by: Zhao Heming Signed-off-by: Song Liu Signed-off-by: Sasha Levin --- drivers/md/md-bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index fd8607124bdb..503f5e06fa86 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1371,7 +1371,7 @@ __acquires(bitmap->lock) if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + - PAGE_COUNTER_SHIFT - 1); + PAGE_COUNTER_SHIFT); else csize = ((sector_t)1) << bitmap->chunkshift; *blocks = csize - (offset & (csize - 1)); From 4896829cafca3bd45b19aa924b66480982433a25 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 12 Oct 2020 05:10:51 -0400 Subject: [PATCH 096/636] bnxt_en: Log unknown link speed appropriately. [ Upstream commit 8eddb3e7ce124dd6375d3664f1aae13873318b0f ] If the VF virtual link is set to always enabled, the speed may be unknown when the physical link is down. The driver currently logs the link speed as 4294967295 Mbps which is SPEED_UNKNOWN. Modify the link up log message as "speed unknown" which makes more sense. Reviewed-by: Vasundhara Volam Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1602493854-29283-7-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c3f04fb31955..01d28ede1fb2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6326,6 +6326,11 @@ static void bnxt_report_link(struct bnxt *bp) u16 fec; netif_carrier_on(bp->dev); + speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); + if (speed == SPEED_UNKNOWN) { + netdev_info(bp->dev, "NIC Link is Up, speed unknown\n"); + return; + } if (bp->link_info.duplex == BNXT_LINK_DUPLEX_FULL) duplex = "full"; else @@ -6338,7 +6343,6 @@ static void bnxt_report_link(struct bnxt *bp) flow_ctrl = "ON - receive"; else flow_ctrl = "none"; - speed = bnxt_fw_to_ethtool_speed(bp->link_info.link_speed); netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n", speed, duplex, flow_ctrl); if (bp->flags & BNXT_FLAG_EEE_CAP) From fe5fe1ec36dc00cdd01e9d9cf3044349d676cfb0 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 24 Jun 2020 22:15:18 +0530 Subject: [PATCH 097/636] rpmsg: glink: Use complete_all for open states [ Upstream commit 4fcdaf6e28d11e2f3820d54dd23cd12a47ddd44e ] The open_req and open_ack completion variables are the state variables to represet a remote channel as open. Use complete_all so there are no races with waiters and using completion_done. Signed-off-by: Chris Lew Signed-off-by: Arun Kumar Neelakantam Signed-off-by: Deepak Kumar Singh Link: https://lore.kernel.org/r/1593017121-7953-2-git-send-email-deesin@codeaurora.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/rpmsg/qcom_glink_native.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index facc577ab0ac..a755f85686e5 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -970,7 +970,7 @@ static int qcom_glink_rx_open_ack(struct qcom_glink *glink, unsigned int lcid) return -EINVAL; } - complete(&channel->open_ack); + complete_all(&channel->open_ack); return 0; } @@ -1178,7 +1178,7 @@ static int qcom_glink_announce_create(struct rpmsg_device *rpdev) __be32 *val = defaults; int size; - if (glink->intentless) + if (glink->intentless || !completion_done(&channel->open_ack)) return 0; prop = of_find_property(np, "qcom,intents", NULL); @@ -1413,7 +1413,7 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid, channel->rcid = ret; spin_unlock_irqrestore(&glink->idr_lock, flags); - complete(&channel->open_req); + complete_all(&channel->open_req); if (create_device) { rpdev = kzalloc(sizeof(*rpdev), GFP_KERNEL); From 1712a5d8dc9105f98bd15b78a1d996c3c113c212 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 7 Sep 2020 11:25:59 +0300 Subject: [PATCH 098/636] clk: ti: clockdomain: fix static checker warning [ Upstream commit b7a7943fe291b983b104bcbd2f16e8e896f56590 ] Fix a memory leak induced by not calling clk_put after doing of_clk_get. Reported-by: Dan Murphy Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20200907082600.454-3-t-kristo@ti.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clockdomain.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c index 07a805125e98..11d92311e162 100644 --- a/drivers/clk/ti/clockdomain.c +++ b/drivers/clk/ti/clockdomain.c @@ -146,10 +146,12 @@ static void __init of_ti_clockdomain_setup(struct device_node *node) if (clk_hw_get_flags(clk_hw) & CLK_IS_BASIC) { pr_warn("can't setup clkdm for basic clk %s\n", __clk_get_name(clk)); + clk_put(clk); continue; } to_clk_hw_omap(clk_hw)->clkdm_name = clkdm_name; omap2_init_clk_clkdm(clk_hw); + clk_put(clk); } } From 9d5fa0ad46e7882bbf89cf40d57241de3307f380 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Mon, 12 Oct 2020 09:54:04 +0530 Subject: [PATCH 099/636] net: 9p: initialize sun_server.sun_path to have addr's value only when addr is valid [ Upstream commit 7ca1db21ef8e0e6725b4d25deed1ca196f7efb28 ] In p9_fd_create_unix, checking is performed to see if the addr (passed as an argument) is NULL or not. However, no check is performed to see if addr is a valid address, i.e., it doesn't entirely consist of only 0's. The initialization of sun_server.sun_path to be equal to this faulty addr value leads to an uninitialized variable, as detected by KMSAN. Checking for this (faulty addr) and returning a negative error number appropriately, resolves this issue. Link: http://lkml.kernel.org/r/20201012042404.2508-1-anant.thazhemadam@gmail.com Reported-by: syzbot+75d51fe5bf4ebe988518@syzkaller.appspotmail.com Tested-by: syzbot+75d51fe5bf4ebe988518@syzkaller.appspotmail.com Signed-off-by: Anant Thazhemadam Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/trans_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index b6dcb40fa8a7..9268f808afc0 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1038,7 +1038,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) csocket = NULL; - if (addr == NULL) + if (!addr || !strlen(addr)) return -EINVAL; if (strlen(addr) >= UNIX_PATH_MAX) { From edf786e2e4987d6defd301c62ed926bca77fe306 Mon Sep 17 00:00:00 2001 From: Madhuparna Bhowmik Date: Fri, 7 Aug 2020 16:59:02 +0530 Subject: [PATCH 100/636] drivers: watchdog: rdc321x_wdt: Fix race condition bugs [ Upstream commit 4b2e7f99cdd314263c9d172bc17193b8b6bba463 ] In rdc321x_wdt_probe(), rdc321x_wdt_device.queue is initialized after misc_register(), hence if ioctl is called before its initialization which can call rdc321x_wdt_start() function, it will see an uninitialized value of rdc321x_wdt_device.queue, hence initialize it before misc_register(). Also, rdc321x_wdt_device.default_ticks is accessed in reset() function called from write callback, thus initialize it before misc_register(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Madhuparna Bhowmik Reviewed-by: Guenter Roeck Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20200807112902.28764-1-madhuparnabhowmik10@gmail.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Sasha Levin --- drivers/watchdog/rdc321x_wdt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/rdc321x_wdt.c b/drivers/watchdog/rdc321x_wdt.c index a281aa84bfb1..4c3b4ea4e17f 100644 --- a/drivers/watchdog/rdc321x_wdt.c +++ b/drivers/watchdog/rdc321x_wdt.c @@ -244,6 +244,8 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.sb_pdev = pdata->sb_pdev; rdc321x_wdt_device.base_reg = r->start; + rdc321x_wdt_device.queue = 0; + rdc321x_wdt_device.default_ticks = ticks; err = misc_register(&rdc321x_wdt_misc); if (err < 0) { @@ -258,14 +260,11 @@ static int rdc321x_wdt_probe(struct platform_device *pdev) rdc321x_wdt_device.base_reg, RDC_WDT_RST); init_completion(&rdc321x_wdt_device.stop); - rdc321x_wdt_device.queue = 0; clear_bit(0, &rdc321x_wdt_device.inuse); timer_setup(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0); - rdc321x_wdt_device.default_ticks = ticks; - dev_info(&pdev->dev, "watchdog init success\n"); return 0; From d808c88f0b88697d7df75eb233b50c235db34b24 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Oct 2020 13:03:30 +0200 Subject: [PATCH 101/636] ext4: Detect already used quota file early [ Upstream commit e0770e91424f694b461141cbc99adf6b23006b60 ] When we try to use file already used as a quota file again (for the same or different quota type), strange things can happen. At the very least lockdep annotations may be wrong but also inode flags may be wrongly set / reset. When the file is used for two quota types at once we can even corrupt the file and likely crash the kernel. Catch all these cases by checking whether passed file is already used as quota file and bail early in that case. This fixes occasional generic/219 failure due to lockdep complaint. Reviewed-by: Andreas Dilger Reported-by: Ritesh Harjani Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201015110330.28716-1-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0c15ff19acbd..16ea7cfd130c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5752,6 +5752,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) return -EXDEV; + + /* Quota already enabled for this file? */ + if (IS_NOQUOTA(d_inode(path->dentry))) + return -EBUSY; + /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ From 6a253f385a2dc9f6a5a1da828e0e9e07f6f9d9b9 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 14 Oct 2020 22:01:09 +0530 Subject: [PATCH 102/636] gfs2: add validation checks for size of superblock [ Upstream commit 0ddc5154b24c96f20e94d653b0a814438de6032b ] In gfs2_check_sb(), no validation checks are performed with regards to the size of the superblock. syzkaller detected a slab-out-of-bounds bug that was primarily caused because the block size for a superblock was set to zero. A valid size for a superblock is a power of 2 between 512 and PAGE_SIZE. Performing validation checks and ensuring that the size of the superblock is valid fixes this bug. Reported-by: syzbot+af90d47a37376844e731@syzkaller.appspotmail.com Tested-by: syzbot+af90d47a37376844e731@syzkaller.appspotmail.com Suggested-by: Andrew Price Signed-off-by: Anant Thazhemadam [Minor code reordering.] Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/ops_fstype.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9448c8461e57..17001f4e9f84 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -161,15 +161,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } - /* If format numbers match exactly, we're done. */ + if (sb->sb_fs_format != GFS2_FORMAT_FS || + sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); + return -EINVAL; + } - if (sb->sb_fs_format == GFS2_FORMAT_FS && - sb->sb_multihost_format == GFS2_FORMAT_MULTI) - return 0; + if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE || + (sb->sb_bsize & (sb->sb_bsize - 1))) { + pr_warn("Invalid superblock size\n"); + return -EINVAL; + } - fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); - - return -EINVAL; + return 0; } static void end_bio_io_page(struct bio *bio) From 80dc9c4266b5696e797bded1d0f6087446d7cf9d Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 9 Oct 2020 09:32:56 +1000 Subject: [PATCH 103/636] cifs: handle -EINTR in cifs_setattr [ Upstream commit c6cc4c5a72505a0ecefc9b413f16bec512f38078 ] RHBZ: 1848178 Some calls that set attributes, like utimensat(), are not supposed to return -EINTR and thus do not have handlers for this in glibc which causes us to leak -EINTR to the applications which are also unprepared to handle it. For example tar will break if utimensat() return -EINTR and abort unpacking the archive. Other applications may break too. To handle this we add checks, and retry, for -EINTR in cifs_setattr() Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4a38f16d944d..d30eb4350656 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2550,13 +2550,18 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb); + int rc, retries = 0; - if (pTcon->unix_ext) - return cifs_setattr_unix(direntry, attrs); - - return cifs_setattr_nounix(direntry, attrs); + do { + if (pTcon->unix_ext) + rc = cifs_setattr_unix(direntry, attrs); + else + rc = cifs_setattr_nounix(direntry, attrs); + retries++; + } while (is_retryable_error(rc) && retries < 2); /* BB: add cifs_setattr_legacy for really old servers */ + return rc; } #if 0 From 06f2cb14aecf9d8a606db271d1723ee49b050c99 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 17 Jul 2020 21:33:21 +0900 Subject: [PATCH 104/636] arm64: dts: renesas: ulcb: add full-pwr-cycle-in-suspend into eMMC nodes [ Upstream commit 992d7a8b88c83c05664b649fc54501ce58e19132 ] Add full-pwr-cycle-in-suspend property to do a graceful shutdown of the eMMC device in system suspend. Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/1594989201-24228-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/ulcb.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 0ead552d7eae..600adc25eaef 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -430,6 +430,7 @@ bus-width = <8>; mmc-hs200-1_8v; non-removable; + full-pwr-cycle-in-suspend; status = "okay"; }; From 103e159f7b186c364f5c96af8680c3e0edf17d7b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 10 Mar 2020 14:02:48 -0700 Subject: [PATCH 105/636] ARM: dts: omap4: Fix sgx clock rate for 4430 [ Upstream commit 19d3e9a0bdd57b90175f30390edeb06851f5f9f3 ] We currently have a different clock rate for droid4 compared to the stock v3.0.8 based Android Linux kernel: # cat /sys/kernel/debug/clk/dpll_*_m7x2_ck/clk_rate 266666667 307200000 # cat /sys/kernel/debug/clk/l3_gfx_cm:clk:0000:0/clk_rate 307200000 Let's fix this by configuring sgx to use 153.6 MHz instead of 307.2 MHz. Looks like also at least duover needs this change to avoid hangs, so let's apply it for all 4430. This helps a bit with thermal issues that seem to be related to memory corruption when using sgx. It seems that other driver related issues still remain though. Cc: Arthur Demchenkov Cc: Merlijn Wajer Cc: Sebastian Reichel Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap4.dtsi | 2 +- arch/arm/boot/dts/omap443x.dtsi | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 1a96d4317c97..8f907c235b02 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -516,7 +516,7 @@ status = "disabled"; }; - target-module@56000000 { + sgx_module: target-module@56000000 { compatible = "ti,sysc-omap4", "ti,sysc"; ti,hwmods = "gpu"; reg = <0x5601fc00 0x4>, diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi index cbcdcb4e7d1c..86b9caf461df 100644 --- a/arch/arm/boot/dts/omap443x.dtsi +++ b/arch/arm/boot/dts/omap443x.dtsi @@ -74,3 +74,13 @@ }; /include/ "omap443x-clocks.dtsi" + +/* + * Use dpll_per for sgx at 153.6MHz like droid4 stock v3.0.8 Android kernel + */ +&sgx_module { + assigned-clocks = <&l3_gfx_clkctrl OMAP4_GPU_CLKCTRL 24>, + <&dpll_per_m7x2_ck>; + assigned-clock-rates = <0>, <153600000>; + assigned-clock-parents = <&dpll_per_m7x2_ck>; +}; From 69c0f5d0ed8284ce2a9efd5ea9557816186d3f69 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 Aug 2020 14:37:59 +0300 Subject: [PATCH 106/636] memory: emif: Remove bogus debugfs error handling [ Upstream commit fd22781648080cc400772b3c68aa6b059d2d5420 ] Callers are generally not supposed to check the return values from debugfs functions. Debugfs functions never return NULL so this error handling will never trigger. (Historically debugfs functions used to return a mix of NULL and error pointers but it was eventually deemed too complicated for something which wasn't intended to be used in normal situations). Delete all the error handling. Signed-off-by: Dan Carpenter Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20200826113759.GF393664@mwanda Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/emif.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/drivers/memory/emif.c b/drivers/memory/emif.c index 2f214440008c..1c6b2cc6269a 100644 --- a/drivers/memory/emif.c +++ b/drivers/memory/emif.c @@ -165,35 +165,12 @@ static const struct file_operations emif_mr4_fops = { static int __init_or_module emif_debugfs_init(struct emif_data *emif) { - struct dentry *dentry; - int ret; - - dentry = debugfs_create_dir(dev_name(emif->dev), NULL); - if (!dentry) { - ret = -ENOMEM; - goto err0; - } - emif->debugfs_root = dentry; - - dentry = debugfs_create_file("regcache_dump", S_IRUGO, - emif->debugfs_root, emif, &emif_regdump_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - - dentry = debugfs_create_file("mr4", S_IRUGO, - emif->debugfs_root, emif, &emif_mr4_fops); - if (!dentry) { - ret = -ENOMEM; - goto err1; - } - + emif->debugfs_root = debugfs_create_dir(dev_name(emif->dev), NULL); + debugfs_create_file("regcache_dump", S_IRUGO, emif->debugfs_root, emif, + &emif_regdump_fops); + debugfs_create_file("mr4", S_IRUGO, emif->debugfs_root, emif, + &emif_mr4_fops); return 0; -err1: - debugfs_remove_recursive(emif->debugfs_root); -err0: - return ret; } static void __exit emif_debugfs_exit(struct emif_data *emif) From 1caf36ebc2ace7d851f81b0b081b463889ff2c09 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:21 +0200 Subject: [PATCH 107/636] ARM: dts: s5pv210: remove DMA controller bus node name to fix dtschema warnings [ Upstream commit ea4e792f3c8931fffec4d700cf6197d84e9f35a6 ] There is no need to keep DMA controller nodes under AMBA bus node. Remove the "amba" node to fix dtschema warnings like: amba: $nodename:0: 'amba' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-6-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 49 +++++++++++++++------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 67358562a6ea..67f70683a2c4 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -126,35 +126,28 @@ }; }; - amba { - #address-cells = <1>; - #size-cells = <1>; - compatible = "simple-bus"; - ranges; + pdma0: dma@e0900000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0900000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <19>; + clocks = <&clocks CLK_PDMA0>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + }; - pdma0: dma@e0900000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0900000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <19>; - clocks = <&clocks CLK_PDMA0>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; - - pdma1: dma@e0a00000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0a00000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <20>; - clocks = <&clocks CLK_PDMA1>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma1: dma@e0a00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0a00000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <20>; + clocks = <&clocks CLK_PDMA1>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; spi0: spi@e1300000 { From 5addcbe6caa55ddcaa342d0d341f6a2e920f0672 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:23 +0200 Subject: [PATCH 108/636] ARM: dts: s5pv210: move PMU node out of clock controller [ Upstream commit bb98fff84ad1ea321823759edaba573a16fa02bd ] The Power Management Unit (PMU) is a separate device which has little common with clock controller. Moving it to one level up (from clock controller child to SoC) allows to remove fake simple-bus compatible and dtbs_check warnings like: clock-controller@e0100000: $nodename:0: 'clock-controller@e0100000' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-8-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 67f70683a2c4..37d251b1f74a 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -98,19 +98,16 @@ }; clocks: clock-controller@e0100000 { - compatible = "samsung,s5pv210-clock", "simple-bus"; + compatible = "samsung,s5pv210-clock"; reg = <0xe0100000 0x10000>; clock-names = "xxti", "xusbxti"; clocks = <&xxti>, <&xusbxti>; #clock-cells = <1>; - #address-cells = <1>; - #size-cells = <1>; - ranges; + }; - pmu_syscon: syscon@e0108000 { - compatible = "samsung-s5pv210-pmu", "syscon"; - reg = <0xe0108000 0x8000>; - }; + pmu_syscon: syscon@e0108000 { + compatible = "samsung-s5pv210-pmu", "syscon"; + reg = <0xe0108000 0x8000>; }; pinctrl0: pinctrl@e0200000 { From c151f68411cdeac5d70b48a19758d20ab110e4cc Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Sep 2020 18:11:24 +0200 Subject: [PATCH 109/636] ARM: dts: s5pv210: remove dedicated 'audio-subsystem' node [ Upstream commit 6c17a2974abf68a58517f75741b15c4aba42b4b8 ] The 'audio-subsystem' node is an artificial creation, not representing real hardware. The hardware is described by its nodes - AUDSS clock controller and I2S0. Remove the 'audio-subsystem' node along with its undocumented compatible to fix dtbs_check warnings like: audio-subsystem: $nodename:0: 'audio-subsystem' does not match '^([a-z][a-z0-9\\-]+-bus|bus|soc|axi|ahb|apb)(@[0-9a-f]+)?$' Signed-off-by: Krzysztof Kozlowski Tested-by: Jonathan Bakker Link: https://lore.kernel.org/r/20200907161141.31034-9-krzk@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/s5pv210.dtsi | 65 +++++++++++++++------------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 37d251b1f74a..020a864623ff 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -217,43 +217,36 @@ status = "disabled"; }; - audio-subsystem { - compatible = "samsung,s5pv210-audss", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges; + clk_audss: clock-controller@eee10000 { + compatible = "samsung,s5pv210-audss-clock"; + reg = <0xeee10000 0x1000>; + clock-names = "hclk", "xxti", + "fout_epll", + "sclk_audio0"; + clocks = <&clocks DOUT_HCLKP>, <&xxti>, + <&clocks FOUT_EPLL>, + <&clocks SCLK_AUDIO0>; + #clock-cells = <1>; + }; - clk_audss: clock-controller@eee10000 { - compatible = "samsung,s5pv210-audss-clock"; - reg = <0xeee10000 0x1000>; - clock-names = "hclk", "xxti", - "fout_epll", - "sclk_audio0"; - clocks = <&clocks DOUT_HCLKP>, <&xxti>, - <&clocks FOUT_EPLL>, - <&clocks SCLK_AUDIO0>; - #clock-cells = <1>; - }; - - i2s0: i2s@eee30000 { - compatible = "samsung,s5pv210-i2s"; - reg = <0xeee30000 0x1000>; - interrupt-parent = <&vic2>; - interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; - clock-names = "iis", - "i2s_opclk0", - "i2s_opclk1"; - clocks = <&clk_audss CLK_I2S>, - <&clk_audss CLK_I2S>, - <&clk_audss CLK_DOUT_AUD_BUS>; - samsung,idma-addr = <0xc0010000>; - pinctrl-names = "default"; - pinctrl-0 = <&i2s0_bus>; - #sound-dai-cells = <0>; - status = "disabled"; - }; + i2s0: i2s@eee30000 { + compatible = "samsung,s5pv210-i2s"; + reg = <0xeee30000 0x1000>; + interrupt-parent = <&vic2>; + interrupts = <16>; + dma-names = "rx", "tx", "tx-sec"; + dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + clock-names = "iis", + "i2s_opclk0", + "i2s_opclk1"; + clocks = <&clk_audss CLK_I2S>, + <&clk_audss CLK_I2S>, + <&clk_audss CLK_DOUT_AUD_BUS>; + samsung,idma-addr = <0xc0010000>; + pinctrl-names = "default"; + pinctrl-0 = <&i2s0_bus>; + #sound-dai-cells = <0>; + status = "disabled"; }; i2s1: i2s@e2100000 { From 2ef6f4bd60411934e3fc2715442c2afe70f84bf3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 13 Oct 2020 22:45:14 -0400 Subject: [PATCH 110/636] nbd: make the config put is called before the notifying the waiter [ Upstream commit 87aac3a80af5cbad93e63250e8a1e19095ba0d30 ] There has one race case for ceph's rbd-nbd tool. When do mapping it may fail with EBUSY from ioctl(nbd, NBD_DO_IT), but actually the nbd device has already unmaped. It dues to if just after the wake_up(), the recv_work() is scheduled out and defers calling the nbd_config_put(), though the map process has exited the "nbd->recv_task" is not cleared. Signed-off-by: Xiubo Li Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index d7c7232e438c..52e1e71e8124 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -740,9 +740,9 @@ static void recv_work(struct work_struct *work) blk_mq_complete_request(blk_mq_rq_from_pdu(cmd)); } + nbd_config_put(nbd); atomic_dec(&config->recv_threads); wake_up(&config->recv_wq); - nbd_config_put(nbd); kfree(args); } From e6786fd18fe2b91a4844f7d1606c50e09d4cebcf Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 15 Oct 2020 14:57:35 -0400 Subject: [PATCH 111/636] sgl_alloc_order: fix memory leak [ Upstream commit b2a182a40278bc5849730e66bca01a762188ed86 ] sgl_alloc_order() can fail when 'length' is large on a memory constrained system. When order > 0 it will potentially be making several multi-page allocations with the later ones more likely to fail than the earlier one. So it is important that sgl_alloc_order() frees up any pages it has obtained before returning NULL. In the case when order > 0 it calls the wrong free page function and leaks. In testing the leak was sufficient to bring down my 8 GiB laptop with OOM. Reviewed-by: Bart Van Assche Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 60e7eca2f4be..3b859201f84c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -506,7 +506,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, elem_len = min_t(u64, length, PAGE_SIZE << order); page = alloc_pages(gfp, order); if (!page) { - sgl_free(sgl); + sgl_free_order(sgl, order); return NULL; } From a2631d71e40d1d330729f42b43f6f6f159ef285a Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Mon, 12 Oct 2020 16:10:40 +0800 Subject: [PATCH 112/636] nvme-rdma: fix crash when connect rejected [ Upstream commit 43efdb8e870ee0f58633fd579aa5b5185bf5d39e ] A crash can happened when a connect is rejected. The host establishes the connection after received ConnectReply, and then continues to send the fabrics Connect command. If the controller does not receive the ReadyToUse capsule, host may receive a ConnectReject reply. Call nvme_rdma_destroy_queue_ib after the host received the RDMA_CM_EVENT_REJECTED event. Then when the fabrics Connect command times out, nvme_rdma_timeout calls nvme_rdma_complete_rq to fail the request. A crash happenes due to use after free in nvme_rdma_complete_rq. nvme_rdma_destroy_queue_ib is redundant when handling the RDMA_CM_EVENT_REJECTED event as nvme_rdma_destroy_queue_ib is already called in connection failure handler. Signed-off-by: Chao Leng Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/rdma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 077c67816665..134e14e778f8 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1640,7 +1640,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, complete(&queue->cm_done); return 0; case RDMA_CM_EVENT_REJECTED: - nvme_rdma_destroy_queue_ib(queue); cm_error = nvme_rdma_conn_rejected(queue, ev); break; case RDMA_CM_EVENT_ROUTE_ERROR: From 9922fa4c3b5e2c0d0d33261573f2af912808a285 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 5 Oct 2020 09:35:21 -0700 Subject: [PATCH 113/636] md/raid5: fix oops during stripe resizing commit b44c018cdf748b96b676ba09fdbc5b34fc443ada upstream. KoWei reported crash during raid5 reshape: [ 1032.252932] Oops: 0002 [#1] SMP PTI [...] [ 1032.252943] RIP: 0010:memcpy_erms+0x6/0x10 [...] [ 1032.252947] RSP: 0018:ffffba1ac0c03b78 EFLAGS: 00010286 [ 1032.252949] RAX: 0000784ac0000000 RBX: ffff91bec3d09740 RCX: 0000000000001000 [ 1032.252951] RDX: 0000000000001000 RSI: ffff91be6781c000 RDI: 0000784ac0000000 [ 1032.252953] RBP: ffffba1ac0c03bd8 R08: 0000000000001000 R09: ffffba1ac0c03bf8 [ 1032.252954] R10: 0000000000000000 R11: 0000000000000000 R12: ffffba1ac0c03bf8 [ 1032.252955] R13: 0000000000001000 R14: 0000000000000000 R15: 0000000000000000 [ 1032.252958] FS: 0000000000000000(0000) GS:ffff91becf500000(0000) knlGS:0000000000000000 [ 1032.252959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1032.252961] CR2: 0000784ac0000000 CR3: 000000031780a002 CR4: 00000000001606e0 [ 1032.252962] Call Trace: [ 1032.252969] ? async_memcpy+0x179/0x1000 [async_memcpy] [ 1032.252977] ? raid5_release_stripe+0x8e/0x110 [raid456] [ 1032.252982] handle_stripe_expansion+0x15a/0x1f0 [raid456] [ 1032.252988] handle_stripe+0x592/0x1270 [raid456] [ 1032.252993] handle_active_stripes.isra.0+0x3cb/0x5a0 [raid456] [ 1032.252999] raid5d+0x35c/0x550 [raid456] [ 1032.253002] ? schedule+0x42/0xb0 [ 1032.253006] ? schedule_timeout+0x10e/0x160 [ 1032.253011] md_thread+0x97/0x160 [ 1032.253015] ? wait_woken+0x80/0x80 [ 1032.253019] kthread+0x104/0x140 [ 1032.253022] ? md_start_sync+0x60/0x60 [ 1032.253024] ? kthread_park+0x90/0x90 [ 1032.253027] ret_from_fork+0x35/0x40 This is because cache_size_mutex was unlocked too early in resize_stripes, which races with grow_one_stripe() that grow_one_stripe() allocates a stripe with wrong pool_size. Fix this issue by unlocking cache_size_mutex after updating pool_size. Cc: # v4.4+ Reported-by: KoWei Sung Signed-off-by: Song Liu Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d91154d65455..c7bda4b0bced 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2417,8 +2417,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; - mutex_unlock(&conf->cache_size_mutex); - conf->slab_cache = sc; conf->active_name = 1-conf->active_name; @@ -2441,6 +2439,8 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!err) conf->pool_size = newsize; + mutex_unlock(&conf->cache_size_mutex); + return err; } From 5ed9b5e389b6a199136ed82289db6cbeb27175df Mon Sep 17 00:00:00 2001 From: Raul E Rangel Date: Mon, 28 Sep 2020 15:59:20 -0600 Subject: [PATCH 114/636] mmc: sdhci-acpi: AMDI0040: Set SDHCI_QUIRK2_PRESET_VALUE_BROKEN commit f23cc3ba491af77395cea3f9d51204398729f26b upstream. This change fixes HS400 tuning for devices with invalid presets. SDHCI presets are not currently used for eMMC HS/HS200/HS400, but are used for DDR52. The HS400 retuning sequence is: HS400->DDR52->HS->HS200->Perform Tuning->HS->HS400 This means that when HS400 tuning happens, we transition through DDR52 for a very brief period. This causes presets to be enabled unintentionally and stay enabled when transitioning back to HS200 or HS400. Some firmware has invalid presets, so we end up with driver strengths that can cause I/O problems. Fixes: 34597a3f60b1 ("mmc: sdhci-acpi: Add support for ACPI HID of AMD Controller with HS400") Signed-off-by: Raul E Rangel Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200928154718.1.Icc21d4b2f354e83e26e57e270dc952f5fe0b0a40@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 145143b6a0e6..6cc187ce3a32 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -546,6 +546,43 @@ static int sdhci_acpi_emmc_amd_probe_slot(struct platform_device *pdev, (host->mmc->caps & MMC_CAP_1_8V_DDR)) host->mmc->caps2 = MMC_CAP2_HS400_1_8V; + /* + * There are two types of presets out in the wild: + * 1) Default/broken presets. + * These presets have two sets of problems: + * a) The clock divisor for SDR12, SDR25, and SDR50 is too small. + * This results in clock frequencies that are 2x higher than + * acceptable. i.e., SDR12 = 25 MHz, SDR25 = 50 MHz, SDR50 = + * 100 MHz.x + * b) The HS200 and HS400 driver strengths don't match. + * By default, the SDR104 preset register has a driver strength of + * A, but the (internal) HS400 preset register has a driver + * strength of B. As part of initializing HS400, HS200 tuning + * needs to be performed. Having different driver strengths + * between tuning and operation is wrong. It results in different + * rise/fall times that lead to incorrect sampling. + * 2) Firmware with properly initialized presets. + * These presets have proper clock divisors. i.e., SDR12 => 12MHz, + * SDR25 => 25 MHz, SDR50 => 50 MHz. Additionally the HS200 and + * HS400 preset driver strengths match. + * + * Enabling presets for HS400 doesn't work for the following reasons: + * 1) sdhci_set_ios has a hard coded list of timings that are used + * to determine if presets should be enabled. + * 2) sdhci_get_preset_value is using a non-standard register to + * read out HS400 presets. The AMD controller doesn't support this + * non-standard register. In fact, it doesn't expose the HS400 + * preset register anywhere in the SDHCI memory map. This results + * in reading a garbage value and using the wrong presets. + * + * Since HS400 and HS200 presets must be identical, we could + * instead use the the SDR104 preset register. + * + * If the above issues are resolved we could remove this quirk for + * firmware that that has valid presets (i.e., SDR12 <= 12 MHz). + */ + host->quirks2 |= SDHCI_QUIRK2_PRESET_VALUE_BROKEN; + host->mmc_host_ops.select_drive_strength = amd_select_drive_strength; host->mmc_host_ops.set_ios = amd_set_ios; return 0; From a1c59cb6a944bd0eab88af1af681a166163ac213 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:37 -0500 Subject: [PATCH 115/636] perf/x86/amd/ibs: Don't include randomized bits in get_ibs_op_count() commit 680d69635005ba0e58fe3f4c52fc162b8fc743b0 upstream. get_ibs_op_count() adds hardware's current count (IbsOpCurCnt) bits to its count regardless of hardware's valid status. According to the PPR for AMD Family 17h Model 31h B0 55803 Rev 0.54, if the counter rolls over, valid status is set, and the lower 7 bits of IbsOpCurCnt are randomized by hardware. Don't include those bits in the driver's event count. Fixes: 8b1e13638d46 ("perf/x86-ibs: Fix usage of IBS op current count") Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 801c50be8e1d..e58a3cfeab39 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -347,11 +347,15 @@ static u64 get_ibs_op_count(u64 config) { u64 count = 0; + /* + * If the internal 27-bit counter rolled over, the count is MaxCnt + * and the lower 7 bits of CurCnt are randomized. + * Otherwise CurCnt has the full 27-bit current counter value. + */ if (config & IBS_OP_VAL) - count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ - - if (ibs_caps & IBS_CAPS_RDWROPCNT) - count += (config & IBS_OP_CUR_CNT) >> 32; + count = (config & IBS_OP_MAX_CNT) << 4; + else if (ibs_caps & IBS_CAPS_RDWROPCNT) + count = (config & IBS_OP_CUR_CNT) >> 32; return count; } From c5a38ae89e3675a49d135aab3b68bd56ee622b93 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 8 Sep 2020 16:47:38 -0500 Subject: [PATCH 116/636] perf/x86/amd/ibs: Fix raw sample data accumulation commit 36e1be8ada994d509538b3b1d0af8b63c351e729 upstream. Neither IbsBrTarget nor OPDATA4 are populated in IBS Fetch mode. Don't accumulate them into raw sample user data in that case. Also, in Fetch mode, add saving the IBS Fetch Control Extended MSR. Technically, there is an ABI change here with respect to the IBS raw sample data format, but I don't see any perf driver version information being included in perf.data file headers, but, existing users can detect whether the size of the sample record has reduced by 8 bytes to determine whether the IBS driver has this fix. Fixes: 904cb3677f3a ("perf/x86/amd/ibs: Update IBS MSRs and feature definitions") Reported-by: Stephane Eranian Signed-off-by: Kim Phillips Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200908214740.18097-6-kim.phillips@amd.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/ibs.c | 26 ++++++++++++++++---------- arch/x86/include/asm/msr-index.h | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index e58a3cfeab39..2410bd4bb48f 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -647,18 +647,24 @@ fail: perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + /* + * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately + * depending on their availability. + * Can't add to offset_max as they are staggered + */ if (event->attr.sample_type & PERF_SAMPLE_RAW) { - /* - * Read IbsBrTarget and IbsOpData4 separately - * depending on their availability. - * Can't add to offset_max as they are staggered - */ - if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); - size++; + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_BRNTRGT) { + rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + size++; + } + if (ibs_caps & IBS_CAPS_OPDATA4) { + rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + size++; + } } - if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { + rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5bb11a8c245e..892af8ab95d7 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -377,6 +377,7 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< Date: Fri, 18 Sep 2020 00:32:58 +0200 Subject: [PATCH 117/636] leds: bcm6328, bcm6358: use devres LED registering function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ff5c89d44453e7ad99502b04bf798a3fc32c758b upstream. These two drivers do not provide remove method and use devres for allocation of other resources, yet they use led_classdev_register instead of the devres variant, devm_led_classdev_register. Fix this. Signed-off-by: Marek Behún Cc: Álvaro Fernández Rojas Cc: Kevin Cernekee Cc: Jaedon Shin Signed-off-by: Pavel Machek Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/leds/leds-bcm6328.c | 2 +- drivers/leds/leds-bcm6358.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c index 2cfd9389ee96..b944ae828004 100644 --- a/drivers/leds/leds-bcm6328.c +++ b/drivers/leds/leds-bcm6328.c @@ -336,7 +336,7 @@ static int bcm6328_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6328_led_set; led->cdev.blink_set = bcm6328_blink_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; diff --git a/drivers/leds/leds-bcm6358.c b/drivers/leds/leds-bcm6358.c index b2cc06618abe..a86ab6197a4e 100644 --- a/drivers/leds/leds-bcm6358.c +++ b/drivers/leds/leds-bcm6358.c @@ -141,7 +141,7 @@ static int bcm6358_led(struct device *dev, struct device_node *nc, u32 reg, led->cdev.brightness_set = bcm6358_led_set; - rc = led_classdev_register(dev, &led->cdev); + rc = devm_led_classdev_register(dev, &led->cdev); if (rc < 0) return rc; From 0a77d44cc1bc55550bc714a60c7b0c9d7ebe0b7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Jul 2020 13:22:08 +0200 Subject: [PATCH 118/636] media: uvcvideo: Fix uvc_ctrl_fixup_xu_info() not having any effect commit 93df48d37c3f03886d84831992926333e7810640 upstream. uvc_ctrl_add_info() calls uvc_ctrl_get_flags() which will override the fixed-up flags set by uvc_ctrl_fixup_xu_info(). uvc_ctrl_init_xu_ctrl() already calls uvc_ctrl_get_flags() before calling uvc_ctrl_add_info(), so the uvc_ctrl_get_flags() call in uvc_ctrl_add_info() is not necessary for xu ctrls. This commit moves the uvc_ctrl_get_flags() call for normal controls from uvc_ctrl_add_info() to uvc_ctrl_init_ctrl(), so that we no longer call uvc_ctrl_get_flags() twice for xu controls and so that we no longer override the fixed-up flags set by uvc_ctrl_fixup_xu_info(). This fixes the xu motor controls not working properly on a Logitech 046d:08cc, and presumably also on the other Logitech models which have a quirk for this in the uvc_ctrl_fixup_xu_info() function. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/uvc/uvc_ctrl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 843dc19bada0..cb6046481aed 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2038,13 +2038,6 @@ static int uvc_ctrl_add_info(struct uvc_device *dev, struct uvc_control *ctrl, goto done; } - /* - * Retrieve control flags from the device. Ignore errors and work with - * default flag values from the uvc_ctrl array when the device doesn't - * properly implement GET_INFO on standard controls. - */ - uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); - ctrl->initialized = 1; uvc_trace(UVC_TRACE_CONTROL, "Added control %pUl/%u to device %s " @@ -2267,6 +2260,13 @@ static void uvc_ctrl_init_ctrl(struct uvc_device *dev, struct uvc_control *ctrl) if (uvc_entity_match_guid(ctrl->entity, info->entity) && ctrl->index == info->index) { uvc_ctrl_add_info(dev, ctrl, info); + /* + * Retrieve control flags from the device. Ignore errors + * and work with default flag values from the uvc_ctrl + * array when the device doesn't properly implement + * GET_INFO on standard controls. + */ + uvc_ctrl_get_flags(dev, ctrl, &ctrl->info); break; } } From 7ce2b16bad2cbfa3fa7bbc42c4448914f639ca47 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 4 Sep 2020 10:58:51 +0200 Subject: [PATCH 119/636] fs: Don't invalidate page buffers in block_write_full_page() commit 6dbf7bb555981fb5faf7b691e8f6169fc2b2e63b upstream. If block_write_full_page() is called for a page that is beyond current inode size, it will truncate page buffers for the page and return 0. This logic has been added in 2.5.62 in commit 81eb69062588 ("fix ext3 BUG due to race with truncate") in history.git tree to fix a problem with ext3 in data=ordered mode. This particular problem doesn't exist anymore because ext3 is long gone and ext4 handles ordered data differently. Also normally buffers are invalidated by truncate code and there's no need to specially handle this in ->writepage() code. This invalidation of page buffers in block_write_full_page() is causing issues to filesystems (e.g. ext4 or ocfs2) when block device is shrunk under filesystem's hands and metadata buffers get discarded while being tracked by the journalling layer. Although it is obviously "not supported" it can cause kernel crashes like: [ 7986.689400] BUG: unable to handle kernel NULL pointer dereference at +0000000000000008 [ 7986.697197] PGD 0 P4D 0 [ 7986.699724] Oops: 0002 [#1] SMP PTI [ 7986.703200] CPU: 4 PID: 203778 Comm: jbd2/dm-3-8 Kdump: loaded Tainted: G +O --------- - - 4.18.0-147.5.0.5.h126.eulerosv2r9.x86_64 #1 [ 7986.716438] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 1.57 08/11/2015 [ 7986.723462] RIP: 0010:jbd2_journal_grab_journal_head+0x1b/0x40 [jbd2] ... [ 7986.810150] Call Trace: [ 7986.812595] __jbd2_journal_insert_checkpoint+0x23/0x70 [jbd2] [ 7986.818408] jbd2_journal_commit_transaction+0x155f/0x1b60 [jbd2] [ 7986.836467] kjournald2+0xbd/0x270 [jbd2] which is not great. The crash happens because bh->b_private is suddently NULL although BH_JBD flag is still set (this is because block_invalidatepage() cleared BH_Mapped flag and subsequent bh lookup found buffer without BH_Mapped set, called init_page_buffers() which has rewritten bh->b_private). So just remove the invalidation in block_write_full_page(). Note that the buffer cache invalidation when block device changes size is already careful to avoid similar problems by using invalidate_mapping_pages() which skips busy buffers so it was only this odd block_write_full_page() behavior that could tear down bdev buffers under filesystem's hands. Reported-by: Ye Bin Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig CC: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 362a86876459..356e289d19f2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2742,16 +2742,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ -#if 0 - /* Not really sure about this - do we need this ? */ - if (page->mapping->a_ops->invalidatepage) - page->mapping->a_ops->invalidatepage(page, offset); -#endif unlock_page(page); return 0; /* don't care */ } @@ -2946,12 +2936,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - do_invalidatepage(page, 0, PAGE_SIZE); unlock_page(page); return 0; /* don't care */ } From dd7992a5f7448ba33892151784c26b93fe005cfe Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 5 Oct 2020 02:22:43 -0700 Subject: [PATCH 120/636] NFS: fix nfs_path in case of a rename retry commit 247db73560bc3e5aef6db50c443c3c0db115bc93 upstream. We are generating incorrect path in case of rename retry because we are restarting from wrong dentry. We should restart from the dentry which was received in the call to nfs_path. CC: stable@vger.kernel.org Signed-off-by: Ashish Sangwan Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5686be67be8..d57d453aecc2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; From 02e92557b3075b9d0e02dc8551ea14ac5955b996 Mon Sep 17 00:00:00 2001 From: "dmitry.torokhov@gmail.com" Date: Sun, 4 Oct 2020 22:11:25 -0700 Subject: [PATCH 121/636] ACPI: button: fix handling lid state changes when input device closed commit 21988a8e51479ceffe7b0568b170effabb708dfe upstream. The original intent of 84d3f6b76447 was to delay evaluating lid state until all drivers have been loaded, with input device being opened from userspace serving as a signal for this condition. Let's ensure that state updates happen even if userspace closed (or in the future inhibited) input device. Note that if we go through suspend/resume cycle we assume the system has been fully initialized even if LID input device has not been opened yet. This has a side-effect of fixing access to input->users outside of input->mutex protections by the way of eliminating said accesses and using driver private flag. Fixes: 84d3f6b76447 ("ACPI / button: Delay acpi_lid_initialize_state() until first user space open") Signed-off-by: Dmitry Torokhov Reviewed-by: Hans de Goede Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/button.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index d5c19e25ddf5..f43f5adc21b6 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -149,6 +149,7 @@ struct acpi_button { int last_state; ktime_t last_time; bool suspended; + bool lid_state_initialized; }; static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); @@ -404,6 +405,8 @@ static int acpi_lid_update_state(struct acpi_device *device, static void acpi_lid_initialize_state(struct acpi_device *device) { + struct acpi_button *button = acpi_driver_data(device); + switch (lid_init_state) { case ACPI_BUTTON_LID_INIT_OPEN: (void)acpi_lid_notify_state(device, 1); @@ -415,13 +418,14 @@ static void acpi_lid_initialize_state(struct acpi_device *device) default: break; } + + button->lid_state_initialized = true; } static void acpi_button_notify(struct acpi_device *device, u32 event) { struct acpi_button *button = acpi_driver_data(device); struct input_dev *input; - int users; switch (event) { case ACPI_FIXED_HARDWARE_EVENT: @@ -430,10 +434,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) case ACPI_BUTTON_NOTIFY_STATUS: input = button->input; if (button->type == ACPI_BUTTON_TYPE_LID) { - mutex_lock(&button->input->mutex); - users = button->input->users; - mutex_unlock(&button->input->mutex); - if (users) + if (button->lid_state_initialized) acpi_lid_update_state(device, true); } else { int keycode; @@ -478,7 +479,7 @@ static int acpi_button_resume(struct device *dev) struct acpi_button *button = acpi_driver_data(device); button->suspended = false; - if (button->type == ACPI_BUTTON_TYPE_LID && button->input->users) { + if (button->type == ACPI_BUTTON_TYPE_LID) { button->last_state = !!acpi_lid_evaluate_state(device); button->last_time = ktime_get(); acpi_lid_initialize_state(device); From 87eae0761f4d0f0ebe960b198b3e28af671f03e9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 27 Sep 2020 22:50:42 +0100 Subject: [PATCH 122/636] ACPI / extlog: Check for RDMSR failure commit 7cecb47f55e00282f972a1e0b09136c8cd938221 upstream. extlog_init() uses rdmsrl() to read an MSR, which on older CPUs provokes a error message at boot: unchecked MSR access error: RDMSR from 0x179 at rIP: 0xcd047307 (native_read_msr+0x7/0x40) Use rdmsrl_safe() instead, and return -ENODEV if it fails. Reported-by: jim@photojim.ca References: https://bugs.debian.org/971058 Cc: All applicable Signed-off-by: Ben Hutchings Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_extlog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 560fdae8cc59..943b1dc2d0b3 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -224,9 +224,9 @@ static int __init extlog_init(void) u64 cap; int rc; - rdmsrl(MSR_IA32_MCG_CAP, cap); - - if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + if (rdmsrl_safe(MSR_IA32_MCG_CAP, &cap) || + !(cap & MCG_ELOG_P) || + !extlog_get_l1addr()) return -ENODEV; if (edac_get_report_status() == EDAC_REPORTING_FORCE) { From 0b7ef21ba885b5fd0a32a3873936c6bd3bd6c676 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sun, 13 Sep 2020 16:34:03 -0600 Subject: [PATCH 123/636] ACPI: video: use ACPI backlight for HP 635 Notebook commit b226faab4e7890bbbccdf794e8b94276414f9058 upstream. The default backlight interface is AMD's radeon_bl0 which does not work on this system, so use the ACPI backlight interface on it instead. BugLink: https://bugs.launchpad.net/bugs/1894667 Cc: All applicable Signed-off-by: Alex Hung [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index ab1da5e6e7e3..86ffb4af4afc 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -274,6 +274,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "530U4E/540U4E"), }, }, + /* https://bugs.launchpad.net/bugs/1894667 */ + { + .callback = video_detect_force_video, + .ident = "HP 635 Notebook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP 635 Notebook PC"), + }, + }, /* Non win8 machines which need native backlight nevertheless */ { From 2d32e60198e266723357034cae66a45c1afa4e3f Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Mon, 12 Oct 2020 14:04:46 +0100 Subject: [PATCH 124/636] ACPI: debug: don't allow debugging when ACPI is disabled commit 0fada277147ffc6d694aa32162f51198d4f10d94 upstream. If ACPI is disabled then loading the acpi_dbg module will result in the following splat when lock debugging is enabled. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 0 PID: 1 at kernel/locking/mutex.c:938 __mutex_lock+0xa10/0x1290 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.9.0-rc8+ #103 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x4d8 show_stack+0x34/0x48 dump_stack+0x174/0x1f8 panic+0x360/0x7a0 __warn+0x244/0x2ec report_bug+0x240/0x398 bug_handler+0x50/0xc0 call_break_hook+0x160/0x1d8 brk_handler+0x30/0xc0 do_debug_exception+0x184/0x340 el1_dbg+0x48/0xb0 el1_sync_handler+0x170/0x1c8 el1_sync+0x80/0x100 __mutex_lock+0xa10/0x1290 mutex_lock_nested+0x6c/0xc0 acpi_register_debugger+0x40/0x88 acpi_aml_init+0xc4/0x114 do_one_initcall+0x24c/0xb10 kernel_init_freeable+0x690/0x728 kernel_init+0x20/0x1e8 ret_from_fork+0x10/0x18 This is because acpi_debugger.lock has not been initialized as acpi_debugger_init() is not called when ACPI is disabled. Fail module loading to avoid this and any subsequent problems that might arise by trying to debug AML when ACPI is disabled. Fixes: 8cfb0cdf07e2 ("ACPI / debugger: Add IO interface to access debugger functionalities") Reviewed-by: Hanjun Guo Signed-off-by: Jamie Iles Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_dbg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/acpi_dbg.c b/drivers/acpi/acpi_dbg.c index f21c99ec46ee..ed19d9822bc0 100644 --- a/drivers/acpi/acpi_dbg.c +++ b/drivers/acpi/acpi_dbg.c @@ -757,6 +757,9 @@ int __init acpi_aml_init(void) goto err_exit; } + if (acpi_disabled) + return -ENODEV; + /* Initialize AML IO interface */ mutex_init(&acpi_aml_io.lock); init_waitqueue_head(&acpi_aml_io.wait); From 57cbc85d3503daa5da52138ba1bc72443df1a207 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Sun, 18 Oct 2020 22:57:41 -0500 Subject: [PATCH 125/636] acpi-cpufreq: Honor _PSD table setting on new AMD CPUs commit 5368512abe08a28525d9b24abbfc2a72493e8dba upstream. acpi-cpufreq has a old quirk that overrides the _PSD table supplied by BIOS on AMD CPUs. However the _PSD table of new AMD CPUs (Family 19h+) now accurately reports the P-state dependency of CPU cores. Hence this quirk needs to be fixed in order to support new CPUs' frequency control. Fixes: acd316248205 ("acpi-cpufreq: Add quirk to disable _PSD usage on all AMD CPUs") Signed-off-by: Wei Huang [ rjw: Subject edit ] Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/acpi-cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index aca30f45172e..9e86404a361f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -701,7 +701,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } - if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { + if (check_amd_hwpstate_cpu(cpu) && boot_cpu_data.x86 < 0x19 && + !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, From 0bfca7315796f332dd2b441eb38e0355c109177c Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Wed, 30 Sep 2020 10:36:46 +0200 Subject: [PATCH 126/636] w1: mxc_w1: Fix timeout resolution problem leading to bus error commit c9723750a699c3bd465493ac2be8992b72ccb105 upstream. On my platform (i.MX53) bus access sometimes fails with w1_search: max_slave_count 64 reached, will continue next search. The reason is the use of jiffies to implement a 200us timeout in mxc_w1_ds2_touch_bit(). On some platforms the jiffies timer resolution is insufficient for this. Fix by replacing jiffies by ktime_get(). For consistency apply the same change to the other use of jiffies in mxc_w1_ds2_reset_bus(). Fixes: f80b2581a706 ("w1: mxc_w1: Optimize mxc_w1_ds2_touch_bit()") Cc: stable Signed-off-by: Martin Fuzzey Link: https://lore.kernel.org/r/1601455030-6607-1-git-send-email-martin.fuzzey@flowbird.group Signed-off-by: Greg Kroah-Hartman --- drivers/w1/masters/mxc_w1.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 50b46c4399ea..075454053e5e 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -48,12 +48,12 @@ struct mxc_w1_device { static u8 mxc_w1_ds2_reset_bus(void *data) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_RPP, dev->regs + MXC_W1_CONTROL); /* Wait for reset sequence 511+512us, use 1500us for sure */ - timeout = jiffies + usecs_to_jiffies(1500); + timeout = ktime_add_us(ktime_get(), 1500); udelay(511 + 512); @@ -63,7 +63,7 @@ static u8 mxc_w1_ds2_reset_bus(void *data) /* PST bit is valid after the RPP bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_RPP)) return !(ctrl & MXC_W1_CONTROL_PST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 1; } @@ -76,12 +76,12 @@ static u8 mxc_w1_ds2_reset_bus(void *data) static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) { struct mxc_w1_device *dev = data; - unsigned long timeout; + ktime_t timeout; writeb(MXC_W1_CONTROL_WR(bit), dev->regs + MXC_W1_CONTROL); /* Wait for read/write bit (60us, Max 120us), use 200us for sure */ - timeout = jiffies + usecs_to_jiffies(200); + timeout = ktime_add_us(ktime_get(), 200); udelay(60); @@ -91,7 +91,7 @@ static u8 mxc_w1_ds2_touch_bit(void *data, u8 bit) /* RDST bit is valid after the WR1/RD bit is self-cleared */ if (!(ctrl & MXC_W1_CONTROL_WR(bit))) return !!(ctrl & MXC_W1_CONTROL_RDST); - } while (time_is_after_jiffies(timeout)); + } while (ktime_before(ktime_get(), timeout)); return 0; } From 2a922386c160f157e60d945b3edacfd6745f2ba0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 22 Oct 2020 11:00:05 +0200 Subject: [PATCH 127/636] scsi: mptfusion: Fix null pointer dereferences in mptscsih_remove() commit 2f4843b172c2c0360ee7792ad98025fae7baefde upstream. The mptscsih_remove() function triggers a kernel oops if the Scsi_Host pointer (ioc->sh) is NULL, as can be seen in this syslog: ioc0: LSI53C1030 B2: Capabilities={Initiator,Target} Begin: Waiting for root file system ... scsi host2: error handler thread failed to spawn, error = -4 mptspi: ioc0: WARNING - Unable to register controller with SCSI subsystem Backtrace: [<000000001045b7cc>] mptspi_probe+0x248/0x3d0 [mptspi] [<0000000040946470>] pci_device_probe+0x1ac/0x2d8 [<0000000040add668>] really_probe+0x1bc/0x988 [<0000000040ade704>] driver_probe_device+0x160/0x218 [<0000000040adee24>] device_driver_attach+0x160/0x188 [<0000000040adef90>] __driver_attach+0x144/0x320 [<0000000040ad7c78>] bus_for_each_dev+0xd4/0x158 [<0000000040adc138>] driver_attach+0x4c/0x80 [<0000000040adb3ec>] bus_add_driver+0x3e0/0x498 [<0000000040ae0130>] driver_register+0xf4/0x298 [<00000000409450c4>] __pci_register_driver+0x78/0xa8 [<000000000007d248>] mptspi_init+0x18c/0x1c4 [mptspi] This patch adds the necessary NULL-pointer checks. Successfully tested on a HP C8000 parisc workstation with buggy SCSI drives. Link: https://lore.kernel.org/r/20201022090005.GA9000@ls3530.fritz.box Cc: Signed-off-by: Helge Deller Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/message/fusion/mptscsih.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 2af7ae13449d..cec867c10968 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1174,8 +1174,10 @@ mptscsih_remove(struct pci_dev *pdev) MPT_SCSI_HOST *hd; int sz1; - if((hd = shost_priv(host)) == NULL) - return; + if (host == NULL) + hd = NULL; + else + hd = shost_priv(host); mptscsih_shutdown(pdev); @@ -1191,14 +1193,15 @@ mptscsih_remove(struct pci_dev *pdev) "Free'd ScsiLookup (%d) memory\n", ioc->name, sz1)); - kfree(hd->info_kbuf); + if (hd) + kfree(hd->info_kbuf); /* NULL the Scsi_Host pointer */ ioc->sh = NULL; - scsi_host_put(host); - + if (host) + scsi_host_put(host); mpt_detach(pdev); } From 3216622921a22b8a96c36614e3a8e4b38b7c8173 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 29 Sep 2020 03:21:50 -0700 Subject: [PATCH 128/636] scsi: qla2xxx: Fix crash on session cleanup with unload commit 50457dab670f396557e60c07f086358460876353 upstream. On unload, session cleanup prematurely gave the signal for driver unload path to advance. Link: https://lore.kernel.org/r/20200929102152.32278-6-njavali@marvell.com Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: stable@vger.kernel.org Reviewed-by: Himanshu Madhani Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_target.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 29b79e85fa7f..eb6112eb475e 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1228,14 +1228,15 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) case DSC_DELETE_PEND: return; case DSC_DELETED: - if (tgt && tgt->tgt_stop && (tgt->sess_count == 0)) - wake_up_all(&tgt->waitQ); - if (sess->vha->fcport_count == 0) - wake_up_all(&sess->vha->fcport_waitQ); - if (!sess->plogi_link[QLT_PLOGI_LINK_SAME_WWN] && - !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) + !sess->plogi_link[QLT_PLOGI_LINK_CONFLICT]) { + if (tgt && tgt->tgt_stop && tgt->sess_count == 0) + wake_up_all(&tgt->waitQ); + + if (sess->vha->fcport_count == 0) + wake_up_all(&sess->vha->fcport_waitQ); return; + } break; case DSC_UPD_FCPORT: /* From 274a95ae33bc61c90f6fe1b4df55d842085f9b82 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 24 Jul 2020 14:46:09 +0800 Subject: [PATCH 129/636] btrfs: qgroup: fix wrong qgroup metadata reserve for delayed inode commit b4c5d8fdfff3e2b6c4fa4a5043e8946dff500f8c upstream. For delayed inode facility, qgroup metadata is reserved for it, and later freed. However we're freeing more bytes than we reserved. In btrfs_delayed_inode_reserve_metadata(): num_bytes = btrfs_calc_metadata_size(fs_info, 1); ... ret = btrfs_qgroup_reserve_meta_prealloc(root, fs_info->nodesize, true); ... if (!ret) { node->bytes_reserved = num_bytes; But in btrfs_delayed_inode_release_metadata(): if (qgroup_free) btrfs_qgroup_free_meta_prealloc(node->root, node->bytes_reserved); else btrfs_qgroup_convert_reserved_meta(node->root, node->bytes_reserved); This means, we're always releasing more qgroup metadata rsv than we have reserved. This won't trigger selftest warning, as btrfs qgroup metadata rsv has extra protection against cases like quota enabled half-way. But we still need to fix this problem any way. This patch will use the same num_bytes for qgroup metadata rsv so we could handle it correctly. Fixes: f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 7374fb23381c..14855972dee3 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -620,8 +620,7 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, - fs_info->nodesize, true); + ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, From b38695b8e35f96389fd85b86570167aadc42fcbe Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 3 Sep 2020 21:30:12 +0800 Subject: [PATCH 130/636] btrfs: improve device scanning messages commit 79dae17d8d44b2d15779e332180080af45df5352 upstream. Systems booting without the initramfs seems to scan an unusual kind of device path (/dev/root). And at a later time, the device is updated to the correct path. We generally print the process name and PID of the process scanning the device but we don't capture the same information if the device path is rescanned with a different pathname. The current message is too long, so drop the unnecessary UUID and add process name and PID. While at this also update the duplicate device warning to include the process name and PID so the messages are consistent CC: stable@vger.kernel.org # 4.19+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=89721 Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 815b655b8f10..0879e3dc39c8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -857,16 +857,18 @@ static noinline struct btrfs_device *device_list_add(const char *path, bdput(path_bdev); mutex_unlock(&fs_devices->device_list_mutex); btrfs_warn_in_rcu(device->fs_info, - "duplicate device fsid:devid for %pU:%llu old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + "duplicate device %s devid %llu generation %llu scanned by %s (%d)", + path, devid, found_transid, + current->comm, + task_pid_nr(current)); return ERR_PTR(-EEXIST); } bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, - "device fsid %pU devid %llu moved old:%s new:%s", - disk_super->fsid, devid, - rcu_str_deref(device->name), path); + "devid %llu device path %s changed to %s scanned by %s (%d)", + devid, rcu_str_deref(device->name), + path, current->comm, + task_pid_nr(current)); } name = rcu_string_strdup(path, GFP_NOFS); From c06d72888d13f00ff4801d7d44aa42f8122edeae Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Sep 2020 15:27:50 +0100 Subject: [PATCH 131/636] btrfs: reschedule if necessary when logging directory items commit bb56f02f26fe23798edb1b2175707419b28c752a upstream. Logging directories with many entries can take a significant amount of time, and in some cases monopolize a cpu/core for a long time if the logging task doesn't happen to block often enough. Johannes and Lu Fengqi reported test case generic/041 triggering a soft lockup when the kernel has CONFIG_SOFTLOCKUP_DETECTOR=y. For this test case we log an inode with 3002 hard links, and because the test removed one hard link before fsyncing the file, the inode logging causes the parent directory do be logged as well, which has 6004 directory items to log (3002 BTRFS_DIR_ITEM_KEY items plus 3002 BTRFS_DIR_INDEX_KEY items), so it can take a significant amount of time and trigger the soft lockup. So just make tree-log.c:log_dir_items() reschedule when necessary, releasing the current search path before doing so and then resume from where it was before the reschedule. The stack trace produced when the soft lockup happens is the following: [10480.277653] watchdog: BUG: soft lockup - CPU#2 stuck for 22s! [xfs_io:28172] [10480.279418] Modules linked in: dm_thin_pool dm_persistent_data (...) [10480.284915] irq event stamp: 29646366 [10480.285987] hardirqs last enabled at (29646365): [] __slab_alloc.constprop.0+0x56/0x60 [10480.288482] hardirqs last disabled at (29646366): [] irqentry_enter+0x1d/0x50 [10480.290856] softirqs last enabled at (4612): [] __do_softirq+0x323/0x56c [10480.293615] softirqs last disabled at (4483): [] asm_call_on_stack+0xf/0x20 [10480.296428] CPU: 2 PID: 28172 Comm: xfs_io Not tainted 5.9.0-rc4-default+ #1248 [10480.298948] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014 [10480.302455] RIP: 0010:__slab_alloc.constprop.0+0x19/0x60 [10480.304151] Code: 86 e8 31 75 21 00 66 66 2e 0f 1f 84 00 00 00 (...) [10480.309558] RSP: 0018:ffffadbe09397a58 EFLAGS: 00000282 [10480.311179] RAX: ffff8a495ab92840 RBX: 0000000000000282 RCX: 0000000000000006 [10480.313242] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff85249b66 [10480.315260] RBP: ffff8a497d04b740 R08: 0000000000000001 R09: 0000000000000001 [10480.317229] R10: ffff8a497d044800 R11: ffff8a495ab93c40 R12: 0000000000000000 [10480.319169] R13: 0000000000000000 R14: 0000000000000c40 R15: ffffffffc01daf70 [10480.321104] FS: 00007fa1dc5c0e40(0000) GS:ffff8a497da00000(0000) knlGS:0000000000000000 [10480.323559] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [10480.325235] CR2: 00007fa1dc5befb8 CR3: 0000000004f8a006 CR4: 0000000000170ea0 [10480.327259] Call Trace: [10480.328286] ? overwrite_item+0x1f0/0x5a0 [btrfs] [10480.329784] __kmalloc+0x831/0xa20 [10480.331009] ? btrfs_get_32+0xb0/0x1d0 [btrfs] [10480.332464] overwrite_item+0x1f0/0x5a0 [btrfs] [10480.333948] log_dir_items+0x2ee/0x570 [btrfs] [10480.335413] log_directory_changes+0x82/0xd0 [btrfs] [10480.336926] btrfs_log_inode+0xc9b/0xda0 [btrfs] [10480.338374] ? init_once+0x20/0x20 [btrfs] [10480.339711] btrfs_log_inode_parent+0x8d3/0xd10 [btrfs] [10480.341257] ? dget_parent+0x97/0x2e0 [10480.342480] btrfs_log_dentry_safe+0x3a/0x50 [btrfs] [10480.343977] btrfs_sync_file+0x24b/0x5e0 [btrfs] [10480.345381] do_fsync+0x38/0x70 [10480.346483] __x64_sys_fsync+0x10/0x20 [10480.347703] do_syscall_64+0x2d/0x70 [10480.348891] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [10480.350444] RIP: 0033:0x7fa1dc80970b [10480.351642] Code: 0f 05 48 3d 00 f0 ff ff 77 45 c3 0f 1f 40 00 48 (...) [10480.356952] RSP: 002b:00007fffb3d081d0 EFLAGS: 00000293 ORIG_RAX: 000000000000004a [10480.359458] RAX: ffffffffffffffda RBX: 0000562d93d45e40 RCX: 00007fa1dc80970b [10480.361426] RDX: 0000562d93d44ab0 RSI: 0000562d93d45e60 RDI: 0000000000000003 [10480.363367] RBP: 0000000000000001 R08: 0000000000000000 R09: 00007fa1dc7b2a40 [10480.365317] R10: 0000562d93d0e366 R11: 0000000000000293 R12: 0000000000000001 [10480.367299] R13: 0000562d93d45290 R14: 0000562d93d45e40 R15: 0000562d93d45e60 Link: https://lore.kernel.org/linux-btrfs/20180713090216.GC575@fnst.localdomain/ Reported-by: Johannes Thumshirn CC: stable@vger.kernel.org # 4.4+ Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 3e903e6a3387..7b940264c7b9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3589,6 +3589,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * search and this search we'll not find the key again and can just * bail. */ +search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); if (ret != 0) goto done; @@ -3608,6 +3609,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, if (min_key.objectid != ino || min_key.type != key_type) goto done; + + if (need_resched()) { + btrfs_release_path(path); + cond_resched(); + goto search; + } + ret = overwrite_item(trans, log, dst_path, src, i, &min_key); if (ret) { From 67dc9c4f0af7a04548dbba8606bee75d20627ea0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 21 Sep 2020 14:13:30 +0100 Subject: [PATCH 132/636] btrfs: send, recompute reference path after orphanization of a directory commit 9c2b4e0347067396ceb3ae929d6888c81d610259 upstream. During an incremental send, when an inode has multiple new references we might end up emitting rename operations for orphanizations that have a source path that is no longer valid due to a previous orphanization of some directory inode. This causes the receiver to fail since it tries to rename a path that does not exists. Example reproducer: $ cat reproducer.sh #!/bin/bash mkfs.btrfs -f /dev/sdi >/dev/null mount /dev/sdi /mnt/sdi touch /mnt/sdi/f1 touch /mnt/sdi/f2 mkdir /mnt/sdi/d1 mkdir /mnt/sdi/d1/d2 # Filesystem looks like: # # . (ino 256) # |----- f1 (ino 257) # |----- f2 (ino 258) # |----- d1/ (ino 259) # |----- d2/ (ino 260) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap1 btrfs send -f /tmp/snap1.send /mnt/sdi/snap1 # Now do a series of changes such that: # # *) inode 258 has one new hardlink and the previous name changed # # *) both names conflict with the old names of two other inodes: # # 1) the new name "d1" conflicts with the old name of inode 259, # under directory inode 256 (root) # # 2) the new name "d2" conflicts with the old name of inode 260 # under directory inode 259 # # *) inodes 259 and 260 now have the old names of inode 258 # # *) inode 257 is now located under inode 260 - an inode with a number # smaller than the inode (258) for which we created a second hard # link and swapped its names with inodes 259 and 260 # ln /mnt/sdi/f2 /mnt/sdi/d1/f2_link mv /mnt/sdi/f1 /mnt/sdi/d1/d2/f1 # Swap d1 and f2. mv /mnt/sdi/d1 /mnt/sdi/tmp mv /mnt/sdi/f2 /mnt/sdi/d1 mv /mnt/sdi/tmp /mnt/sdi/f2 # Swap d2 and f2_link mv /mnt/sdi/f2/d2 /mnt/sdi/tmp mv /mnt/sdi/f2/f2_link /mnt/sdi/f2/d2 mv /mnt/sdi/tmp /mnt/sdi/f2/f2_link # Filesystem now looks like: # # . (ino 256) # |----- d1 (ino 258) # |----- f2/ (ino 259) # |----- f2_link/ (ino 260) # | |----- f1 (ino 257) # | # |----- d2 (ino 258) btrfs subvolume snapshot -r /mnt/sdi /mnt/sdi/snap2 btrfs send -f /tmp/snap2.send -p /mnt/sdi/snap1 /mnt/sdi/snap2 mkfs.btrfs -f /dev/sdj >/dev/null mount /dev/sdj /mnt/sdj btrfs receive -f /tmp/snap1.send /mnt/sdj btrfs receive -f /tmp/snap2.send /mnt/sdj umount /mnt/sdi umount /mnt/sdj When executed the receive of the incremental stream fails: $ ./reproducer.sh Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 At subvol snap1 At snapshot snap2 ERROR: rename d1/d2 -> o260-6-0 failed: No such file or directory This happens because: 1) When processing inode 257 we end up computing the name for inode 259 because it is an ancestor in the send snapshot, and at that point it still has its old name, "d1", from the parent snapshot because inode 259 was not yet processed. We then cache that name, which is valid until we start processing inode 259 (or set the progress to 260 after processing its references); 2) Later we start processing inode 258 and collecting all its new references into the list sctx->new_refs. The first reference in the list happens to be the reference for name "d1" while the reference for name "d2" is next (the last element of the list). We compute the full path "d1/d2" for this second reference and store it in the reference (its ->full_path member). The path used for the new parent directory was "d1" and not "f2" because inode 259, the new parent, was not yet processed; 3) When we start processing the new references at process_recorded_refs() we start with the first reference in the list, for the new name "d1". Because there is a conflicting inode that was not yet processed, which is directory inode 259, we orphanize it, renaming it from "d1" to "o259-6-0"; 4) Then we start processing the new reference for name "d2", and we realize it conflicts with the reference of inode 260 in the parent snapshot. So we issue an orphanization operation for inode 260 by emitting a rename operation with a destination path of "o260-6-0" and a source path of "d1/d2" - this source path is the value we stored in the reference earlier at step 2), corresponding to the ->full_path member of the reference, however that path is no longer valid due to the orphanization of the directory inode 259 in step 3). This makes the receiver fail since the path does not exists, it should have been "o259-6-0/d2". Fix this by recomputing the full path of a reference before emitting an orphanization if we previously orphanized any directory, since that directory could be a parent in the new path. This is a rare scenario so keeping it simple and not checking if that previously orphanized directory is in fact an ancestor of the inode we are trying to orphanize. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 2bc80d0b56db..ec67afed07ca 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -3810,6 +3810,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) return 0; } +/* + * When processing the new references for an inode we may orphanize an existing + * directory inode because its old name conflicts with one of the new references + * of the current inode. Later, when processing another new reference of our + * inode, we might need to orphanize another inode, but the path we have in the + * reference reflects the pre-orphanization name of the directory we previously + * orphanized. For example: + * + * parent snapshot looks like: + * + * . (ino 256) + * |----- f1 (ino 257) + * |----- f2 (ino 258) + * |----- d1/ (ino 259) + * |----- d2/ (ino 260) + * + * send snapshot looks like: + * + * . (ino 256) + * |----- d1 (ino 258) + * |----- f2/ (ino 259) + * |----- f2_link/ (ino 260) + * | |----- f1 (ino 257) + * | + * |----- d2 (ino 258) + * + * When processing inode 257 we compute the name for inode 259 as "d1", and we + * cache it in the name cache. Later when we start processing inode 258, when + * collecting all its new references we set a full path of "d1/d2" for its new + * reference with name "d2". When we start processing the new references we + * start by processing the new reference with name "d1", and this results in + * orphanizing inode 259, since its old reference causes a conflict. Then we + * move on the next new reference, with name "d2", and we find out we must + * orphanize inode 260, as its old reference conflicts with ours - but for the + * orphanization we use a source path corresponding to the path we stored in the + * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the + * receiver fail since the path component "d1/" no longer exists, it was renamed + * to "o259-6-0/" when processing the previous new reference. So in this case we + * must recompute the path in the new reference and use it for the new + * orphanization operation. + */ +static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) +{ + char *name; + int ret; + + name = kmemdup(ref->name, ref->name_len, GFP_KERNEL); + if (!name) + return -ENOMEM; + + fs_path_reset(ref->full_path); + ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path); + if (ret < 0) + goto out; + + ret = fs_path_add(ref->full_path, name, ref->name_len); + if (ret < 0) + goto out; + + /* Update the reference's base name pointer. */ + set_ref_path(ref, ref->full_path); +out: + kfree(name); + return ret; +} + /* * This does all the move/link/unlink/rmdir magic. */ @@ -3940,6 +4006,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct name_cache_entry *nce; struct waiting_dir_move *wdm; + if (orphanized_dir) { + ret = refresh_ref_path(sctx, cur); + if (ret < 0) + goto out; + } + ret = orphanize_inode(sctx, ow_inode, ow_gen, cur->full_path); if (ret < 0) From 96e4fc79e90d34bfafad353c77e873a1da7a3776 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 21 Sep 2020 20:03:35 +0300 Subject: [PATCH 133/636] btrfs: use kvzalloc() to allocate clone_roots in btrfs_ioctl_send() commit 8eb2fd00153a3a96a19c62ac9c6d48c2efebe5e8 upstream. btrfs_ioctl_send() used open-coded kvzalloc implementation earlier. The code was accidentally replaced with kzalloc() call [1]. Restore the original code by using kvzalloc() to allocate sctx->clone_roots. [1] https://patchwork.kernel.org/patch/9757891/#20529627 Fixes: 818e010bf9d0 ("btrfs: replace opencoded kvzalloc with the helper") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Denis Efremov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ec67afed07ca..ed61c0daef41 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6859,7 +6859,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); - sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL); + sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL); if (!sctx->clone_roots) { ret = -ENOMEM; goto out; From 8c6990856f1bc58ee1c64b9432cda24428a4bc89 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 29 Sep 2020 08:53:54 -0400 Subject: [PATCH 134/636] btrfs: cleanup cow block on error commit 572c83acdcdafeb04e70aa46be1fa539310be20c upstream. In fstest btrfs/064 a transaction abort in __btrfs_cow_block could lead to a system lockup. It gets stuck trying to write back inodes, and the write back thread was trying to lock an extent buffer: $ cat /proc/2143497/stack [<0>] __btrfs_tree_lock+0x108/0x250 [<0>] lock_extent_buffer_for_io+0x35e/0x3a0 [<0>] btree_write_cache_pages+0x15a/0x3b0 [<0>] do_writepages+0x28/0xb0 [<0>] __writeback_single_inode+0x54/0x5c0 [<0>] writeback_sb_inodes+0x1e8/0x510 [<0>] wb_writeback+0xcc/0x440 [<0>] wb_workfn+0xd7/0x650 [<0>] process_one_work+0x236/0x560 [<0>] worker_thread+0x55/0x3c0 [<0>] kthread+0x13a/0x150 [<0>] ret_from_fork+0x1f/0x30 This is because we got an error while COWing a block, specifically here if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { btrfs_abort_transaction(trans, ret); return ret; } } [16402.241552] BTRFS: Transaction aborted (error -2) [16402.242362] WARNING: CPU: 1 PID: 2563188 at fs/btrfs/ctree.c:1074 __btrfs_cow_block+0x376/0x540 [16402.249469] CPU: 1 PID: 2563188 Comm: fsstress Not tainted 5.9.0-rc6+ #8 [16402.249936] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 [16402.250525] RIP: 0010:__btrfs_cow_block+0x376/0x540 [16402.252417] RSP: 0018:ffff9cca40e578b0 EFLAGS: 00010282 [16402.252787] RAX: 0000000000000025 RBX: 0000000000000002 RCX: ffff9132bbd19388 [16402.253278] RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff9132bbd19380 [16402.254063] RBP: ffff9132b41a49c0 R08: 0000000000000000 R09: 0000000000000000 [16402.254887] R10: 0000000000000000 R11: ffff91324758b080 R12: ffff91326ef17ce0 [16402.255694] R13: ffff91325fc0f000 R14: ffff91326ef176b0 R15: ffff9132815e2000 [16402.256321] FS: 00007f542c6d7b80(0000) GS:ffff9132bbd00000(0000) knlGS:0000000000000000 [16402.256973] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [16402.257374] CR2: 00007f127b83f250 CR3: 0000000133480002 CR4: 0000000000370ee0 [16402.257867] Call Trace: [16402.258072] btrfs_cow_block+0x109/0x230 [16402.258356] btrfs_search_slot+0x530/0x9d0 [16402.258655] btrfs_lookup_file_extent+0x37/0x40 [16402.259155] __btrfs_drop_extents+0x13c/0xd60 [16402.259628] ? btrfs_block_rsv_migrate+0x4f/0xb0 [16402.259949] btrfs_replace_file_extents+0x190/0x820 [16402.260873] btrfs_clone+0x9ae/0xc00 [16402.261139] btrfs_extent_same_range+0x66/0x90 [16402.261771] btrfs_remap_file_range+0x353/0x3b1 [16402.262333] vfs_dedupe_file_range_one.part.0+0xd5/0x140 [16402.262821] vfs_dedupe_file_range+0x189/0x220 [16402.263150] do_vfs_ioctl+0x552/0x700 [16402.263662] __x64_sys_ioctl+0x62/0xb0 [16402.264023] do_syscall_64+0x33/0x40 [16402.264364] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [16402.264862] RIP: 0033:0x7f542c7d15cb [16402.266901] RSP: 002b:00007ffd35944ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [16402.267627] RAX: ffffffffffffffda RBX: 00000000009d1968 RCX: 00007f542c7d15cb [16402.268298] RDX: 00000000009d2490 RSI: 00000000c0189436 RDI: 0000000000000003 [16402.268958] RBP: 00000000009d2520 R08: 0000000000000036 R09: 00000000009d2e64 [16402.269726] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 [16402.270659] R13: 000000000001f000 R14: 00000000009d1970 R15: 00000000009d2e80 [16402.271498] irq event stamp: 0 [16402.271846] hardirqs last enabled at (0): [<0000000000000000>] 0x0 [16402.272497] hardirqs last disabled at (0): [] copy_process+0x6b9/0x1ba0 [16402.273343] softirqs last enabled at (0): [] copy_process+0x6b9/0x1ba0 [16402.273905] softirqs last disabled at (0): [<0000000000000000>] 0x0 [16402.274338] ---[ end trace 737874a5a41a8236 ]--- [16402.274669] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.276179] BTRFS info (device dm-9): forced readonly [16402.277046] BTRFS: error (device dm-9) in btrfs_replace_file_extents:2723: errno=-2 No such entry [16402.278744] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.279968] BTRFS: error (device dm-9) in __btrfs_cow_block:1074: errno=-2 No such entry [16402.280582] BTRFS info (device dm-9): balance: ended with status: -30 The problem here is that as soon as we allocate the new block it is locked and marked dirty in the btree inode. This means that we could attempt to writeback this block and need to lock the extent buffer. However we're not unlocking it here and thus we deadlock. Fix this by unlocking the cow block if we have any errors inside of __btrfs_cow_block, and also free it so we do not leak it. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8007b6aacec6..f36b2a386aae 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1110,6 +1110,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1117,6 +1119,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { ret = btrfs_reloc_cow_block(trans, root, buf, cow); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } @@ -1149,6 +1153,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (last_ref) { ret = tree_mod_log_free_eb(buf); if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); btrfs_abort_transaction(trans, ret); return ret; } From 0da7b606dc286edadce490d5970fab3fa39c5883 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 12 Oct 2020 11:55:23 +0100 Subject: [PATCH 135/636] btrfs: fix use-after-free on readahead extent after failure to create it commit 83bc1560e02e25c6439341352024ebe8488f4fbd upstream. If we fail to find suitable zones for a new readahead extent, we end up leaving a stale pointer in the global readahead extents radix tree (fs_info->reada_tree), which can trigger the following trace later on: [13367.696354] BUG: kernel NULL pointer dereference, address: 00000000000000b0 [13367.696802] #PF: supervisor read access in kernel mode [13367.697249] #PF: error_code(0x0000) - not-present page [13367.697721] PGD 0 P4D 0 [13367.698171] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [13367.698632] CPU: 6 PID: 851214 Comm: btrfs Tainted: G W 5.9.0-rc6-btrfs-next-69 #1 [13367.699100] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [13367.700069] RIP: 0010:__lock_acquire+0x20a/0x3970 [13367.700562] Code: ff 1f 0f b7 c0 48 0f (...) [13367.701609] RSP: 0018:ffffb14448f57790 EFLAGS: 00010046 [13367.702140] RAX: 0000000000000000 RBX: 29b935140c15e8cf RCX: 0000000000000000 [13367.702698] RDX: 0000000000000002 RSI: ffffffffb3d66bd0 RDI: 0000000000000046 [13367.703240] RBP: ffff8a52ba8ac040 R08: 00000c2866ad9288 R09: 0000000000000001 [13367.703783] R10: 0000000000000001 R11: 00000000b66d9b53 R12: ffff8a52ba8ac9b0 [13367.704330] R13: 0000000000000000 R14: ffff8a532b6333e8 R15: 0000000000000000 [13367.704880] FS: 00007fe1df6b5700(0000) GS:ffff8a5376600000(0000) knlGS:0000000000000000 [13367.705438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [13367.705995] CR2: 00000000000000b0 CR3: 000000022cca8004 CR4: 00000000003706e0 [13367.706565] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [13367.707127] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [13367.707686] Call Trace: [13367.708246] ? ___slab_alloc+0x395/0x740 [13367.708820] ? reada_add_block+0xae/0xee0 [btrfs] [13367.709383] lock_acquire+0xb1/0x480 [13367.709955] ? reada_add_block+0xe0/0xee0 [btrfs] [13367.710537] ? reada_add_block+0xae/0xee0 [btrfs] [13367.711097] ? rcu_read_lock_sched_held+0x5d/0x90 [13367.711659] ? kmem_cache_alloc_trace+0x8d2/0x990 [13367.712221] ? lock_acquired+0x33b/0x470 [13367.712784] _raw_spin_lock+0x34/0x80 [13367.713356] ? reada_add_block+0xe0/0xee0 [btrfs] [13367.713966] reada_add_block+0xe0/0xee0 [btrfs] [13367.714529] ? btrfs_root_node+0x15/0x1f0 [btrfs] [13367.715077] btrfs_reada_add+0x117/0x170 [btrfs] [13367.715620] scrub_stripe+0x21e/0x10d0 [btrfs] [13367.716141] ? kvm_sched_clock_read+0x5/0x10 [13367.716657] ? __lock_acquire+0x41e/0x3970 [13367.717184] ? scrub_chunk+0x60/0x140 [btrfs] [13367.717697] ? find_held_lock+0x32/0x90 [13367.718254] ? scrub_chunk+0x60/0x140 [btrfs] [13367.718773] ? lock_acquired+0x33b/0x470 [13367.719278] ? scrub_chunk+0xcd/0x140 [btrfs] [13367.719786] scrub_chunk+0xcd/0x140 [btrfs] [13367.720291] scrub_enumerate_chunks+0x270/0x5c0 [btrfs] [13367.720787] ? finish_wait+0x90/0x90 [13367.721281] btrfs_scrub_dev+0x1ee/0x620 [btrfs] [13367.721762] ? rcu_read_lock_any_held+0x8e/0xb0 [13367.722235] ? preempt_count_add+0x49/0xa0 [13367.722710] ? __sb_start_write+0x19b/0x290 [13367.723192] btrfs_ioctl+0x7f5/0x36f0 [btrfs] [13367.723660] ? __fget_files+0x101/0x1d0 [13367.724118] ? find_held_lock+0x32/0x90 [13367.724559] ? __fget_files+0x101/0x1d0 [13367.724982] ? __x64_sys_ioctl+0x83/0xb0 [13367.725399] __x64_sys_ioctl+0x83/0xb0 [13367.725802] do_syscall_64+0x33/0x80 [13367.726188] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [13367.726574] RIP: 0033:0x7fe1df7add87 [13367.726948] Code: 00 00 00 48 8b 05 09 91 (...) [13367.727763] RSP: 002b:00007fe1df6b4d48 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [13367.728179] RAX: ffffffffffffffda RBX: 000055ce1fb596a0 RCX: 00007fe1df7add87 [13367.728604] RDX: 000055ce1fb596a0 RSI: 00000000c400941b RDI: 0000000000000003 [13367.729021] RBP: 0000000000000000 R08: 00007fe1df6b5700 R09: 0000000000000000 [13367.729431] R10: 00007fe1df6b5700 R11: 0000000000000246 R12: 00007ffd922b07de [13367.729842] R13: 00007ffd922b07df R14: 00007fe1df6b4e40 R15: 0000000000802000 [13367.730275] Modules linked in: btrfs blake2b_generic xor (...) [13367.732638] CR2: 00000000000000b0 [13367.733166] ---[ end trace d298b6805556acd9 ]--- What happens is the following: 1) At reada_find_extent() we don't find any existing readahead extent for the metadata extent starting at logical address X; 2) So we proceed to create a new one. We then call btrfs_map_block() to get information about which stripes contain extent X; 3) After that we iterate over the stripes and create only one zone for the readahead extent - only one because reada_find_zone() returned NULL for all iterations except for one, either because a memory allocation failed or it couldn't find the block group of the extent (it may have just been deleted); 4) We then add the new readahead extent to the readahead extents radix tree at fs_info->reada_tree; 5) Then we iterate over each zone of the new readahead extent, and find that the device used for that zone no longer exists, because it was removed or it was the source device of a device replace operation. Since this left 'have_zone' set to 0, after finishing the loop we jump to the 'error' label, call kfree() on the new readahead extent and return without removing it from the radix tree at fs_info->reada_tree; 6) Any future call to reada_find_extent() for the logical address X will find the stale pointer in the readahead extents radix tree, increment its reference counter, which can trigger the use-after-free right away or return it to the caller reada_add_block() that results in the use-after-free of the example trace above. So fix this by making sure we delete the readahead extent from the radix tree if we fail to setup zones for it (when 'have_zone = 0'). Fixes: 319450211842ba ("btrfs: reada: bypass adding extent when all zone failed") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/reada.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 4c81ffe12385..368c349c5669 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -442,6 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, } have_zone = 1; } + if (!have_zone) + radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_read_unlock(&fs_info->dev_replace); From 9b1f935ae4daefbc6ff8929526875d55b7a53121 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Wed, 28 Oct 2020 22:31:23 +0200 Subject: [PATCH 136/636] usb: xhci: Workaround for S3 issue on AMD SNPS 3.0 xHC commit 2a632815683d2d34df52b701a36fe5ac6654e719 upstream. On some platform of AMD, S3 fails with HCE and SRE errors. To fix this, need to disable a bit which is enable in sparse controller. Cc: stable@vger.kernel.org #v4.19+ Signed-off-by: Sanket Goswami Signed-off-by: Sandeep Singh Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20201028203124.375344-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 17 +++++++++++++++++ drivers/usb/host/xhci.h | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1a6a23e57201..0c6b6f14b169 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -21,6 +21,8 @@ #define SSIC_PORT_CFG2_OFFSET 0x30 #define PROG_DONE (1 << 30) #define SSIC_PORT_UNUSED (1 << 31) +#define SPARSE_DISABLE_BIT 17 +#define SPARSE_CNTL_ENABLE 0xC12C /* Device for a quirk */ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 @@ -141,6 +143,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) (pdev->device == 0x15e0 || pdev->device == 0x15e1)) xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND; + if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) + xhci->quirks |= XHCI_DISABLE_SPARSE; + if (pdev->vendor == PCI_VENDOR_ID_AMD) xhci->quirks |= XHCI_TRUST_TX_LENGTH; @@ -441,6 +446,15 @@ static void xhci_pme_quirk(struct usb_hcd *hcd) readl(reg); } +static void xhci_sparse_control_quirk(struct usb_hcd *hcd) +{ + u32 reg; + + reg = readl(hcd->regs + SPARSE_CNTL_ENABLE); + reg &= ~BIT(SPARSE_DISABLE_BIT); + writel(reg, hcd->regs + SPARSE_CNTL_ENABLE); +} + static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -460,6 +474,9 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci->quirks & XHCI_SSIC_PORT_UNUSED) xhci_ssic_port_unused_quirk(hcd, true); + if (xhci->quirks & XHCI_DISABLE_SPARSE) + xhci_sparse_control_quirk(hcd); + ret = xhci_suspend(xhci, do_wakeup); if (ret && (xhci->quirks & XHCI_SSIC_PORT_UNUSED)) xhci_ssic_port_unused_quirk(hcd, false); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 39efbcf63c11..7a4195f8cd1c 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1872,6 +1872,7 @@ struct xhci_hcd { #define XHCI_ZERO_64B_REGS BIT_ULL(32) #define XHCI_RESET_PLL_ON_DISCONNECT BIT_ULL(34) #define XHCI_SNPS_BROKEN_SUSPEND BIT_ULL(35) +#define XHCI_DISABLE_SPARSE BIT_ULL(38) unsigned int num_active_eps; unsigned int limit_active_eps; From 7ba24610f39fb71f37ecb03b8981e6bc8231cafd Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 24 Sep 2020 01:21:43 -0700 Subject: [PATCH 137/636] usb: dwc3: ep0: Fix ZLP for OUT ep0 requests commit 66706077dc89c66a4777a4c6298273816afb848c upstream. The current ZLP handling for ep0 requests is only for control IN requests. For OUT direction, DWC3 needs to check and setup for MPS alignment. Usually, control OUT requests can indicate its transfer size via the wLength field of the control message. So usb_request->zero is usually not needed for OUT direction. To handle ZLP OUT for control endpoint, make sure the TRB is MPS size. Cc: stable@vger.kernel.org Fixes: c7fcdeb2627c ("usb: dwc3: ep0: simplify EP0 state machine") Fixes: d6e5a549cc4d ("usb: dwc3: simplify ZLP handling") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ep0.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 2fb02f877401..0d95ca804c1e 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -935,12 +935,16 @@ static void dwc3_ep0_xfer_complete(struct dwc3 *dwc, static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req) { + unsigned int trb_length = 0; int ret; req->direction = !!dep->number; if (req->request.length == 0) { - dwc3_ep0_prepare_one_trb(dep, dwc->ep0_trb_addr, 0, + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + + dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else if (!IS_ALIGNED(req->request.length, dep->endpoint.maxpacket) @@ -987,9 +991,12 @@ static void __dwc3_ep0_do_control_data(struct dwc3 *dwc, req->trb = &dwc->ep0_trb[dep->trb_enqueue - 1]; + if (!req->direction) + trb_length = dep->endpoint.maxpacket; + /* Now prepare one extra TRB to align transfer size */ dwc3_ep0_prepare_one_trb(dep, dwc->bounce_addr, - 0, DWC3_TRBCTL_CONTROL_DATA, + trb_length, DWC3_TRBCTL_CONTROL_DATA, false); ret = dwc3_ep0_start_trans(dep); } else { From 05dac708d4873058d203d41766370b47cb044234 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 24 Sep 2020 01:21:18 -0700 Subject: [PATCH 138/636] usb: dwc3: gadget: Check MPS of the request length commit ca3df3468eec87f6374662f7de425bc44c3810c1 upstream. When preparing for SG, not all the entries are prepared at once. When resume, don't use the remaining request length to calculate for MPS alignment. Use the entire request->length to do that. Cc: stable@vger.kernel.org Fixes: 5d187c0454ef ("usb: dwc3: gadget: Don't setup more than requested") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 37cc3fd7c3ca..f0d2f0a4e990 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1057,6 +1057,8 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, struct scatterlist *s; int i; unsigned int length = req->request.length; + unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); + unsigned int rem = length % maxp; unsigned int remaining = req->request.num_mapped_sgs - req->num_queued_sgs; @@ -1068,8 +1070,6 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, length -= sg_dma_len(s); for_each_sg(sg, s, remaining, i) { - unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); - unsigned int rem = length % maxp; unsigned int trb_length; unsigned chain = true; From a68cbd8b95e713c07dc38970e8c0bb2bf90bbae4 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 28 Jul 2020 20:42:41 +0800 Subject: [PATCH 139/636] usb: dwc3: core: add phy cleanup for probe error handling commit 03c1fd622f72c7624c81b64fdba4a567ae5ee9cb upstream. Add the phy cleanup if dwc3 mode init fail, which is the missing part of de-init for dwc3 core init. Fixes: c499ff71ff2a ("usb: dwc3: core: re-factor init and exit paths") Cc: Signed-off-by: Li Jun Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 60d08269ad9a..065c6dee172d 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1507,6 +1507,17 @@ static int dwc3_probe(struct platform_device *pdev) err5: dwc3_event_buffers_cleanup(dwc); + + usb_phy_shutdown(dwc->usb2_phy); + usb_phy_shutdown(dwc->usb3_phy); + phy_exit(dwc->usb2_generic_phy); + phy_exit(dwc->usb3_generic_phy); + + usb_phy_set_suspend(dwc->usb2_phy, 1); + usb_phy_set_suspend(dwc->usb3_phy, 1); + phy_power_off(dwc->usb2_generic_phy); + phy_power_off(dwc->usb3_generic_phy); + dwc3_ulpi_exit(dwc); err4: From f324c9f5ba40ae617c9601743be42e1449e3c305 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 28 Jul 2020 20:42:40 +0800 Subject: [PATCH 140/636] usb: dwc3: core: don't trigger runtime pm when remove driver commit 266d0493900ac5d6a21cdbe6b1624ed2da94d47a upstream. No need to trigger runtime pm in driver removal, otherwise if user disable auto suspend via sys file, runtime suspend may be entered, which will call dwc3_core_exit() again and there will be clock disable not balance warning: [ 2026.820154] xhci-hcd xhci-hcd.0.auto: remove, state 4 [ 2026.825268] usb usb2: USB disconnect, device number 1 [ 2026.831017] xhci-hcd xhci-hcd.0.auto: USB bus 2 deregistered [ 2026.836806] xhci-hcd xhci-hcd.0.auto: remove, state 4 [ 2026.842029] usb usb1: USB disconnect, device number 1 [ 2026.848029] xhci-hcd xhci-hcd.0.auto: USB bus 1 deregistered [ 2026.865889] ------------[ cut here ]------------ [ 2026.870506] usb2_ctrl_root_clk already disabled [ 2026.875082] WARNING: CPU: 0 PID: 731 at drivers/clk/clk.c:958 clk_core_disable+0xa0/0xa8 [ 2026.883170] Modules linked in: dwc3(-) phy_fsl_imx8mq_usb [last unloaded: dwc3] [ 2026.890488] CPU: 0 PID: 731 Comm: rmmod Not tainted 5.8.0-rc7-00280-g9d08cca-dirty #245 [ 2026.898489] Hardware name: NXP i.MX8MQ EVK (DT) [ 2026.903020] pstate: 20000085 (nzCv daIf -PAN -UAO BTYPE=--) [ 2026.908594] pc : clk_core_disable+0xa0/0xa8 [ 2026.912777] lr : clk_core_disable+0xa0/0xa8 [ 2026.916958] sp : ffff8000121b39a0 [ 2026.920271] x29: ffff8000121b39a0 x28: ffff0000b11f3700 [ 2026.925583] x27: 0000000000000000 x26: ffff0000b539c700 [ 2026.930895] x25: 000001d7e44e1232 x24: ffff0000b76fa800 [ 2026.936208] x23: ffff0000b76fa6f8 x22: ffff800008d01040 [ 2026.941520] x21: ffff0000b539ce00 x20: ffff0000b7105000 [ 2026.946832] x19: ffff0000b7105000 x18: 0000000000000010 [ 2026.952144] x17: 0000000000000001 x16: 0000000000000000 [ 2026.957456] x15: ffff0000b11f3b70 x14: ffffffffffffffff [ 2026.962768] x13: ffff8000921b36f7 x12: ffff8000121b36ff [ 2026.968080] x11: ffff8000119e1000 x10: ffff800011bf26d0 [ 2026.973392] x9 : 0000000000000000 x8 : ffff800011bf3000 [ 2026.978704] x7 : ffff800010695d68 x6 : 0000000000000252 [ 2026.984016] x5 : ffff0000bb9881f0 x4 : 0000000000000000 [ 2026.989327] x3 : 0000000000000027 x2 : 0000000000000023 [ 2026.994639] x1 : ac2fa471aa7cab00 x0 : 0000000000000000 [ 2026.999951] Call trace: [ 2027.002401] clk_core_disable+0xa0/0xa8 [ 2027.006238] clk_core_disable_lock+0x20/0x38 [ 2027.010508] clk_disable+0x1c/0x28 [ 2027.013911] clk_bulk_disable+0x34/0x50 [ 2027.017758] dwc3_core_exit+0xec/0x110 [dwc3] [ 2027.022122] dwc3_suspend_common+0x84/0x188 [dwc3] [ 2027.026919] dwc3_runtime_suspend+0x74/0x9c [dwc3] [ 2027.031712] pm_generic_runtime_suspend+0x28/0x40 [ 2027.036419] genpd_runtime_suspend+0xa0/0x258 [ 2027.040777] __rpm_callback+0x88/0x140 [ 2027.044526] rpm_callback+0x20/0x80 [ 2027.048015] rpm_suspend+0xd0/0x418 [ 2027.051503] __pm_runtime_suspend+0x58/0xa0 [ 2027.055693] dwc3_runtime_idle+0x7c/0x90 [dwc3] [ 2027.060224] __rpm_callback+0x88/0x140 [ 2027.063973] rpm_idle+0x78/0x150 [ 2027.067201] __pm_runtime_idle+0x58/0xa0 [ 2027.071130] dwc3_remove+0x64/0xc0 [dwc3] [ 2027.075140] platform_drv_remove+0x28/0x48 [ 2027.079239] device_release_driver_internal+0xf4/0x1c0 [ 2027.084377] driver_detach+0x4c/0xd8 [ 2027.087954] bus_remove_driver+0x54/0xa8 [ 2027.091877] driver_unregister+0x2c/0x58 [ 2027.095799] platform_driver_unregister+0x10/0x18 [ 2027.100509] dwc3_driver_exit+0x14/0x1408 [dwc3] [ 2027.105129] __arm64_sys_delete_module+0x178/0x218 [ 2027.109922] el0_svc_common.constprop.0+0x68/0x160 [ 2027.114714] do_el0_svc+0x20/0x80 [ 2027.118031] el0_sync_handler+0x88/0x190 [ 2027.121953] el0_sync+0x140/0x180 [ 2027.125267] ---[ end trace 027f4f8189958f1f ]--- [ 2027.129976] ------------[ cut here ]------------ Fixes: fc8bb91bc83e ("usb: dwc3: implement runtime PM") Cc: Signed-off-by: Li Jun Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 065c6dee172d..86b1cfbe48a0 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1556,9 +1556,9 @@ static int dwc3_remove(struct platform_device *pdev) dwc3_core_exit(dwc); dwc3_ulpi_exit(dwc); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_allow(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); dwc3_free_event_buffers(dwc); dwc3_free_scratch_buffers(dwc); From b40d7d3bd43f8333ebe24a62b63affc6a1aac244 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 19 Oct 2020 19:07:02 +0200 Subject: [PATCH 141/636] usb: cdc-acm: fix cooldown mechanism commit 38203b8385bf6283537162bde7d499f830964711 upstream. Commit a4e7279cd1d1 ("cdc-acm: introduce a cool down") is causing regression if there is some USB error, such as -EPROTO. This has been reported on some samples of the Odroid-N2 using the Combee II Zibgee USB dongle. > struct acm *acm = container_of(work, struct acm, work) is incorrect in case of a delayed work and causes warnings, usually from the workqueue: > WARNING: CPU: 0 PID: 0 at kernel/workqueue.c:1474 __queue_work+0x480/0x528. When this happens, USB eventually stops working completely after a while. Also the ACM_ERROR_DELAY bit is never set, so the cooldown mechanism previously introduced cannot be triggered and acm_submit_read_urb() is never called. This changes makes the cdc-acm driver use a single delayed work, fixing the pointer arithmetic in acm_softint() and set the ACM_ERROR_DELAY when the cooldown mechanism appear to be needed. Fixes: a4e7279cd1d1 ("cdc-acm: introduce a cool down") Cc: Oliver Neukum Reported-by: Pascal Vizeli Acked-by: Oliver Neukum Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201019170702.150534-1-jbrunet@baylibre.com Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 12 +++++------- drivers/usb/class/cdc-acm.h | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7376f74a4f04..08751d1a765f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -508,6 +508,7 @@ static void acm_read_bulk_callback(struct urb *urb) "%s - cooling babbling device\n", __func__); usb_mark_last_busy(acm->dev); set_bit(rb->index, &acm->urbs_in_error_delay); + set_bit(ACM_ERROR_DELAY, &acm->flags); cooldown = true; break; default: @@ -533,7 +534,7 @@ static void acm_read_bulk_callback(struct urb *urb) if (stopped || stalled || cooldown) { if (stalled) - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); else if (cooldown) schedule_delayed_work(&acm->dwork, HZ / 2); return; @@ -568,13 +569,13 @@ static void acm_write_bulk(struct urb *urb) acm_write_done(acm, wb); spin_unlock_irqrestore(&acm->write_lock, flags); set_bit(EVENT_TTY_WAKEUP, &acm->flags); - schedule_work(&acm->work); + schedule_delayed_work(&acm->dwork, 0); } static void acm_softint(struct work_struct *work) { int i; - struct acm *acm = container_of(work, struct acm, work); + struct acm *acm = container_of(work, struct acm, dwork.work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { smp_mb(); /* against acm_suspend() */ @@ -590,7 +591,7 @@ static void acm_softint(struct work_struct *work) if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { for (i = 0; i < acm->rx_buflimit; i++) if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) - acm_submit_read_urb(acm, i, GFP_NOIO); + acm_submit_read_urb(acm, i, GFP_KERNEL); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) @@ -1396,7 +1397,6 @@ made_compressed_probe: acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; - INIT_WORK(&acm->work, acm_softint); INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); @@ -1606,7 +1606,6 @@ static void acm_disconnect(struct usb_interface *intf) } acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); @@ -1649,7 +1648,6 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) return 0; acm_kill_urbs(acm); - cancel_work_sync(&acm->work); cancel_delayed_work_sync(&acm->dwork); acm->urbs_in_error_delay = 0; diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 30380d28a504..d8f8651425c4 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -111,8 +111,7 @@ struct acm { # define ACM_ERROR_DELAY 3 unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */ struct usb_cdc_line_coding line; /* bits, stop, parity */ - struct work_struct work; /* work queue entry for various purposes*/ - struct delayed_work dwork; /* for cool downs needed in error recovery */ + struct delayed_work dwork; /* work queue entry for various purposes */ unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */ unsigned int ctrlout; /* output control lines (DTR, RTS) */ struct async_icount iocount; /* counters for control line changes */ From 061fae52838ad8e9c59c65859088e3f6f850d5e5 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Mon, 12 Oct 2020 19:03:12 +0800 Subject: [PATCH 142/636] usb: typec: tcpm: reset hard_reset_count for any disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2d9c6442a9c81f4f8dee678d0b3c183173ab1e2d upstream. Current tcpm_detach() only reset hard_reset_count if port->attached is true, this may cause this counter clear is missed if the CC disconnect event is generated after tcpm_port_reset() is done by other events, e.g. VBUS off comes first before CC disconect for a power sink, in that case the first tcpm_detach() will only clear port->attached flag but leave hard_reset_count there because tcpm_port_is_disconnected() is still false, then later tcpm_detach() by CC disconnect will directly return due to port->attached is cleared, finally this will result tcpm will not try hard reset or error recovery for later attach. ChiYuan reported this issue on his platform with below tcpm trace: After power sink session setup after hard reset 2 times, detach from the power source and then attach: [ 4848.046358] VBUS off [ 4848.046384] state change SNK_READY -> SNK_UNATTACHED [ 4848.050908] Setting voltage/current limit 0 mV 0 mA [ 4848.050936] polarity 0 [ 4848.052593] Requesting mux state 0, usb-role 0, orientation 0 [ 4848.053222] Start toggling [ 4848.086500] state change SNK_UNATTACHED -> TOGGLING [ 4848.089983] CC1: 0 -> 0, CC2: 3 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.089993] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.090031] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4848.141162] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 4848.141170] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [ 4848.141184] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @20 ms [ 4848.163156] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 20 ms] [ 4848.163162] Start toggling [ 4848.216918] CC1: 0 -> 0, CC2: 0 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.216954] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.217080] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4848.231771] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 4848.231800] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [ 4848.231857] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @20 ms [ 4848.256022] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed20 ms] [ 4848.256049] Start toggling [ 4848.871148] VBUS on [ 4848.885324] CC1: 0 -> 0, CC2: 0 -> 3 [state TOGGLING, polarity 0, connected] [ 4848.885372] state change TOGGLING -> SNK_ATTACH_WAIT [ 4848.885548] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @200 ms [ 4849.088240] state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED [delayed200 ms] [ 4849.088284] state change SNK_DEBOUNCED -> SNK_ATTACHED [ 4849.088291] polarity 1 [ 4849.088769] Requesting mux state 1, usb-role 2, orientation 2 [ 4849.088895] state change SNK_ATTACHED -> SNK_STARTUP [ 4849.088907] state change SNK_STARTUP -> SNK_DISCOVERY [ 4849.088915] Setting voltage/current limit 5000 mV 0 mA [ 4849.088927] vbus=0 charge:=1 [ 4849.090505] state change SNK_DISCOVERY -> SNK_WAIT_CAPABILITIES [ 4849.090828] pending state change SNK_WAIT_CAPABILITIES -> SNK_READY @240 ms [ 4849.335878] state change SNK_WAIT_CAPABILITIES -> SNK_READY [delayed240 ms] this patch fix this issue by clear hard_reset_count at any cases of cc disconnect, í.e. don't check port->attached flag. Fixes: 4b4e02c83167 ("typec: tcpm: Move out of staging") Cc: stable@vger.kernel.org Reported-and-tested-by: ChiYuan Huang Reviewed-by: Guenter Roeck Reviewed-by: Heikki Krogerus Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1602500592-3817-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index 7086ebb24532..af41d4dce3ad 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -2727,12 +2727,12 @@ static void tcpm_reset_port(struct tcpm_port *port) static void tcpm_detach(struct tcpm_port *port) { - if (!port->attached) - return; - if (tcpm_port_is_disconnected(port)) port->hard_reset_count = 0; + if (!port->attached) + return; + tcpm_reset_port(port); } From 51f6f0e8d3929ca0610c7fd69fb62d3a74dda372 Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Sat, 10 Oct 2020 14:03:08 +0800 Subject: [PATCH 143/636] usb: host: fsl-mph-dr-of: check return of dma_set_mask() commit 3cd54a618834430a26a648d880dd83d740f2ae30 upstream. fsl_usb2_device_register() should stop init if dma_set_mask() return error. Fixes: cae058610465 ("drivers/usb/host: fsl: Set DMA_MASK of usb platform device") Reviewed-by: Peter Chen Signed-off-by: Ran Wang Link: https://lore.kernel.org/r/20201010060308.33693-1-ran.wang_1@nxp.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fsl-mph-dr-of.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 677f9d592109..de922022b83a 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -94,10 +94,13 @@ static struct platform_device *fsl_usb2_device_register( pdev->dev.coherent_dma_mask = ofdev->dev.coherent_dma_mask; - if (!pdev->dev.dma_mask) + if (!pdev->dev.dma_mask) { pdev->dev.dma_mask = &ofdev->dev.coherent_dma_mask; - else - dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + } else { + retval = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (retval) + goto error; + } retval = platform_device_add_data(pdev, pdata, sizeof(*pdata)); if (retval) From d1dff15c809e93af022da03f4663b8ac93e3d1c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 19 Oct 2020 11:15:23 +0100 Subject: [PATCH 144/636] drm/i915: Force VT'd workarounds when running as a guest OS commit 8195400f7ea95399f721ad21f4d663a62c65036f upstream. If i915.ko is being used as a passthrough device, it does not know if the host is using intel_iommu. Mixing the iommu and gfx causes a few issues (such as scanout overfetch) which we need to workaround inside the driver, so if we detect we are running under a hypervisor, also assume the device access is being virtualised. Reported-by: Stefan Fritsch Suggested-by: Stefan Fritsch Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Cc: Stefan Fritsch Cc: stable@vger.kernel.org Tested-by: Stefan Fritsch Reviewed-by: Zhenyu Wang Link: https://patchwork.freedesktop.org/patch/msgid/20201019101523.4145-1-chris@chris-wilson.co.uk (cherry picked from commit f566fdcd6cc49a9d5b5d782f56e3e7cb243f01b8) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db2e9af49ae6..37c80cfecd09 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -33,6 +33,8 @@ #include #include +#include + #include #include #include @@ -2683,7 +2685,9 @@ static inline bool intel_vtd_active(void) if (intel_iommu_gfx_mapped) return true; #endif - return false; + + /* Running as a guest, we assume the host is enforcing VT'd */ + return !hypervisor_is_type(X86_HYPER_NATIVE); } static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) From 20cf3fcdd646735346fca7bbf365b74bf0988034 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 19 Oct 2020 10:55:16 +0200 Subject: [PATCH 145/636] vt: keyboard, simplify vt_kdgkbsent commit 6ca03f90527e499dd5e32d6522909e2ad390896b upstream. Use 'strlen' of the string, add one for NUL terminator and simply do 'copy_to_user' instead of the explicit 'for' loop. This makes the KDGKBSENT case more compact. The only thing we need to take care about is NULL 'func_table[i]'. Use an empty string in that case. The original check for overflow could never trigger as the func_buf strings are always shorter or equal to 'struct kbsentry's. Cc: Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201019085517.10176-1-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index a7455f8a4235..56315d23f34e 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1994,9 +1994,7 @@ out: int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { struct kbsentry *kbs; - char *p; u_char *q; - u_char __user *up; int sz, fnw_sz; int delta; char *first_free, *fj, *fnw; @@ -2022,23 +2020,15 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) i = kbs->kb_func; switch (cmd) { - case KDGKBSENT: - sz = sizeof(kbs->kb_string) - 1; /* sz should have been - a struct member */ - up = user_kdgkb->kb_string; - p = func_table[i]; - if(p) - for ( ; *p && sz; p++, sz--) - if (put_user(*p, up++)) { - ret = -EFAULT; - goto reterr; - } - if (put_user('\0', up)) { - ret = -EFAULT; - goto reterr; - } - kfree(kbs); - return ((p && *p) ? -EOVERFLOW : 0); + case KDGKBSENT: { + /* size should have been a struct member */ + unsigned char *from = func_table[i] ? : ""; + + ret = copy_to_user(user_kdgkb->kb_string, from, + strlen(from) + 1) ? -EFAULT : 0; + + goto reterr; + } case KDSKBSENT: if (!perm) { ret = -EPERM; From 7e3ba72e5b9376e12839ea347c49fc4108244b0c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 19 Oct 2020 10:55:17 +0200 Subject: [PATCH 146/636] vt: keyboard, extend func_buf_lock to readers commit 82e61c3909db51d91b9d3e2071557b6435018b80 upstream. Both read-side users of func_table/func_buf need locking. Without that, one can easily confuse the code by repeatedly setting altering strings like: while (1) for (a = 0; a < 2; a++) { struct kbsentry kbs = {}; strcpy((char *)kbs.kb_string, a ? ".\n" : "88888\n"); ioctl(fd, KDSKBSENT, &kbs); } When that program runs, one can get unexpected output by holding F1 (note the unxpected period on the last line): . 88888 .8888 So protect all accesses to 'func_table' (and func_buf) by preexisting 'func_buf_lock'. It is easy in 'k_fn' handler as 'puts_queue' is expected not to sleep. On the other hand, KDGKBSENT needs a local (atomic) copy of the string because copy_to_user can sleep. Use already allocated, but unused 'kbs->kb_string' for that purpose. Note that the program above needs at least CAP_SYS_TTY_CONFIG. This depends on the previous patch and on the func_buf_lock lock added in commit 46ca3f735f34 (tty/vt: fix write/write race in ioctl(KDSKBSENT) handler) in 5.2. Likely fixes CVE-2020-25656. Cc: Reported-by: Minh Yuan Signed-off-by: Jiri Slaby Link: https://lore.kernel.org/r/20201019085517.10176-2-jslaby@suse.cz Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/keyboard.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 56315d23f34e..94cad9f86ff9 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -742,8 +742,13 @@ static void k_fn(struct vc_data *vc, unsigned char value, char up_flag) return; if ((unsigned)value < ARRAY_SIZE(func_table)) { + unsigned long flags; + + spin_lock_irqsave(&func_buf_lock, flags); if (func_table[value]) puts_queue(vc, func_table[value]); + spin_unlock_irqrestore(&func_buf_lock, flags); + } else pr_err("k_fn called with value=%d\n", value); } @@ -1990,7 +1995,7 @@ out: #undef s #undef v -/* FIXME: This one needs untangling and locking */ +/* FIXME: This one needs untangling */ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) { struct kbsentry *kbs; @@ -2022,10 +2027,14 @@ int vt_do_kdgkb_ioctl(int cmd, struct kbsentry __user *user_kdgkb, int perm) switch (cmd) { case KDGKBSENT: { /* size should have been a struct member */ - unsigned char *from = func_table[i] ? : ""; + ssize_t len = sizeof(user_kdgkb->kb_string); - ret = copy_to_user(user_kdgkb->kb_string, from, - strlen(from) + 1) ? -EFAULT : 0; + spin_lock_irqsave(&func_buf_lock, flags); + len = strlcpy(kbs->kb_string, func_table[i] ? : "", len); + spin_unlock_irqrestore(&func_buf_lock, flags); + + ret = copy_to_user(user_kdgkb->kb_string, kbs->kb_string, + len + 1) ? -EFAULT : 0; goto reterr; } From d8a2bd86bf4c8138eec3224d3e70d94b2ab18371 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 23 Sep 2020 13:14:56 -0700 Subject: [PATCH 147/636] HID: wacom: Avoid entering wacom_wac_pen_report for pad / battery commit d9216d753b2b1406b801243b12aaf00a5ce5b861 upstream. It has recently been reported that the "heartbeat" report from devices like the 2nd-gen Intuos Pro (PTH-460, PTH-660, PTH-860) or the 2nd-gen Bluetooth-enabled Intuos tablets (CTL-4100WL, CTL-6100WL) can cause the driver to send a spurious BTN_TOUCH=0 once per second in the middle of drawing. This can result in broken lines while drawing on Chrome OS. The source of the issue has been traced back to a change which modified the driver to only call `wacom_wac_pad_report()` once per report instead of once per collection. As part of this change, pad-handling code was removed from `wacom_wac_collection()` under the assumption that the `WACOM_PEN_FIELD` and `WACOM_TOUCH_FIELD` checks would not be satisfied when a pad or battery collection was being processed. To be clear, the macros `WACOM_PAD_FIELD` and `WACOM_PEN_FIELD` do not currently check exclusive conditions. In fact, most "pad" fields will also appear to be "pen" fields simply due to their presence inside of a Digitizer application collection. Because of this, the removal of the check from `wacom_wac_collection()` just causes pad / battery collections to instead trigger a call to `wacom_wac_pen_report()` instead. The pen report function in turn resets the tip switch state just prior to exiting, resulting in the observed BTN_TOUCH=0 symptom. To correct this, we restore a version of the `WACOM_PAD_FIELD` check in `wacom_wac_collection()` and return early. This effectively prevents pad / battery collections from being reported until the very end of the report as originally intended. Fixes: d4b8efeb46d9 ("HID: wacom: generic: Correct pad syncing") Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Tested-by: Ping Cheng Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 77bb46948eea..da83884b90d2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2729,7 +2729,9 @@ static int wacom_wac_collection(struct hid_device *hdev, struct hid_report *repo if (report->type != HID_INPUT_REPORT) return -1; - if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) + if (WACOM_PAD_FIELD(field)) + return 0; + else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) wacom_wac_pen_report(hdev, report); else if (WACOM_FINGER_FIELD(field) && wacom->wacom_wac.touch_input) wacom_wac_finger_report(hdev, report); From 9bbfd6578e596d3bf480e74d142e49c88102014d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 22 Sep 2020 12:20:14 +0200 Subject: [PATCH 148/636] udf: Fix memory leak when mounting commit a7be300de800e755714c71103ae4a0d205e41e99 upstream. udf_process_sequence() allocates temporary array for processing partition descriptors on volume which it fails to free. Free the array when it is not needed anymore. Fixes: 7b78fd02fb19 ("udf: Fix handling of Partition Descriptors") CC: stable@vger.kernel.org Reported-by: syzbot+128f4dd6e796c98b3760@syzkaller.appspotmail.com Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index caeb01ca039b..c7f6243f318b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1685,7 +1685,8 @@ static noinline int udf_process_sequence( "Pointers (max %u supported)\n", UDF_MAX_TD_NESTING); brelse(bh); - return -EIO; + ret = -EIO; + goto out; } vdp = (struct volDescPtr *)bh->b_data; @@ -1705,7 +1706,8 @@ static noinline int udf_process_sequence( curr = get_volume_descriptor_record(ident, bh, &data); if (IS_ERR(curr)) { brelse(bh); - return PTR_ERR(curr); + ret = PTR_ERR(curr); + goto out; } /* Descriptor we don't care about? */ if (!curr) @@ -1727,28 +1729,31 @@ static noinline int udf_process_sequence( */ if (!data.vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); - return -EAGAIN; + ret = -EAGAIN; + goto out; } ret = udf_load_pvoldesc(sb, data.vds[VDS_POS_PRIMARY_VOL_DESC].block); if (ret < 0) - return ret; + goto out; if (data.vds[VDS_POS_LOGICAL_VOL_DESC].block) { ret = udf_load_logicalvol(sb, data.vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset); if (ret < 0) - return ret; + goto out; } /* Now handle prevailing Partition Descriptors */ for (i = 0; i < data.num_part_descs; i++) { ret = udf_load_partdesc(sb, data.part_descs_loc[i].rec.block); if (ret < 0) - return ret; + goto out; } - - return 0; + ret = 0; +out: + kfree(data.part_descs_loc); + return ret; } /* From fd9079bdc25c2d004da5294f91ddaf844818618a Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Sun, 4 Oct 2020 16:03:07 +0200 Subject: [PATCH 149/636] dmaengine: dma-jz4780: Fix race in jz4780_dma_tx_status commit baf6fd97b16ea8f981b8a8b04039596f32fc2972 upstream. The jz4780_dma_tx_status() function would check if a channel's cookie state was set to 'completed', and if not, it would enter the critical section. However, in that time frame, the jz4780_dma_chan_irq() function was able to set the cookie to 'completed', and clear the jzchan->vchan pointer, which was deferenced in the critical section of the first function. Fix this race by checking the channel's cookie state after entering the critical function and not before. Fixes: d894fc6046fe ("dmaengine: jz4780: add driver for the Ingenic JZ4780 DMA controller") Cc: stable@vger.kernel.org # v4.0 Signed-off-by: Paul Cercueil Reported-by: Artur Rojek Tested-by: Artur Rojek Link: https://lore.kernel.org/r/20201004140307.885556-1-paul@crapouillou.net Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dma-jz4780.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index edff93aacad3..0d0850b35e45 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -574,11 +574,11 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, enum dma_status status; unsigned long flags; + spin_lock_irqsave(&jzchan->vchan.lock, flags); + status = dma_cookie_status(chan, cookie, txstate); if ((status == DMA_COMPLETE) || (txstate == NULL)) - return status; - - spin_lock_irqsave(&jzchan->vchan.lock, flags); + goto out_unlock_irqrestore; vdesc = vchan_find_desc(&jzchan->vchan, cookie); if (vdesc) { @@ -595,6 +595,7 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) status = DMA_ERROR; +out_unlock_irqrestore: spin_unlock_irqrestore(&jzchan->vchan.lock, flags); return status; } From edd390613683eba9db783b5ed825af054ef0c62c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:44 +0100 Subject: [PATCH 150/636] iio:light:si1145: Fix timestamp alignment and prevent data leak. commit 0456ecf34d466261970e0ff92b2b9c78a4908637 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 24 byte array of smaller elements on the stack. As Lars also noted this anti pattern can involve a leak of data to userspace and that indeed can happen here. We close both issues by moving to a suitable array in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak appart from previous readings. Depending on the enabled channels, the location of the timestamp can be at various aligned offsets through the buffer. As such we any use of a structure to enforce this alignment would incorrectly suggest a single location for the timestamp. Comments adjusted to express this clearly in the code. Fixes: ac45e57f1590 ("iio: light: Add driver for Silabs si1132, si1141/2/3 and si1145/6/7 ambient light, uv index and proximity sensors") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Peter Meerwald-Stadler Cc: Link: https://lore.kernel.org/r/20200722155103.979802-9-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/light/si1145.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iio/light/si1145.c b/drivers/iio/light/si1145.c index 76f16f9c7616..31f78fd7f915 100644 --- a/drivers/iio/light/si1145.c +++ b/drivers/iio/light/si1145.c @@ -172,6 +172,7 @@ struct si1145_part_info { * @part_info: Part information * @trig: Pointer to iio trigger * @meas_rate: Value of MEAS_RATE register. Only set in HW in auto mode + * @buffer: Used to pack data read from sensor. */ struct si1145_data { struct i2c_client *client; @@ -183,6 +184,14 @@ struct si1145_data { bool autonomous; struct iio_trigger *trig; int meas_rate; + /* + * Ensure timestamp will be naturally aligned if present. + * Maximum buffer size (may be only partly used if not all + * channels are enabled): + * 6*2 bytes channels data + 4 bytes alignment + + * 8 bytes timestamp + */ + u8 buffer[24] __aligned(8); }; /** @@ -444,12 +453,6 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) struct iio_poll_func *pf = private; struct iio_dev *indio_dev = pf->indio_dev; struct si1145_data *data = iio_priv(indio_dev); - /* - * Maximum buffer size: - * 6*2 bytes channels data + 4 bytes alignment + - * 8 bytes timestamp - */ - u8 buffer[24]; int i, j = 0; int ret; u8 irq_status = 0; @@ -482,7 +485,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) ret = i2c_smbus_read_i2c_block_data_or_emulated( data->client, indio_dev->channels[i].address, - sizeof(u16) * run, &buffer[j]); + sizeof(u16) * run, &data->buffer[j]); if (ret < 0) goto done; j += run * sizeof(u16); @@ -497,7 +500,7 @@ static irqreturn_t si1145_trigger_handler(int irq, void *private) goto done; } - iio_push_to_buffers_with_timestamp(indio_dev, buffer, + iio_push_to_buffers_with_timestamp(indio_dev, data->buffer, iio_get_time_ns(indio_dev)); done: From 8a8900553d2c302bba3243d992a601eb3701b898 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:00 +0100 Subject: [PATCH 151/636] iio:adc:ti-adc0832 Fix alignment issue with timestamp commit 39e91f3be4cba51c1560bcda3a343ed1f64dc916 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. We fix this issues by moving to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak apart from previous readings. Note that previously no data could leak 'including' previous readings but I don't think it is an issue to potentially leak them like this now does. In this case the postioning of the timestamp is depends on what other channels are enabled. As such we cannot use a structure to make the alignment explicit as it would be missleading by suggesting only one possible location for the timestamp. Fixes: 815bbc87462a ("iio: ti-adc0832: add triggered buffer support") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Akinobu Mita Cc: Link: https://lore.kernel.org/r/20200722155103.979802-25-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-adc0832.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/ti-adc0832.c b/drivers/iio/adc/ti-adc0832.c index 188dae705bf7..a408d97e2d2e 100644 --- a/drivers/iio/adc/ti-adc0832.c +++ b/drivers/iio/adc/ti-adc0832.c @@ -31,6 +31,12 @@ struct adc0832 { struct regulator *reg; struct mutex lock; u8 mux_bits; + /* + * Max size needed: 16x 1 byte ADC data + 8 bytes timestamp + * May be shorter if not all channels are enabled subject + * to the timestamp remaining 8 byte aligned. + */ + u8 data[24] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -202,7 +208,6 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc0832 *adc = iio_priv(indio_dev); - u8 data[24] = { }; /* 16x 1 byte ADC data + 8 bytes timestamp */ int scan_index; int i = 0; @@ -220,10 +225,10 @@ static irqreturn_t adc0832_trigger_handler(int irq, void *p) goto out; } - data[i] = ret; + adc->data[i] = ret; i++; } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); From da58185f7bc08767a438a182c27d29a6d454975e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:51:01 +0100 Subject: [PATCH 152/636] iio:adc:ti-adc12138 Fix alignment issue with timestamp commit 293e809b2e8e608b65a949101aaf7c0bd1224247 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses an array of smaller elements on the stack. We move to a suitable structure in the iio_priv() data with alignment explicitly requested. This data is allocated with kzalloc so no data can leak apart from previous readings. Note that previously no leak at all could occur, but previous readings should never be a problem. In this case the timestamp location depends on what other channels are enabled. As such we can't use a structure without misleading by suggesting only one possible timestamp location. Fixes: 50a6edb1b6e0 ("iio: adc: add ADC12130/ADC12132/ADC12138 ADC driver") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Akinobu Mita Cc: Link: https://lore.kernel.org/r/20200722155103.979802-26-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ti-adc12138.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 703d68ae96b7..4517d7742bc3 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -50,6 +50,12 @@ struct adc12138 { struct completion complete; /* The number of cclk periods for the S/H's acquisition time */ unsigned int acquisition_time; + /* + * Maximum size needed: 16x 2 bytes ADC data + 8 bytes timestamp. + * Less may be need if not all channels are enabled, as long as + * the 8 byte alignment of the timestamp is maintained. + */ + __be16 data[20] __aligned(8); u8 tx_buf[2] ____cacheline_aligned; u8 rx_buf[2]; @@ -332,7 +338,6 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct adc12138 *adc = iio_priv(indio_dev); - __be16 data[20] = { }; /* 16x 2 bytes ADC data + 8 bytes timestamp */ __be16 trash; int ret; int scan_index; @@ -348,7 +353,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) reinit_completion(&adc->complete); ret = adc12138_start_and_read_conv(adc, scan_chan, - i ? &data[i - 1] : &trash); + i ? &adc->data[i - 1] : &trash); if (ret) { dev_warn(&adc->spi->dev, "failed to start conversion\n"); @@ -365,7 +370,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } if (i) { - ret = adc12138_read_conv_data(adc, &data[i - 1]); + ret = adc12138_read_conv_data(adc, &adc->data[i - 1]); if (ret) { dev_warn(&adc->spi->dev, "failed to get conversion data\n"); @@ -373,7 +378,7 @@ static irqreturn_t adc12138_trigger_handler(int irq, void *p) } } - iio_push_to_buffers_with_timestamp(indio_dev, data, + iio_push_to_buffers_with_timestamp(indio_dev, adc->data, iio_get_time_ns(indio_dev)); out: mutex_unlock(&adc->lock); From c7975bd7fdb7bd8b5d9547acc456f6050bae02f6 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 22 Jul 2020 16:50:41 +0100 Subject: [PATCH 153/636] iio:gyro:itg3200: Fix timestamp alignment and prevent data leak. commit 10ab7cfd5522f0041028556dac864a003e158556 upstream. One of a class of bugs pointed out by Lars in a recent review. iio_push_to_buffers_with_timestamp assumes the buffer used is aligned to the size of the timestamp (8 bytes). This is not guaranteed in this driver which uses a 16 byte array of smaller elements on the stack. This is fixed by using an explicit c structure. As there are no holes in the structure, there is no possiblity of data leakage in this case. The explicit alignment of ts is not strictly necessary but potentially makes the code slightly less fragile. It also removes the possibility of this being cut and paste into another driver where the alignment isn't already true. Fixes: 36e0371e7764 ("iio:itg3200: Use iio_push_to_buffers_with_timestamp()") Reported-by: Lars-Peter Clausen Signed-off-by: Jonathan Cameron Reviewed-by: Andy Shevchenko Cc: Link: https://lore.kernel.org/r/20200722155103.979802-6-jic23@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/iio/gyro/itg3200_buffer.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 59770e5b6660..b080362a8766 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -49,13 +49,20 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct itg3200 *st = iio_priv(indio_dev); - __be16 buf[ITG3200_SCAN_ELEMENTS + sizeof(s64)/sizeof(u16)]; + /* + * Ensure correct alignment and padding including for the + * timestamp that may be inserted. + */ + struct { + __be16 buf[ITG3200_SCAN_ELEMENTS]; + s64 ts __aligned(8); + } scan; - int ret = itg3200_read_all_channels(st->i2c, buf); + int ret = itg3200_read_all_channels(st->i2c, scan.buf); if (ret < 0) goto error_ret; - iio_push_to_buffers_with_timestamp(indio_dev, buf, pf->timestamp); + iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); iio_trigger_notify_done(indio_dev->trig); From 77d89366bc9220195e87ba33a3dd8048990631f3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Oct 2020 17:18:33 +0530 Subject: [PATCH 154/636] powerpc/drmem: Make lmb_size 64 bit commit ec72024e35dddb88a81e40071c87ceb18b5ee835 upstream. Similar to commit 89c140bbaeee ("pseries: Fix 64 bit logical memory block panic") make sure different variables tracking lmb_size are updated to be 64 bit. This was found by code audit. Cc: stable@vger.kernel.org Signed-off-by: Aneesh Kumar K.V Acked-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201007114836.282468-2-aneesh.kumar@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/drmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 668d8a121f1a..8b196720e9a0 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -24,7 +24,7 @@ struct drmem_lmb { struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; @@ -83,7 +83,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } From 9b7089ce099a5803b3d5b64a325faff195324680 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Sep 2020 08:53:50 +0200 Subject: [PATCH 155/636] s390/stp: add locking to sysfs functions commit b3bd02495cb339124f13135d51940cf48d83e5cb upstream. The sysfs function might race with stp_work_fn. To prevent that, add the required locking. Another issue is that the sysfs functions are checking the stp_online flag, but this flag just holds the user setting whether STP is enabled. Add a flag to clock_sync_flag whether stp_info holds valid data and use that instead. Cc: stable@vger.kernel.org Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/time.c | 118 +++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8ea9db599d38..11c32b228f51 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -354,8 +354,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -592,6 +593,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -613,8 +630,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -659,7 +675,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -696,10 +712,14 @@ static ssize_t stp_ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); @@ -708,9 +728,13 @@ static ssize_t stp_ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); @@ -719,9 +743,13 @@ static ssize_t stp_dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); @@ -730,9 +758,13 @@ static ssize_t stp_leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); @@ -741,9 +773,13 @@ static ssize_t stp_stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); @@ -752,9 +788,13 @@ static ssize_t stp_time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); @@ -763,9 +803,13 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_zone_offset, 0400, @@ -775,9 +819,13 @@ static ssize_t stp_timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); @@ -786,9 +834,13 @@ static ssize_t stp_timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); From 94e8f0bbc475228c93d28b2e0f7e37303db80ffe Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 20 Aug 2020 14:45:12 +1000 Subject: [PATCH 156/636] powerpc/rtas: Restrict RTAS requests from userspace commit bd59380c5ba4147dcbaad3e582b55ccfd120b764 upstream. A number of userspace utilities depend on making calls to RTAS to retrieve information and update various things. The existing API through which we expose RTAS to userspace exposes more RTAS functionality than we actually need, through the sys_rtas syscall, which allows root (or anyone with CAP_SYS_ADMIN) to make any RTAS call they want with arbitrary arguments. Many RTAS calls take the address of a buffer as an argument, and it's up to the caller to specify the physical address of the buffer as an argument. We allocate a buffer (the "RMO buffer") in the Real Memory Area that RTAS can access, and then expose the physical address and size of this buffer in /proc/powerpc/rtas/rmo_buffer. Userspace is expected to read this address, poke at the buffer using /dev/mem, and pass an address in the RMO buffer to the RTAS call. However, there's nothing stopping the caller from specifying whatever address they want in the RTAS call, and it's easy to construct a series of RTAS calls that can overwrite arbitrary bytes (even without /dev/mem access). Additionally, there are some RTAS calls that do potentially dangerous things and for which there are no legitimate userspace use cases. In the past, this would not have been a particularly big deal as it was assumed that root could modify all system state freely, but with Secure Boot and lockdown we need to care about this. We can't fundamentally change the ABI at this point, however we can address this by implementing a filter that checks RTAS calls against a list of permitted calls and forces the caller to use addresses within the RMO buffer. The list is based off the list of calls that are used by the librtas userspace library, and has been tested with a number of existing userspace RTAS utilities. For compatibility with any applications we are not aware of that require other calls, the filter can be turned off at build time. Cc: stable@vger.kernel.org Reported-by: Daniel Axtens Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200820044512.7543-1-ajd@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Kconfig | 13 ++++ arch/powerpc/kernel/rtas.c | 153 +++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0bc53f0e37c0..d18ea3c1f4fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1010,6 +1010,19 @@ config FSL_RIO source "drivers/rapidio/Kconfig" +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config NONSTATIC_KERNEL diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 95d1264ba795..7e0722b62cae 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1057,6 +1057,147 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1094,6 +1235,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1155,6 +1299,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1190,6 +1337,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, From b035bb9573ff8d3ee89ee6019a511f2aefbbee06 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 2 Sep 2020 09:30:11 +0930 Subject: [PATCH 157/636] powerpc: Warn about use of smt_snooze_delay commit a02f6d42357acf6e5de6ffc728e6e77faf3ad217 upstream. It's not done anything for a long time. Save the percpu variable, and emit a warning to remind users to not expect it to do anything. This uses pr_warn_once instead of pr_warn_ratelimit as testing 'ppc64_cpu --smt=off' on a 24 core / 4 SMT system showed the warning to be noisy, as the online/offline loop is slow. Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.") Cc: stable@vger.kernel.org # v3.14 Signed-off-by: Joel Stanley Acked-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200902000012.3440389-1-joel@jms.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/sysfs.c | 42 +++++++++++++++---------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 755dc98a57ae..6b107de10ffa 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -29,29 +29,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -59,9 +57,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -69,16 +67,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); From e46550aa5a0708f10d6cd26d8028a7778c6862e5 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 6 Oct 2020 13:02:18 +0530 Subject: [PATCH 158/636] powerpc/powernv/elog: Fix race while processing OPAL error log event. commit aea948bb80b478ddc2448f7359d574387521a52d upstream. Every error log reported by OPAL is exported to userspace through a sysfs interface and notified using kobject_uevent(). The userspace daemon (opal_errd) then reads the error log and acknowledges the error log is saved safely to disk. Once acknowledged the kernel removes the respective sysfs file entry causing respective resources to be released including kobject. However it's possible the userspace daemon may already be scanning elog entries when a new sysfs elog entry is created by the kernel. User daemon may read this new entry and ack it even before kernel can notify userspace about it through kobject_uevent() call. If that happens then we have a potential race between elog_ack_store->kobject_put() and kobject_uevent which can lead to use-after-free of a kernfs object resulting in a kernel crash. eg: BUG: Unable to handle kernel data access on read at 0x6b6b6b6b6b6b6bfb Faulting instruction address: 0xc0000000008ff2a0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV CPU: 27 PID: 805 Comm: irq/29-opal-elo Not tainted 5.9.0-rc2-gcc-8.2.0-00214-g6f56a67bcbb5-dirty #363 ... NIP kobject_uevent_env+0xa0/0x910 LR elog_event+0x1f4/0x2d0 Call Trace: 0x5deadbeef0000122 (unreliable) elog_event+0x1f4/0x2d0 irq_thread_fn+0x4c/0xc0 irq_thread+0x1c0/0x2b0 kthread+0x1c4/0x1d0 ret_from_kernel_thread+0x5c/0x6c This patch fixes this race by protecting the sysfs file creation/notification by holding a reference count on kobject until we safely send kobject_uevent(). The function create_elog_obj() returns the elog object which if used by caller function will end up in use-after-free problem again. However, the return value of create_elog_obj() function isn't being used today and there is no need as well. Hence change it to return void to make this fix complete. Fixes: 774fea1a38c6 ("powerpc/powernv: Read OPAL error log and export it through sysfs") Cc: stable@vger.kernel.org # v3.15+ Reported-by: Oliver O'Halloran Signed-off-by: Mahesh Salgaonkar Signed-off-by: Aneesh Kumar K.V Reviewed-by: Oliver O'Halloran Reviewed-by: Vasant Hegde [mpe: Rework the logic to use a single return, reword comments, add oops] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201006122051.190176-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/powernv/opal-elog.c | 33 +++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index ba6e437abb4b..398a06631456 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -183,14 +183,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -223,18 +223,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) From ed596114fd3ac2b8bb3d8ac2df4217290d1e2a70 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 13 Oct 2020 15:37:40 +1100 Subject: [PATCH 159/636] powerpc: Fix undetected data corruption with P9N DD2.1 VSX CI load emulation commit 1da4a0272c5469169f78cd76cf175ff984f52f06 upstream. __get_user_atomic_128_aligned() stores to kaddr using stvx which is a VMX store instruction, hence kaddr must be 16 byte aligned otherwise the store won't occur as expected. Unfortunately when we call __get_user_atomic_128_aligned() in p9_hmi_special_emu(), the buffer we pass as kaddr (ie. vbuf) isn't guaranteed to be 16B aligned. This means that the write to vbuf in __get_user_atomic_128_aligned() has the bottom bits of the address truncated. This results in other local variables being overwritten. Also vbuf will not contain the correct data which results in the userspace emulation being wrong and hence undetected user data corruption. In the past we've been mostly lucky as vbuf has ended up aligned but this is fragile and isn't always true. CONFIG_STACKPROTECTOR in particular can change the stack arrangement enough that our luck runs out. This issue only occurs on POWER9 Nimbus <= DD2.1 bare metal. The fix is to align vbuf to a 16 byte boundary. Fixes: 5080332c2c89 ("powerpc/64s: Add workaround for P9 vector CI load issue") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201013043741.743413-1-mikey@neuling.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 7781f0168ce8..1b2d84cb373b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -794,7 +794,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; From 3c040b924988be85e99819dd48c1f3130ae683cc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 16 Oct 2020 09:25:45 -0400 Subject: [PATCH 160/636] NFSv4.2: support EXCHGID4_FLAG_SUPP_FENCE_OPS 4.2 EXCHANGE_ID flag commit 8c39076c276be0b31982e44654e2c2357473258a upstream. RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS. Client needs to update its bitmask to allow for this flag value. v2: changed minor version argument to unsigned int Signed-off-by: Olga Kornievskaia CC: Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 9 ++++++--- include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2a2ff3f22a4..fe7b42c277ac 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7600,9 +7600,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -7997,7 +7999,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b0..54a78529c8b3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -136,6 +136,8 @@ #define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -143,6 +145,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107 #define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002 From 88401813fcec150ff179e731364fc19af1e6632f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Oct 2020 18:58:56 -0400 Subject: [PATCH 161/636] NFSD: Add missing NFSv2 .pc_func methods commit 6b3dccd48de8a4c650b01499a0b09d1e2279649e upstream. There's no protection in nfsd_dispatch() against a NULL .pc_func helpers. A malicious NFS client can trigger a crash by invoking the unused/unsupported NFSv2 ROOT or WRITECACHE procedures. The current NFSD dispatcher does not support returning a void reply to a non-NULL procedure, so the reply to both of these is wrong, for the moment. Cc: Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfsproc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0d20fd161225..01f7ce1ae127 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -118,6 +118,13 @@ done: return nfsd_return_attrs(nfserr, resp); } +/* Obsolete, replaced by MNTPROC_MNT. */ +static __be32 +nfsd_proc_root(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Look up a path name component * Note: the dentry in the resp->fh may be negative if the file @@ -201,6 +208,13 @@ nfsd_proc_read(struct svc_rqst *rqstp) return fh_getattr(&resp->fh, &resp->stat); } +/* Reserved */ +static __be32 +nfsd_proc_writecache(struct svc_rqst *rqstp) +{ + return nfs_ok; +} + /* * Write data to a file * N.B. After this call resp->fh needs an fh_put @@ -615,6 +629,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT, }, [NFSPROC_ROOT] = { + .pc_func = nfsd_proc_root, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), @@ -652,6 +667,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, }, [NFSPROC_WRITECACHE] = { + .pc_func = nfsd_proc_writecache, .pc_decode = nfssvc_decode_void, .pc_encode = nfssvc_encode_void, .pc_argsize = sizeof(struct nfsd_void), From b08433b1c5e581c8824c190f3827107bd8db05cf Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 1 Jun 2020 17:10:37 +0800 Subject: [PATCH 162/636] ubifs: dent: Fix some potential memory leaks while iterating entries commit 58f6e78a65f1fcbf732f60a7478ccc99873ff3ba upstream. Fix some potential memory leaks in error handling branches while iterating dent entries. For example, function dbg_check_dir() forgets to free pdent if it exists. Signed-off-by: Zhihao Cheng Cc: Fixes: 1e51764a3c2ac05a2 ("UBIFS: add new flash file system") Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/ubifs/debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 564e330d05b1..24bbecd4752b 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1129,6 +1129,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) err = PTR_ERR(dent); if (err == -ENOENT) break; + kfree(pdent); return err; } From 753e8a597d54209aa97aa985342f7f98d4240be4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 28 Sep 2020 22:11:35 +0200 Subject: [PATCH 163/636] perf python scripting: Fix printable strings in python3 scripts commit 6fcd5ddc3b1467b3586972ef785d0d926ae4cdf4 upstream. Hagen reported broken strings in python3 tracepoint scripts: make PYTHON=python3 perf record -e sched:sched_switch -a -- sleep 5 perf script --gen-script py perf script -s ./perf-script.py [..] sched__sched_switch 7 563231.759525792 0 swapper prev_comm=bytearray(b'swapper/7\x00\x00\x00\x00\x00\x00\x00'), prev_pid=0, prev_prio=120, prev_state=, next_comm=bytearray(b'mutex-thread-co\x00'), The problem is in the is_printable_array function that does not take the zero byte into account and claim such string as not printable, so the code will create byte array instead of string. Committer testing: After this fix: sched__sched_switch 3 484522.497072626 1158680 kworker/3:0-eve prev_comm=kworker/3:0, prev_pid=1158680, prev_prio=120, prev_state=I, next_comm=swapper/3, next_pid=0, next_prio=120 Sample: {addr=0, cpu=3, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1158680, tid=1158680, time=484522497072626, transaction=0, values=[(0, 0)], weight=0} sched__sched_switch 4 484522.497085610 1225814 perf prev_comm=perf, prev_pid=1225814, prev_prio=120, prev_state=, next_comm=migration/4, next_pid=30, next_prio=0 Sample: {addr=0, cpu=4, datasrc=84410401, datasrc_decode=N/A|SNP N/A|TLB N/A|LCK N/A, ip=18446744071841817196, period=1, phys_addr=0, pid=1225814, tid=1225814, time=484522497085610, transaction=0, values=[(0, 0)], weight=0} Fixes: 249de6e07458 ("perf script python: Fix string vs byte array resolving") Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Hagen Paul Pfeifer Cc: Alexander Shishkin Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20200928201135.3633850-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/print_binary.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 23e367063446..71aeaf6f45cc 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len) len--; - for (i = 0; i < len; i++) { + for (i = 0; i < len && p[i]; i++) { if (!isprint(p[i]) && !isspace(p[i])) return 0; } From 9aa191b69a7392d5db8696c249e3a35ee64fafc7 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 1 Jun 2020 17:12:31 +0800 Subject: [PATCH 164/636] ubi: check kthread_should_stop() after the setting of task state commit d005f8c6588efcfbe88099b6edafc6f58c84a9c1 upstream. A detach hung is possible when a race occurs between the detach process and the ubi background thread. The following sequences outline the race: ubi thread: if (list_empty(&ubi->works)... ubi detach: set_bit(KTHREAD_SHOULD_STOP, &kthread->flags) => by kthread_stop() wake_up_process() => ubi thread is still running, so 0 is returned ubi thread: set_current_state(TASK_INTERRUPTIBLE) schedule() => ubi thread will never be scheduled again ubi detach: wait_for_completion() => hung task! To fix that, we need to check kthread_should_stop() after we set the task state, so the ubi thread will either see the stop bit and exit or the task state is reset to runnable such that it isn't scheduled out indefinitely. Signed-off-by: Zhihao Cheng Cc: Fixes: 801c135ce73d5df1ca ("UBI: Unsorted Block Images") Reported-by: syzbot+853639d0cb16c31c7a14@syzkaller.appspotmail.com Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/wl.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 80d64d7e7a8b..ac336164f625 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1471,6 +1471,19 @@ int ubi_thread(void *u) !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&ubi->wl_lock); + + /* + * Check kthread_should_stop() after we set the task + * state to guarantee that we either see the stop bit + * and exit or the task state is reset to runnable such + * that it's not scheduled out indefinitely and detects + * the stop bit at kthread_should_stop(). + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + break; + } + schedule(); continue; } From 3aac1f37cefb2f9157e7b93d2114c9fd9f88a404 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 17 Oct 2020 16:13:37 -0700 Subject: [PATCH 165/636] ia64: fix build error with !COREDUMP commit 7404840d87557c4092bf0272bce5e0354c774bf9 upstream. Fix linkage error when CONFIG_BINFMT_ELF is selected but CONFIG_COREDUMP is not: ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_phdrs': elfcore.c:(.text+0x172): undefined reference to `dump_emit' ia64-linux-ld: arch/ia64/kernel/elfcore.o: in function `elf_core_write_extra_data': elfcore.c:(.text+0x2b2): undefined reference to `dump_emit' Fixes: 1fcccbac89f5 ("elf coredump: replace ELF_CORE_EXTRA_* macros by functions") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Signed-off-by: Andrew Morton Cc: Tony Luck Cc: Fenghua Yu Cc: Link: https://lkml.kernel.org/r/20200819064146.12529-1-krzk@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/ia64/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index d0c0ccdd656a..03ee3ff3cefa 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -42,7 +42,7 @@ obj-y += esi_stub.o # must be in kernel proper endif obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 From b37b5ced4dae58a17bbcd4846838901892eb5a9e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 20 Sep 2020 23:12:38 +0200 Subject: [PATCH 166/636] i2c: imx: Fix external abort on interrupt in exit paths commit e50e4f0b85be308a01b830c5fbdffc657e1a6dd0 upstream. If interrupt comes late, during probe error path or device remove (could be triggered with CONFIG_DEBUG_SHIRQ), the interrupt handler i2c_imx_isr() will access registers with the clock being disabled. This leads to external abort on non-linefetch on Toradex Colibri VF50 module (with Vybrid VF5xx): Unhandled fault: external abort on non-linefetch (0x1008) at 0x8882d003 Internal error: : 1008 [#1] ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 5.7.0 #607 Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree) (i2c_imx_isr) from [<8017009c>] (free_irq+0x25c/0x3b0) (free_irq) from [<805844ec>] (release_nodes+0x178/0x284) (release_nodes) from [<80580030>] (really_probe+0x10c/0x348) (really_probe) from [<80580380>] (driver_probe_device+0x60/0x170) (driver_probe_device) from [<80580630>] (device_driver_attach+0x58/0x60) (device_driver_attach) from [<805806bc>] (__driver_attach+0x84/0xc0) (__driver_attach) from [<8057e228>] (bus_for_each_dev+0x68/0xb4) (bus_for_each_dev) from [<8057f3ec>] (bus_add_driver+0x144/0x1ec) (bus_add_driver) from [<80581320>] (driver_register+0x78/0x110) (driver_register) from [<8010213c>] (do_one_initcall+0xa8/0x2f4) (do_one_initcall) from [<80c0100c>] (kernel_init_freeable+0x178/0x1dc) (kernel_init_freeable) from [<80807048>] (kernel_init+0x8/0x110) (kernel_init) from [<80100114>] (ret_from_fork+0x14/0x20) Additionally, the i2c_imx_isr() could wake up the wait queue (imx_i2c_struct->queue) before its initialization happens. The resource-managed framework should not be used for interrupt handling, because the resource will be released too late - after disabling clocks. The interrupt handler is not prepared for such case. Fixes: 1c4b6c3bcf30 ("i2c: imx: implement bus recovery") Cc: Signed-off-by: Krzysztof Kozlowski Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-imx.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index d4b72e4ffd71..0e7f9bd17a91 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1101,14 +1101,6 @@ static int i2c_imx_probe(struct platform_device *pdev) return ret; } - /* Request IRQ */ - ret = devm_request_irq(&pdev->dev, irq, i2c_imx_isr, IRQF_SHARED, - pdev->name, i2c_imx); - if (ret) { - dev_err(&pdev->dev, "can't claim irq %d\n", irq); - goto clk_disable; - } - /* Init queue */ init_waitqueue_head(&i2c_imx->queue); @@ -1127,6 +1119,14 @@ static int i2c_imx_probe(struct platform_device *pdev) if (ret < 0) goto rpm_disable; + /* Request IRQ */ + ret = request_threaded_irq(irq, i2c_imx_isr, NULL, IRQF_SHARED, + pdev->name, i2c_imx); + if (ret) { + dev_err(&pdev->dev, "can't claim irq %d\n", irq); + goto rpm_disable; + } + /* Set up clock divider */ i2c_imx->bitrate = IMX_I2C_BIT_RATE; ret = of_property_read_u32(pdev->dev.of_node, @@ -1169,13 +1169,12 @@ static int i2c_imx_probe(struct platform_device *pdev) clk_notifier_unregister: clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + free_irq(irq, i2c_imx); rpm_disable: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - -clk_disable: clk_disable_unprepare(i2c_imx->clk); return ret; } @@ -1183,7 +1182,7 @@ clk_disable: static int i2c_imx_remove(struct platform_device *pdev) { struct imx_i2c_struct *i2c_imx = platform_get_drvdata(pdev); - int ret; + int irq, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) @@ -1203,6 +1202,9 @@ static int i2c_imx_remove(struct platform_device *pdev) imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR); clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) + free_irq(irq, i2c_imx); clk_disable_unprepare(i2c_imx->clk); pm_runtime_put_noidle(&pdev->dev); From ebc8d835c23eb2365f3b9fd4d31ee5807cb8c7e0 Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Fri, 16 Oct 2020 18:03:07 +0530 Subject: [PATCH 167/636] drm/amdgpu: don't map BO in reserved region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c4aa8dff6091cc9536aeb255e544b0b4ba29faf4 upstream. 2MB area is reserved at top inside VM. Suggested-by: Christian König Signed-off-by: Madhav Chauhan Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 71792d820ae0..0db05ff4a652 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -563,6 +563,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct ww_acquire_ctx ticket; struct list_head list, duplicates; uint64_t va_flags; + uint64_t vm_size; int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { @@ -583,6 +584,15 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_VA_HOLE_MASK; + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + vm_size -= AMDGPU_VA_RESERVED_SIZE; + if (args->va_address + args->map_size > vm_size) { + dev_dbg(&dev->pdev->dev, + "va_address 0x%llx is in top reserved area 0x%llx\n", + args->va_address + args->map_size, vm_size); + return -EINVAL; + } + if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", args->flags); From 0ac2a7bcff3717da75acec69fff4728a22bea383 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 23 Oct 2020 09:46:55 +0200 Subject: [PATCH 168/636] drm/amd/display: Don't invoke kgdb_breakpoint() unconditionally commit 8b7dc1fe1a5c1093551f6cd7dfbb941bd9081c2e upstream. ASSERT_CRITICAL() invokes kgdb_breakpoint() whenever either CONFIG_KGDB or CONFIG_HAVE_KGDB is set. This, however, may lead to a kernel panic when no kdb stuff is attached, since the kgdb_breakpoint() call issues INT3. It's nothing but a surprise for normal end-users. For avoiding the pitfall, make the kgdb_breakpoint() call only when CONFIG_DEBUG_KERNEL_DC is set. https://bugzilla.opensuse.org/show_bug.cgi?id=1177973 Cc: Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Takashi Iwai Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/os_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index a407892905af..d4cb7db89192 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -57,7 +57,7 @@ * general debug capabilities * */ -#if defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB) +#if defined(CONFIG_DEBUG_KERNEL_DC) && (defined(CONFIG_HAVE_KGDB) || defined(CONFIG_KGDB)) #define ASSERT_CRITICAL(expr) do { \ if (WARN_ON(!(expr))) { \ kgdb_breakpoint(); \ From 4f3b78e25a073115c52b064c9f35e6a9aa39e566 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:24 +0100 Subject: [PATCH 169/636] ceph: promote to unsigned long long before shifting commit c403c3a2fbe24d4ed33e10cabad048583ebd4edf upstream. On 32-bit systems, this shift will overflow for files larger than 4GB. Cc: stable@vger.kernel.org Fixes: 61f68816211e ("ceph: check caps in filemap_fault and page_mkwrite") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 476728bdae8c..e59b2f53a81f 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1437,7 +1437,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_info *fi = vma->vm_file->private_data; struct page *pinned_page = NULL; - loff_t off = vmf->pgoff << PAGE_SHIFT; + loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT; int want, got, err; sigset_t oldset; vm_fault_t ret = VM_FAULT_SIGBUS; From 416ea9783571eedcabdc9247653518e1005eca73 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 7 Oct 2020 20:06:48 +0200 Subject: [PATCH 170/636] libceph: clear con->out_msg on Policy::stateful_server faults commit 28e1581c3b4ea5f98530064a103c6217bedeea73 upstream. con->out_msg must be cleared on Policy::stateful_server (!CEPH_MSG_CONNECT_LOSSY) faults. Not doing so botches the reconnection attempt, because after writing the banner the messenger moves on to writing the data section of that message (either from where it got interrupted by the connection reset or from the beginning) instead of writing struct ceph_msg_connect. This results in a bizarre error message because the server sends CEPH_MSGR_TAG_BADPROTOVER but we think we wrote struct ceph_msg_connect: libceph: mds0 (1)172.21.15.45:6828 socket error on write ceph: mds0 reconnect start libceph: mds0 (1)172.21.15.45:6829 socket closed (con state OPEN) libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch, my 32 != server's 32 libceph: mds0 (1)172.21.15.45:6829 protocol version mismatch AFAICT this bug goes back to the dawn of the kernel client. The reason it survived for so long is that only MDS sessions are stateful and only two MDS messages have a data section: CEPH_MSG_CLIENT_RECONNECT (always, but reconnecting is rare) and CEPH_MSG_CLIENT_REQUEST (only when xattrs are involved). The connection has to get reset precisely when such message is being sent -- in this case it was the former. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/47723 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f7d7f32ac673..21bd37ec5511 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -3037,6 +3037,11 @@ static void con_fault(struct ceph_connection *con) ceph_msg_put(con->in_msg); con->in_msg = NULL; } + if (con->out_msg) { + BUG_ON(con->out_msg->con != con); + ceph_msg_put(con->out_msg); + con->out_msg = NULL; + } /* Requeue anything that hasn't been acked */ list_splice_init(&con->out_sent, &con->out_queue); From f870895527ae8dde12309583ebe3e5b0a3ecb8d2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:22 +0100 Subject: [PATCH 171/636] 9P: Cast to loff_t before multiplying commit f5f7ab168b9a60e12a4b8f2bb6fcc91321dc23c1 upstream. On 32-bit systems, this multiplication will overflow for files larger than 4GB. Link: http://lkml.kernel.org/r/20201004180428.14494-2-willy@infradead.org Cc: stable@vger.kernel.org Fixes: fb89b45cdfdc ("9P: introduction of a new cache=mmap model.") Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Dominique Martinet Signed-off-by: Greg Kroah-Hartman --- fs/9p/vfs_file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 550d0b169d7c..61e0c552083f 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -624,9 +624,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = WB_SYNC_ALL, - .range_start = vma->vm_pgoff * PAGE_SIZE, + .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE, /* absolute end, byte at end included */ - .range_end = vma->vm_pgoff * PAGE_SIZE + + .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start - 1), }; From 57ebe9102928e835d72ffec43c7995723cfea132 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Mon, 19 Oct 2020 22:22:42 +0800 Subject: [PATCH 172/636] ring-buffer: Return 0 on success from ring_buffer_resize() commit 0a1754b2a97efa644aa6e84d1db5b17c42251483 upstream. We don't need to check the new buffer size, and the return value had confused resize_buffer_duplicate_size(). ... ret = ring_buffer_resize(trace_buf->buffer, per_cpu_ptr(size_buf->data,cpu_id)->entries, cpu_id); if (ret == 0) per_cpu_ptr(trace_buf->data, cpu_id)->entries = per_cpu_ptr(size_buf->data, cpu_id)->entries; ... Link: https://lkml.kernel.org/r/20201019142242.11560-1-hqjagain@gmail.com Cc: stable@vger.kernel.org Fixes: d60da506cbeb3 ("tracing: Add a resize function to make one buffer equivalent to another buffer") Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 564d22691dd7..eef05eb3b284 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1692,18 +1692,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; - int cpu, err = 0; + int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) - return size; + return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) - return size; + return 0; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); @@ -1843,7 +1843,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } mutex_unlock(&buffer->mutex); - return size; + return 0; out_err: for_each_buffer_cpu(buffer, cpu) { From 5b14ac3b9404f18591deb81913ab708642b2a4bd Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 8 Oct 2020 22:42:56 +0200 Subject: [PATCH 173/636] vringh: fix __vringh_iov() when riov and wiov are different commit 5745bcfbbf89b158416075374254d3c013488f21 upstream. If riov and wiov are both defined and they point to different objects, only riov is initialized. If the wiov is not initialized by the caller, the function fails returning -EINVAL and printing "Readable desc 0x... after writable" error message. This issue happens when descriptors have both readable and writable buffers (eg. virtio-blk devices has virtio_blk_outhdr in the readable buffer and status as last byte of writable buffer) and we call __vringh_iov() to get both type of buffers in two different iovecs. Let's replace the 'else if' clause with 'if' to initialize both riov and wiov if they are not NULL. As checkpatch pointed out, we also avoid crashing the kernel when riov and wiov are both NULL, replacing BUG() with WARN_ON() and returning -EINVAL. Fixes: f87d0fbb5798 ("vringh: host-side implementation of virtio rings.") Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201008204256.162292-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vringh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index a94d700a4503..59c61744dcc1 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -273,13 +273,14 @@ __vringh_iov(struct vringh *vrh, u16 i, desc_max = vrh->vring.num; up_next = -1; + /* You must want something! */ + if (WARN_ON(!riov && !wiov)) + return -EINVAL; + if (riov) riov->i = riov->used = 0; - else if (wiov) + if (wiov) wiov->i = wiov->used = 0; - else - /* You must want something! */ - BUG(); for (;;) { void *addr; From 314b5a46c65726750fa1fa3e665df9485d09551a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Sep 2020 09:24:56 -0700 Subject: [PATCH 174/636] ext4: fix leaking sysfs kobject after failed mount commit cb8d53d2c97369029cc638c9274ac7be0a316c75 upstream. ext4_unregister_sysfs() only deletes the kobject. The reference to it needs to be put separately, like ext4_put_super() does. This addresses the syzbot report "memory leak in kobject_set_name_vargs (3)" (https://syzkaller.appspot.com/bug?extid=9f864abad79fae7c17e1). Reported-by: syzbot+9f864abad79fae7c17e1@syzkaller.appspotmail.com Fixes: 72ba74508b28 ("ext4: release sysfs kobject when failing to enable quotas on mount") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200922162456.93657-1-ebiggers@kernel.org Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 16ea7cfd130c..6338ca95d8b3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4594,6 +4594,7 @@ cantfind_ext4: failed_mount8: ext4_unregister_sysfs(sb); + kobject_put(&sbi->s_kobj); failed_mount7: ext4_unregister_li_request(sb); failed_mount6: From 6ad22dc99cb627fa13812bc0e27a6595bba62f5e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 29 Aug 2020 10:54:02 +0800 Subject: [PATCH 175/636] ext4: fix error handling code in add_new_gdb commit c9e87161cc621cbdcfc472fa0b2d81c63780c8f5 upstream. When ext4_journal_get_write_access() fails, we should terminate the execution flow and release n_group_desc, iloc.bh, dind and gdb_bh. Cc: stable@kernel.org Signed-off-by: Dinghao Liu Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20200829025403.3139-1-dinghao.liu@zju.edu.cn Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/resize.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index ef552d93708e..8098255c2801 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -861,8 +861,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, BUFFER_TRACE(dind, "get_write_access"); err = ext4_journal_get_write_access(handle, dind); - if (unlikely(err)) + if (unlikely(err)) { ext4_std_error(sb, err); + goto errout; + } /* ext4_reserve_inode_write() gets a reference on the iloc */ err = ext4_reserve_inode_write(handle, inode, &iloc); From 7b97149296ef752a1576d0939f629dd30e83e03d Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Tue, 20 Oct 2020 09:36:31 +0800 Subject: [PATCH 176/636] ext4: fix invalid inode checksum commit 1322181170bb01bce3c228b82ae3d5c6b793164f upstream. During the stability test, there are some errors: ext4_lookup:1590: inode #6967: comm fsstress: iget: checksum invalid. If the inode->i_iblocks too big and doesn't set huge file flag, checksum will not be recalculated when update the inode information to it's buffer. If other inode marks the buffer dirty, then the inconsistent inode will be flushed to disk. Fix this problem by checking i_blocks in advance. Cc: stable@kernel.org Signed-off-by: Luo Meng Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20201020013631.3796673-1-luomeng12@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 52be4c965024..cc092386ac6d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5271,6 +5271,12 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + err = ext4_inode_blocks_set(handle, raw_inode, ei); + if (err) { + spin_unlock(&ei->i_raw_lock); + goto out_brelse; + } + raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); i_gid = i_gid_read(inode); @@ -5304,11 +5310,6 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); - err = ext4_inode_blocks_set(handle, raw_inode, ei); - if (err) { - spin_unlock(&ei->i_raw_lock); - goto out_brelse; - } raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) From 36beb0e4d553671273710890e9357aaebc996f36 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 20 Oct 2020 08:22:53 +1000 Subject: [PATCH 177/636] drm/ttm: fix eviction valuable range check. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fea456d82c19d201c21313864105876deabe148b upstream. This was adding size to start, but pfn and start are in pages, so it should be using num_pages. Not sure this fixes anything in the real world, just noticed it during refactoring. Signed-off-by: Dave Airlie Reviewed-by: Christian König Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20201019222257.1684769-2-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 268f5a3b3122..81e076662c7a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -671,7 +671,7 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, /* Don't evict this BO if it's outside of the * requested placement range */ - if (place->fpfn >= (bo->mem.start + bo->mem.size) || + if (place->fpfn >= (bo->mem.start + bo->mem.num_pages) || (place->lpfn && place->lpfn <= bo->mem.start)) return false; From 90553a7466d0bf67ed67d84c5ea01bf6fc9c821a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 14 Sep 2020 17:45:48 +0200 Subject: [PATCH 178/636] rtc: rx8010: don't modify the global rtc ops commit d3b14296da69adb7825022f3224ac6137eb30abf upstream. The way the driver is implemented is buggy for the (admittedly unlikely) use case where there are two RTCs with one having an interrupt configured and the second not. This is caused by the fact that we use a global rtc_class_ops struct which we modify depending on whether the irq number is present or not. Fix it by using two const ops structs with and without alarm operations. While at it: not being able to request a configured interrupt is an error so don't ignore it and bail out of probe(). Fixes: ed13d89b08e3 ("rtc: Add Epson RX8010SJ RTC driver") Signed-off-by: Bartosz Golaszewski Signed-off-by: Alexandre Belloni Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200914154601.32245-2-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- drivers/rtc/rtc-rx8010.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c index 7ddc22eb5b0f..f4db80f9c1b1 100644 --- a/drivers/rtc/rtc-rx8010.c +++ b/drivers/rtc/rtc-rx8010.c @@ -428,16 +428,26 @@ static int rx8010_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) } } -static struct rtc_class_ops rx8010_rtc_ops = { +static const struct rtc_class_ops rx8010_rtc_ops_default = { .read_time = rx8010_get_time, .set_time = rx8010_set_time, .ioctl = rx8010_ioctl, }; +static const struct rtc_class_ops rx8010_rtc_ops_alarm = { + .read_time = rx8010_get_time, + .set_time = rx8010_set_time, + .ioctl = rx8010_ioctl, + .read_alarm = rx8010_read_alarm, + .set_alarm = rx8010_set_alarm, + .alarm_irq_enable = rx8010_alarm_irq_enable, +}; + static int rx8010_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent); + const struct rtc_class_ops *rtc_ops; struct rx8010_data *rx8010; int err = 0; @@ -468,16 +478,16 @@ static int rx8010_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "unable to request IRQ\n"); - client->irq = 0; - } else { - rx8010_rtc_ops.read_alarm = rx8010_read_alarm; - rx8010_rtc_ops.set_alarm = rx8010_set_alarm; - rx8010_rtc_ops.alarm_irq_enable = rx8010_alarm_irq_enable; + return err; } + + rtc_ops = &rx8010_rtc_ops_alarm; + } else { + rtc_ops = &rx8010_rtc_ops_default; } rx8010->rtc = devm_rtc_device_register(&client->dev, client->name, - &rx8010_rtc_ops, THIS_MODULE); + rtc_ops, THIS_MODULE); if (IS_ERR(rx8010->rtc)) { dev_err(&client->dev, "unable to register the class device\n"); From 619e366268e0430687d07b24b48f7382fc088c9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Oct 2020 13:15:23 -0700 Subject: [PATCH 179/636] tty: make FONTX ioctl use the tty pointer they were actually passed commit 90bfdeef83f1d6c696039b6a917190dcbbad3220 upstream. Some of the font tty ioctl's always used the current foreground VC for their operations. Don't do that then. This fixes a data race on fg_console. Side note: both Michael Ellerman and Jiri Slaby point out that all these ioctls are deprecated, and should probably have been removed long ago, and everything seems to be using the KDFONTOP ioctl instead. In fact, Michael points out that it looks like busybox's loadfont program seems to have switched over to using KDFONTOP exactly _because_ of this bug (ahem.. 12 years ago ;-). Reported-by: Minh Yuan Acked-by: Michael Ellerman Acked-by: Jiri Slaby Cc: Greg KH Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 6a82030cf1ef..2e959563af53 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -244,7 +244,7 @@ int vt_waitactive(int n) static inline int -do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) +do_fontx_ioctl(struct vc_data *vc, int cmd, struct consolefontdesc __user *user_cfd, int perm, struct console_font_op *op) { struct consolefontdesc cfdarg; int i; @@ -262,15 +262,16 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - return con_font_op(vc_cons[fg_console].d, op); - case GIO_FONTX: { + return con_font_op(vc, op); + + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; op->width = 8; op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = cfdarg.chardata; - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -278,7 +279,6 @@ do_fontx_ioctl(int cmd, struct consolefontdesc __user *user_cfd, int perm, struc if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc))) return -EFAULT; return 0; - } } return -EINVAL; } @@ -924,7 +924,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 0; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -935,7 +935,7 @@ int vt_ioctl(struct tty_struct *tty, op.height = 32; op.charcount = 256; op.data = up; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); break; } @@ -952,7 +952,7 @@ int vt_ioctl(struct tty_struct *tty, case PIO_FONTX: case GIO_FONTX: - ret = do_fontx_ioctl(cmd, up, perm, &op); + ret = do_fontx_ioctl(vc, cmd, up, perm, &op); break; case PIO_FONTRESET: @@ -969,11 +969,11 @@ int vt_ioctl(struct tty_struct *tty, { op.op = KD_FONT_OP_SET_DEFAULT; op.data = NULL; - ret = con_font_op(vc_cons[fg_console].d, &op); + ret = con_font_op(vc, &op); if (ret) break; console_lock(); - con_set_default_unimap(vc_cons[fg_console].d); + con_set_default_unimap(vc); console_unlock(); break; } @@ -1100,8 +1100,9 @@ struct compat_consolefontdesc { }; static inline int -compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, - int perm, struct console_font_op *op) +compat_fontx_ioctl(struct vc_data *vc, int cmd, + struct compat_consolefontdesc __user *user_cfd, + int perm, struct console_font_op *op) { struct compat_consolefontdesc cfdarg; int i; @@ -1119,7 +1120,8 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - return con_font_op(vc_cons[fg_console].d, op); + return con_font_op(vc, op); + case GIO_FONTX: op->op = KD_FONT_OP_GET; op->flags = KD_FONT_FLAG_OLD; @@ -1127,7 +1129,7 @@ compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd, op->height = cfdarg.charheight; op->charcount = cfdarg.charcount; op->data = compat_ptr(cfdarg.chardata); - i = con_font_op(vc_cons[fg_console].d, op); + i = con_font_op(vc, op); if (i) return i; cfdarg.charheight = op->height; @@ -1218,7 +1220,7 @@ long vt_compat_ioctl(struct tty_struct *tty, */ case PIO_FONTX: case GIO_FONTX: - ret = compat_fontx_ioctl(cmd, up, perm, &op); + ret = compat_fontx_ioctl(vc, cmd, up, perm, &op); break; case KDFONTOP: From 87ae7881958d090f179856a4d3fcbfbe37284724 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 9 Oct 2020 15:08:31 +0800 Subject: [PATCH 180/636] arm64: berlin: Select DW_APB_TIMER_OF commit b0fc70ce1f028e14a37c186d9f7a55e51439b83a upstream. Berlin SoCs always contain some DW APB timers which can be used as an always-on broadcast timer. Link: https://lore.kernel.org/r/20201009150536.214181fb@xhacker.debian Cc: # v3.14+ Signed-off-by: Jisheng Zhang Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 393d2b524284..91c7ffad8541 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -46,6 +46,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help From 9f14e20c0e63e92e91607740e21b7de897dc0399 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 26 Oct 2020 09:12:10 +0000 Subject: [PATCH 181/636] cachefiles: Handle readpage error correctly commit 9480b4e75b7108ee68ecf5bc6b4bd68e8031c521 upstream. If ->readpage returns an error, it has already unlocked the page. Fixes: 5e929b33c393 ("CacheFiles: Handle truncate unlocking the page we're reading") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/rdwr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index f822ac9e3cb0..f5bf10729a87 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -125,7 +125,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, _debug("reissue read"); ret = bmapping->a_ops->readpage(NULL, backpage); if (ret < 0) - goto unlock_discard; + goto discard; } /* but the page may have been read before the monitor was installed, so @@ -142,6 +142,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, unlock_discard: unlock_page(backpage); +discard: spin_lock_irq(&object->work_lock); list_del(&monitor->op_link); spin_unlock_irq(&object->work_lock); From 286d11dbf7005e3f348e95f64261ceb285d23fa0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 19 Oct 2020 16:57:50 +0200 Subject: [PATCH 182/636] hil/parisc: Disable HIL driver when it gets stuck commit 879bc2d27904354b98ca295b6168718e045c4aa2 upstream. When starting a HP machine with HIL driver but without an HIL keyboard or HIL mouse attached, it may happen that data written to the HIL loop gets stuck (e.g. because the transaction queue is full). Usually one will then have to reboot the machine because all you see is and endless output of: Transaction add failed: transaction already queued? In the higher layers hp_sdc_enqueue_transaction() is called to queued up a HIL packet. This function returns an error code, and this patch adds the necessary checks for this return code and disables the HIL driver if further packets can't be sent. Tested on a HP 730 and a HP 715/64 machine. Signed-off-by: Helge Deller Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/hil_mlc.c | 21 ++++++++++++++++++--- drivers/input/serio/hp_sdc_mlc.c | 8 ++++---- include/linux/hil_mlc.h | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/input/serio/hil_mlc.c b/drivers/input/serio/hil_mlc.c index e1423f7648d6..4c039e4125d9 100644 --- a/drivers/input/serio/hil_mlc.c +++ b/drivers/input/serio/hil_mlc.c @@ -74,7 +74,7 @@ EXPORT_SYMBOL(hil_mlc_unregister); static LIST_HEAD(hil_mlcs); static DEFINE_RWLOCK(hil_mlcs_lock); static struct timer_list hil_mlcs_kicker; -static int hil_mlcs_probe; +static int hil_mlcs_probe, hil_mlc_stop; static void hil_mlcs_process(unsigned long unused); static DECLARE_TASKLET_DISABLED(hil_mlcs_tasklet, hil_mlcs_process, 0); @@ -702,9 +702,13 @@ static int hilse_donode(hil_mlc *mlc) if (!mlc->ostarted) { mlc->ostarted = 1; mlc->opacket = pack; - mlc->out(mlc); + rc = mlc->out(mlc); nextidx = HILSEN_DOZE; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; } mlc->ostarted = 0; @@ -715,8 +719,13 @@ static int hilse_donode(hil_mlc *mlc) case HILSE_CTS: write_lock_irqsave(&mlc->lock, flags); - nextidx = mlc->cts(mlc) ? node->bad : node->good; + rc = mlc->cts(mlc); + nextidx = rc ? node->bad : node->good; write_unlock_irqrestore(&mlc->lock, flags); + if (rc) { + hil_mlc_stop = 1; + return 1; + } break; default: @@ -780,6 +789,12 @@ static void hil_mlcs_process(unsigned long unused) static void hil_mlcs_timer(struct timer_list *unused) { + if (hil_mlc_stop) { + /* could not send packet - stop immediately. */ + pr_warn(PREFIX "HIL seems stuck - Disabling HIL MLC.\n"); + return; + } + hil_mlcs_probe = 1; tasklet_schedule(&hil_mlcs_tasklet); /* Re-insert the periodic task. */ diff --git a/drivers/input/serio/hp_sdc_mlc.c b/drivers/input/serio/hp_sdc_mlc.c index 232d30c825bd..3e85e9039374 100644 --- a/drivers/input/serio/hp_sdc_mlc.c +++ b/drivers/input/serio/hp_sdc_mlc.c @@ -210,7 +210,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) priv->tseq[2] = 1; priv->tseq[3] = 0; priv->tseq[4] = 0; - __hp_sdc_enqueue_transaction(&priv->trans); + return __hp_sdc_enqueue_transaction(&priv->trans); busy: return 1; done: @@ -219,7 +219,7 @@ static int hp_sdc_mlc_cts(hil_mlc *mlc) return 0; } -static void hp_sdc_mlc_out(hil_mlc *mlc) +static int hp_sdc_mlc_out(hil_mlc *mlc) { struct hp_sdc_mlc_priv_s *priv; @@ -234,7 +234,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) do_data: if (priv->emtestmode) { up(&mlc->osem); - return; + return 0; } /* Shouldn't be sending commands when loop may be busy */ BUG_ON(down_trylock(&mlc->csem)); @@ -296,7 +296,7 @@ static void hp_sdc_mlc_out(hil_mlc *mlc) BUG_ON(down_trylock(&mlc->csem)); } enqueue: - hp_sdc_enqueue_transaction(&priv->trans); + return hp_sdc_enqueue_transaction(&priv->trans); } static int __init hp_sdc_mlc_init(void) diff --git a/include/linux/hil_mlc.h b/include/linux/hil_mlc.h index 774f7d3b8f6a..369221fd5518 100644 --- a/include/linux/hil_mlc.h +++ b/include/linux/hil_mlc.h @@ -103,7 +103,7 @@ struct hilse_node { /* Methods for back-end drivers, e.g. hp_sdc_mlc */ typedef int (hil_mlc_cts) (hil_mlc *mlc); -typedef void (hil_mlc_out) (hil_mlc *mlc); +typedef int (hil_mlc_out) (hil_mlc *mlc); typedef int (hil_mlc_in) (hil_mlc *mlc, suseconds_t timeout); struct hil_mlc_devinfo { From 99a1adfae331427df2c84f4be556cdf2fdd6e98d Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 7 Sep 2020 09:05:17 +0200 Subject: [PATCH 183/636] arm: dts: mt7623: add missing pause for switchport commit 36f0a5fc5284838c544218666c63ee8cfa46a9c3 upstream. port6 of mt7530 switch (= cpu port 0) on bananapi-r2 misses pause option which causes rx drops on running iperf. Fixes: f4ff257cd160 ("arm: dts: mt7623: add support for Bananapi R2 (BPI-R2) board") Signed-off-by: Frank Wunderlich Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200907070517.51715-1-linux@fw-web.de Signed-off-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts index 2b760f90f38c..5375c6699843 100644 --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts @@ -192,6 +192,7 @@ fixed-link { speed = <1000>; full-duplex; + pause; }; }; }; From 8ad5b2cf6cd9db71048314d893465afd7dfdd9d4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 10 Sep 2020 17:41:49 +0200 Subject: [PATCH 184/636] ARM: samsung: fix PM debug build with DEBUG_LL but !MMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7be0d19c751b02db778ca95e3274d5ea7f31891c upstream. Selecting CONFIG_SAMSUNG_PM_DEBUG (depending on CONFIG_DEBUG_LL) but without CONFIG_MMU leads to build errors: arch/arm/plat-samsung/pm-debug.c: In function ‘s3c_pm_uart_base’: arch/arm/plat-samsung/pm-debug.c:57:2: error: implicit declaration of function ‘debug_ll_addr’ [-Werror=implicit-function-declaration] Fixes: 99b2fc2b8b40 ("ARM: SAMSUNG: Use debug_ll_addr() to get UART base address") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Cc: Link: https://lore.kernel.org/r/20200910154150.3318-1-krzk@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/plat-samsung/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 377ff9cda667..c83baa51289f 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -240,6 +240,7 @@ config SAMSUNG_PM_DEBUG bool "Samsung PM Suspend debug" depends on PM && DEBUG_KERNEL depends on DEBUG_EXYNOS_UART || DEBUG_S3C24XX_UART || DEBUG_S3C2410_UART + depends on DEBUG_LL && MMU help Say Y here if you want verbose debugging from the PM Suspend and Resume code. See From 87e1447dc4dea61b88a2599161f926c826a4ebcf Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 4 Aug 2020 21:26:49 +0200 Subject: [PATCH 185/636] ARM: s3c24xx: fix missing system reset commit f6d7cde84f6c5551586c8b9b68d70f8e6dc9a000 upstream. Commit f6361c6b3880 ("ARM: S3C24XX: remove separate restart code") removed usage of the watchdog reset platform code in favor of the Samsung SoC watchdog driver. However the latter was not selected thus S3C24xx platforms lost reset abilities. Cc: Fixes: f6361c6b3880 ("ARM: S3C24XX: remove separate restart code") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e2f7c50dbace..1877da816f65 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -592,8 +592,10 @@ config ARCH_S3C24XX select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_IO_H + select S3C2410_WATCHDOG select SAMSUNG_ATAGS select USE_OF + select WATCHDOG help Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 SoCs based systems, such as the Simtec Electronics BAST From 76b712488dcfc71c158ba2e2785c23aca3ed2924 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Oct 2020 21:40:59 +0300 Subject: [PATCH 186/636] device property: Keep secondary firmware node secondary by type commit d5dcce0c414fcbfe4c2037b66ac69ea5f9b3f75c upstream. Behind primary and secondary we understand the type of the nodes which might define their ordering. However, if primary node gone, we can't maintain the ordering by definition of the linked list. Thus, by ordering secondary node becomes first in the list. But in this case the meaning of it is still secondary (or auxiliary). The type of the node is maintained by the secondary pointer in it: secondary pointer Meaning NULL or valid primary node ERR_PTR(-ENODEV) secondary node So, if by some reason we do the following sequence of calls set_primary_fwnode(dev, NULL); set_primary_fwnode(dev, primary); we should preserve secondary node. This concept is supported by the description of set_primary_fwnode() along with implementation of set_secondary_fwnode(). Hence, fix the commit c15e1bdda436 to follow this as well. Fixes: c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") Cc: Ferry Toth Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Tested-by: Ferry Toth Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b911c38ad18c..b2e2f086f586 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3347,7 +3347,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; - fn->secondary = NULL; + fn->secondary = ERR_PTR(-ENODEV); } else { dev->fwnode = NULL; } From 9169e2f12323994af8244dd64c036f1d974ea51d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 22 Oct 2020 21:41:00 +0300 Subject: [PATCH 187/636] device property: Don't clear secondary pointer for shared primary firmware node commit 99aed9227073fb34ce2880cbc7063e04185a65e1 upstream. It appears that firmware nodes can be shared between devices. In such case when a (child) device is about to be deleted, its firmware node may be shared and ACPI_COMPANION_SET(..., NULL) call for it breaks the secondary link of the shared primary firmware node. In order to prevent that, check, if the device has a parent and parent's firmware node is shared with its child, and avoid crashing the link. Fixes: c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") Reported-by: Ferry Toth Signed-off-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Tested-by: Ferry Toth Cc: 5.9+ # 5.9+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index b2e2f086f586..f0cdf38ed31c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -3333,6 +3333,7 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { + struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { @@ -3347,7 +3348,8 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; - fn->secondary = ERR_PTR(-ENODEV); + if (!(parent && fn == parent->fwnode)) + fn->secondary = ERR_PTR(-ENODEV); } else { dev->fwnode = NULL; } From 98fe5332fa4126d66248f24db5e1078e77856976 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 29 Oct 2020 17:24:09 +0000 Subject: [PATCH 188/636] KVM: arm64: Fix AArch32 handling of DBGD{CCINT,SCRext} and DBGVCR commit 4a1c2c7f63c52ccb11770b5ae25920a6b79d3548 upstream. The DBGD{CCINT,SCRext} and DBGVCR register entries in the cp14 array are missing their target register, resulting in all accesses being targetted at the guard sysreg (indexed by __INVALID_SYSREG__). Point the emulation code at the actual register entries. Fixes: bdfb4b389c8d ("arm64: KVM: add trap handlers for AArch32 debug registers") Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201029172409.2768336-1-maz@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/sys_regs.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e720742d647..c67cae9d5229 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -192,6 +192,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0c073f3ca122..b53d0ebb87fc 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1555,9 +1555,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1572,7 +1572,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), From 28f7c57a3ce95ca17cff98cc5af52fe7f9226f3f Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Wed, 21 Oct 2020 13:21:42 +0100 Subject: [PATCH 189/636] staging: comedi: cb_pcidas: Allow 2-channel commands for AO subdevice commit 647a6002cb41d358d9ac5de101a8a6dc74748a59 upstream. The "cb_pcidas" driver supports asynchronous commands on the analog output (AO) subdevice for those boards that have an AO FIFO. The code (in `cb_pcidas_ao_check_chanlist()` and `cb_pcidas_ao_cmd()`) to validate and set up the command supports output to a single channel or to two channels simultaneously (the boards have two AO channels). However, the code in `cb_pcidas_auto_attach()` that initializes the subdevices neglects to initialize the AO subdevice's `len_chanlist` member, leaving it set to 0, but the Comedi core will "correct" it to 1 if the driver neglected to set it. This limits commands to use a single channel (either channel 0 or 1), but the limit should be two channels. Set the AO subdevice's `len_chanlist` member to be the same value as the `n_chan` member, which will be 2. Cc: Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20201021122142.81628-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/cb_pcidas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/comedi/drivers/cb_pcidas.c b/drivers/staging/comedi/drivers/cb_pcidas.c index 8429d57087fd..9b716c696477 100644 --- a/drivers/staging/comedi/drivers/cb_pcidas.c +++ b/drivers/staging/comedi/drivers/cb_pcidas.c @@ -1342,6 +1342,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev, if (dev->irq && board->has_ao_fifo) { dev->write_subdev = s; s->subdev_flags |= SDF_CMD_WRITE; + s->len_chanlist = s->n_chan; s->do_cmdtest = cb_pcidas_ao_cmdtest; s->do_cmd = cb_pcidas_ao_cmd; s->cancel = cb_pcidas_ao_cancel; From e2f30d7ff0f1efb86600cc8e388f00db16629322 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 16 Oct 2020 12:18:57 +0200 Subject: [PATCH 190/636] staging: octeon: repair "fixed-link" support commit 179f5dc36b0a1aa31538d7d8823deb65c39847b3 upstream. The PHYs must be registered once in device probe function, not in device open callback because it's only possible to register them once. Fixes: a25e278020bf ("staging: octeon: support fixed-link phys") Signed-off-by: Alexander Sverdlin Cc: stable Link: https://lore.kernel.org/r/20201016101858.11374-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-mdio.c | 6 ------ drivers/staging/octeon/ethernet.c | 9 +++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index f67f95043887..5761a31e2318 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -152,12 +152,6 @@ int cvm_oct_phy_setup_device(struct net_device *dev) phy_node = of_parse_phandle(priv->of_node, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(priv->of_node)) { - int rc; - - rc = of_phy_register_fixed_link(priv->of_node); - if (rc) - return rc; - phy_node = of_node_get(priv->of_node); } if (!phy_node) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index 9b15c9ed844b..b680e5785ae3 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -875,6 +876,14 @@ static int cvm_oct_probe(struct platform_device *pdev) break; } + if (priv->of_node && of_phy_is_fixed_link(priv->of_node)) { + if (of_phy_register_fixed_link(priv->of_node)) { + netdev_err(dev, "Failed to register fixed link for interface %d, port %d\n", + interface, priv->port); + dev->netdev_ops = NULL; + } + } + if (!dev->netdev_ops) { free_netdev(dev); } else if (register_netdev(dev) < 0) { From 906fa9ac4360e3cb2703289684bc973de5f1424d Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Fri, 16 Oct 2020 16:56:30 +0200 Subject: [PATCH 191/636] staging: octeon: Drop on uncorrectable alignment or FCS error commit 49d28ebdf1e30d806410eefc7de0a7a1ca5d747c upstream. Currently in case of alignment or FCS error if the packet cannot be corrected it's still not dropped. Report the error properly and drop the packet while making the code around a little bit more readable. Fixes: 80ff0fd3ab64 ("Staging: Add octeon-ethernet driver files.") Signed-off-by: Alexander Sverdlin Cc: stable Link: https://lore.kernel.org/r/20201016145630.41852-1-alexander.sverdlin@nokia.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-rx.c | 34 ++++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c index 5e271245273c..6c644ef6f3ff 100644 --- a/drivers/staging/octeon/ethernet-rx.c +++ b/drivers/staging/octeon/ethernet-rx.c @@ -80,15 +80,17 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) else port = work->word1.cn38xx.ipprt; - if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) { + if ((work->word2.snoip.err_code == 10) && (work->word1.len <= 64)) /* * Ignore length errors on min size packets. Some * equipment incorrectly pads packets to 64+4FCS * instead of 60+4FCS. Note these packets still get * counted as frame errors. */ - } else if (work->word2.snoip.err_code == 5 || - work->word2.snoip.err_code == 7) { + return 0; + + if (work->word2.snoip.err_code == 5 || + work->word2.snoip.err_code == 7) { /* * We received a packet with either an alignment error * or a FCS error. This may be signalling that we are @@ -119,7 +121,10 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) /* Port received 0xd5 preamble */ work->packet_ptr.s.addr += i + 1; work->word1.len -= i + 5; - } else if ((*ptr & 0xf) == 0xd) { + return 0; + } + + if ((*ptr & 0xf) == 0xd) { /* Port received 0xd preamble */ work->packet_ptr.s.addr += i; work->word1.len -= i + 4; @@ -129,21 +134,20 @@ static inline int cvm_oct_check_rcv_error(cvmx_wqe_t *work) ((*(ptr + 1) & 0xf) << 4); ptr++; } - } else { - printk_ratelimited("Port %d unknown preamble, packet dropped\n", - port); - cvm_oct_free_work(work); - return 1; + return 0; } + + printk_ratelimited("Port %d unknown preamble, packet dropped\n", + port); + cvm_oct_free_work(work); + return 1; } - } else { - printk_ratelimited("Port %d receive error code %d, packet dropped\n", - port, work->word2.snoip.err_code); - cvm_oct_free_work(work); - return 1; } - return 0; + printk_ratelimited("Port %d receive error code %d, packet dropped\n", + port, work->word2.snoip.err_code); + cvm_oct_free_work(work); + return 1; } static void copy_segments_to_skb(cvmx_wqe_t *work, struct sk_buff *skb) From b94de4d19498b454645b72d08a05d32fa9074fb5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 5 Nov 2020 11:08:56 +0100 Subject: [PATCH 192/636] Linux 4.19.155 Tested-by: Pavel Machek (CIP) Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201103203232.656475008@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a7056e26947c..9fc16d34e1bb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 154 +SUBLEVEL = 155 EXTRAVERSION = NAME = "People's Front" From c0be21f5047fede8040cc240df602b42b8f367c5 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Thu, 5 Nov 2020 08:49:49 -0800 Subject: [PATCH 193/636] ANDROID: Temporarily disable XFRM_USER_COMPAT filtering Before 5.10-rc1, the upstream kernel blocked any compat calls into XFRM code with EOPNOTSUPP, however Android kernels had been patching this check out and made userspace match the 64-bit kernel netlink format instead. When the new XFRM_USER_COMPAT feature landed, it added a similar check in two places which returns EOPNOTSUPP only if the XFRM_USER_COMPAT feature is disabled, however that is currently always the case for Android kernels and we do not want to filter these callers. While we work to remove the userspace compatibility mess, disable the filtering of compat calls when XFRM_USER_COMPAT is disabled. If the XFRM_USER_COMPAT feature is enabled, nothing changes. Bug: 163141236 Bug: 172541864 Signed-off-by: Alistair Delva Change-Id: Ifbea109070650dfcb4f93a3cc692c18a8d11ab44 --- net/xfrm/xfrm_state.c | 21 ++++++++++++--------- net/xfrm/xfrm_user.c | 25 ++++++++++++++----------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 263f7adf7865..c42df9dcad6e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2199,17 +2199,20 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (IS_ERR(data)) return PTR_ERR(data); - if (in_compat_syscall()) { - struct xfrm_translator *xtr = xfrm_get_translator(); + /* Use the 64-bit / untranslated format on Android, even for compat */ + if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) { + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) - return -EOPNOTSUPP; + if (!xtr) + return -EOPNOTSUPP; - err = xtr->xlate_user_policy_sockptr(&data, optlen); - xfrm_put_translator(xtr); - if (err) { - kfree(data); - return err; + err = xtr->xlate_user_policy_sockptr(&data, optlen); + xfrm_put_translator(xtr); + if (err) { + kfree(data); + return err; + } } } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8cdc63254c63..fe7ff38f2374 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2700,19 +2700,22 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; - if (in_compat_syscall()) { - struct xfrm_translator *xtr = xfrm_get_translator(); + /* Use the 64-bit / untranslated format on Android, even for compat */ + if (!IS_ENABLED(CONFIG_ANDROID) || IS_ENABLED(CONFIG_XFRM_USER_COMPAT)) { + if (in_compat_syscall()) { + struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) - return -EOPNOTSUPP; + if (!xtr) + return -EOPNOTSUPP; - nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, - link->nla_pol, extack); - xfrm_put_translator(xtr); - if (IS_ERR(nlh64)) - return PTR_ERR(nlh64); - if (nlh64) - nlh = nlh64; + nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, + link->nla_pol, extack); + xfrm_put_translator(xtr); + if (IS_ERR(nlh64)) + return PTR_ERR(nlh64); + if (nlh64) + nlh = nlh64; + } } if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || From 54b41388f58f1d2e640b5339e3745ee3e5f9affe Mon Sep 17 00:00:00 2001 From: Alessio Balsini Date: Tue, 27 Oct 2020 13:24:03 +0000 Subject: [PATCH 194/636] ANDROID: vfs: add d_canonical_path for stacked filesystem support Inotify does not currently know when a filesystem is acting as a wrapper around another fs. This means that inotify watchers will miss any modifications to the base file, as well as any made in a separate stacked fs that points to the same file. d_canonical_path solves this problem by allowing the fs to map a dentry to a path in the lower fs. Inotify can use it to find the appropriate place to watch to be informed of all changes to a file. Test: Pixel 4.19 Bug: 171780975 Change-Id: I09563baffad1711a045e45c1bd0bd8713c2cc0b6 Signed-off-by: Daniel Rosenberg [astrachan: Folded 34df4102216e ("ANDROID: fsnotify: Notify lower fs of open") into this patch] Signed-off-by: Alistair Strachan Signed-off-by: Yongqin Liu Signed-off-by: Alessio Balsini --- fs/notify/inotify/inotify_user.c | 16 ++++++++++++++-- include/linux/dcache.h | 3 ++- include/linux/fsnotify.h | 9 +++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 97a51690338e..9d220c72014a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -702,6 +702,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, struct fsnotify_group *group; struct inode *inode; struct path path; + struct path alteredpath; + struct path *canonical_path = &path; struct fd f; int ret; unsigned flags = 0; @@ -747,13 +749,23 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, if (ret) goto fput_and_out; + /* support stacked filesystems */ + if (path.dentry && path.dentry->d_op) { + if (path.dentry->d_op->d_canonical_path) { + path.dentry->d_op->d_canonical_path(&path, + &alteredpath); + canonical_path = &alteredpath; + path_put(&path); + } + } + /* inode held in place by reference to path; group by fget on fd */ - inode = path.dentry->d_inode; + inode = canonical_path->dentry->d_inode; group = f.file->private_data; /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - path_put(&path); + path_put(canonical_path); fput_and_out: fdput(f); return ret; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1c6bb25a8501..ee2d177dc0e8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -151,7 +151,8 @@ struct dentry_operations { int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, const struct inode *); - ANDROID_KABI_RESERVE(1); + ANDROID_KABI_USE(1, void (*d_canonical_path)(const struct path *, + struct path *)); ANDROID_KABI_RESERVE(2); ANDROID_KABI_RESERVE(3); ANDROID_KABI_RESERVE(4); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index fd1ce10553bf..f3eb2b2e05f4 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -211,11 +211,20 @@ static inline void fsnotify_open(struct file *file) { const struct path *path = &file->f_path; struct inode *inode = file_inode(file); + struct path lower_path; __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; + if (path->dentry->d_op && path->dentry->d_op->d_canonical_path) { + path->dentry->d_op->d_canonical_path(path, &lower_path); + fsnotify_parent(&lower_path, NULL, mask); + fsnotify(lower_path.dentry->d_inode, mask, &lower_path, + FSNOTIFY_EVENT_PATH, NULL, 0); + path_put(&lower_path); + } + fsnotify_parent(path, NULL, mask); fsnotify(inode, mask, path, FSNOTIFY_EVENT_PATH, NULL, 0); } From fa199896a3e2c8d46e9c52accec8544490c69b08 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 22 Apr 2016 00:00:48 -0700 Subject: [PATCH 195/636] ANDROID: fuse: Add support for d_canonical_path Allows FUSE to report to inotify that it is acting as a layered filesystem. The userspace component returns a string representing the location of the underlying file. If the string cannot be resolved into a path, the top level path is returned instead. Bug: 23904372 Bug: 171780975 Test: Pixel 4.19 Change-Id: Iabdca0bbedfbff59e9c820c58636a68ef9683d9f Signed-off-by: Daniel Rosenberg Signed-off-by: Alessio Balsini --- fs/fuse/dev.c | 9 +++++++++ fs/fuse/dir.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 6 ++++++ include/uapi/linux/fuse.h | 1 + 4 files changed, 55 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1ff5a6b21db0..9861204da06f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -573,6 +574,7 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) req->out.numargs = args->out.numargs; memcpy(req->out.args, args->out.args, args->out.numargs * sizeof(struct fuse_arg)); + req->out.canonical_path = args->out.canonical_path; fuse_request_send(fc, req); ret = req->out.h.error; if (!ret && args->out.argvar) { @@ -1932,6 +1934,13 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); + if (!err && req->in.h.opcode == FUSE_CANONICAL_PATH) { + char *path = (char *)req->out.args[0].value; + + path[req->out.args[0].size - 1] = 0; + req->out.h.error = kern_path(path, 0, req->out.canonical_path); + } + spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6244345a5745..90bd6c0d18cc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -281,10 +281,49 @@ static void fuse_dentry_release(struct dentry *dentry) kfree_rcu(fd, rcu); } +/* + * Get the canonical path. Since we must translate to a path, this must be done + * in the context of the userspace daemon, however, the userspace daemon cannot + * look up paths on its own. Instead, we handle the lookup as a special case + * inside of the write request. + */ +static void fuse_dentry_canonical_path(const struct path *path, + struct path *canonical_path) +{ + struct inode *inode = d_inode(path->dentry); + struct fuse_conn *fc = get_fuse_conn(inode); + FUSE_ARGS(args); + char *path_name; + int err; + + path_name = (char *)__get_free_page(GFP_KERNEL); + if (!path_name) + goto default_path; + + args.in.h.opcode = FUSE_CANONICAL_PATH; + args.in.h.nodeid = get_node_id(inode); + args.in.numargs = 0; + args.out.numargs = 1; + args.out.args[0].size = PATH_MAX; + args.out.args[0].value = path_name; + args.out.argvar = 1; + args.out.canonical_path = canonical_path; + + err = fuse_simple_request(fc, &args); + free_page((unsigned long)path_name); + if (err > 0) + return; +default_path: + canonical_path->dentry = path->dentry; + canonical_path->mnt = path->mnt; + path_get(canonical_path); +} + const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, + .d_canonical_path = fuse_dentry_canonical_path, }; const struct dentry_operations fuse_root_dentry_operations = { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index dbfc35efbefb..123f87804f51 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -213,6 +213,9 @@ struct fuse_out { /** Array of arguments */ struct fuse_arg args[2]; + + /* Path used for completing d_canonical_path */ + struct path *canonical_path; }; /** FUSE page descriptor */ @@ -235,6 +238,9 @@ struct fuse_args { unsigned argvar:1; unsigned numargs; struct fuse_arg args[2]; + + /* Path used for completing d_canonical_path */ + struct path *canonical_path; } out; }; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 2170e58a2a97..24af4edfc98c 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -383,6 +383,7 @@ enum fuse_opcode { FUSE_READDIRPLUS = 44, FUSE_RENAME2 = 45, FUSE_LSEEK = 46, + FUSE_CANONICAL_PATH= 2016, /* CUSE specific operations */ CUSE_INIT = 4096, From 4141168704e40509b376d0c32822eccc29e00627 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Sep 2020 10:00:58 +0100 Subject: [PATCH 196/636] drm/i915: Break up error capture compression loops with cond_resched() commit 7d5553147613b50149238ac1385c60e5c7cacb34 upstream. As the error capture will compress user buffers as directed to by the user, it can take an arbitrary amount of time and space. Break up the compression loops with a call to cond_resched(), that will allow other processes to schedule (avoiding the soft lockups) and also serve as a warning should we try to make this loop atomic in the future. Testcase: igt/gem_exec_capture/many-* Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200916090059.3189-2-chris@chris-wilson.co.uk (cherry picked from commit 293f43c80c0027ff9299036c24218ac705ce584e) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a262a64f5625..ba24ac698e8b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -268,6 +268,8 @@ static int compress_page(struct compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + cond_resched(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -347,6 +349,7 @@ static int compress_page(struct compress *c, if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE)) memcpy(ptr, src, PAGE_SIZE); dst->pages[dst->page_count++] = ptr; + cond_resched(); return 0; } From 604ac10d9d1e9a85787d53acbba541f420b9cc9e Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Thu, 27 Aug 2020 09:56:51 +0700 Subject: [PATCH 197/636] tipc: fix use-after-free in tipc_bcast_get_mode commit fdeba99b1e58ecd18c2940c453e19e4ef20ff591 upstream. Syzbot has reported those issues as: ================================================================== BUG: KASAN: use-after-free in tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 Read of size 1 at addr ffff88805e6b3571 by task kworker/0:6/3850 CPU: 0 PID: 3850 Comm: kworker/0:6 Not tainted 5.8.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Thread 1's call trace: [...] kfree+0x103/0x2c0 mm/slab.c:3757 <- bcbase releasing tipc_bcast_stop+0x1b0/0x2f0 net/tipc/bcast.c:721 tipc_exit_net+0x24/0x270 net/tipc/core.c:112 [...] Thread 2's call trace: [...] tipc_bcast_get_mode+0x3ab/0x400 net/tipc/bcast.c:759 <- bcbase has already been freed by Thread 1 tipc_node_broadcast+0x9e/0xcc0 net/tipc/node.c:1744 tipc_nametbl_publish+0x60b/0x970 net/tipc/name_table.c:752 tipc_net_finalize net/tipc/net.c:141 [inline] tipc_net_finalize+0x1fa/0x310 net/tipc/net.c:131 tipc_net_finalize_work+0x55/0x80 net/tipc/net.c:150 [...] ================================================================== BUG: KASAN: use-after-free in tipc_named_reinit+0xef/0x290 net/tipc/name_distr.c:344 Read of size 8 at addr ffff888052ab2000 by task kworker/0:13/30628 CPU: 0 PID: 30628 Comm: kworker/0:13 Not tainted 5.8.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events tipc_net_finalize_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1f0/0x31e lib/dump_stack.c:118 print_address_description+0x66/0x5a0 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 tipc_named_reinit+0xef/0x290 net/tipc/name_distr.c:344 tipc_net_finalize+0x85/0xe0 net/tipc/net.c:138 tipc_net_finalize_work+0x50/0x70 net/tipc/net.c:150 process_one_work+0x789/0xfc0 kernel/workqueue.c:2269 worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415 kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 [...] Freed by task 14058: save_stack mm/kasan/common.c:48 [inline] set_track mm/kasan/common.c:56 [inline] kasan_set_free_info mm/kasan/common.c:316 [inline] __kasan_slab_free+0x114/0x170 mm/kasan/common.c:455 __cache_free mm/slab.c:3426 [inline] kfree+0x10a/0x220 mm/slab.c:3757 tipc_exit_net+0x29/0x50 net/tipc/core.c:113 ops_exit_list net/core/net_namespace.c:186 [inline] cleanup_net+0x708/0xba0 net/core/net_namespace.c:603 process_one_work+0x789/0xfc0 kernel/workqueue.c:2269 worker_thread+0xaa4/0x1460 kernel/workqueue.c:2415 kthread+0x37e/0x3a0 drivers/block/aoe/aoecmd.c:1234 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 Fix it by calling flush_scheduled_work() to make sure the tipc_net_finalize_work() stopped before releasing bcbase object. Reported-by: syzbot+6ea1f7a8df64596ef4d7@syzkaller.appspotmail.com Reported-by: syzbot+e9cc557752ab126c1b99@syzkaller.appspotmail.com Acked-by: Jon Maloy Signed-off-by: Hoang Huu Le Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/core.c b/net/tipc/core.c index 49bb9fc0aa06..ce0f067d0faf 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -93,6 +93,11 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { tipc_net_stop(net); + + /* Make sure the tipc_net_finalize_work stopped + * before releasing the resources. + */ + flush_scheduled_work(); tipc_bcast_stop(net); tipc_nametbl_stop(net); tipc_sk_rht_destroy(net); From caf8f9c19a93ec501d0ffca5e6767a969961baea Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 1 Nov 2020 17:07:44 -0800 Subject: [PATCH 198/636] ptrace: fix task_join_group_stop() for the case when current is traced commit 7b3c36fc4c231ca532120bbc0df67a12f09c1d96 upstream. This testcase #include #include #include #include #include #include #include void *tf(void *arg) { return NULL; } int main(void) { int pid = fork(); if (!pid) { kill(getpid(), SIGSTOP); pthread_t th; pthread_create(&th, NULL, tf, NULL); return 0; } waitpid(pid, NULL, WSTOPPED); ptrace(PTRACE_SEIZE, pid, 0, PTRACE_O_TRACECLONE); waitpid(pid, NULL, 0); ptrace(PTRACE_CONT, pid, 0,0); waitpid(pid, NULL, 0); int status; int thread = waitpid(-1, &status, 0); assert(thread > 0 && thread != pid); assert(status == 0x80137f); return 0; } fails and triggers WARN_ON_ONCE(!signr) in do_jobctl_trap(). This is because task_join_group_stop() has 2 problems when current is traced: 1. We can't rely on the "JOBCTL_STOP_PENDING" check, a stopped tracee can be woken up by debugger and it can clone another thread which should join the group-stop. We need to check group_stop_count || SIGNAL_STOP_STOPPED. 2. If SIGNAL_STOP_STOPPED is already set, we should not increment sig->group_stop_count and add JOBCTL_STOP_CONSUME. The new thread should stop without another do_notify_parent_cldstop() report. To clarify, the problem is very old and we should blame ptrace_init_task(). But now that we have task_join_group_stop() it makes more sense to fix this helper to avoid the code duplication. Reported-by: syzbot+3485e3773f7da290eecc@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Cc: Jens Axboe Cc: Christian Brauner Cc: "Eric W . Biederman" Cc: Zhiqiang Liu Cc: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201019134237.GA18810@redhat.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/signal.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/kernel/signal.c b/kernel/signal.c index 6a5692118139..a02a25acf205 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -385,16 +385,17 @@ static bool task_participate_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task) { + unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; + struct signal_struct *sig = current->signal; + + if (sig->group_stop_count) { + sig->group_stop_count++; + mask |= JOBCTL_STOP_CONSUME; + } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) + return; + /* Have the new thread join an on-going signal group stop */ - unsigned long jobctl = current->jobctl; - if (jobctl & JOBCTL_STOP_PENDING) { - struct signal_struct *sig = current->signal; - unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; - unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; - if (task_set_jobctl_pending(task, signr | gstop)) { - sig->group_stop_count++; - } - } + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } /* From d67543112dc5f39b41f9bd309e47da25be2bb14c Mon Sep 17 00:00:00 2001 From: Mark Deneen Date: Fri, 30 Oct 2020 15:58:14 +0000 Subject: [PATCH 199/636] cadence: force nonlinear buffers to be cloned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 403dc16796f5516acf23d94a1cd9eba564d03210 ] In my test setup, I had a SAMA5D27 device configured with ip forwarding, and second device with usb ethernet (r8152) sending ICMP packets.  If the packet was larger than about 220 bytes, the SAMA5 device would "oops" with the following trace: kernel BUG at net/core/skbuff.c:1863! Internal error: Oops - BUG: 0 [#1] ARM Modules linked in: xt_MASQUERADE ppp_async ppp_generic slhc iptable_nat xt_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 can_raw can bridge stp llc ipt_REJECT nf_reject_ipv4 sd_mod cdc_ether usbnet usb_storage r8152 scsi_mod mii o ption usb_wwan usbserial micrel macb at91_sama5d2_adc phylink gpio_sama5d2_piobu m_can_platform m_can industrialio_triggered_buffer kfifo_buf of_mdio can_dev fixed_phy sdhci_of_at91 sdhci_pltfm libphy sdhci mmc_core ohci_at91 ehci_atmel o hci_hcd iio_rescale industrialio sch_fq_codel spidev prox2_hal(O) CPU: 0 PID: 0 Comm: swapper Tainted: G           O      5.9.1-prox2+ #1 Hardware name: Atmel SAMA5 PC is at skb_put+0x3c/0x50 LR is at macb_start_xmit+0x134/0xad0 [macb] pc : []    lr : []    psr: 20070113 sp : c0d01a60  ip : c07232c0  fp : c4250000 r10: c0d03cc8  r9 : 00000000  r8 : c0d038c0 r7 : 00000000  r6 : 00000008  r5 : c59b66c0  r4 : 0000002a r3 : 8f659eff  r2 : c59e9eea  r1 : 00000001  r0 : c59b66c0 Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none Control: 10c53c7d  Table: 2640c059  DAC: 00000051 Process swapper (pid: 0, stack limit = 0x75002d81) [] (skb_put) from [] (macb_start_xmit+0x134/0xad0 [macb]) [] (macb_start_xmit [macb]) from [] (dev_hard_start_xmit+0x90/0x11c) [] (dev_hard_start_xmit) from [] (sch_direct_xmit+0x124/0x260) [] (sch_direct_xmit) from [] (__dev_queue_xmit+0x4b0/0x6d0) [] (__dev_queue_xmit) from [] (ip_finish_output2+0x350/0x580) [] (ip_finish_output2) from [] (ip_output+0xb4/0x13c) [] (ip_output) from [] (ip_forward+0x474/0x500) [] (ip_forward) from [] (ip_sublist_rcv_finish+0x3c/0x50) [] (ip_sublist_rcv_finish) from [] (ip_sublist_rcv+0x11c/0x188) [] (ip_sublist_rcv) from [] (ip_list_rcv+0xf8/0x124) [] (ip_list_rcv) from [] (__netif_receive_skb_list_core+0x1a0/0x20c) [] (__netif_receive_skb_list_core) from [] (netif_receive_skb_list_internal+0x194/0x230) [] (netif_receive_skb_list_internal) from [] (gro_normal_list.part.0+0x14/0x28) [] (gro_normal_list.part.0) from [] (napi_complete_done+0x16c/0x210) [] (napi_complete_done) from [] (r8152_poll+0x684/0x708 [r8152]) [] (r8152_poll [r8152]) from [] (net_rx_action+0x100/0x328) [] (net_rx_action) from [] (__do_softirq+0xec/0x274) [] (__do_softirq) from [] (irq_exit+0xcc/0xd0) [] (irq_exit) from [] (__handle_domain_irq+0x58/0xa4) [] (__handle_domain_irq) from [] (__irq_svc+0x6c/0x90) Exception stack(0xc0d01ef0 to 0xc0d01f38) 1ee0:                                     00000000 0000003d 0c31f383 c0d0fa00 1f00: c0d2eb80 00000000 c0d2e630 4dad8c49 4da967b0 0000003d 0000003d 00000000 1f20: fffffff5 c0d01f40 c04e0f88 c04e0f8c 30070013 ffffffff [] (__irq_svc) from [] (cpuidle_enter_state+0x7c/0x378) [] (cpuidle_enter_state) from [] (cpuidle_enter+0x28/0x38) [] (cpuidle_enter) from [] (do_idle+0x194/0x214) [] (do_idle) from [] (cpu_startup_entry+0xc/0x14) [] (cpu_startup_entry) from [] (start_kernel+0x46c/0x4a0) Code: e580c054 8a000002 e1a00002 e8bd8070 (e7f001f2) ---[ end trace 146c8a334115490c ]--- The solution was to force nonlinear buffers to be cloned.  This was previously reported by Klaus Doth (https://www.spinics.net/lists/netdev/msg556937.html) but never formally submitted as a patch. This is the third revision, hopefully the formatting is correct this time! Suggested-by: Klaus Doth Fixes: 653e92a9175e ("net: macb: add support for padding and fcs computation") Signed-off-by: Mark Deneen Link: https://lore.kernel.org/r/20201030155814.622831-1-mdeneen@saucontech.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index acae87f548a1..0374a1ba1010 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1704,7 +1704,8 @@ static inline int macb_clear_csum(struct sk_buff *skb) static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) { - bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb); + bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || + skb_is_nonlinear(*skb); int padlen = ETH_ZLEN - (*skb)->len; int headroom = skb_headroom(*skb); int tailroom = skb_tailroom(*skb); From fe478a589404287513bf4cbb074df80805db9f6f Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 2 Nov 2020 23:06:51 +0530 Subject: [PATCH 200/636] chelsio/chtls: fix memory leaks caused by a race [ Upstream commit 8080b462b6aa856ae05ea010441a702599e579f2 ] race between user context and softirq causing memleak, consider the call sequence scenario chtls_setkey() //user context chtls_peer_close() chtls_abort_req_rss() chtls_setkey() //user context work request skb queued in chtls_setkey() won't be freed because resources are already cleaned for this connection, fix it by not queuing work request while socket is closing. v1->v2: - fix W=1 warning. v2->v3: - separate it out from another memleak fix. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201102173650.24754-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_hw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c index 64d24823c65a..2294fb06bef3 100644 --- a/drivers/crypto/chelsio/chtls/chtls_hw.c +++ b/drivers/crypto/chelsio/chtls/chtls_hw.c @@ -368,6 +368,9 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) if (ret) goto out_notcb; + if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) + goto out_notcb; + set_wr_txq(skb, CPL_PRIORITY_DATA, csk->tlshws.txqid); csk->wr_credits -= DIV_ROUND_UP(len, 16); csk->wr_unacked += DIV_ROUND_UP(len, 16); From 75b0b05ace856333c2a94c2173c92973dd027297 Mon Sep 17 00:00:00 2001 From: Vinay Kumar Yadav Date: Mon, 2 Nov 2020 23:09:10 +0530 Subject: [PATCH 201/636] chelsio/chtls: fix always leaking ctrl_skb [ Upstream commit dbfe394dad33f99cf8458be50483ec40a5d29c34 ] Correct skb refcount in alloc_ctrl_skb(), causing skb memleak when chtls_send_abort() called with NULL skb. it was always leaking the skb, correct it by incrementing skb refs by one. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Link: https://lore.kernel.org/r/20201102173909.24826-1-vinay.yadav@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/chelsio/chtls/chtls_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index f7334c42ebd9..35116cdb5e38 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -175,7 +175,7 @@ static struct sk_buff *alloc_ctrl_skb(struct sk_buff *skb, int len) { if (likely(skb && !skb_shared(skb) && !skb_cloned(skb))) { __skb_trim(skb, 0); - refcount_add(2, &skb->users); + refcount_inc(&skb->users); } else { skb = alloc_skb(len, GFP_KERNEL | __GFP_NOFAIL); } From 63a22c7cb9d9a422e959bd7a01d4af07cfa00298 Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 29 Oct 2020 10:10:56 +0200 Subject: [PATCH 202/636] gianfar: Replace skb_realloc_headroom with skb_cow_head for PTP [ Upstream commit d145c9031325fed963a887851d9fa42516efd52b ] When PTP timestamping is enabled on Tx, the controller inserts the Tx timestamp at the beginning of the frame buffer, between SFD and the L2 frame header. This means that the skb provided by the stack is required to have enough headroom otherwise a new skb needs to be created by the driver to accommodate the timestamp inserted by h/w. Up until now the driver was relying on skb_realloc_headroom() to create new skbs to accommodate PTP frames. Turns out that this method is not reliable in this context at least, as skb_realloc_headroom() for PTP frames can cause random crashes, mostly in subsequent skb_*() calls, when multiple concurrent TCP streams are run at the same time with the PTP flow on the same device (as seen in James' report). I also noticed that when the system is loaded by sending multiple TCP streams, the driver receives cloned skbs in large numbers. skb_cow_head() instead proves to be stable in this scenario, and not only handles cloned skbs too but it's also more efficient and widely used in other drivers. The commit introducing skb_realloc_headroom in the driver goes back to 2009, commit 93c1285c5d92 ("gianfar: reallocate skb when headroom is not enough for fcb"). For practical purposes I'm referencing a newer commit (from 2012) that brings the code to its current structure (and fixes the PTP case). Fixes: 9c4886e5e63b ("gianfar: Fix invalid TX frames returned on error queue when time stamping") Reported-by: James Jurack Suggested-by: Jakub Kicinski Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201029081057.8506-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 8243501c3757..2e7988e1c0ab 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2370,20 +2370,12 @@ static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) fcb_len = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* make space for additional header when fcb is needed */ - if (fcb_len && unlikely(skb_headroom(skb) < fcb_len)) { - struct sk_buff *skb_new; - - skb_new = skb_realloc_headroom(skb, fcb_len); - if (!skb_new) { + if (fcb_len) { + if (unlikely(skb_cow_head(skb, fcb_len))) { dev->stats.tx_errors++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; } - - if (skb->sk) - skb_set_owner_w(skb_new, skb->sk); - dev_consume_skb_any(skb); - skb = skb_new; } /* total number of fragments in the SKB */ From 3a40f713276b2a03609749c6ef2e9efe4acdfa1a Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Tue, 20 Oct 2020 20:36:05 +0300 Subject: [PATCH 203/636] gianfar: Account for Tx PTP timestamp in the skb headroom [ Upstream commit d6a076d68c6b5d6a5800f3990a513facb7016dea ] When PTP timestamping is enabled on Tx, the controller inserts the Tx timestamp at the beginning of the frame buffer, between SFD and the L2 frame header. This means that the skb provided by the stack is required to have enough headroom otherwise a new skb needs to be created by the driver to accommodate the timestamp inserted by h/w. Up until now the driver was relying on the second option, using skb_realloc_headroom() to create a new skb to accommodate PTP frames. Turns out that this method is not reliable, as reallocation of skbs for PTP frames along with the required overhead (skb_set_owner_w, consume_skb) is causing random crashes in subsequent skb_*() calls, when multiple concurrent TCP streams are run at the same time on the same device (as seen in James' report). Note that these crashes don't occur with a single TCP stream, nor with multiple concurrent UDP streams, but only when multiple TCP streams are run concurrently with the PTP packet flow (doing skb reallocation). This patch enforces the first method, by requesting enough headroom from the stack to accommodate PTP frames, and so avoiding skb_realloc_headroom() & co, and the crashes no longer occur. There's no reason not to set needed_headroom to a large enough value to accommodate PTP frames, so in this regard this patch is a fix. Reported-by: James Jurack Fixes: bee9e58c9e98 ("gianfar:don't add FCB length to hard_header_len") Signed-off-by: Claudiu Manoil Link: https://lore.kernel.org/r/20201020173605.1173-1-claudiu.manoil@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 2e7988e1c0ab..8db0924ec681 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1388,7 +1388,7 @@ static int gfar_probe(struct platform_device *ofdev) if (dev->features & NETIF_F_IP_CSUM || priv->device_flags & FSL_GIANFAR_DEV_HAS_TIMER) - dev->needed_headroom = GMAC_FCB_LEN; + dev->needed_headroom = GMAC_FCB_LEN + GMAC_TXPAL_LEN; /* Initializing some of the rx/tx queue level parameters */ for (i = 0; i < priv->num_tx_queues; i++) { From 1a9118379cc7b66bfd2ac030da4af3a712a9e9e2 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 2 Nov 2020 12:01:08 +0100 Subject: [PATCH 204/636] net: usb: qmi_wwan: add Telit LE910Cx 0x1230 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5fd8477ed8ca77e64b93d44a6dae4aa70c191396 ] Add support for Telit LE910Cx 0x1230 composition: 0x1230: tty, adb, rmnet, audio, tty, tty, tty, tty Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20201102110108.17244-1-dnlplm@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d2612b69257e..6e0b3dc14aa4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1268,6 +1268,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ From 64752f5cfda61aa7ca12d23ca1ecc7d36e996f93 Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Fri, 30 Oct 2020 14:26:33 +0100 Subject: [PATCH 205/636] sctp: Fix COMM_LOST/CANT_STR_ASSOC err reporting on big-endian platforms [ Upstream commit b6df8c81412190fbd5eaa3cec7f642142d9c16cd ] Commit 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning")' broke err reading from sctp_arg, because it reads the value as 32-bit integer, although the value is stored as 16-bit integer. Later this value is passed to the userspace in 16-bit variable, thus the user always gets 0 on big-endian platforms. Fix it by reading the __u16 field of sctp_arg union, as reading err field would produce a sparse warning. Fixes: 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning") Signed-off-by: Petr Malat Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20201030132633.7045-1-oss@malat.biz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 567517e44811..bb4f1d0da153 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1615,12 +1615,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u16); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.u32); + subtype, chunk, cmd->obj.u16); break; case SCTP_CMD_INIT_COUNTER_INC: From 4ff840604a45673bb02dfaaa5f30cd9bc40011e5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 31 Oct 2020 11:10:53 +0800 Subject: [PATCH 206/636] sfp: Fix error handing in sfp_probe() [ Upstream commit 9621618130bf7e83635367c13b9a6ee53935bb37 ] gpiod_to_irq() never return 0, but returns negative in case of error, check it and set gpio_irq to 0. Fixes: 73970055450e ("sfp: add SFP module support") Signed-off-by: YueHaibing Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201031031053.25264-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/sfp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 998d08ae7431..47d518e6d5d4 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1886,7 +1886,8 @@ static int sfp_probe(struct platform_device *pdev) continue; irq = gpiod_to_irq(sfp->gpio[i]); - if (!irq) { + if (irq < 0) { + irq = 0; poll = true; continue; } From 8a78b4c0d6292d32d76b4268b5a33ae089a5d791 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 19 Jun 2020 20:47:28 +0000 Subject: [PATCH 207/636] blktrace: fix debugfs use after free commit bad8e64fb19d3a0de5e564d9a7271c31bd684369 upstream. On commit 6ac93117ab00 ("blktrace: use existing disk debugfs directory") merged on v4.12 Omar fixed the original blktrace code for request-based drivers (multiqueue). This however left in place a possible crash, if you happen to abuse blktrace while racing to remove / add a device. We used to use asynchronous removal of the request_queue, and with that the issue was easier to reproduce. Now that we have reverted to synchronous removal of the request_queue, the issue is still possible to reproduce, its however just a bit more difficult. We essentially run two instances of break-blktrace which add/remove a loop device, and setup a blktrace and just never tear the blktrace down. We do this twice in parallel. This is easily reproduced with the script run_0004.sh from break-blktrace [0]. We can end up with two types of panics each reflecting where we race, one a failed blktrace setup: [ 252.426751] debugfs: Directory 'loop0' with parent 'block' already present! [ 252.432265] BUG: kernel NULL pointer dereference, address: 00000000000000a0 [ 252.436592] #PF: supervisor write access in kernel mode [ 252.439822] #PF: error_code(0x0002) - not-present page [ 252.442967] PGD 0 P4D 0 [ 252.444656] Oops: 0002 [#1] SMP NOPTI [ 252.446972] CPU: 10 PID: 1153 Comm: break-blktrace Tainted: G E 5.7.0-rc2-next-20200420+ #164 [ 252.452673] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 [ 252.456343] RIP: 0010:down_write+0x15/0x40 [ 252.458146] Code: eb ca e8 ae 22 8d ff cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 55 48 89 fd e8 52 db ff ff 31 c0 ba 01 00 00 00 48 0f b1 55 00 75 0f 48 8b 04 25 c0 8b 01 00 48 89 45 08 5d [ 252.463638] RSP: 0018:ffffa626415abcc8 EFLAGS: 00010246 [ 252.464950] RAX: 0000000000000000 RBX: ffff958c25f0f5c0 RCX: ffffff8100000000 [ 252.466727] RDX: 0000000000000001 RSI: ffffff8100000000 RDI: 00000000000000a0 [ 252.468482] RBP: 00000000000000a0 R08: 0000000000000000 R09: 0000000000000001 [ 252.470014] R10: 0000000000000000 R11: ffff958d1f9227ff R12: 0000000000000000 [ 252.471473] R13: ffff958c25ea5380 R14: ffffffff8cce15f1 R15: 00000000000000a0 [ 252.473346] FS: 00007f2e69dee540(0000) GS:ffff958c2fc80000(0000) knlGS:0000000000000000 [ 252.475225] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 252.476267] CR2: 00000000000000a0 CR3: 0000000427d10004 CR4: 0000000000360ee0 [ 252.477526] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 252.478776] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 252.479866] Call Trace: [ 252.480322] simple_recursive_removal+0x4e/0x2e0 [ 252.481078] ? debugfs_remove+0x60/0x60 [ 252.481725] ? relay_destroy_buf+0x77/0xb0 [ 252.482662] debugfs_remove+0x40/0x60 [ 252.483518] blk_remove_buf_file_callback+0x5/0x10 [ 252.484328] relay_close_buf+0x2e/0x60 [ 252.484930] relay_open+0x1ce/0x2c0 [ 252.485520] do_blk_trace_setup+0x14f/0x2b0 [ 252.486187] __blk_trace_setup+0x54/0xb0 [ 252.486803] blk_trace_ioctl+0x90/0x140 [ 252.487423] ? do_sys_openat2+0x1ab/0x2d0 [ 252.488053] blkdev_ioctl+0x4d/0x260 [ 252.488636] block_ioctl+0x39/0x40 [ 252.489139] ksys_ioctl+0x87/0xc0 [ 252.489675] __x64_sys_ioctl+0x16/0x20 [ 252.490380] do_syscall_64+0x52/0x180 [ 252.491032] entry_SYSCALL_64_after_hwframe+0x44/0xa9 And the other on the device removal: [ 128.528940] debugfs: Directory 'loop0' with parent 'block' already present! [ 128.615325] BUG: kernel NULL pointer dereference, address: 00000000000000a0 [ 128.619537] #PF: supervisor write access in kernel mode [ 128.622700] #PF: error_code(0x0002) - not-present page [ 128.625842] PGD 0 P4D 0 [ 128.627585] Oops: 0002 [#1] SMP NOPTI [ 128.629871] CPU: 12 PID: 544 Comm: break-blktrace Tainted: G E 5.7.0-rc2-next-20200420+ #164 [ 128.635595] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 [ 128.640471] RIP: 0010:down_write+0x15/0x40 [ 128.643041] Code: eb ca e8 ae 22 8d ff cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 55 48 89 fd e8 52 db ff ff 31 c0 ba 01 00 00 00 48 0f b1 55 00 75 0f 65 48 8b 04 25 c0 8b 01 00 48 89 45 08 5d [ 128.650180] RSP: 0018:ffffa9c3c05ebd78 EFLAGS: 00010246 [ 128.651820] RAX: 0000000000000000 RBX: ffff8ae9a6370240 RCX: ffffff8100000000 [ 128.653942] RDX: 0000000000000001 RSI: ffffff8100000000 RDI: 00000000000000a0 [ 128.655720] RBP: 00000000000000a0 R08: 0000000000000002 R09: ffff8ae9afd2d3d0 [ 128.657400] R10: 0000000000000056 R11: 0000000000000000 R12: 0000000000000000 [ 128.659099] R13: 0000000000000000 R14: 0000000000000003 R15: 00000000000000a0 [ 128.660500] FS: 00007febfd995540(0000) GS:ffff8ae9afd00000(0000) knlGS:0000000000000000 [ 128.662204] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 128.663426] CR2: 00000000000000a0 CR3: 0000000420042003 CR4: 0000000000360ee0 [ 128.664776] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 128.666022] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 128.667282] Call Trace: [ 128.667801] simple_recursive_removal+0x4e/0x2e0 [ 128.668663] ? debugfs_remove+0x60/0x60 [ 128.669368] debugfs_remove+0x40/0x60 [ 128.669985] blk_trace_free+0xd/0x50 [ 128.670593] __blk_trace_remove+0x27/0x40 [ 128.671274] blk_trace_shutdown+0x30/0x40 [ 128.671935] blk_release_queue+0x95/0xf0 [ 128.672589] kobject_put+0xa5/0x1b0 [ 128.673188] disk_release+0xa2/0xc0 [ 128.673786] device_release+0x28/0x80 [ 128.674376] kobject_put+0xa5/0x1b0 [ 128.674915] loop_remove+0x39/0x50 [loop] [ 128.675511] loop_control_ioctl+0x113/0x130 [loop] [ 128.676199] ksys_ioctl+0x87/0xc0 [ 128.676708] __x64_sys_ioctl+0x16/0x20 [ 128.677274] do_syscall_64+0x52/0x180 [ 128.677823] entry_SYSCALL_64_after_hwframe+0x44/0xa9 The common theme here is: debugfs: Directory 'loop0' with parent 'block' already present This crash happens because of how blktrace uses the debugfs directory where it places its files. Upon init we always create the same directory which would be needed by blktrace but we only do this for make_request drivers (multiqueue) block drivers. When you race a removal of these devices with a blktrace setup you end up in a situation where the make_request recursive debugfs removal will sweep away the blktrace files and then later blktrace will also try to remove individual dentries which are already NULL. The inverse is also possible and hence the two types of use after frees. We don't create the block debugfs directory on init for these types of block devices: * request-based block driver block devices * every possible partition * scsi-generic And so, this race should in theory only be possible with make_request drivers. We can fix the UAF by simply re-using the debugfs directory for make_request drivers (multiqueue) and only creating the ephemeral directory for the other type of block devices. The new clarifications on relying on the q->blk_trace_mutex *and* also checking for q->blk_trace *prior* to processing a blktrace ensures the debugfs directories are only created if no possible directory name clashes are possible. This goes tested with: o nvme partitions o ISCSI with tgt, and blktracing against scsi-generic with: o block o tape o cdrom o media changer o blktests This patch is part of the work which disputes the severity of CVE-2019-19770 which shows this issue is not a core debugfs issue, but a misuse of debugfs within blktace. Fixes: 6ac93117ab00 ("blktrace: use existing disk debugfs directory") Reported-by: syzbot+603294af2d01acfdd6da@syzkaller.appspotmail.com Signed-off-by: Luis Chamberlain Reviewed-by: Christoph Hellwig Cc: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Nicolai Stange Cc: Greg Kroah-Hartman Cc: Michal Hocko Cc: "Martin K. Petersen" Cc: "James E.J. Bottomley" Cc: yu kuai Signed-off-by: Jens Axboe [bwh: Backported to 4.19: open-code queue_is_mq()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- kernel/trace/blktrace.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 1442f6152abc..645048bb1e86 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -521,10 +521,18 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->msg_data) goto err; - ret = -ENOENT; - - dir = debugfs_lookup(buts->name, blk_debugfs_root); - if (!dir) +#ifdef CONFIG_BLK_DEBUG_FS + /* + * When tracing whole make_request drivers (multiqueue) block devices, + * reuse the existing debugfs directory created by the block layer on + * init. For request-based block devices, all partitions block devices, + * and scsi-generic block devices we create a temporary new debugfs + * directory that will be removed once the trace ends. + */ + if (q->mq_ops && bdev && bdev == bdev->bd_contains) + dir = q->debugfs_dir; + else +#endif bt->dir = dir = debugfs_create_dir(buts->name, blk_debugfs_root); if (!dir) goto err; @@ -583,8 +591,6 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, ret = 0; err: - if (dir && !bt->dir) - dput(dir); if (ret) blk_trace_free(bt); return ret; From 7f2b9e8d42bd957332dc2af0ba6a77684fe32b1c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 25 Jan 2019 13:09:15 +0800 Subject: [PATCH 208/636] btrfs: extent_io: Kill the forward declaration of flush_write_bio commit bb58eb9e167d087cc518f7a71c3c00f1671958da upstream. There is no need to forward declare flush_write_bio(), as it only depends on submit_one_bio(). Both of them are pretty small, just move them to kill the forward declaration. Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Signed-off-by: David Sterba [bwh: Cherry-picked for 4.19 to ease backporting later fixes] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 66 +++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 82d597b16152..128da03721d0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -138,7 +138,38 @@ static int add_extent_changeset(struct extent_state *state, unsigned bits, return ret; } -static void flush_write_bio(struct extent_page_data *epd); +static int __must_check submit_one_bio(struct bio *bio, int mirror_num, + unsigned long bio_flags) +{ + blk_status_t ret = 0; + struct bio_vec *bvec = bio_last_bvec_all(bio); + struct page *page = bvec->bv_page; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + + start = page_offset(page) + bvec->bv_offset; + + bio->bi_private = NULL; + + if (tree->ops) + ret = tree->ops->submit_bio_hook(tree->private_data, bio, + mirror_num, bio_flags, start); + else + btrfsic_submit_bio(bio); + + return blk_status_to_errno(ret); +} + +static void flush_write_bio(struct extent_page_data *epd) +{ + if (epd->bio) { + int ret; + + ret = submit_one_bio(epd->bio, 0, 0); + BUG_ON(ret < 0); /* -ENOMEM */ + epd->bio = NULL; + } +} int __init extent_io_init(void) { @@ -2710,28 +2741,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) return bio; } -static int __must_check submit_one_bio(struct bio *bio, int mirror_num, - unsigned long bio_flags) -{ - blk_status_t ret = 0; - struct bio_vec *bvec = bio_last_bvec_all(bio); - struct page *page = bvec->bv_page; - struct extent_io_tree *tree = bio->bi_private; - u64 start; - - start = page_offset(page) + bvec->bv_offset; - - bio->bi_private = NULL; - - if (tree->ops) - ret = tree->ops->submit_bio_hook(tree->private_data, bio, - mirror_num, bio_flags, start); - else - btrfsic_submit_bio(bio); - - return blk_status_to_errno(ret); -} - /* * @opf: bio REQ_OP_* and REQ_* flags as one value * @tree: tree so we can call our merge_bio hook @@ -4033,17 +4042,6 @@ retry: return ret; } -static void flush_write_bio(struct extent_page_data *epd) -{ - if (epd->bio) { - int ret; - - ret = submit_one_bio(epd->bio, 0, 0); - BUG_ON(ret < 0); /* -ENOMEM */ - epd->bio = NULL; - } -} - int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; From 63ece3bb01489e3942bc15e4aa976afcb3cd864f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:41 +0800 Subject: [PATCH 209/636] btrfs: extent_io: Move the BUG_ON() in flush_write_bio() one level up commit f4340622e02261fae599e3da936ff4808b418173 upstream. We have a BUG_ON() in flush_write_bio() to handle the return value of submit_one_bio(). Move the BUG_ON() one level up to all its callers. This patch will introduce temporary variable, @flush_ret to keep code change minimal in this patch. That variable will be cleaned up when enhancing the error handling later. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Cherry-picked for 4.19 to ease backporting later fixes] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 55 +++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 128da03721d0..e6dab32e37f4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -160,15 +160,28 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, return blk_status_to_errno(ret); } -static void flush_write_bio(struct extent_page_data *epd) +/* + * Submit bio from extent page data via submit_one_bio + * + * Return 0 if everything is OK. + * Return <0 for error. + */ +static int __must_check flush_write_bio(struct extent_page_data *epd) { - if (epd->bio) { - int ret; + int ret = 0; + if (epd->bio) { ret = submit_one_bio(epd->bio, 0, 0); - BUG_ON(ret < 0); /* -ENOMEM */ + /* + * Clean up of epd->bio is handled by its endio function. + * And endio is either triggered by successful bio execution + * or the error handler of submit bio hook. + * So at this point, no matter what happened, we don't need + * to clean up epd->bio. + */ epd->bio = NULL; } + return ret; } int __init extent_io_init(void) @@ -3538,7 +3551,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!btrfs_try_tree_write_lock(eb)) { flush = 1; - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); btrfs_tree_lock(eb); } @@ -3547,7 +3561,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!epd->sync_io) return 0; if (!flush) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); flush = 1; } while (1) { @@ -3588,7 +3603,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); flush = 1; } lock_page(p); @@ -3779,6 +3795,7 @@ int btree_write_cache_pages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; int ret = 0; + int flush_ret; int done = 0; int nr_to_write_done = 0; struct pagevec pvec; @@ -3878,7 +3895,8 @@ retry: index = 0; goto retry; } - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -3975,7 +3993,8 @@ retry: * tmpfs file mapping */ if (!trylock_page(page)) { - flush_write_bio(epd); + ret = flush_write_bio(epd); + BUG_ON(ret < 0); lock_page(page); } @@ -3985,8 +4004,10 @@ retry: } if (wbc->sync_mode != WB_SYNC_NONE) { - if (PageWriteback(page)) - flush_write_bio(epd); + if (PageWriteback(page)) { + ret = flush_write_bio(epd); + BUG_ON(ret < 0); + } wait_on_page_writeback(page); } @@ -4045,6 +4066,7 @@ retry: int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; + int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(page->mapping->host)->io_tree, @@ -4054,7 +4076,8 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) ret = __extent_writepage(page, wbc, &epd); - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -4062,6 +4085,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, int mode) { int ret = 0; + int flush_ret; struct address_space *mapping = inode->i_mapping; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; struct page *page; @@ -4096,7 +4120,8 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end, start += PAGE_SIZE; } - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } @@ -4104,6 +4129,7 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret = 0; + int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(mapping->host)->io_tree, @@ -4112,7 +4138,8 @@ int extent_writepages(struct address_space *mapping, }; ret = extent_write_cache_pages(mapping, wbc, &epd); - flush_write_bio(&epd); + flush_ret = flush_write_bio(&epd); + BUG_ON(flush_ret < 0); return ret; } From aa38097b44153d7470ce8956ec4de8f1372390c6 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 12 Oct 2020 23:18:11 +0100 Subject: [PATCH 210/636] Revert "btrfs: flush write bio if we loop in extent_write_cache_pages" This reverts commit 860473714cbe7fbedcf92bfe3eb6d69fae8c74ff. That has an incorrect upstream commit reference, and was modified in a way that conflicts with some older fixes. We can cleanly cherry-pick the upstream commit *after* those fixes. Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e6dab32e37f4..fa8bccca8f8c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4045,14 +4045,6 @@ retry: */ scanned = 1; index = 0; - - /* - * If we're looping we could run into a page that is locked by a - * writer and that writer could be waiting on writeback for a - * page in our current bio, and thus deadlock, so flush the - * write bio here. - */ - flush_write_bio(epd); goto retry; } From 169ae603400293e0182469f3adfd135d04a38470 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 23 Jan 2020 15:33:02 -0500 Subject: [PATCH 211/636] btrfs: flush write bio if we loop in extent_write_cache_pages commit 42ffb0bf584ae5b6b38f72259af1e0ee417ac77f upstream. There exists a deadlock with range_cyclic that has existed forever. If we loop around with a bio already built we could deadlock with a writer who has the page locked that we're attempting to write but is waiting on a page in our bio to be written out. The task traces are as follows PID: 1329874 TASK: ffff889ebcdf3800 CPU: 33 COMMAND: "kworker/u113:5" #0 [ffffc900297bb658] __schedule at ffffffff81a4c33f #1 [ffffc900297bb6e0] schedule at ffffffff81a4c6e3 #2 [ffffc900297bb6f8] io_schedule at ffffffff81a4ca42 #3 [ffffc900297bb708] __lock_page at ffffffff811f145b #4 [ffffc900297bb798] __process_pages_contig at ffffffff814bc502 #5 [ffffc900297bb8c8] lock_delalloc_pages at ffffffff814bc684 #6 [ffffc900297bb900] find_lock_delalloc_range at ffffffff814be9ff #7 [ffffc900297bb9a0] writepage_delalloc at ffffffff814bebd0 #8 [ffffc900297bba18] __extent_writepage at ffffffff814bfbf2 #9 [ffffc900297bba98] extent_write_cache_pages at ffffffff814bffbd PID: 2167901 TASK: ffff889dc6a59c00 CPU: 14 COMMAND: "aio-dio-invalid" #0 [ffffc9003b50bb18] __schedule at ffffffff81a4c33f #1 [ffffc9003b50bba0] schedule at ffffffff81a4c6e3 #2 [ffffc9003b50bbb8] io_schedule at ffffffff81a4ca42 #3 [ffffc9003b50bbc8] wait_on_page_bit at ffffffff811f24d6 #4 [ffffc9003b50bc60] prepare_pages at ffffffff814b05a7 #5 [ffffc9003b50bcd8] btrfs_buffered_write at ffffffff814b1359 #6 [ffffc9003b50bdb0] btrfs_file_write_iter at ffffffff814b5933 #7 [ffffc9003b50be38] new_sync_write at ffffffff8128f6a8 #8 [ffffc9003b50bec8] vfs_write at ffffffff81292b9d #9 [ffffc9003b50bf00] ksys_pwrite64 at ffffffff81293032 I used drgn to find the respective pages we were stuck on page_entry.page 0xffffea00fbfc7500 index 8148 bit 15 pid 2167901 page_entry.page 0xffffea00f9bb7400 index 7680 bit 0 pid 1329874 As you can see the kworker is waiting for bit 0 (PG_locked) on index 7680, and aio-dio-invalid is waiting for bit 15 (PG_writeback) on index 8148. aio-dio-invalid has 7680, and the kworker epd looks like the following crash> struct extent_page_data ffffc900297bbbb0 struct extent_page_data { bio = 0xffff889f747ed830, tree = 0xffff889eed6ba448, extent_locked = 0, sync_io = 0 } Probably worth mentioning as well that it waits for writeback of the page to complete while holding a lock on it (at prepare_pages()). Using drgn I walked the bio pages looking for page 0xffffea00fbfc7500 which is the one we're waiting for writeback on bio = Object(prog, 'struct bio', address=0xffff889f747ed830) for i in range(0, bio.bi_vcnt.value_()): bv = bio.bi_io_vec[i] if bv.bv_page.value_() == 0xffffea00fbfc7500: print("FOUND IT") which validated what I suspected. The fix for this is simple, flush the epd before we loop back around to the beginning of the file during writeout. Fixes: b293f02e1423 ("Btrfs: Add writepages support") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fa8bccca8f8c..af41f407dee5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4045,7 +4045,16 @@ retry: */ scanned = 1; index = 0; - goto retry; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + ret = flush_write_bio(epd); + if (!ret) + goto retry; } if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) From eeda803b77d6cc80c9f9dffcfc86cddc5a52c0fe Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:42 +0800 Subject: [PATCH 212/636] btrfs: extent_io: Handle errors better in extent_write_full_page() commit 3065976b045f77a910809fa7699f99a1e7c0dbbb upstream. Since now flush_write_bio() could return error, kill the BUG_ON() first. Then don't call flush_write_bio() unconditionally, instead we check the return value from __extent_writepage() first. If __extent_writepage() fails, we do cleanup, and return error without submitting the possible corrupted or half-baked bio. If __extent_writepage() successes, then we call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index af41f407dee5..0aa7fbafa105 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -160,6 +160,16 @@ static int __must_check submit_one_bio(struct bio *bio, int mirror_num, return blk_status_to_errno(ret); } +/* Cleanup unsubmitted bios */ +static void end_write_bio(struct extent_page_data *epd, int ret) +{ + if (epd->bio) { + epd->bio->bi_status = errno_to_blk_status(ret); + bio_endio(epd->bio); + epd->bio = NULL; + } +} + /* * Submit bio from extent page data via submit_one_bio * @@ -3461,6 +3471,9 @@ done: * records are inserted to lock ranges in the tree, and as dirty areas * are found, they are marked writeback. Then the lock bits are removed * and the end_io handler clears the writeback ranges + * + * Return 0 if everything goes well. + * Return <0 for error. */ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct extent_page_data *epd) @@ -3528,6 +3541,7 @@ done: end_extent_writepage(page, ret, start, page_end); } unlock_page(page); + ASSERT(ret <= 0); return ret; done_unlocked: @@ -4067,7 +4081,6 @@ retry: int extent_write_full_page(struct page *page, struct writeback_control *wbc) { int ret; - int flush_ret; struct extent_page_data epd = { .bio = NULL, .tree = &BTRFS_I(page->mapping->host)->io_tree, @@ -4076,9 +4089,14 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc) }; ret = __extent_writepage(page, wbc, &epd); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ret = flush_write_bio(&epd); + ASSERT(ret <= 0); return ret; } From 22bb77c13fbeeef2e0d0a8506c4efdde41598d32 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:43 +0800 Subject: [PATCH 213/636] btrfs: extent_io: Handle errors better in btree_write_cache_pages() commit 2b952eea813b1f7e7d4b9782271acd91625b9bb9 upstream. In btree_write_cache_pages(), we can only get @ret <= 0. Add an ASSERT() for it just in case. Then instead of submitting the write bio even we got some error, check the return value first. If we have already hit some error, just clean up the corrupted or half-baked bio, and return error. If there is no error so far, then call flush_write_bio() and return the result. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0aa7fbafa105..822515856ab0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3809,7 +3809,6 @@ int btree_write_cache_pages(struct address_space *mapping, .sync_io = wbc->sync_mode == WB_SYNC_ALL, }; int ret = 0; - int flush_ret; int done = 0; int nr_to_write_done = 0; struct pagevec pvec; @@ -3909,8 +3908,12 @@ retry: index = 0; goto retry; } - flush_ret = flush_write_bio(&epd); - BUG_ON(flush_ret < 0); + ASSERT(ret <= 0); + if (ret < 0) { + end_write_bio(&epd, ret); + return ret; + } + ret = flush_write_bio(&epd); return ret; } From 2311ea7ea04fbc15f51760d3b91f6a8381b06c9a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 14:27:46 +0800 Subject: [PATCH 214/636] btrfs: extent_io: add proper error handling to lock_extent_buffer_for_io() commit 2e3c25136adfb293d517e17f761d3b8a43a8fc22 upstream. This function needs some extra checks on locked pages and eb. For error handling we need to unlock locked pages and the eb. There is a rare >0 return value branch, where all pages get locked while write bio is not flushed. Thankfully it's handled by the only caller, btree_write_cache_pages(), as later write_one_eb() call will trigger submit_one_bio(). So there shouldn't be any problem. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 822515856ab0..944ac072081d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3554,19 +3554,27 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) TASK_UNINTERRUPTIBLE); } +/* + * Lock eb pages and flush the bio if we can't the locks + * + * Return 0 if nothing went wrong + * Return >0 is same as 0, except bio is not submitted + * Return <0 if something went wrong, no page is locked + */ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, struct btrfs_fs_info *fs_info, struct extent_page_data *epd) { - int i, num_pages; + int i, num_pages, failed_page_nr; int flush = 0; int ret = 0; if (!btrfs_try_tree_write_lock(eb)) { - flush = 1; ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) + return ret; + flush = 1; btrfs_tree_lock(eb); } @@ -3576,7 +3584,8 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, return 0; if (!flush) { ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) + return ret; flush = 1; } while (1) { @@ -3618,7 +3627,10 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { ret = flush_write_bio(epd); - BUG_ON(ret < 0); + if (ret < 0) { + failed_page_nr = i; + goto err_unlock; + } flush = 1; } lock_page(p); @@ -3626,6 +3638,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, } return ret; +err_unlock: + /* Unlock already locked pages */ + for (i = 0; i < failed_page_nr; i++) + unlock_page(eb->pages[i]); + return ret; } static void end_extent_buffer_writeback(struct extent_buffer *eb) From 432bdb0d931b3a6e8c8d34ffa29a218479b5f9be Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 11 Sep 2019 17:42:00 +0100 Subject: [PATCH 215/636] Btrfs: fix unwritten extent buffers and hangs on future writeback attempts commit 18dfa7117a3f379862dcd3f67cadd678013bb9dd upstream. The lock_extent_buffer_io() returns 1 to the caller to tell it everything went fine and the callers needs to start writeback for the extent buffer (submit a bio, etc), 0 to tell the caller everything went fine but it does not need to start writeback for the extent buffer, and a negative value if some error happened. When it's about to return 1 it tries to lock all pages, and if a try lock on a page fails, and we didn't flush any existing bio in our "epd", it calls flush_write_bio(epd) and overwrites the return value of 1 to 0 or an error. The page might have been locked elsewhere, not with the goal of starting writeback of the extent buffer, and even by some code other than btrfs, like page migration for example, so it does not mean the writeback of the extent buffer was already started by some other task, so returning a 0 tells the caller (btree_write_cache_pages()) to not start writeback for the extent buffer. Note that epd might currently have either no bio, so flush_write_bio() returns 0 (success) or it might have a bio for another extent buffer with a lower index (logical address). Since we return 0 with the EXTENT_BUFFER_WRITEBACK bit set on the extent buffer and writeback is never started for the extent buffer, future attempts to writeback the extent buffer will hang forever waiting on that bit to be cleared, since it can only be cleared after writeback completes. Such hang is reported with a trace like the following: [49887.347053] INFO: task btrfs-transacti:1752 blocked for more than 122 seconds. [49887.347059] Not tainted 5.2.13-gentoo #2 [49887.347060] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [49887.347062] btrfs-transacti D 0 1752 2 0x80004000 [49887.347064] Call Trace: [49887.347069] ? __schedule+0x265/0x830 [49887.347071] ? bit_wait+0x50/0x50 [49887.347072] ? bit_wait+0x50/0x50 [49887.347074] schedule+0x24/0x90 [49887.347075] io_schedule+0x3c/0x60 [49887.347077] bit_wait_io+0x8/0x50 [49887.347079] __wait_on_bit+0x6c/0x80 [49887.347081] ? __lock_release.isra.29+0x155/0x2d0 [49887.347083] out_of_line_wait_on_bit+0x7b/0x80 [49887.347084] ? var_wake_function+0x20/0x20 [49887.347087] lock_extent_buffer_for_io+0x28c/0x390 [49887.347089] btree_write_cache_pages+0x18e/0x340 [49887.347091] do_writepages+0x29/0xb0 [49887.347093] ? kmem_cache_free+0x132/0x160 [49887.347095] ? convert_extent_bit+0x544/0x680 [49887.347097] filemap_fdatawrite_range+0x70/0x90 [49887.347099] btrfs_write_marked_extents+0x53/0x120 [49887.347100] btrfs_write_and_wait_transaction.isra.4+0x38/0xa0 [49887.347102] btrfs_commit_transaction+0x6bb/0x990 [49887.347103] ? start_transaction+0x33e/0x500 [49887.347105] transaction_kthread+0x139/0x15c So fix this by not overwriting the return value (ret) with the result from flush_write_bio(). We also need to clear the EXTENT_BUFFER_WRITEBACK bit in case flush_write_bio() returns an error, otherwise it will hang any future attempts to writeback the extent buffer, and undo all work done before (set back EXTENT_BUFFER_DIRTY, etc). This is a regression introduced in the 5.2 kernel. Fixes: 2e3c25136adfb ("btrfs: extent_io: add proper error handling to lock_extent_buffer_for_io()") Fixes: f4340622e0226 ("btrfs: extent_io: Move the BUG_ON() in flush_write_bio() one level up") Reported-by: Zdenek Sojka Link: https://lore.kernel.org/linux-btrfs/GpO.2yos.3WGDOLpx6t%7D.1TUDYM@seznam.cz/T/#u Reported-by: Stefan Priebe - Profihost AG Link: https://lore.kernel.org/linux-btrfs/5c4688ac-10a7-fb07-70e8-c5d31a3fbb38@profihost.ag/T/#t Reported-by: Drazen Kacar Link: https://lore.kernel.org/linux-btrfs/DB8PR03MB562876ECE2319B3E579590F799C80@DB8PR03MB5628.eurprd03.prod.outlook.com/ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204377 Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 944ac072081d..88ada4a19213 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3554,6 +3554,13 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) TASK_UNINTERRUPTIBLE); } +static void end_extent_buffer_writeback(struct extent_buffer *eb) +{ + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); +} + /* * Lock eb pages and flush the bio if we can't the locks * @@ -3626,8 +3633,11 @@ lock_extent_buffer_for_io(struct extent_buffer *eb, if (!trylock_page(p)) { if (!flush) { - ret = flush_write_bio(epd); - if (ret < 0) { + int err; + + err = flush_write_bio(epd); + if (err < 0) { + ret = err; failed_page_nr = i; goto err_unlock; } @@ -3642,16 +3652,23 @@ err_unlock: /* Unlock already locked pages */ for (i = 0; i < failed_page_nr; i++) unlock_page(eb->pages[i]); + /* + * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it. + * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can + * be made and undo everything done before. + */ + btrfs_tree_lock(eb); + spin_lock(&eb->refs_lock); + set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); + end_extent_buffer_writeback(eb); + spin_unlock(&eb->refs_lock); + percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len, + fs_info->dirty_metadata_batch); + btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_tree_unlock(eb); return ret; } -static void end_extent_buffer_writeback(struct extent_buffer *eb) -{ - clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); - smp_mb__after_atomic(); - wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); -} - static void set_btree_ioerr(struct page *page) { struct extent_buffer *eb = (struct extent_buffer *)page->private; From 1527c0e0229d2dd1c8ae1e73b1579bd8d5866b5b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 12 Feb 2020 14:12:44 +0800 Subject: [PATCH 216/636] btrfs: Don't submit any btree write bio if the fs has errors commit b3ff8f1d380e65dddd772542aa9bff6c86bf715a upstream. [BUG] There is a fuzzed image which could cause KASAN report at unmount time. BUG: KASAN: use-after-free in btrfs_queue_work+0x2c1/0x390 Read of size 8 at addr ffff888067cf6848 by task umount/1922 CPU: 0 PID: 1922 Comm: umount Tainted: G W 5.0.21 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: dump_stack+0x5b/0x8b print_address_description+0x70/0x280 kasan_report+0x13a/0x19b btrfs_queue_work+0x2c1/0x390 btrfs_wq_submit_bio+0x1cd/0x240 btree_submit_bio_hook+0x18c/0x2a0 submit_one_bio+0x1be/0x320 flush_write_bio.isra.41+0x2c/0x70 btree_write_cache_pages+0x3bb/0x7f0 do_writepages+0x5c/0x130 __writeback_single_inode+0xa3/0x9a0 writeback_single_inode+0x23d/0x390 write_inode_now+0x1b5/0x280 iput+0x2ef/0x600 close_ctree+0x341/0x750 generic_shutdown_super+0x126/0x370 kill_anon_super+0x31/0x50 btrfs_kill_super+0x36/0x2b0 deactivate_locked_super+0x80/0xc0 deactivate_super+0x13c/0x150 cleanup_mnt+0x9a/0x130 task_work_run+0x11a/0x1b0 exit_to_usermode_loop+0x107/0x130 do_syscall_64+0x1e5/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 [CAUSE] The fuzzed image has a completely screwd up extent tree: leaf 29421568 gen 8 total ptrs 6 free space 3587 owner EXTENT_TREE refs 2 lock (w:0 r:0 bw:0 br:0 sw:0 sr:0) lock_owner 0 current 5938 item 0 key (12587008 168 4096) itemoff 3942 itemsize 53 extent refs 1 gen 9 flags 1 ref#0: extent data backref root 5 objectid 259 offset 0 count 1 item 1 key (12591104 168 8192) itemoff 3889 itemsize 53 extent refs 1 gen 9 flags 1 ref#0: extent data backref root 5 objectid 271 offset 0 count 1 item 2 key (12599296 168 4096) itemoff 3836 itemsize 53 extent refs 1 gen 9 flags 1 ref#0: extent data backref root 5 objectid 259 offset 4096 count 1 item 3 key (29360128 169 0) itemoff 3803 itemsize 33 extent refs 1 gen 9 flags 2 ref#0: tree block backref root 5 item 4 key (29368320 169 1) itemoff 3770 itemsize 33 extent refs 1 gen 9 flags 2 ref#0: tree block backref root 5 item 5 key (29372416 169 0) itemoff 3737 itemsize 33 extent refs 1 gen 9 flags 2 ref#0: tree block backref root 5 Note that leaf 29421568 doesn't have its backref in the extent tree. Thus extent allocator can re-allocate leaf 29421568 for other trees. In short, the bug is caused by: - Existing tree block gets allocated to log tree This got its generation bumped. - Log tree balance cleaned dirty bit of offending tree block It will not be written back to disk, thus no WRITTEN flag. - Original owner of the tree block gets COWed Since the tree block has higher transid, no WRITTEN flag, it's reused, and not traced by transaction::dirty_pages. - Transaction aborted Tree blocks get cleaned according to transaction::dirty_pages. But the offending tree block is not recorded at all. - Filesystem unmount All pages are assumed to be are clean, destroying all workqueue, then call iput(btree_inode). But offending tree block is still dirty, which triggers writeback, and causes use-after-free bug. The detailed sequence looks like this: - Initial status eb: 29421568, header=WRITTEN bflags_dirty=0, page_dirty=0, gen=8, not traced by any dirty extent_iot_tree. - New tree block is allocated Since there is no backref for 29421568, it's re-allocated as new tree block. Keep in mind that tree block 29421568 is still referred by extent tree. - Tree block 29421568 is filled for log tree eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 << (gen bumped) traced by btrfs_root::dirty_log_pages - Some log tree operations Since the fs is using node size 4096, the log tree can easily go a level higher. - Log tree needs balance Tree block 29421568 gets all its content pushed to right, thus now it is empty, and we don't need it. btrfs_clean_tree_block() from __push_leaf_right() get called. eb: 29421568, header=0 bflags_dirty=0, page_dirty=0, gen=9 traced by btrfs_root::dirty_log_pages - Log tree write back btree_write_cache_pages() goes through dirty pages ranges, but since page of tree block 29421568 gets cleaned already, it's not written back to disk. Thus it doesn't have WRITTEN bit set. But ranges in dirty_log_pages are cleared. eb: 29421568, header=0 bflags_dirty=0, page_dirty=0, gen=9 not traced by any dirty extent_iot_tree. - Extent tree update when committing transaction Since tree block 29421568 has transid equal to running trans, and has no WRITTEN bit, should_cow_block() will use it directly without adding it to btrfs_transaction::dirty_pages. eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 not traced by any dirty extent_iot_tree. At this stage, we're doomed. We have a dirty eb not tracked by any extent io tree. - Transaction gets aborted due to corrupted extent tree Btrfs cleans up dirty pages according to transaction::dirty_pages and btrfs_root::dirty_log_pages. But since tree block 29421568 is not tracked by neither of them, it's still dirty. eb: 29421568, header=0 bflags_dirty=1, page_dirty=1, gen=9 not traced by any dirty extent_iot_tree. - Filesystem unmount Since all cleanup is assumed to be done, all workqueus are destroyed. Then iput(btree_inode) is called, expecting no dirty pages. But tree 29421568 is still dirty, thus triggering writeback. Since all workqueues are already freed, we cause use-after-free. This shows us that, log tree blocks + bad extent tree can cause wild dirty pages. [FIX] To fix the problem, don't submit any btree write bio if the filesytem has any error. This is the last safe net, just in case other cleanup haven't caught catch it. Link: https://github.com/bobfuzzer/CVE/tree/master/CVE-2019-19377 CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.19: fs_info variable already exists in btree_write_cache_pages()] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent_io.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 88ada4a19213..301111922a1a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3947,7 +3947,39 @@ retry: end_write_bio(&epd, ret); return ret; } - ret = flush_write_bio(&epd); + /* + * If something went wrong, don't allow any metadata write bio to be + * submitted. + * + * This would prevent use-after-free if we had dirty pages not + * cleaned up, which can still happen by fuzzed images. + * + * - Bad extent tree + * Allowing existing tree block to be allocated for other trees. + * + * - Log tree operations + * Exiting tree blocks get allocated to log tree, bumps its + * generation, then get cleaned in tree re-balance. + * Such tree block will not be written back, since it's clean, + * thus no WRITTEN flag set. + * And after log writes back, this tree block is not traced by + * any dirty extent_io_tree. + * + * - Offending tree block gets re-dirtied from its original owner + * Since it has bumped generation, no WRITTEN flag, it can be + * reused without COWing. This tree block will not be traced + * by btrfs_transaction::dirty_pages. + * + * Now such dirty tree block will not be cleaned by any dirty + * extent io tree. Thus we don't want to submit such wild eb + * if the fs already has error. + */ + if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + ret = flush_write_bio(&epd); + } else { + ret = -EUCLEAN; + end_write_bio(&epd, ret); + } return ret; } From 5d123c6335e82db8de1e2fe1c15563e003245438 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:16:42 +0800 Subject: [PATCH 217/636] btrfs: Move btrfs_check_chunk_valid() to tree-check.[ch] and export it commit 82fc28fbedbb59642f05215db3b0ef4eb91aa31d upstream. By function, chunk item verification is more suitable to be done inside tree-checker. So move btrfs_check_chunk_valid() to tree-checker.c and export it. And since it's now moved to tree-checker, also add a better comment for what this function is doing. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Cherry-picked for 4.19 to ease backporting later fixes] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 97 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 4 ++ fs/btrfs/volumes.c | 94 +-------------------------------------- 3 files changed, 102 insertions(+), 93 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d98ec885b72a..9f81f4e3f263 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -448,6 +448,103 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, return 0; } +/* + * The common chunk check which could also work on super block sys chunk array. + * + * Return -EIO if anything is corrupted. + * Return 0 if everything is OK. + */ +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical) +{ + u64 length; + u64 stripe_len; + u16 num_stripes; + u16 sub_stripes; + u64 type; + u64 features; + bool mixed = false; + + length = btrfs_chunk_length(leaf, chunk); + stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); + type = btrfs_chunk_type(leaf, chunk); + + if (!num_stripes) { + btrfs_err(fs_info, "invalid chunk num_stripes: %u", + num_stripes); + return -EIO; + } + if (!IS_ALIGNED(logical, fs_info->sectorsize)) { + btrfs_err(fs_info, "invalid chunk logical %llu", logical); + return -EIO; + } + if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { + btrfs_err(fs_info, "invalid chunk sectorsize %u", + btrfs_chunk_sector_size(leaf, chunk)); + return -EIO; + } + if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { + btrfs_err(fs_info, "invalid chunk length %llu", length); + return -EIO; + } + if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { + btrfs_err(fs_info, "invalid chunk stripe length: %llu", + stripe_len); + return -EIO; + } + if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & + type) { + btrfs_err(fs_info, "unrecognized chunk type: %llu", + ~(BTRFS_BLOCK_GROUP_TYPE_MASK | + BTRFS_BLOCK_GROUP_PROFILE_MASK) & + btrfs_chunk_type(leaf, chunk)); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { + btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + return -EIO; + } + + if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && + (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { + btrfs_err(fs_info, + "system chunk with data or metadata type: 0x%llx", type); + return -EIO; + } + + features = btrfs_super_incompat_flags(fs_info->super_copy); + if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = true; + + if (!mixed) { + if ((type & BTRFS_BLOCK_GROUP_METADATA) && + (type & BTRFS_BLOCK_GROUP_DATA)) { + btrfs_err(fs_info, + "mixed chunk type in non-mixed mode: 0x%llx", type); + return -EIO; + } + } + + if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || + (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || + (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || + (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || + ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { + btrfs_err(fs_info, + "invalid num_stripes:sub_stripes %u:%u for profile %llu", + num_stripes, sub_stripes, + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EIO; + } + + return 0; +} + /* * Common point to switch the item-specific validation. */ diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index ff043275b784..4df45e8a6659 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -25,4 +25,8 @@ int btrfs_check_leaf_relaxed(struct btrfs_fs_info *fs_info, struct extent_buffer *leaf); int btrfs_check_node(struct btrfs_fs_info *fs_info, struct extent_buffer *node); +int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_chunk *chunk, u64 logical); + #endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0879e3dc39c8..3050a93ae6f5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,6 +28,7 @@ #include "math.h" #include "dev-replace.h" #include "sysfs.h" +#include "tree-checker.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -6370,99 +6371,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, return dev; } -/* Return -EIO if any error, otherwise return 0. */ -static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, - struct extent_buffer *leaf, - struct btrfs_chunk *chunk, u64 logical) -{ - u64 length; - u64 stripe_len; - u16 num_stripes; - u16 sub_stripes; - u64 type; - u64 features; - bool mixed = false; - - length = btrfs_chunk_length(leaf, chunk); - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); - num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); - type = btrfs_chunk_type(leaf, chunk); - - if (!num_stripes) { - btrfs_err(fs_info, "invalid chunk num_stripes: %u", - num_stripes); - return -EIO; - } - if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk logical %llu", logical); - return -EIO; - } - if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - btrfs_err(fs_info, "invalid chunk sectorsize %u", - btrfs_chunk_sector_size(leaf, chunk)); - return -EIO; - } - if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk length %llu", length); - return -EIO; - } - if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(fs_info, "invalid chunk stripe length: %llu", - stripe_len); - return -EIO; - } - if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & - type) { - btrfs_err(fs_info, "unrecognized chunk type: %llu", - ~(BTRFS_BLOCK_GROUP_TYPE_MASK | - BTRFS_BLOCK_GROUP_PROFILE_MASK) & - btrfs_chunk_type(leaf, chunk)); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); - return -EIO; - } - - if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && - (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - btrfs_err(fs_info, - "system chunk with data or metadata type: 0x%llx", type); - return -EIO; - } - - features = btrfs_super_incompat_flags(fs_info->super_copy); - if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) - mixed = true; - - if (!mixed) { - if ((type & BTRFS_BLOCK_GROUP_METADATA) && - (type & BTRFS_BLOCK_GROUP_DATA)) { - btrfs_err(fs_info, - "mixed chunk type in non-mixed mode: 0x%llx", type); - return -EIO; - } - } - - if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) || - (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) || - (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || - (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || - ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && - num_stripes != 1)) { - btrfs_err(fs_info, - "invalid num_stripes:sub_stripes %u:%u for profile %llu", - num_stripes, sub_stripes, - type & BTRFS_BLOCK_GROUP_PROFILE_MASK); - return -EIO; - } - - return 0; -} - static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, u8 *uuid, bool error) { From c24ed5ea52165bf44f405aca10d9af0ad593ae86 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:36:06 +0800 Subject: [PATCH 218/636] btrfs: tree-checker: Make chunk item checker messages more readable commit f114024376bceb1c0f61a7bad4a72a0f978767af upstream. Old error message would be something like: BTRFS error (device dm-3): invalid chunk num_stipres: 0 New error message would be: Btrfs critical (device dm-3): corrupt superblock syschunk array: chunk_start=2097152, invalid chunk num_stripes: 0 Or Btrfs critical (device dm-3): corrupt leaf: root=3 block=8388608 slot=3 chunk_start=2097152, invalid chunk num_stripes: 0 And for certain error message, also output expected value. The error message levels are changed from error to critical. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Cherry-picked for 4.19 to ease backporting later fixes] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 81 ++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 9f81f4e3f263..3f7fb0ddce8a 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -448,6 +448,51 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info, return 0; } +__printf(5, 6) +__cold +static void chunk_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *leaf, + const struct btrfs_chunk *chunk, u64 logical, + const char *fmt, ...) +{ + bool is_sb; + struct va_format vaf; + va_list args; + int i; + int slot = -1; + + /* Only superblock eb is able to have such small offset */ + is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); + + if (!is_sb) { + /* + * Get the slot number by iterating through all slots, this + * would provide better readability. + */ + for (i = 0; i < btrfs_header_nritems(leaf); i++) { + if (btrfs_item_ptr_offset(leaf, i) == + (unsigned long)chunk) { + slot = i; + break; + } + } + } + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (is_sb) + btrfs_crit(fs_info, + "corrupt superblock syschunk array: chunk_start=%llu, %pV", + logical, &vaf); + else + btrfs_crit(fs_info, + "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", + BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, + logical, &vaf); + va_end(args); +} + /* * The common chunk check which could also work on super block sys chunk array. * @@ -473,31 +518,38 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, type = btrfs_chunk_type(leaf, chunk); if (!num_stripes) { - btrfs_err(fs_info, "invalid chunk num_stripes: %u", - num_stripes); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk num_stripes, have %u", num_stripes); return -EIO; } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk logical %llu", logical); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk logical, have %llu should aligned to %u", + logical, fs_info->sectorsize); return -EIO; } if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { - btrfs_err(fs_info, "invalid chunk sectorsize %u", - btrfs_chunk_sector_size(leaf, chunk)); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk sectorsize, have %u expect %u", + btrfs_chunk_sector_size(leaf, chunk), + fs_info->sectorsize); return -EIO; } if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { - btrfs_err(fs_info, "invalid chunk length %llu", length); + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk length, have %llu", length); return -EIO; } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { - btrfs_err(fs_info, "invalid chunk stripe length: %llu", + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk stripe length: %llu", stripe_len); return -EIO; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type) { - btrfs_err(fs_info, "unrecognized chunk type: %llu", + chunk_err(fs_info, leaf, chunk, logical, + "unrecognized chunk type: 0x%llx", ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); @@ -505,14 +557,17 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { - btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type); + chunk_err(fs_info, leaf, chunk, logical, + "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", + type, BTRFS_BLOCK_GROUP_TYPE_MASK); return -EIO; } if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) { - btrfs_err(fs_info, - "system chunk with data or metadata type: 0x%llx", type); + chunk_err(fs_info, leaf, chunk, logical, + "system chunk with data or metadata type: 0x%llx", + type); return -EIO; } @@ -523,7 +578,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!mixed) { if ((type & BTRFS_BLOCK_GROUP_METADATA) && (type & BTRFS_BLOCK_GROUP_DATA)) { - btrfs_err(fs_info, + chunk_err(fs_info, leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); return -EIO; } @@ -535,7 +590,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) || (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) || ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) { - btrfs_err(fs_info, + chunk_err(fs_info, leaf, chunk, logical, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); From 782aa87c8f1d4472fc45697fdddbe8d47b56ebff Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:39:14 +0800 Subject: [PATCH 219/636] btrfs: tree-checker: Make btrfs_check_chunk_valid() return EUCLEAN instead of EIO commit bf871c3b43b1dcc3f2a076ff39a8f1ce7959d958 upstream. To follow the standard behavior of tree-checker. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Cherry-picked for 4.19 to ease backporting later fixes] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 3f7fb0ddce8a..7b59a3dfb5a8 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -496,7 +496,7 @@ static void chunk_err(const struct btrfs_fs_info *fs_info, /* * The common chunk check which could also work on super block sys chunk array. * - * Return -EIO if anything is corrupted. + * Return -EUCLEAN if anything is corrupted. * Return 0 if everything is OK. */ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, @@ -520,31 +520,31 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, if (!num_stripes) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk num_stripes, have %u", num_stripes); - return -EIO; + return -EUCLEAN; } if (!IS_ALIGNED(logical, fs_info->sectorsize)) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk logical, have %llu should aligned to %u", logical, fs_info->sectorsize); - return -EIO; + return -EUCLEAN; } if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk sectorsize, have %u expect %u", btrfs_chunk_sector_size(leaf, chunk), fs_info->sectorsize); - return -EIO; + return -EUCLEAN; } if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk length, have %llu", length); - return -EIO; + return -EUCLEAN; } if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { chunk_err(fs_info, leaf, chunk, logical, "invalid chunk stripe length: %llu", stripe_len); - return -EIO; + return -EUCLEAN; } if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & type) { @@ -553,14 +553,14 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, ~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) & btrfs_chunk_type(leaf, chunk)); - return -EIO; + return -EUCLEAN; } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { chunk_err(fs_info, leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", type, BTRFS_BLOCK_GROUP_TYPE_MASK); - return -EIO; + return -EUCLEAN; } if ((type & BTRFS_BLOCK_GROUP_SYSTEM) && @@ -568,7 +568,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, chunk_err(fs_info, leaf, chunk, logical, "system chunk with data or metadata type: 0x%llx", type); - return -EIO; + return -EUCLEAN; } features = btrfs_super_incompat_flags(fs_info->super_copy); @@ -580,7 +580,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, (type & BTRFS_BLOCK_GROUP_DATA)) { chunk_err(fs_info, leaf, chunk, logical, "mixed chunk type in non-mixed mode: 0x%llx", type); - return -EIO; + return -EUCLEAN; } } @@ -594,7 +594,7 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, "invalid num_stripes:sub_stripes %u:%u for profile %llu", num_stripes, sub_stripes, type & BTRFS_BLOCK_GROUP_PROFILE_MASK); - return -EIO; + return -EUCLEAN; } return 0; From 3c06c86cd08fdabe2ed20cbc3ea5010dcd7d4b29 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 20 Mar 2019 13:42:33 +0800 Subject: [PATCH 220/636] btrfs: tree-checker: Check chunk item at tree block read time commit 075cb3c78fe7976c9f29ca1fa23f9728634ecefc upstream. Since we have btrfs_check_chunk_valid() in tree-checker, let's do chunk item verification in tree-checker too. Since the tree-checker is run at endio time, if one chunk leaf fails chunk verification, we can still retry the other copy, making btrfs more robust to fuzzed image as we may still get a good chunk item. Also since we have done chunk verification in tree block read time, skip the btrfs_check_chunk_valid() call in read_one_chunk() if we're reading chunk items from leaf. Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 6 ++++++ fs/btrfs/volumes.c | 12 +++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 7b59a3dfb5a8..e7099300d3e0 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -608,6 +608,7 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, struct btrfs_key *key, int slot) { int ret = 0; + struct btrfs_chunk *chunk; switch (key->type) { case BTRFS_EXTENT_DATA_KEY: @@ -624,6 +625,11 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_BLOCK_GROUP_ITEM_KEY: ret = check_block_group_item(fs_info, leaf, key, slot); break; + case BTRFS_CHUNK_ITEM_KEY: + chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, + key->offset); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3050a93ae6f5..8e9d855dceba 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6401,9 +6401,15 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, length = btrfs_chunk_length(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); - ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); - if (ret) - return ret; + /* + * Only need to verify chunk item if we're reading from sys chunk array, + * as chunk item in tree block is already verified by tree-checker. + */ + if (leaf->start == BTRFS_SUPER_INFO_OFFSET) { + ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); + if (ret) + return ret; + } read_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); From e23e5d2594307d1088e1e1b3621d785add2a57b8 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 8 Mar 2019 14:20:03 +0800 Subject: [PATCH 221/636] btrfs: tree-checker: Verify dev item commit ab4ba2e133463c702b37242560d7fabedd2dc750 upstream. [BUG] For fuzzed image whose DEV_ITEM has invalid total_bytes as 0, then kernel will just panic: BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 #PF error: [normal kernel read fault] PGD 800000022b2bd067 P4D 800000022b2bd067 PUD 22b2bc067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 1106 Comm: mount Not tainted 5.0.0-rc8+ #9 RIP: 0010:btrfs_verify_dev_extents+0x2a5/0x5a0 Call Trace: open_ctree+0x160d/0x2149 btrfs_mount_root+0x5b2/0x680 [CAUSE] If device extent verification finds a deivce with 0 total_bytes, then it assumes it's a seed dummy, then search for seed devices. But in this case, there is no seed device at all, causing NULL pointer. [FIX] Since this is caused by fuzzed image, let's go the tree-check way, just add a new verification for device item. Reported-by: Yoon Jungyeon Link: https://bugzilla.kernel.org/show_bug.cgi?id=202691 Reviewed-by: Nikolay Borisov Signed-off-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 74 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.c | 9 ----- fs/btrfs/volumes.h | 9 +++++ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e7099300d3e0..b5e7caeb28a2 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -600,6 +600,77 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return 0; } +__printf(4, 5) +__cold +static void dev_item_err(const struct btrfs_fs_info *fs_info, + const struct extent_buffer *eb, int slot, + const char *fmt, ...) +{ + struct btrfs_key key; + struct va_format vaf; + va_list args; + + btrfs_item_key_to_cpu(eb, &key, slot); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + btrfs_crit(fs_info, + "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", + btrfs_header_level(eb) == 0 ? "leaf" : "node", + btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, + key.objectid, &vaf); + va_end(args); +} + +static int check_dev_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_dev_item *ditem; + u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); + + if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { + dev_item_err(fs_info, leaf, slot, + "invalid objectid: has=%llu expect=%llu", + key->objectid, BTRFS_DEV_ITEMS_OBJECTID); + return -EUCLEAN; + } + if (key->offset > max_devid) { + dev_item_err(fs_info, leaf, slot, + "invalid devid: has=%llu expect=[0, %llu]", + key->offset, max_devid); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); + if (btrfs_device_id(leaf, ditem) != key->offset) { + dev_item_err(fs_info, leaf, slot, + "devid mismatch: key has=%llu item has=%llu", + key->offset, btrfs_device_id(leaf, ditem)); + return -EUCLEAN; + } + + /* + * For device total_bytes, we don't have reliable way to check it, as + * it can be 0 for device removal. Device size check can only be done + * by dev extents check. + */ + if (btrfs_device_bytes_used(leaf, ditem) > + btrfs_device_total_bytes(leaf, ditem)) { + dev_item_err(fs_info, leaf, slot, + "invalid bytes used: have %llu expect [0, %llu]", + btrfs_device_bytes_used(leaf, ditem), + btrfs_device_total_bytes(leaf, ditem)); + return -EUCLEAN; + } + /* + * Remaining members like io_align/type/gen/dev_group aren't really + * utilized. Skip them to make later usage of them easier. + */ + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -630,6 +701,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, key->offset); break; + case BTRFS_DEV_ITEM_KEY: + ret = check_dev_item(fs_info, leaf, key, slot); + break; } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8e9d855dceba..05daa2b816c3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4606,15 +4606,6 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } -#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ - - sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - -#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ - - 2 * sizeof(struct btrfs_disk_key) \ - - 2 * sizeof(struct btrfs_chunk)) \ - / sizeof(struct btrfs_stripe) + 1) - static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 start, u64 type) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8e8bf3246de1..65cd023b097c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -257,6 +257,15 @@ struct btrfs_fs_devices { #define BTRFS_BIO_INLINE_CSUM_SIZE 64 +#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + /* * we need the mirror number and stripe index to be passed around * the call chain while we are processing end_io (especially errors). From b5b2a94d846a599a0b843895d3dbd90246d8d1fc Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 28 Aug 2019 10:33:13 +0800 Subject: [PATCH 222/636] btrfs: tree-checker: Fix wrong check on max devid commit 8bb177d18f114358a57d8ae7e206861b48b8b4de upstream. [BUG] The following script will cause false alert on devid check. #!/bin/bash dev1=/dev/test/test dev2=/dev/test/scratch1 mnt=/mnt/btrfs umount $dev1 &> /dev/null umount $dev2 &> /dev/null umount $mnt &> /dev/null mkfs.btrfs -f $dev1 mount $dev1 $mnt _fail() { echo "!!! FAILED !!!" exit 1 } for ((i = 0; i < 4096; i++)); do btrfs dev add -f $dev2 $mnt || _fail btrfs dev del $dev1 $mnt || _fail dev_tmp=$dev1 dev1=$dev2 dev2=$dev_tmp done [CAUSE] Tree-checker uses BTRFS_MAX_DEVS() and BTRFS_MAX_DEVS_SYS_CHUNK() as upper limit for devid. But we can have devid holes just like above script. So the check for devid is incorrect and could cause false alert. [FIX] Just remove the whole devid check. We don't have any hard requirement for devid assignment. Furthermore, even devid could get corrupted by a bitflip, we still have dev extents verification at mount time, so corrupted data won't sneak in. This fixes fstests btrfs/194. Reported-by: Anand Jain Fixes: ab4ba2e13346 ("btrfs: tree-checker: Verify dev item") CC: stable@vger.kernel.org # 5.2+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index b5e7caeb28a2..a0222e6a11c5 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -629,7 +629,6 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; - u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK); if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(fs_info, leaf, slot, @@ -637,12 +636,6 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } - if (key->offset > max_devid) { - dev_item_err(fs_info, leaf, slot, - "invalid devid: has=%llu expect=[0, %llu]", - key->offset, max_devid); - return -EUCLEAN; - } ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(fs_info, leaf, slot, From bedd9974c106fe96b518da667d49b0bfe6353590 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 13 Mar 2019 12:17:50 +0800 Subject: [PATCH 223/636] btrfs: tree-checker: Enhance chunk checker to validate chunk profile commit 80e46cf22ba0bcb57b39c7c3b52961ab3a0fd5f2 upstream. Btrfs-progs already have a comprehensive type checker, to ensure there is only 0 (SINGLE profile) or 1 (DUP/RAID0/1/5/6/10) bit set for chunk profile bits. Do the same work for kernel. Reported-by: Yoon Jungyeon Link: https://bugzilla.kernel.org/show_bug.cgi?id=202765 Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index a0222e6a11c5..ab1553781e9c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -556,6 +556,13 @@ int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return -EUCLEAN; } + if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && + (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) { + chunk_err(fs_info, leaf, chunk, logical, + "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", + type & BTRFS_BLOCK_GROUP_PROFILE_MASK); + return -EUCLEAN; + } if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) { chunk_err(fs_info, leaf, chunk, logical, "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", From 3384e8d72567dc6827d53c66071da61f57c517f7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 13 Mar 2019 14:31:35 +0800 Subject: [PATCH 224/636] btrfs: tree-checker: Verify inode item commit 496245cac57e26d8b738d85c7a29cf9a47610f3f upstream. There is a report in kernel bugzilla about mismatch file type in dir item and inode item. This inspires us to check inode mode in inode item. This patch will check the following members: - inode key objectid Should be ROOT_DIR_DIR or [256, (u64)-256] or FREE_INO. - inode key offset Should be 0 - inode item generation - inode item transid No newer than sb generation + 1. The +1 is for log tree. - inode item mode No unknown bits. No invalid S_IF* bit. NOTE: S_IFMT check is not enough, need to check every know type. - inode item nlink Dir should have no more link than 1. - inode item flags Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 15 +++++++ fs/btrfs/tree-checker.c | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 554727d82d43..4d1c12faada8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1459,6 +1459,21 @@ do { \ #define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31) +#define BTRFS_INODE_FLAG_MASK \ + (BTRFS_INODE_NODATASUM | \ + BTRFS_INODE_NODATACOW | \ + BTRFS_INODE_READONLY | \ + BTRFS_INODE_NOCOMPRESS | \ + BTRFS_INODE_PREALLOC | \ + BTRFS_INODE_SYNC | \ + BTRFS_INODE_IMMUTABLE | \ + BTRFS_INODE_APPEND | \ + BTRFS_INODE_NODUMP | \ + BTRFS_INODE_NOATIME | \ + BTRFS_INODE_DIRSYNC | \ + BTRFS_INODE_COMPRESS | \ + BTRFS_INODE_ROOT_ITEM_INIT) + struct btrfs_map_token { const struct extent_buffer *eb; char *kaddr; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index ab1553781e9c..e77c25bc6860 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -671,6 +671,97 @@ static int check_dev_item(struct btrfs_fs_info *fs_info, return 0; } +/* Inode item error output has the same format as dir_item_err() */ +#define inode_item_err(fs_info, eb, slot, fmt, ...) \ + dir_item_err(fs_info, eb, slot, fmt, __VA_ARGS__) + +static int check_inode_item(struct btrfs_fs_info *fs_info, + struct extent_buffer *leaf, + struct btrfs_key *key, int slot) +{ + struct btrfs_inode_item *iitem; + u64 super_gen = btrfs_super_generation(fs_info->super_copy); + u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); + u32 mode; + + if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID || + key->objectid > BTRFS_LAST_FREE_OBJECTID) && + key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && + key->objectid != BTRFS_FREE_INO_OBJECTID) { + generic_err(fs_info, leaf, slot, + "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", + key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, + BTRFS_FIRST_FREE_OBJECTID, + BTRFS_LAST_FREE_OBJECTID, + BTRFS_FREE_INO_OBJECTID); + return -EUCLEAN; + } + if (key->offset != 0) { + inode_item_err(fs_info, leaf, slot, + "invalid key offset: has %llu expect 0", + key->offset); + return -EUCLEAN; + } + iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); + + /* Here we use super block generation + 1 to handle log tree */ + if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode generation: has %llu expect (0, %llu]", + btrfs_inode_generation(leaf, iitem), + super_gen + 1); + return -EUCLEAN; + } + /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ + if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { + inode_item_err(fs_info, leaf, slot, + "invalid inode generation: has %llu expect [0, %llu]", + btrfs_inode_transid(leaf, iitem), super_gen + 1); + return -EUCLEAN; + } + + /* + * For size and nbytes it's better not to be too strict, as for dir + * item its size/nbytes can easily get wrong, but doesn't affect + * anything in the fs. So here we skip the check. + */ + mode = btrfs_inode_mode(leaf, iitem); + if (mode & ~valid_mask) { + inode_item_err(fs_info, leaf, slot, + "unknown mode bit detected: 0x%x", + mode & ~valid_mask); + return -EUCLEAN; + } + + /* + * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2, + * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG. + * Only needs to check BLK, LNK and SOCKS + */ + if (!is_power_of_2(mode & S_IFMT)) { + if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) { + inode_item_err(fs_info, leaf, slot, + "invalid mode: has 0%o expect valid S_IF* bit(s)", + mode & S_IFMT); + return -EUCLEAN; + } + } + if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) { + inode_item_err(fs_info, leaf, slot, + "invalid nlink: has %u expect no more than 1 for dir", + btrfs_inode_nlink(leaf, iitem)); + return -EUCLEAN; + } + if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) { + inode_item_err(fs_info, leaf, slot, + "unknown flags detected: 0x%llx", + btrfs_inode_flags(leaf, iitem) & + ~BTRFS_INODE_FLAG_MASK); + return -EUCLEAN; + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -704,6 +795,9 @@ static int check_leaf_item(struct btrfs_fs_info *fs_info, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(fs_info, leaf, key, slot); break; + case BTRFS_INODE_ITEM_KEY: + ret = check_inode_item(fs_info, leaf, key, slot); + break; } return ret; } From cdf69f3b132c95aa255383b2d7907da5ba63221a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 25 Aug 2020 21:42:51 +0800 Subject: [PATCH 225/636] btrfs: tree-checker: fix the error message for transid error commit f96d6960abbc52e26ad124e69e6815283d3e1674 upstream. The error message for inode transid is the same as for inode generation, which makes us unable to detect the real problem. Reported-by: Tyler Richmond Fixes: 496245cac57e ("btrfs: tree-checker: Verify inode item") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Marcos Paulo de Souza Signed-off-by: Qu Wenruo Signed-off-by: David Sterba [bwh: Backported to 4.19: adjust context] Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-checker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e77c25bc6860..9023e6b46396 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -715,7 +715,7 @@ static int check_inode_item(struct btrfs_fs_info *fs_info, /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) { inode_item_err(fs_info, leaf, slot, - "invalid inode generation: has %llu expect [0, %llu]", + "invalid inode transid: has %llu expect [0, %llu]", btrfs_inode_transid(leaf, iitem), super_gen + 1); return -EUCLEAN; } From 11af858054baadff13eb92eb347443f93768fde7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 2 Nov 2020 13:32:42 -0500 Subject: [PATCH 226/636] Fonts: Replace discarded const qualifier commit 9522750c66c689b739e151fcdf895420dc81efc0 upstream. Commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") introduced the following error when building rpc_defconfig (only this build appears to be affected): `acorndata_8x8' referenced in section `.text' of arch/arm/boot/compressed/ll_char_wr.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o `acorndata_8x8' referenced in section `.data.rel.ro' of arch/arm/boot/compressed/font.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o make[3]: *** [/scratch/linux/arch/arm/boot/compressed/Makefile:191: arch/arm/boot/compressed/vmlinux] Error 1 make[2]: *** [/scratch/linux/arch/arm/boot/Makefile:61: arch/arm/boot/compressed/vmlinux] Error 2 make[1]: *** [/scratch/linux/arch/arm/Makefile:317: zImage] Error 2 The .data section is discarded at link time. Reinstating acorndata_8x8 as const ensures it is still available after linking. Do the same for the other 12 built-in fonts as well, for consistency purposes. Cc: Cc: Russell King Reviewed-by: Greg Kroah-Hartman Fixes: 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") Signed-off-by: Lee Jones Co-developed-by: Peilin Ye Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201102183242.2031659-1-yepeilin.cs@gmail.com Signed-off-by: Greg Kroah-Hartman --- lib/fonts/font_10x18.c | 2 +- lib/fonts/font_6x10.c | 2 +- lib/fonts/font_6x11.c | 2 +- lib/fonts/font_7x14.c | 2 +- lib/fonts/font_8x16.c | 2 +- lib/fonts/font_8x8.c | 2 +- lib/fonts/font_acorn_8x8.c | 2 +- lib/fonts/font_mini_4x6.c | 2 +- lib/fonts/font_pearl_8x8.c | 2 +- lib/fonts/font_sun12x22.c | 2 +- lib/fonts/font_sun8x16.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c index 0e2deac97da0..e02f9df24d1e 100644 --- a/lib/fonts/font_10x18.c +++ b/lib/fonts/font_10x18.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 9216 -static struct font_data fontdata_10x18 = { +static const struct font_data fontdata_10x18 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 0000000000 */ diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c index 87da8acd07db..6e3c4b7691c8 100644 --- a/lib/fonts/font_6x10.c +++ b/lib/fonts/font_6x10.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 2560 -static struct font_data fontdata_6x10 = { +static const struct font_data fontdata_6x10 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c index 5e975dfa10a5..2d22a24e816f 100644 --- a/lib/fonts/font_6x11.c +++ b/lib/fonts/font_6x11.c @@ -9,7 +9,7 @@ #define FONTDATAMAX (11*256) -static struct font_data fontdata_6x11 = { +static const struct font_data fontdata_6x11 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c index 86d298f38505..9cc7ae2e03f7 100644 --- a/lib/fonts/font_7x14.c +++ b/lib/fonts/font_7x14.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 3584 -static struct font_data fontdata_7x14 = { +static const struct font_data fontdata_7x14 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 0000000 */ diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c index 37cedd36ca5e..bab25dc59e8d 100644 --- a/lib/fonts/font_8x16.c +++ b/lib/fonts/font_8x16.c @@ -10,7 +10,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_8x16 = { +static const struct font_data fontdata_8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c index 8ab695538395..109d0572368f 100644 --- a/lib/fonts/font_8x8.c +++ b/lib/fonts/font_8x8.c @@ -9,7 +9,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_8x8 = { +static const struct font_data fontdata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c index 069b3e80c434..fb395f0d4031 100644 --- a/lib/fonts/font_acorn_8x8.c +++ b/lib/fonts/font_acorn_8x8.c @@ -5,7 +5,7 @@ #define FONTDATAMAX 2048 -static struct font_data acorndata_8x8 = { +static const struct font_data acorndata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ^@ */ /* 01 */ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e, /* ^A */ diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c index 1449876c6a27..592774a90917 100644 --- a/lib/fonts/font_mini_4x6.c +++ b/lib/fonts/font_mini_4x6.c @@ -43,7 +43,7 @@ __END__; #define FONTDATAMAX 1536 -static struct font_data fontdata_mini_4x6 = { +static const struct font_data fontdata_mini_4x6 = { { 0, 0, FONTDATAMAX, 0 }, { /*{*/ /* Char 0: ' ' */ diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c index 32d65551e7ed..a6f95ebce950 100644 --- a/lib/fonts/font_pearl_8x8.c +++ b/lib/fonts/font_pearl_8x8.c @@ -14,7 +14,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_pearl8x8 = { +static const struct font_data fontdata_pearl8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c index 641a6b4dca42..a5b65bd49604 100644 --- a/lib/fonts/font_sun12x22.c +++ b/lib/fonts/font_sun12x22.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 11264 -static struct font_data fontdata_sun12x22 = { +static const struct font_data fontdata_sun12x22 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 000000000000 */ diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c index 193fe6d988e0..e577e76a6a7c 100644 --- a/lib/fonts/font_sun8x16.c +++ b/lib/fonts/font_sun8x16.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_sun8x16 = { +static const struct font_data fontdata_sun8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* */ 0x00,0x00,0x7e,0x81,0xa5,0x81,0x81,0xbd,0x99,0x81,0x81,0x7e,0x00,0x00,0x00,0x00, From 0b443d1aff6d6f3706e48c93d9fe1212a569af4b Mon Sep 17 00:00:00 2001 From: Keith Winstein Date: Sun, 25 Oct 2020 22:05:47 -0700 Subject: [PATCH 227/636] ALSA: usb-audio: Add implicit feedback quirk for Zoom UAC-2 commit f15cfca818d756dd1c9492530091dfd583359db3 upstream. The Zoom UAC-2 USB audio interface provides an async playback endpoint ("1 OUT (ASYNC)") and capture endpoint ("2 IN (ASYNC)"), both with 2-channel S32_LE in 44.1, 48, 88.2, 96, 176.4, or 192 kilosamples/s. The device provides explicit feedback to adjust the host's playback rate, but the feedback appears unstable and biased relative to the device's capture rate. "alsaloop -t 1000" experiences playback underruns and tries to resample the captured audio to match the varying playback rate. Forcing the kernel to use implicit feedback appears to produce more stable results. This causes the host to transmit one playback sample for each capture sample received. (Zoom North America has been notified of this change.) Signed-off-by: Keith Winstein Tested-by: Keith Winstein Cc: BugLink: https://lore.kernel.org/r/20201027071841.GA164525@trolley.csail.mit.edu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 7743e7bc6bf2..038ce38986e2 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -350,6 +350,10 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x81; ifnum = 2; goto add_sync_ep_from_ifnum; + case USB_ID(0x1686, 0xf029): /* Zoom UAC-2 */ + ep = 0x82; + ifnum = 2; + goto add_sync_ep_from_ifnum; case USB_ID(0x1397, 0x0001): /* Behringer UFX1604 */ case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */ ep = 0x81; From 69a7cae8656039d4420e8613448387d0ebf494f0 Mon Sep 17 00:00:00 2001 From: Artem Lapkin Date: Tue, 3 Nov 2020 18:08:09 +0800 Subject: [PATCH 228/636] ALSA: usb-audio: add usb vendor id as DSD-capable for Khadas devices commit 07815a2b3501adeaae6384a25b9c4a9c81dae59f upstream. Khadas audio devices ( USB_ID_VENDOR 0x3353 ) have DSD-capable implementations from XMOS need add new usb vendor id for recognition Signed-off-by: Artem Lapkin Cc: Link: https://lore.kernel.org/r/20201103103311.5435-1-art@khadas.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e6dea1c7112b..3effe2e86197 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1466,6 +1466,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case 0x278b: /* Rotel? */ case 0x292b: /* Gustard/Ess based devices */ case 0x2ab6: /* T+A devices */ + case 0x3353: /* Khadas devices */ case 0x3842: /* EVGA */ case 0xc502: /* HiBy devices */ if (fp->dsd_raw) From 9a83c225d4b118e65957abbc75bb844f88e5162b Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 4 Nov 2020 22:27:17 +1030 Subject: [PATCH 229/636] ALSA: usb-audio: Add implicit feedback quirk for Qu-16 commit 0938ecae432e7ac8b01080c35dd81d50a1e43033 upstream. This patch fixes audio distortion on playback for the Allen&Heath Qu-16. Signed-off-by: Geoffrey D. Bennett Cc: Link: https://lore.kernel.org/r/20201104115717.GA19046@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 038ce38986e2..5a591bace2bd 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -334,6 +334,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, switch (subs->stream->chip->usb_id) { case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */ case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */ + case USB_ID(0x22f0, 0x0006): /* Allen&Heath Qu-16 */ ep = 0x81; ifnum = 3; goto add_sync_ep_from_ifnum; From df0486a4faee7277932136f2526721f07443ec95 Mon Sep 17 00:00:00 2001 From: "Geoffrey D. Bennett" Date: Wed, 4 Nov 2020 22:37:05 +1030 Subject: [PATCH 230/636] ALSA: usb-audio: Add implicit feedback quirk for MODX commit 26201ddc1373c99b2a67c5774da2f0eecd749b93 upstream. This patch fixes audio distortion on playback for the Yamaha MODX. Signed-off-by: Geoffrey D. Bennett Tested-by: Frank Slotta Cc: Link: https://lore.kernel.org/r/20201104120705.GA19126@b4.vu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 5a591bace2bd..e4d2fcc89c30 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -344,6 +344,7 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ifnum = 2; goto add_sync_ep_from_ifnum; case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */ + case USB_ID(0x0499, 0x172a): /* Yamaha MODX */ ep = 0x86; ifnum = 2; goto add_sync_ep_from_ifnum; From d4fea27ffcb01bbf8ca8ae54f60479945394b83e Mon Sep 17 00:00:00 2001 From: Shijie Luo Date: Sun, 1 Nov 2020 17:07:40 -0800 Subject: [PATCH 231/636] mm: mempolicy: fix potential pte_unmap_unlock pte error commit 3f08842098e842c51e3b97d0dcdebf810b32558e upstream. When flags in queue_pages_pte_range don't have MPOL_MF_MOVE or MPOL_MF_MOVE_ALL bits, code breaks and passing origin pte - 1 to pte_unmap_unlock seems like not a good idea. queue_pages_pte_range can run in MPOL_MF_MOVE_ALL mode which doesn't migrate misplaced pages but returns with EIO when encountering such a page. Since commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") and early break on the first pte in the range results in pte_unmap_unlock on an underflow pte. This can lead to lockups later on when somebody tries to lock the pte resp. page_table_lock again.. Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Miaohe Lin Cc: Feilong Lin Cc: Shijie Luo Cc: Link: https://lkml.kernel.org/r/20201019074853.50856-1-luoshijie1@huawei.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/mempolicy.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 68c46da82aac..3cd27c1c729f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -496,7 +496,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long flags = qp->flags; int ret; bool has_unmovable = false; - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; ptl = pmd_trans_huge_lock(pmd, vma); @@ -510,7 +510,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, if (pmd_trans_unstable(pmd)) return 0; - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; @@ -542,7 +542,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, } else break; } - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); cond_resched(); if (has_unmovable) From 1471e96263b6e68929e7afed28f80ab3b6bd1066 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Nov 2020 17:07:47 -0800 Subject: [PATCH 232/636] lib/crc32test: remove extra local_irq_disable/enable commit aa4e460f0976351fddd2f5ac6e08b74320c277a1 upstream. Commit 4d004099a668 ("lockdep: Fix lockdep recursion") uncovered the following issue in lib/crc32test reported on s390: BUG: using __this_cpu_read() in preemptible [00000000] code: swapper/0/1 caller is lockdep_hardirqs_on_prepare+0x48/0x270 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.9.0-next-20201015-15164-g03d992bd2de6 #19 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: lockdep_hardirqs_on_prepare+0x48/0x270 trace_hardirqs_on+0x9c/0x1b8 crc32_test.isra.0+0x170/0x1c0 crc32test_init+0x1c/0x40 do_one_initcall+0x40/0x130 do_initcalls+0x126/0x150 kernel_init_freeable+0x1f6/0x230 kernel_init+0x22/0x150 ret_from_fork+0x24/0x2c no locks held by swapper/0/1. Remove extra local_irq_disable/local_irq_enable helpers calls. Fixes: 5fb7f87408f1 ("lib: add module support to crc32 tests") Signed-off-by: Vasily Gorbik Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/patch.git-4369da00c06e.your-ad-here.call-01602859837-ext-1679@work.hours Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/crc32test.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/crc32test.c b/lib/crc32test.c index 97d6a57cefcc..61ddce2cff77 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -683,7 +683,6 @@ static int __init crc32c_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -694,7 +693,6 @@ static int __init crc32c_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); @@ -768,7 +766,6 @@ static int __init crc32_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -783,7 +780,6 @@ static int __init crc32_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", CRC_LE_BITS, CRC_BE_BITS); From 68e8b8ed787a8acafa3e987d52f6d415c5949ec6 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Sun, 1 Nov 2020 17:07:53 -0800 Subject: [PATCH 233/636] kthread_worker: prevent queuing delayed work from timer_fn when it is being canceled commit 6993d0fdbee0eb38bfac350aa016f65ad11ed3b1 upstream. There is a small race window when a delayed work is being canceled and the work still might be queued from the timer_fn: CPU0 CPU1 kthread_cancel_delayed_work_sync() __kthread_cancel_work_sync() __kthread_cancel_work() work->canceling++; kthread_delayed_work_timer_fn() kthread_insert_work(); BUG: kthread_insert_work() should not get called when work->canceling is set. Signed-off-by: Zqiang Signed-off-by: Andrew Morton Reviewed-by: Petr Mladek Acked-by: Tejun Heo Cc: Link: https://lkml.kernel.org/r/20201014083030.16895-1-qiang.zhang@windriver.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/kthread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index b786eda90bb5..2eed853ab9cc 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -863,7 +863,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) /* Move the work from worker->delayed_work_list. */ WARN_ON_ONCE(list_empty(&work->node)); list_del_init(&work->node); - kthread_insert_work(worker, work, &worker->work_list); + if (!work->canceling) + kthread_insert_work(worker, work, &worker->work_list); spin_unlock(&worker->lock); } From 361d82eb7c301e7473459cb7f1e0d7733cf5e7ec Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 1 Nov 2020 17:08:00 -0800 Subject: [PATCH 234/636] mm: always have io_remap_pfn_range() set pgprot_decrypted() commit f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 upstream. The purpose of io_remap_pfn_range() is to map IO memory, such as a memory mapped IO exposed through a PCI BAR. IO devices do not understand encryption, so this memory must always be decrypted. Automatically call pgprot_decrypted() as part of the generic implementation. This fixes a bug where enabling AMD SME causes subsystems, such as RDMA, using io_remap_pfn_range() to expose BAR pages to user space to fail. The CPU will encrypt access to those BAR pages instead of passing unencrypted IO directly to the device. Places not mapping IO should use remap_pfn_range(). Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Tom Lendacky Cc: Thomas Gleixner Cc: Andrey Ryabinin Cc: Borislav Petkov Cc: Brijesh Singh Cc: Jonathan Corbet Cc: Dmitry Vyukov Cc: "Dave Young" Cc: Alexander Potapenko Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Larry Woodman Cc: Matt Fleming Cc: Ingo Molnar Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Rik van Riel Cc: Toshimitsu Kani Cc: Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/pgtable.h | 4 ---- include/linux/mm.h | 9 +++++++++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 15fd0277ffa6..5901b0005929 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1115,10 +1115,6 @@ static inline bool arch_has_pfn_modify_check(void) #endif /* !__ASSEMBLY__ */ -#ifndef io_remap_pfn_range -#define io_remap_pfn_range remap_pfn_range -#endif - #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 diff --git a/include/linux/mm.h b/include/linux/mm.h index 83828c118b6b..f6ecf41aea83 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2561,6 +2561,15 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +#ifndef io_remap_pfn_range +static inline int io_remap_pfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, + unsigned long size, pgprot_t prot) +{ + return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot)); +} +#endif + static inline vm_fault_t vmf_error(int err) { if (err == -ENOMEM) From fe0af0efa7b1ecb8aebb5e8f44d00db07f564c6c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 26 Oct 2020 10:52:29 -0400 Subject: [PATCH 235/636] gfs2: Wake up when sd_glock_disposal becomes zero commit da7d554f7c62d0c17c1ac3cc2586473c2d99f0bd upstream. Commit fc0e38dae645 ("GFS2: Fix glock deallocation race") fixed a sd_glock_disposal accounting bug by adding a missing atomic_dec statement, but it failed to wake up sd_glock_wait when that decrement causes sd_glock_disposal to reach zero. As a consequence, gfs2_gl_hash_clear can now run into a 10-minute timeout instead of being woken up. Add the missing wakeup. Fixes: fc0e38dae645 ("GFS2: Fix glock deallocation race") Cc: stable@vger.kernel.org # v2.6.39+ Signed-off-by: Alexander Aring Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/glock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ccdd8c821abd..c20d71d86812 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -870,7 +870,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, out_free: kfree(gl->gl_lksb.sb_lvbptr); kmem_cache_free(cachep, gl); - atomic_dec(&sdp->sd_glock_disposal); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); out: return ret; From b410d07e965a039dc67073889dab9ff01cee8402 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 2 Nov 2020 15:31:27 -0500 Subject: [PATCH 236/636] ring-buffer: Fix recursion protection transitions between interrupt context commit b02414c8f045ab3b9afc816c3735bc98c5c3d262 upstream. The recursion protection of the ring buffer depends on preempt_count() to be correct. But it is possible that the ring buffer gets called after an interrupt comes in but before it updates the preempt_count(). This will trigger a false positive in the recursion code. Use the same trick from the ftrace function callback recursion code which uses a "transition" bit that gets set, to allow for a single recursion for to handle transitions between contexts. Cc: stable@vger.kernel.org Fixes: 567cd4da54ff4 ("ring-buffer: User context bit recursion checking") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 58 ++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index eef05eb3b284..87ce9736043d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -444,14 +444,16 @@ struct rb_event_info { /* * Used for which event context the event is in. - * NMI = 0 - * IRQ = 1 - * SOFTIRQ = 2 - * NORMAL = 3 + * TRANSITION = 0 + * NMI = 1 + * IRQ = 2 + * SOFTIRQ = 3 + * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { + RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, @@ -2620,10 +2622,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * bit 1 = NMI context + * bit 2 = IRQ context + * bit 3 = SoftIRQ context + * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ @@ -2646,6 +2648,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. + * + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit + * is set when a recursion is detected at the current context, and if + * the TRANSITION bit is already set, it will fail the recursion. + * This is needed because there's a lag between the changing of + * interrupt context and updating the preempt count. In this case, + * a false positive will be found. To handle this, one extra recursion + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION + * bit is already set, then it is considered a recursion and the function + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. + * + * On the trace_recursive_unlock(), the TRANSITION bit will be the first + * to be cleared. Even if it wasn't the context that set it. That is, + * if an interrupt comes in while NORMAL bit is set and the ring buffer + * is called before preempt_count() is updated, since the check will + * be on the NORMAL bit, the TRANSITION bit will then be set. If an + * NMI then comes in, it will set the NMI bit, but when the NMI code + * does the trace_recursive_unlock() it will clear the TRANSTION bit + * and leave the NMI bit set. But this is fine, because the interrupt + * code that set the TRANSITION bit will then clear the NMI bit when it + * calls trace_recursive_unlock(). If another NMI comes in, it will + * set the TRANSITION bit and continue. + * + * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline int @@ -2661,8 +2687,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = pc & NMI_MASK ? RB_CTX_NMI : pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; - if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) - return 1; + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { + /* + * It is possible that this was called by transitioning + * between interrupt context, and preempt_count() has not + * been updated yet. In this case, use the TRANSITION bit. + */ + bit = RB_CTX_TRANSITION; + if (val & (1 << (bit + cpu_buffer->nest))) + return 1; + } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; @@ -2677,8 +2711,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->current_context - (1 << cpu_buffer->nest); } -/* The recursive locking above uses 4 bits */ -#define NESTED_BITS 4 +/* The recursive locking above uses 5 bits */ +#define NESTED_BITS 5 /** * ring_buffer_nest_start - Allow to trace while nested From ee2b95c08515633da67c048beb164966224ffe9e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 29 Oct 2020 17:31:45 -0400 Subject: [PATCH 237/636] ftrace: Fix recursion check for NMI test commit ee11b93f95eabdf8198edd4668bf9102e7248270 upstream. The code that checks recursion will work to only do the recursion check once if there's nested checks. The top one will do the check, the other nested checks will see recursion was already checked and return zero for its "bit". On the return side, nothing will be done if the "bit" is zero. The problem is that zero is returned for the "good" bit when in NMI context. This will set the bit for NMIs making it look like *all* NMI tracing is recursing, and prevent tracing of anything in NMI context! The simple fix is to return "bit + 1" and subtract that bit on the end to get the real bit. Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ee0c6a313ed1..cb75f93fd254 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -595,7 +595,7 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) current->trace_recursion = val; barrier(); - return bit; + return bit + 1; } static __always_inline void trace_clear_recursion(int bit) @@ -605,6 +605,7 @@ static __always_inline void trace_clear_recursion(int bit) if (!bit) return; + bit--; bit = 1 << bit; val &= ~bit; From 2de780dfbe1ad05c3f15f8d6f2d22dae9cf95267 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 29 Oct 2020 19:35:08 -0400 Subject: [PATCH 238/636] ftrace: Handle tracing when switching between context commit 726b3d3f141fba6f841d715fc4d8a4a84f02c02a upstream. When an interrupt or NMI comes in and switches the context, there's a delay from when the preempt_count() shows the update. As the preempt_count() is used to detect recursion having each context have its own bit get set when tracing starts, and if that bit is already set, it is considered a recursion and the function exits. But if this happens in that section where context has changed but preempt_count() has not been updated, this will be incorrectly flagged as a recursion. To handle this case, create another bit call TRANSITION and test it if the current context bit is already set. Flag the call as a recursion if the TRANSITION bit is already set, and if not, set it and continue. The TRANSITION bit will be cleared normally on the return of the function that set it, or if the current context bit is clear, set it and clear the TRANSITION bit to allow for another transition between the current context and an even higher one. Cc: stable@vger.kernel.org Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.h | 23 +++++++++++++++++++++-- kernel/trace/trace_selftest.c | 9 +++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb75f93fd254..01475411fd1b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -534,6 +534,12 @@ enum { TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -588,8 +594,21 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) return 0; bit = trace_get_context_bit() + start; - if (unlikely(val & (1 << bit))) - return -1; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); val |= 1 << bit; current->trace_recursion = val; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 11e9daa4a568..330ba2b081ed 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -492,8 +492,13 @@ trace_selftest_function_recursion(void) unregister_ftrace_function(&test_rec_probe); ret = -1; - if (trace_selftest_recursion_cnt != 1) { - pr_cont("*callback not called once (%d)* ", + /* + * Recursion allows for transitions between context, + * and may call the callback twice. + */ + if (trace_selftest_recursion_cnt != 1 && + trace_selftest_recursion_cnt != 2) { + pr_cont("*callback not called once (or twice) (%d)* ", trace_selftest_recursion_cnt); goto out; } From 7e4eeff7da1df5602e9af340d01b2ce1b4662d3e Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 30 Oct 2020 00:19:05 +0800 Subject: [PATCH 239/636] tracing: Fix out of bounds write in get_trace_buf commit c1acb4ac1a892cf08d27efcb964ad281728b0545 upstream. The nesting count of trace_printk allows for 4 levels of nesting. The nesting counter starts at zero and is incremented before being used to retrieve the current context's buffer. But the index to the buffer uses the nesting counter after it was incremented, and not its original number, which in needs to do. Link: https://lkml.kernel.org/r/20201029161905.4269-1-hqjagain@gmail.com Cc: stable@vger.kernel.org Fixes: 3d9622c12c887 ("tracing: Add barrier to trace_printk() buffer nesting modification") Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 6bf617ff0369..c3cc6aaa6f79 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2819,7 +2819,7 @@ static char *get_trace_buf(void) /* Interrupts must see nesting incremented before we use the buffer */ barrier(); - return &buffer->buffer[buffer->nesting][0]; + return &buffer->buffer[buffer->nesting - 1][0]; } static void put_trace_buf(void) From c096a3d44ead54b53a3b65d9d210022099bdc7ef Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 4 Nov 2020 16:12:44 +0100 Subject: [PATCH 240/636] futex: Handle transient "ownerless" rtmutex state correctly commit 9f5d1c336a10c0d24e83e40b4c1b9539f7dba627 upstream. Gratian managed to trigger the BUG_ON(!newowner) in fixup_pi_state_owner(). This is one possible chain of events leading to this: Task Prio Operation T1 120 lock(F) T2 120 lock(F) -> blocks (top waiter) T3 50 (RT) lock(F) -> boosts T1 and blocks (new top waiter) XX timeout/ -> wakes T2 signal T1 50 unlock(F) -> wakes T3 (rtmutex->owner == NULL, waiter bit is set) T2 120 cleanup -> try_to_take_mutex() fails because T3 is the top waiter and the lower priority T2 cannot steal the lock. -> fixup_pi_state_owner() sees newowner == NULL -> BUG_ON() The comment states that this is invalid and rt_mutex_real_owner() must return a non NULL owner when the trylock failed, but in case of a queued and woken up waiter rt_mutex_real_owner() == NULL is a valid transient state. The higher priority waiter has simply not yet managed to take over the rtmutex. The BUG_ON() is therefore wrong and this is just another retry condition in fixup_pi_state_owner(). Drop the locks, so that T3 can make progress, and then try the fixup again. Gratian provided a great analysis, traces and a reproducer. The analysis is to the point, but it confused the hell out of that tglx dude who had to page in all the futex horrors again. Condensed version is above. [ tglx: Wrote comment and changelog ] Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") Reported-by: Gratian Crisan Signed-off-by: Mike Galbraith Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87a6w6x7bb.fsf@ni.com Link: https://lore.kernel.org/r/87sg9pkvf7.fsf@nanos.tec.linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index eabb9180ffa8..52f641c00a65 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2417,10 +2417,22 @@ retry: } /* - * Since we just failed the trylock; there must be an owner. + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); - BUG_ON(!newowner); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { From dffce0f0e7f1c2c421ef488642764d81ff62da0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= Date: Sat, 3 Oct 2020 12:03:32 +0200 Subject: [PATCH 241/636] ARM: dts: sun4i-a10: fix cpu_alert temperature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit dea252fa41cd8ce332d148444e4799235a8a03ec ] When running dtbs_check thermal_zone warn about the temperature declared. thermal-zones: cpu-thermal:trips:cpu-alert0:temperature:0:0: 850000 is greater than the maximum of 200000 It's indeed wrong the real value is 85°C and not 850°C. Signed-off-by: Clément Péron Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201003100332.431178-1-peron.clem@gmail.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun4i-a10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5d46bb0139fa..707ad5074878 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; From f81794c3789dcacf5dc5abe7da557f33e0c60ecf Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 14 Oct 2020 17:24:28 +0800 Subject: [PATCH 242/636] x86/kexec: Use up-to-dated screen_info copy to fill boot params [ Upstream commit afc18069a2cb7ead5f86623a5f3d4ad6e21f940d ] kexec_file_load() currently reuses the old boot_params.screen_info, but if drivers have change the hardware state, boot_param.screen_info could contain invalid info. For example, the video type might be no longer VGA, or the frame buffer address might be changed. If the kexec kernel keeps using the old screen_info, kexec'ed kernel may attempt to write to an invalid framebuffer memory region. There are two screen_info instances globally available, boot_params.screen_info and screen_info. Later one is a copy, and is updated by drivers. So let kexec_file_load use the updated copy. [ mingo: Tidied up the changelog. ] Signed-off-by: Kairui Song Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20201014092429.1415040-2-kasong@redhat.com Signed-off-by: Sasha Levin --- arch/x86/kernel/kexec-bzimage64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 9490a2845f14..273687986a26 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -211,8 +211,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; From b8f922aa95aa07759623e2adab65bdba8811e808 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 21 Oct 2020 11:53:59 +0200 Subject: [PATCH 243/636] of: Fix reserved-memory overlap detection [ Upstream commit ca05f33316559a04867295dd49f85aeedbfd6bfd ] The reserved-memory overlap detection code fails to detect overlaps if either of the regions starts at address 0x0. The code explicitly checks for and ignores such regions, apparently in order to ignore dynamically allocated regions which have an address of 0x0 at this point. These dynamically allocated regions also have a size of 0x0 at this point, so fix this by removing the check and sorting the dynamically allocated regions ahead of any static regions at address 0x0. For example, there are two overlaps in this case but they are not currently reported: foo@0 { reg = <0x0 0x2000>; }; bar@0 { reg = <0x0 0x1000>; }; baz@1000 { reg = <0x1000 0x1000>; }; quux { size = <0x1000>; }; but they are after this patch: OF: reserved mem: OVERLAP DETECTED! bar@0 (0x00000000--0x00001000) overlaps with foo@0 (0x00000000--0x00002000) OF: reserved mem: OVERLAP DETECTED! foo@0 (0x00000000--0x00002000) overlaps with baz@1000 (0x00001000--0x00002000) Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/ded6fd6b47b58741aabdcc6967f73eca6a3f311e.1603273666.git-series.vincent.whitchurch@axis.com Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/of_reserved_mem.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 895c83e0c7b6..19f95552da4d 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -218,6 +218,16 @@ static int __init __rmem_cmp(const void *a, const void *b) if (ra->base > rb->base) return 1; + /* + * Put the dynamic allocations (address == 0, size == 0) before static + * allocations at address 0x0 so that overlap detection works + * correctly. + */ + if (ra->size < rb->size) + return -1; + if (ra->size > rb->size) + return 1; + return 0; } @@ -235,8 +245,7 @@ static void __init __rmem_check_for_overlap(void) this = &reserved_mem[i]; next = &reserved_mem[i + 1]; - if (!(this->base && next->base)) - continue; + if (this->base + this->size > next->base) { phys_addr_t this_end, next_end; From 6dba51828dfd31e326e6642727ede24c054a86fe Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 22 Oct 2020 16:58:41 -0400 Subject: [PATCH 244/636] blk-cgroup: Fix memleak on error path [ Upstream commit 52abfcbd57eefdd54737fc8c2dc79d8f46d4a3e5 ] If new_blkg allocation raced with blk_policy change and blkg_lookup_check fails, new_blkg is leaked. Acked-by: Tejun Heo Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a06547fe6f6b..51fc803c999d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -882,6 +882,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_lookup_check(pos, pol, q); if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); + blkg_free(new_blkg); goto fail_unlock; } From 239aed5d2ecbfd381ca642f0a6bcb272c98408df Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 22 Oct 2020 16:58:42 -0400 Subject: [PATCH 245/636] blk-cgroup: Pre-allocate tree node on blkg_conf_prep [ Upstream commit f255c19b3ab46d3cad3b1b2e1036f4c926cb1d0c ] Similarly to commit 457e490f2b741 ("blkcg: allocate struct blkcg_gq outside request queue spinlock"), blkg_create can also trigger occasional -ENOMEM failures at the radix insertion because any allocation inside blkg_create has to be non-blocking, making it more likely to fail. This causes trouble for userspace tools trying to configure io weights who need to deal with this condition. This patch reduces the occurrence of -ENOMEMs on this path by preloading the radix tree element on a GFP_KERNEL context, such that we guarantee the later non-blocking insertion won't fail. A similar solution exists in blkcg_init_queue for the same situation. Acked-by: Tejun Heo Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-cgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 51fc803c999d..85bd46e0a745 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -876,6 +876,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto fail; } + if (radix_tree_preload(GFP_KERNEL)) { + blkg_free(new_blkg); + ret = -ENOMEM; + goto fail; + } + rcu_read_lock(); spin_lock_irq(q->queue_lock); @@ -883,7 +889,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); blkg_free(new_blkg); - goto fail_unlock; + goto fail_preloaded; } if (blkg) { @@ -892,10 +898,12 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, blkg = blkg_create(pos, q, new_blkg); if (unlikely(IS_ERR(blkg))) { ret = PTR_ERR(blkg); - goto fail_unlock; + goto fail_preloaded; } } + radix_tree_preload_end(); + if (pos == blkcg) goto success; } @@ -905,6 +913,8 @@ success: ctx->body = body; return 0; +fail_preloaded: + radix_tree_preload_end(); fail_unlock: spin_unlock_irq(q->queue_lock); rcu_read_unlock(); From 6280db912e03089b15a3e2c2882d573d3b09bd0c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 Oct 2020 11:25:39 +0800 Subject: [PATCH 246/636] scsi: core: Don't start concurrent async scan on same host [ Upstream commit 831e3405c2a344018a18fcc2665acc5a38c3a707 ] The current scanning mechanism is supposed to fall back to a synchronous host scan if an asynchronous scan is in progress. However, this rule isn't strictly respected, scsi_prep_async_scan() doesn't hold scan_mutex when checking shost->async_scan. When scsi_scan_host() is called concurrently, two async scans on same host can be started and a hang in do_scan_async() is observed. Fixes this issue by checking & setting shost->async_scan atomically with shost->scan_mutex. Link: https://lore.kernel.org/r/20201010032539.426615-1-ming.lei@redhat.com Cc: Christoph Hellwig Cc: Ewan D. Milne Cc: Hannes Reinecke Cc: Bart Van Assche Reviewed-by: Lee Duncan Reviewed-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 9a7e3a3bd5ce..009a5b2aa3d0 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1722,15 +1722,16 @@ static void scsi_sysfs_add_devices(struct Scsi_Host *shost) */ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) { - struct async_scan_data *data; + struct async_scan_data *data = NULL; unsigned long flags; if (strncmp(scsi_scan_type, "sync", 4) == 0) return NULL; + mutex_lock(&shost->scan_mutex); if (shost->async_scan) { shost_printk(KERN_DEBUG, shost, "%s called twice\n", __func__); - return NULL; + goto err; } data = kmalloc(sizeof(*data), GFP_KERNEL); @@ -1741,7 +1742,6 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) goto err; init_completion(&data->prev_finished); - mutex_lock(&shost->scan_mutex); spin_lock_irqsave(shost->host_lock, flags); shost->async_scan = 1; spin_unlock_irqrestore(shost->host_lock, flags); @@ -1756,6 +1756,7 @@ static struct async_scan_data *scsi_prep_async_scan(struct Scsi_Host *shost) return data; err: + mutex_unlock(&shost->scan_mutex); kfree(data); return NULL; } From 1aa82721dd74c9f08337ea0a22689eb2a94634ae Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Fri, 23 Oct 2020 16:37:57 +0200 Subject: [PATCH 247/636] vsock: use ns_capable_noaudit() on socket create [ Upstream commit af545bb5ee53f5261db631db2ac4cde54038bdaf ] During __vsock_create() CAP_NET_ADMIN is used to determine if the vsock_sock->trusted should be set to true. This value is used later for determing if a remote connection should be allowed to connect to a restricted VM. Unfortunately, if the caller doesn't have CAP_NET_ADMIN, an audit message such as an selinux denial is generated even if the caller does not want a trusted socket. Logging errors on success is confusing. To avoid this, switch the capable(CAP_NET_ADMIN) check to the noaudit version. Reported-by: Roman Kiryanov https://android-review.googlesource.com/c/device/generic/goldfish/+/1468545/ Signed-off-by: Jeff Vander Stoep Reviewed-by: James Morris Link: https://lore.kernel.org/r/20201023143757.377574-1-jeffv@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/vmw_vsock/af_vsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index c88dc8ee3144..02374459c417 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -629,7 +629,7 @@ struct sock *__vsock_create(struct net *net, vsk->owner = get_cred(psk->owner); vsk->connect_timeout = psk->connect_timeout; } else { - vsk->trusted = capable(CAP_NET_ADMIN); + vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT; } From 6eecfcbcde431904e5837d285e9e99b5a5eac02c Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Tue, 27 Oct 2020 13:14:42 +0900 Subject: [PATCH 248/636] drm/vc4: drv: Add error handding for bind [ Upstream commit 9ce0af3e9573fb84c4c807183d13ea2a68271e4b ] There is a problem that if vc4_drm bind fails, a memory leak occurs on the drm_property_create side. Add error handding for drm_mode_config. Signed-off-by: Hoegeun Kwon Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20201027041442.30352-2-hoegeun.kwon@samsung.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 04270a14fcaa..868dd1ef3b69 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -312,6 +312,7 @@ unbind_all: component_unbind_all(dev, drm); gem_destroy: vc4_gem_destroy(drm); + drm_mode_config_cleanup(drm); vc4_bo_cache_destroy(drm); dev_put: drm_dev_put(drm); From a52cdf61125b2189a8e1d85d1e61d654f7fe5d4d Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 27 Oct 2020 21:49:01 +0800 Subject: [PATCH 249/636] ACPI: NFIT: Fix comparison to '-ENXIO' [ Upstream commit 85f971b65a692b68181438e099b946cc06ed499b ] Initial value of rc is '-ENXIO', and we should use the initial value to check it. Signed-off-by: Zhang Qilong Reviewed-by: Pankaj Gupta Reviewed-by: Vishal Verma [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/nfit/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index dd4c7289610e..cb88f3b43a94 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1535,7 +1535,7 @@ static ssize_t format1_show(struct device *dev, le16_to_cpu(nfit_dcr->dcr->code)); break; } - if (rc != ENXIO) + if (rc != -ENXIO) break; } mutex_unlock(&acpi_desc->init_mutex); From 6612b754ac0c85ca8b1181b5d3ea4461a8c1bbcb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 8 Nov 2020 16:38:06 +0100 Subject: [PATCH 250/636] vt: Disable KD_FONT_OP_COPY commit 3c4e0dff2095c579b142d5a0693257f1c58b4804 upstream. It's buggy: On Fri, Nov 06, 2020 at 10:30:08PM +0800, Minh Yuan wrote: > We recently discovered a slab-out-of-bounds read in fbcon in the latest > kernel ( v5.10-rc2 for now ). The root cause of this vulnerability is that > "fbcon_do_set_font" did not handle "vc->vc_font.data" and > "vc->vc_font.height" correctly, and the patch > for VT_RESIZEX can't handle this > issue. > > Specifically, we use KD_FONT_OP_SET to set a small font.data for tty6, and > use KD_FONT_OP_SET again to set a large font.height for tty1. After that, > we use KD_FONT_OP_COPY to assign tty6's vc_font.data to tty1's vc_font.data > in "fbcon_do_set_font", while tty1 retains the original larger > height. Obviously, this will cause an out-of-bounds read, because we can > access a smaller vc_font.data with a larger vc_font.height. Further there was only one user ever. - Android's loadfont, busybox and console-tools only ever use OP_GET and OP_SET - fbset documentation only mentions the kernel cmdline font: option, not anything else. - systemd used OP_COPY before release 232 published in Nov 2016 Now unfortunately the crucial report seems to have gone down with gmane, and the commit message doesn't say much. But the pull request hints at OP_COPY being broken https://github.com/systemd/systemd/pull/3651 So in other words, this never worked, and the only project which foolishly every tried to use it, realized that rather quickly too. Instead of trying to fix security issues here on dead code by adding missing checks, fix the entire thing by removing the functionality. Note that systemd code using the OP_COPY function ignored the return value, so it doesn't matter what we're doing here really - just in case a lone server somewhere happens to be extremely unlucky and running an affected old version of systemd. The relevant code from font_copy_to_all_vcs() in systemd was: /* copy font from active VT, where the font was uploaded to */ cfo.op = KD_FONT_OP_COPY; cfo.height = vcs.v_active-1; /* tty1 == index 0 */ (void) ioctl(vcfd, KDFONTOP, &cfo); Note this just disables the ioctl, garbage collecting the now unused callbacks is left for -next. v2: Tetsuo found the old mail, which allowed me to find it on another archive. Add the link too. Acked-by: Peilin Ye Reported-by: Minh Yuan References: https://lists.freedesktop.org/archives/systemd-devel/2016-June/036935.html References: https://github.com/systemd/systemd/pull/3651 Cc: Greg KH Cc: Peilin Ye Cc: Tetsuo Handa Signed-off-by: Daniel Vetter Link: https://lore.kernel.org/r/20201108153806.3140315-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 758f522f331e..13ea0579f104 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -4574,27 +4574,6 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) return rc; } -static int con_font_copy(struct vc_data *vc, struct console_font_op *op) -{ - int con = op->height; - int rc; - - - console_lock(); - if (vc->vc_mode != KD_TEXT) - rc = -EINVAL; - else if (!vc->vc_sw->con_font_copy) - rc = -ENOSYS; - else if (con < 0 || !vc_cons_allocated(con)) - rc = -ENOTTY; - else if (con == vc->vc_num) /* nothing to do */ - rc = 0; - else - rc = vc->vc_sw->con_font_copy(vc, con); - console_unlock(); - return rc; -} - int con_font_op(struct vc_data *vc, struct console_font_op *op) { switch (op->op) { @@ -4605,7 +4584,8 @@ int con_font_op(struct vc_data *vc, struct console_font_op *op) case KD_FONT_OP_SET_DEFAULT: return con_font_default(vc, op); case KD_FONT_OP_COPY: - return con_font_copy(vc, op); + /* was buggy and never really used */ + return -EINVAL; } return -ENOSYS; } From b177d2d915cea2d0a590f0034a20299dd1ee3ef2 Mon Sep 17 00:00:00 2001 From: Eddy Wu Date: Sat, 7 Nov 2020 14:47:22 +0800 Subject: [PATCH 251/636] fork: fix copy_process(CLONE_PARENT) race with the exiting ->real_parent commit b4e00444cab4c3f3fec876dc0cccc8cbb0d1a948 upstream. current->group_leader->exit_signal may change during copy_process() if current->real_parent exits. Move the assignment inside tasklist_lock to avoid the race. Signed-off-by: Eddy Wu Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/fork.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 3ed29bf8eb29..f2c92c100194 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1965,14 +1965,9 @@ static __latent_entropy struct task_struct *copy_process( /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { - p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { - if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = (clone_flags & CSIGNAL); p->group_leader = p; p->tgid = p->pid; } @@ -2017,9 +2012,14 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) + p->exit_signal = -1; + else + p->exit_signal = current->group_leader->exit_signal; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; + p->exit_signal = (clone_flags & CSIGNAL); } klp_copy_process(p); From 18f5757d7fd3ae455c2fcfd60efd5242ea9836f6 Mon Sep 17 00:00:00 2001 From: Claire Chang Date: Mon, 2 Nov 2020 20:07:49 +0800 Subject: [PATCH 252/636] serial: 8250_mtk: Fix uart_get_baud_rate warning commit 912ab37c798770f21b182d656937072b58553378 upstream. Mediatek 8250 port supports speed higher than uartclk / 16. If the baud rates in both the new and the old termios setting are higher than uartclk / 16, the WARN_ON in uart_get_baud_rate() will be triggered. Passing NULL as the old termios so uart_get_baud_rate() will use uartclk / 16 - 1 as the new baud rate which will be replaced by the original baud rate later by tty_termios_encode_baud_rate() in mtk8250_set_termios(). Fixes: 551e553f0d4a ("serial: 8250_mtk: Fix high-speed baud rates clamping") Signed-off-by: Claire Chang Link: https://lore.kernel.org/r/20201102120749.374458-1-tientzu@chromium.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index 159169a48deb..cc4f0c5e5ddf 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -47,7 +47,7 @@ mtk8250_set_termios(struct uart_port *port, struct ktermios *termios, */ baud = tty_termios_baud_rate(termios); - serial8250_do_set_termios(port, termios, old); + serial8250_do_set_termios(port, termios, NULL); tty_termios_encode_baud_rate(termios, baud, baud); From 288609c2bae2b08a21d1c794053c7e761620a946 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Tue, 3 Nov 2020 16:49:42 +0800 Subject: [PATCH 253/636] serial: txx9: add missing platform_driver_unregister() on error in serial_txx9_init commit 0c5fc92622ed5531ff324b20f014e9e3092f0187 upstream. Add the missing platform_driver_unregister() before return from serial_txx9_init in the error handling case when failed to register serial_txx9_pci_driver with macro ENABLE_SERIAL_TXX9_PCI defined. Fixes: ab4382d27412 ("tty: move drivers/serial/ to drivers/tty/serial/") Signed-off-by: Qinglang Miao Link: https://lore.kernel.org/r/20201103084942.109076-1-miaoqinglang@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_txx9.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c index 1b4008d022bf..2f7ef64536a0 100644 --- a/drivers/tty/serial/serial_txx9.c +++ b/drivers/tty/serial/serial_txx9.c @@ -1284,6 +1284,9 @@ static int __init serial_txx9_init(void) #ifdef ENABLE_SERIAL_TXX9_PCI ret = pci_register_driver(&serial_txx9_pci_driver); + if (ret) { + platform_driver_unregister(&serial_txx9_plat_driver); + } #endif if (ret == 0) goto out; From 202dae99e649d49355133d2bb9150244fd7695e5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Oct 2020 09:25:48 +0100 Subject: [PATCH 254/636] USB: serial: cyberjack: fix write-URB completion race commit 985616f0457d9f555fff417d0da56174f70cc14f upstream. The write-URB busy flag was being cleared before the completion handler was done with the URB, something which could lead to corrupt transfers due to a racing write request if the URB is resubmitted. Fixes: 507ca9bc0476 ("[PATCH] USB: add ability for usb-serial drivers to determine if their write urb is currently being used.") Cc: stable # 2.6.13 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cyberjack.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index ebd76ab07b72..36dd688b5795 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -357,11 +357,12 @@ static void cyberjack_write_bulk_callback(struct urb *urb) struct device *dev = &port->dev; int status = urb->status; unsigned long flags; + bool resubmitted = false; - set_bit(0, &port->write_urbs_free); if (status) { dev_dbg(dev, "%s - nonzero write bulk status received: %d\n", __func__, status); + set_bit(0, &port->write_urbs_free); return; } @@ -394,6 +395,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) goto exit; } + resubmitted = true; + dev_dbg(dev, "%s - priv->wrsent=%d\n", __func__, priv->wrsent); dev_dbg(dev, "%s - priv->wrfilled=%d\n", __func__, priv->wrfilled); @@ -410,6 +413,8 @@ static void cyberjack_write_bulk_callback(struct urb *urb) exit: spin_unlock_irqrestore(&priv->lock, flags); + if (!resubmitted) + set_bit(0, &port->write_urbs_free); usb_serial_port_softint(port); } From 3411d382f62132df82fb697c79ef3c4a7b9c741b Mon Sep 17 00:00:00 2001 From: Ziyi Cao Date: Tue, 20 Oct 2020 00:08:06 +0800 Subject: [PATCH 255/636] USB: serial: option: add Quectel EC200T module support commit a46b973bced1ba57420752bf38426acd9f6cbfa6 upstream. Add usb product id of the Quectel EC200T module. Signed-off-by: Ziyi Cao Link: https://lore.kernel.org/r/17f8a2a3-ce0f-4be7-8544-8fdf286907d0@www.fastmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c773db129bf9..57f57ddd7af5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -250,6 +250,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_EC200T 0x6026 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1117,6 +1118,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From 6ea688db659c49334256d2dc69dc45f1e93f3594 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Sat, 31 Oct 2020 23:54:58 +0100 Subject: [PATCH 256/636] USB: serial: option: add LE910Cx compositions 0x1203, 0x1230, 0x1231 commit 489979b4aab490b6b917c11dc02d81b4b742784a upstream. Add following Telit LE910Cx compositions: 0x1203: rndis, tty, adb, tty, tty, tty, tty 0x1230: tty, adb, rmnet, audio, tty, tty, tty, tty 0x1231: rndis, tty, adb, audio, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20201031225458.10512-1-dnlplm@gmail.com [ johan: add comments after entries ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 57f57ddd7af5..0311c333a9b3 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1203,6 +1203,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1217,6 +1219,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1230, 0xff), /* Telit LE910Cx (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1260), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x1261), From 59187e8bba015683ac00a1e10895a18443249cee Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Tue, 3 Nov 2020 13:44:25 +0100 Subject: [PATCH 257/636] USB: serial: option: add Telit FN980 composition 0x1055 commit db0362eeb22992502764e825c79b922d7467e0eb upstream. Add the following Telit FN980 composition: 0x1055: tty, adb, tty, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20201103124425.12940-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0311c333a9b3..f28344f03141 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1191,6 +1191,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1054, 0xff), /* Telit FT980-KS */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1055, 0xff), /* Telit FN980 (PCIe) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), From 327a8935379f2e9b73b1c0f7345df321d6b42c88 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 2 Nov 2020 09:58:21 -0500 Subject: [PATCH 258/636] USB: Add NO_LPM quirk for Kingston flash drive commit afaa2e745a246c5ab95103a65b1ed00101e1bc63 upstream. In Bugzilla #208257, Julien Humbert reports that a 32-GB Kingston flash drive spontaneously disconnects and reconnects, over and over. Testing revealed that disabling Link Power Management for the drive fixed the problem. This patch adds a quirk entry for that drive to turn off LPM permanently. CC: Hans de Goede CC: Reported-and-tested-by: Julien Humbert Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201102145821.GA1478741@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 4ee810531098..5ad14cdd9762 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -378,6 +378,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, + /* Kingston DataTraveler 3.0 */ + { USB_DEVICE(0x0951, 0x1666), .driver_info = USB_QUIRK_NO_LPM }, + /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, From 83a282f990ba94103eb49be1ad69a8e4b2de4fbd Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Fri, 6 Nov 2020 13:54:29 +0800 Subject: [PATCH 259/636] usb: mtu3: fix panic in mtu3_gadget_stop() commit 20914919ad31849ee2b9cfe0428f4a20335c9e2a upstream. This patch fixes a possible issue when mtu3_gadget_stop() already assigned NULL to mtu->gadget_driver during mtu_gadget_disconnect(). [] notifier_call_chain+0xa4/0x128 [] __atomic_notifier_call_chain+0x84/0x138 [] notify_die+0xb0/0x120 [] die+0x1f8/0x5d0 [] __do_kernel_fault+0x19c/0x280 [] do_bad_area+0x44/0x140 [] do_translation_fault+0x4c/0x90 [] do_mem_abort+0xb8/0x258 [] el1_da+0x24/0x3c [] mtu3_gadget_disconnect+0xac/0x128 [] mtu3_irq+0x34c/0xc18 [] __handle_irq_event_percpu+0x2ac/0xcd0 [] handle_irq_event_percpu+0x80/0x138 [] handle_irq_event+0xac/0x148 [] handle_fasteoi_irq+0x234/0x568 [] generic_handle_irq+0x48/0x68 [] __handle_domain_irq+0x264/0x1740 [] gic_handle_irq+0x14c/0x250 [] el1_irq+0xec/0x194 [] dma_pool_alloc+0x6e4/0xae0 [] cmdq_mbox_pool_alloc_impl+0xb0/0x238 [] cmdq_pkt_alloc_buf+0x2dc/0x7c0 [] cmdq_pkt_add_cmd_buffer+0x178/0x270 [] cmdq_pkt_perf_begin+0x108/0x148 [] cmdq_pkt_create+0x178/0x1f0 [] mtk_crtc_config_default_path+0x328/0x7a0 [] mtk_drm_idlemgr_kick+0xa6c/0x1460 [] mtk_drm_crtc_atomic_begin+0x1a4/0x1a68 [] drm_atomic_helper_commit_planes+0x154/0x878 [] mtk_atomic_complete.isra.16+0xe80/0x19c8 [] mtk_atomic_commit+0x258/0x898 [] drm_atomic_commit+0xcc/0x108 [] drm_mode_atomic_ioctl+0x1c20/0x2580 [] drm_ioctl_kernel+0x118/0x1b0 [] drm_ioctl+0x5c0/0x920 [] do_vfs_ioctl+0x188/0x1820 [] SyS_ioctl+0x8c/0xa0 Fixes: df2069acb005 ("usb: Add MediaTek USB3 DRD driver") Signed-off-by: Macpaul Lin Acked-by: Chunfeng Yun Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1604642069-20961-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index bbcd3332471d..3828ed6d38a2 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -573,6 +573,7 @@ static int mtu3_gadget_stop(struct usb_gadget *g) spin_unlock_irqrestore(&mtu->lock, flags); + synchronize_irq(mtu->irq); return 0; } From a7443bebdecaa474900ee5eec5da69de63686c24 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 27 Oct 2020 15:01:17 -0700 Subject: [PATCH 260/636] ARC: stack unwinding: avoid indefinite looping commit 328d2168ca524d501fc4b133d6be076142bd305c upstream. Currently stack unwinder is a while(1) loop which relies on the dwarf unwinder to signal termination, which in turn relies on dwarf info to do so. This in theory could cause an infinite loop if the dwarf info was somehow messed up or the register contents were etc. This fix thus detects the excessive looping and breaks the loop. | Mem: 26184K used, 1009136K free, 0K shrd, 0K buff, 14416K cached | CPU: 0.0% usr 72.8% sys 0.0% nic 27.1% idle 0.0% io 0.0% irq 0.0% sirq | Load average: 4.33 2.60 1.11 2/74 139 | PID PPID USER STAT VSZ %VSZ CPU %CPU COMMAND | 133 2 root SWN 0 0.0 3 22.9 [rcu_torture_rea] | 132 2 root SWN 0 0.0 0 22.0 [rcu_torture_rea] | 131 2 root SWN 0 0.0 3 21.5 [rcu_torture_rea] | 126 2 root RW 0 0.0 2 5.4 [rcu_torture_wri] | 129 2 root SWN 0 0.0 0 0.2 [rcu_torture_fak] | 137 2 root SW 0 0.0 0 0.2 [rcu_torture_cbf] | 127 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 138 115 root R 1464 0.1 2 0.1 top | 130 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 128 2 root SWN 0 0.0 0 0.1 [rcu_torture_fak] | 115 1 root S 1472 0.1 1 0.0 -/bin/sh | 104 1 root S 1464 0.1 0 0.0 inetd | 1 0 root S 1456 0.1 2 0.0 init | 78 1 root S 1456 0.1 0 0.0 syslogd -O /var/log/messages | 134 2 root SW 0 0.0 2 0.0 [rcu_torture_sta] | 10 2 root IW 0 0.0 1 0.0 [rcu_preempt] | 88 2 root IW 0 0.0 1 0.0 [kworker/1:1-eve] | 66 2 root IW 0 0.0 2 0.0 [kworker/2:2-eve] | 39 2 root IW 0 0.0 2 0.0 [kworker/2:1-eve] | unwinder looping too long, aborting ! Cc: Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/stacktrace.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index bf40e06f3fb8..0fed32b95923 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -115,7 +115,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; @@ -135,6 +135,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ From 2590411220774522fe00c598e089cc11baebe26e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 19 Oct 2020 19:19:57 -0700 Subject: [PATCH 261/636] Revert "ARC: entry: fix potential EFA clobber when TIF_SYSCALL_TRACE" This reverts commit 00fdec98d9881bf5173af09aebd353ab3b9ac729. (but only from 5.2 and prior kernels) The original commit was a preventive fix based on code-review and was auto-picked for stable back-port (for better or worse). It was OK for v5.3+ kernels, but turned up needing an implicit change 68e5c6f073bcf70 "(ARC: entry: EV_Trap expects r10 (vs. r9) to have exception cause)" merged in v5.3 which itself was not backported. So to summarize the stable backport of this patch for v5.2 and prior kernels is busted and it won't boot. The obvious solution is backport 68e5c6f073bcf70 but that is a pain as it doesn't revert cleanly and each of affected kernels (so far v4.19, v4.14, v4.9, v4.4) needs a slightly different massaged varaint. So the easier fix is to simply revert the backport from 5.2 and prior. The issue was not a big deal as it would cause strace to sporadically not work correctly. Waldemar Brodkorb first reported this when running ARC uClibc regressions on latest stable kernels (with offending backport). Once he bisected it, the analysis was trivial, so thx to him for this. Reported-by: Waldemar Brodkorb Bisected-by: Waldemar Brodkorb Cc: stable # 5.2 and prior Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/entry.S | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 705a68208423..85d9ea4a0acc 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -156,6 +156,7 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed + lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -198,9 +199,15 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc + + ; stop_pc info by gdb needs this info + lr r0, [efa] mov r1, sp + ; Now that we have read EFA, it is safe to do "fake" rtie + ; and get out of CPU exception mode + FAKE_RET_FROM_EXCPN + ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -227,10 +234,6 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE - lr r12, [efa] - - FAKE_RET_FROM_EXCPN - ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r9 has ECR) bmsk.f 0, r9, 7 @@ -238,6 +241,9 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level + ; First return from Exception to pure K mode (Exception/IRQs renabled) + FAKE_RET_FROM_EXCPN + ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE From 292e5700f4db841593141e9a3b1210e94bc8f74f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Oct 2020 17:38:22 +0200 Subject: [PATCH 262/636] PM: runtime: Resume the device earlier in __device_release_driver() commit 9226c504e364158a17a68ff1fe9d67d266922f50 upstream. Since the device is resumed from runtime-suspend in __device_release_driver() anyway, it is better to do that before looking for busy managed device links from it to consumers, because if there are any, device_links_unbind_consumers() will be called and it will cause the consumer devices' drivers to unbind, so the consumer devices will be runtime-resumed. In turn, resuming each consumer device will cause the supplier to be resumed and when the runtime PM references from the given consumer to it are dropped, it may be suspended. Then, the runtime-resume of the next consumer will cause the supplier to resume again and so on. Update the code accordingly. Signed-off-by: Rafael J. Wysocki Fixes: 9ed9895370ae ("driver core: Functional dependencies tracking support") Cc: All applicable # All applicable Tested-by: Xiang Chen Reviewed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index b3c569412f4e..4ba9231a6be8 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -931,6 +931,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv = dev->driver; if (drv) { + pm_runtime_get_sync(dev); + while (device_links_busy(dev)) { device_unlock(dev); if (parent && dev->bus->need_parent_lock) @@ -946,11 +948,12 @@ static void __device_release_driver(struct device *dev, struct device *parent) * have released the driver successfully while this one * was waiting, so check for that. */ - if (dev->driver != drv) + if (dev->driver != drv) { + pm_runtime_put(dev); return; + } } - pm_runtime_get_sync(dev); pm_runtime_clean_up_links(dev); driver_sysfs_remove(dev); From 29a975bcc107d68e379a55048813ddf3e7b120b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kiyin=28=E5=B0=B9=E4=BA=AE=29?= Date: Wed, 4 Nov 2020 08:23:22 +0300 Subject: [PATCH 263/636] perf/core: Fix a memory leak in perf_event_parse_addr_filter() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7bdb157cdebbf95a1cd94ed2e01b338714075d00 upstream. As shown through runtime testing, the "filename" allocation is not always freed in perf_event_parse_addr_filter(). There are three possible ways that this could happen: - It could be allocated twice on subsequent iterations through the loop, - or leaked on the success path, - or on the failure path. Clean up the code flow to make it obvious that 'filename' is always freed in the reallocation path and in the two return paths as well. We rely on the fact that kfree(NULL) is NOP and filename is initialized with NULL. This fixes the leak. No other side effects expected. [ Dan Carpenter: cleaned up the code flow & added a changelog. ] [ Ingo Molnar: updated the changelog some more. ] Fixes: 375637bc5249 ("perf/core: Introduce address range filtering") Signed-off-by: "kiyin(尹亮)" Signed-off-by: Dan Carpenter Signed-off-by: Ingo Molnar Cc: "Srivatsa S. Bhat" Cc: Anthony Liguori -- kernel/events/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a35d742b0ba8..8b94eb6437c1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9047,6 +9047,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { int fpos = token == IF_SRC_FILE ? 2 : 1; + kfree(filename); filename = match_strdup(&args[fpos]); if (!filename) { ret = -ENOMEM; @@ -9093,16 +9094,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, */ ret = -EOPNOTSUPP; if (!event->ctx->task) - goto fail_free_name; + goto fail; /* look up the path and grab its inode */ ret = kern_path(filename, LOOKUP_FOLLOW, &filter->path); if (ret) - goto fail_free_name; - - kfree(filename); - filename = NULL; + goto fail; ret = -EINVAL; if (!filter->path.dentry || @@ -9122,13 +9120,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (state != IF_STATE_ACTION) goto fail; + kfree(filename); kfree(orig); return 0; -fail_free_name: - kfree(filename); fail: + kfree(filename); free_filters_list(filters); kfree(orig); From 3ab5872df9211447be82d0b01a2ec5731aa20621 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 7 Nov 2020 16:31:24 -0800 Subject: [PATCH 264/636] tools: perf: Fix build error in v4.19.y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf may fail to build in v4.19.y with the following error. util/evsel.c: In function ‘perf_evsel__exit’: util/util.h:25:28: error: passing argument 1 of ‘free’ discards ‘const’ qualifier from pointer target type This is observed (at least) with gcc v6.5.0. The underlying problem is the following statement. zfree(&evsel->pmu_name); evsel->pmu_name is decared 'const *'. zfree in turn is defined as #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) and thus passes the const * to free(). The problem is not seen in the upstream kernel since zfree() has been rewritten there. The problem has been introduced into v4.19.y with the backport of upstream commit d4953f7ef1a2 (perf parse-events: Fix 3 use after frees found with clang ASAN). One possible fix of this problem would be to not declare pmu_name as const. This patch chooses to typecast the parameter of zfree() to void *, following the guidance from the upstream kernel which does the same since commit 7f7c536f23e6a ("tools lib: Adopt zalloc()/zfree() from tools/perf") Fixes: a0100a363098 ("perf parse-events: Fix 3 use after frees found with clang ASAN") Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index dc58254a2b69..8c01b2cfdb1a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -22,7 +22,7 @@ static inline void *zalloc(size_t size) return calloc(1, size); } -#define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) +#define zfree(ptr) ({ free((void *)*ptr); *ptr = NULL; }) struct dirent; struct nsinfo; From 8069a08d35b72e16486a6a81eb06f964b2e1432b Mon Sep 17 00:00:00 2001 From: Xiaofei Shen Date: Fri, 29 Mar 2019 11:04:58 +0530 Subject: [PATCH 265/636] net: dsa: read mac address from DT for slave device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a2c7023f7075ca9b80f944d3f20f60e6574538e2 upstream. Before creating a slave netdevice, get the mac address from DTS and apply in case it is valid. Signed-off-by: Xiaofei Shen Signed-off-by: Vinod Koul Signed-off-by: David S. Miller Cc: Pali Rohár Signed-off-by: Greg Kroah-Hartman --- include/net/dsa.h | 1 + net/dsa/dsa2.c | 1 + net/dsa/slave.c | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 461e8a7661b7..d28df7adf948 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -196,6 +196,7 @@ struct dsa_port { unsigned int index; const char *name; const struct dsa_port *cpu_dp; + const char *mac; struct device_node *dn; unsigned int ageing_time; u8 stp_state; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b036c55f5cb1..7c10bc4dacd3 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -261,6 +261,7 @@ static int dsa_port_setup(struct dsa_port *dp) int err = 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); + dp->mac = of_get_mac_address(dp->dn); if (dp->type != DSA_PORT_TYPE_UNUSED) err = devlink_port_register(ds->devlink, &dp->devlink_port, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8ee28b6016d8..d03c67e761df 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1313,7 +1313,10 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->features = master->vlan_features | NETIF_F_HW_TC; slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; - eth_hw_addr_inherit(slave_dev, master); + if (port->mac && is_valid_ether_addr(port->mac)) + ether_addr_copy(slave_dev->dev_addr, port->mac); + else + eth_hw_addr_inherit(slave_dev, master); slave_dev->priv_flags |= IFF_NO_QUEUE; slave_dev->netdev_ops = &dsa_slave_netdev_ops; slave_dev->switchdev_ops = &dsa_slave_switchdev_ops; From 779d3e303977edbe43c4f0017d1553b4306d8d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 7 Sep 2020 13:27:17 +0200 Subject: [PATCH 266/636] arm64: dts: marvell: espressobin: Add ethernet switch aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b64d814257b027e29a474bcd660f6372490138c7 upstream. Espressobin boards have 3 ethernet ports and some of them got assigned more then one MAC address. MAC addresses are stored in U-Boot environment. Since commit a2c7023f7075c ("net: dsa: read mac address from DT for slave device") kernel can use MAC addresses from DT for particular DSA port. Currently Espressobin DTS file contains alias just for ethernet0. This patch defines additional ethernet aliases in Espressobin DTS files, so bootloader can fill correct MAC address for DSA switch ports if more MAC addresses were specified. DT alias ethernet1 is used for wan port, DT aliases ethernet2 and ethernet3 are used for lan ports for both Espressobin revisions (V5 and V7). Fixes: 5253cb8c00a6f ("arm64: dts: marvell: espressobin: add ethernet alias") Cc: # a2c7023f7075c: dsa: read mac address Signed-off-by: Pali Rohár Reviewed-by: Andrew Lunn Reviewed-by: Andre Heider Signed-off-by: Gregory CLEMENT [pali: Backported Espressobin rev V5 changes to 5.4 and 4.19 versions] Signed-off-by: Greg Kroah-Hartman --- .../boot/dts/marvell/armada-3720-espressobin.dts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index 6cbdd66921aa..1a3e6e3b04eb 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -21,6 +21,10 @@ aliases { ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; serial0 = &uart0; serial1 = &uart1; }; @@ -136,25 +140,25 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; From 53fff24aaf01dcb09cbfabbe060f42db8e61ab01 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Nov 2020 12:36:02 +0100 Subject: [PATCH 267/636] Linux 4.19.156 Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Guenter Roeck Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20201109125019.906191744@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9fc16d34e1bb..82891b34e19e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 155 +SUBLEVEL = 156 EXTRAVERSION = NAME = "People's Front" From be047631defd955980ea239e1f690e1f733692fd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 10 Nov 2020 11:45:35 -0800 Subject: [PATCH 268/636] Revert "ANDROID: Kbuild, LLVMLinux: allow overriding clang target triple" This reverts commit 4c451dba25d2829a6a0c532196cce83e2e65b47e. AOSP's distribution of GNU binutils always had a curious target triple prefix on the binaries. Now that GNU binutils is deprecated for Android Common Kernels, we can now remove this out of tree workaround. Now building Android kernels with LLVM matches upstream (see Documentation/kbuild/llvm.rst). Bug: 118439987 Bug: 120440614 Bug: 141693040 Signed-off-by: Nick Desaulniers Change-Id: Iecaa3264a440f795f2f3a44bdf74fe28ad4ed1cc --- Makefile | 6 +----- build.config.aarch64 | 5 ++--- build.config.allmodconfig | 2 +- build.config.arm | 3 +-- build.config.x86_64 | 3 +-- scripts/clang-android.sh | 4 ---- 6 files changed, 6 insertions(+), 17 deletions(-) delete mode 100755 scripts/clang-android.sh diff --git a/Makefile b/Makefile index 766b40b81746..f72ed4f02bfe 100644 --- a/Makefile +++ b/Makefile @@ -501,11 +501,7 @@ endif ifeq ($(cc-name),clang) ifneq ($(CROSS_COMPILE),) -CLANG_TRIPLE ?= $(CROSS_COMPILE) -CLANG_FLAGS += --target=$(notdir $(CLANG_TRIPLE:%-=%)) -ifeq ($(shell $(srctree)/scripts/clang-android.sh $(CC) $(CLANG_FLAGS)), y) -$(error "Clang with Android --target detected. Did you specify CLANG_TRIPLE?") -endif +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) diff --git a/build.config.aarch64 b/build.config.aarch64 index adc881d0cc30..569b9747661d 100644 --- a/build.config.aarch64 +++ b/build.config.aarch64 @@ -1,8 +1,7 @@ ARCH=arm64 -CLANG_TRIPLE=aarch64-linux-gnu- -CROSS_COMPILE=aarch64-linux-androidkernel- -CROSS_COMPILE_COMPAT=arm-linux-androidkernel- +CROSS_COMPILE=aarch64-linux-gnu- +CROSS_COMPILE_COMPAT=arm-linux-gnueabi- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 LINUX_GCC_CROSS_COMPILE_COMPAT_PREBUILTS_BIN=prebuilts/gas/linux-x86 diff --git a/build.config.allmodconfig b/build.config.allmodconfig index 4ce14e5a7c5a..103d1cdf4edc 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -10,5 +10,5 @@ function update_config() { -e UNWINDER_FRAME_POINTER \ (cd ${OUT_DIR} && \ - make O=${OUT_DIR} $archsubarch CLANG_TRIPLE=${CLANG_TRIPLE} CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) + make O=${OUT_DIR} $archsubarch CROSS_COMPILE=${CROSS_COMPILE} "${TOOL_ARGS[@]}" ${MAKE_ARGS} olddefconfig) } diff --git a/build.config.arm b/build.config.arm index 35defb811b98..a9e9a6c2a418 100644 --- a/build.config.arm +++ b/build.config.arm @@ -1,7 +1,6 @@ ARCH=arm -CLANG_TRIPLE=arm-linux-gnueabi- -CROSS_COMPILE=arm-linux-androidkernel- +CROSS_COMPILE=arm-linux-gnueabi- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 FILES=" diff --git a/build.config.x86_64 b/build.config.x86_64 index 73405452fe83..b16ac8a68b25 100644 --- a/build.config.x86_64 +++ b/build.config.x86_64 @@ -1,7 +1,6 @@ ARCH=x86_64 -CLANG_TRIPLE=x86_64-linux-gnu- -CROSS_COMPILE=x86_64-linux-androidkernel- +CROSS_COMPILE=x86_64-linux-gnu- LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 FILES=" diff --git a/scripts/clang-android.sh b/scripts/clang-android.sh deleted file mode 100755 index 9186c4f48576..000000000000 --- a/scripts/clang-android.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -$* -dM -E - &1 | grep -q __ANDROID__ && echo "y" From 900281e167f45e0c0e5df6e59fa00334b5e38133 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 10 Nov 2020 13:00:00 -0800 Subject: [PATCH 269/636] powercap: restrict energy meter to root access commit 949dd0104c496fa7c14991a23c03c62e44637e71 upstream. Remove non-privileged user access to power data contained in /sys/class/powercap/intel-rapl*/*/energy_uj Non-privileged users currently have read access to power data and can use this data to form a security attack. Some privileged drivers/applications need read access to this data, but don't expose it to non-privileged users. For example, thermald uses this data to ensure that power management works correctly. Thus removing non-privileged access is preferred over completely disabling this power reporting capability with CONFIG_INTEL_RAPL=n. Fixes: 95677a9a3847 ("PowerCap: Fix mode for energy counter") Signed-off-by: Len Brown Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/powercap/powercap_sys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 9e2f274bd44f..60c8375c3c81 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -379,9 +379,9 @@ static void create_power_zone_common_attributes( &dev_attr_max_energy_range_uj.attr; if (power_zone->ops->get_energy_uj) { if (power_zone->ops->reset_energy_uj) - dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IWUSR | S_IRUSR; else - dev_attr_energy_uj.attr.mode = S_IRUGO; + dev_attr_energy_uj.attr.mode = S_IRUSR; power_zone->zone_dev_attrs[count++] = &dev_attr_energy_uj.attr; } From 31acccdc877486a649a86d37725a15175fcd5ed6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Nov 2020 21:11:27 +0100 Subject: [PATCH 270/636] Linux 4.19.157 Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 82891b34e19e..245bcd8dd7b7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 156 +SUBLEVEL = 157 EXTRAVERSION = NAME = "People's Front" From ef728f3b3524ed5c4812d674ef908ff201eca0f2 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Mon, 28 Sep 2020 10:35:13 -0600 Subject: [PATCH 271/636] UPSTREAM: coresight: etm4x: Fix save and restore of TRCVMIDCCTLR1 register In commit f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states"), mistakenly TRCVMIDCCTLR1 register value was saved in trcvmidcctlr0 state variable which is used to store TRCVMIDCCTLR0 register value in etm4x_cpu_save() and then same value is written back to both TRCVMIDCCTLR0 and TRCVMIDCCTLR1 in etm4x_cpu_restore(). There is already a trcvmidcctlr1 state variable available for TRCVMIDCCTLR1, so use it. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Reviewed-by: Suzuki K Poulose Signed-off-by: Sai Prakash Ranjan Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200928163513.70169-26-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3477326277451000bc667dfcc4fd0774c039184c) Signed-off-by: Greg Kroah-Hartman Change-Id: I1a5951b58f2c412d81e1ee1ec52393726d61c9e5 --- drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 3e22cfc0bc48..a2531cc8a14e 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1199,7 +1199,7 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0); - state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1); + state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1); state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); @@ -1309,7 +1309,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0); - writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1); + writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1); writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); From 1391761dc4799e3658a8b1063d9ae2f68205002b Mon Sep 17 00:00:00 2001 From: Tingwei Zhang Date: Mon, 28 Sep 2020 10:34:56 -0600 Subject: [PATCH 272/636] UPSTREAM: coresight: etm: perf: Fix warning caused by etm_setup_aux failure When coresight_build_path() fails on all the cpus, etm_setup_aux calls etm_free_aux() to free allocated event_data. WARN_ON(cpumask_empty(mask) will be triggered since cpu mask is empty. Check event_data->snk_config is not NULL first to avoid this warning. Fixes: f5200aa9831f38 ("coresight: perf: Refactor function free_event_data()") Reviewed-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Tingwei Zhang Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200928163513.70169-9-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 716f5652a13122364a65e694386b9b26f5e98c51) Signed-off-by: Greg Kroah-Hartman Change-Id: Ie470e2e0989abfb82250546a9e3b313d8a5d5ef9 --- drivers/hwtracing/coresight/coresight-etm-perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 84f1dcb69827..fd0352b678cf 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -126,10 +126,10 @@ static void free_sink_buffer(struct etm_event_data *event_data) cpumask_t *mask = &event_data->mask; struct coresight_device *sink; - if (WARN_ON(cpumask_empty(mask))) + if (!event_data->snk_config) return; - if (!event_data->snk_config) + if (WARN_ON(cpumask_empty(mask))) return; cpu = cpumask_first(mask); From 0dda2fbad380fc6a58d7910d9534da1a3f911657 Mon Sep 17 00:00:00 2001 From: Jonathan Zhou Date: Wed, 16 Sep 2020 13:17:32 -0600 Subject: [PATCH 273/636] UPSTREAM: coresight: etm4x: Fix issues on trcseqevr access The TRCSEQEVR(3) is reserved, using '@nrseqstate - 1' instead to avoid accessing the reserved register. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Cc: Mathieu Poirier Cc: Suzuki K Poulose Cc: Mike Leach Cc: Shaokun Zhang Signed-off-by: Jonathan Zhou [Fixed capital letter in title] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200916191737.4001561-12-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4cd83037cd957ad97756055355ab4ee63f259380) Signed-off-by: Greg Kroah-Hartman Change-Id: Ida0a7d780f2d7b61a70324e4eed592d56743ee68 --- drivers/hwtracing/coresight/coresight-etm4x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index a2531cc8a14e..272b660ecb5c 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1155,7 +1155,7 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR); state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR); - for (i = 0; i < drvdata->nrseqstate; i++) + for (i = 0; i < drvdata->nrseqstate - 1; i++) state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i)); state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR); @@ -1260,7 +1260,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR); writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR); - for (i = 0; i < drvdata->nrseqstate; i++) + for (i = 0; i < drvdata->nrseqstate - 1; i++) writel_relaxed(state->trcseqevr[i], drvdata->base + TRCSEQEVRn(i)); From 83bbb2655ad85146af605bede0a7cf5bc5674117 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 16 Sep 2020 13:17:31 -0600 Subject: [PATCH 274/636] UPSTREAM: coresight: etm4x: Handle unreachable sink in perf mode If the specified/hinted sink is not reachable from a subset of the CPUs, we could end up unable to trace the event on those CPUs. This is the best effort we could do until we support 1:1 configurations. Fail gracefully in such cases avoiding a WARN_ON, which can be easily triggered by the user on certain platforms (Arm N1SDP), with the following trace paths : CPU0 \ -- Funnel0 --> ETF0 --> / \ CPU1 \ MainFunnel CPU2 / \ / -- Funnel1 --> ETF1 --> / CPU1 $ perf record --per-thread -e cs_etm/@ETF0/u -- could trigger the following WARNING, when the event is scheduled on CPU2. [10919.513250] ------------[ cut here ]------------ [10919.517861] WARNING: CPU: 2 PID: 24021 at drivers/hwtracing/coresight/coresight-etm-perf.c:316 etm_event_start+0xf8/0x100 ... [10919.564403] CPU: 2 PID: 24021 Comm: perf Not tainted 5.8.0+ #24 [10919.570308] pstate: 80400089 (Nzcv daIf +PAN -UAO BTYPE=--) [10919.575865] pc : etm_event_start+0xf8/0x100 [10919.580034] lr : etm_event_start+0x80/0x100 [10919.584202] sp : fffffe001932f940 [10919.587502] x29: fffffe001932f940 x28: fffffc834995f800 [10919.592799] x27: 0000000000000000 x26: fffffe0011f3ced0 [10919.598095] x25: fffffc837fce244c x24: fffffc837fce2448 [10919.603391] x23: 0000000000000002 x22: fffffc8353529c00 [10919.608688] x21: fffffc835bb31000 x20: 0000000000000000 [10919.613984] x19: fffffc837fcdcc70 x18: 0000000000000000 [10919.619281] x17: 0000000000000000 x16: 0000000000000000 [10919.624577] x15: 0000000000000000 x14: 00000000000009f8 [10919.629874] x13: 00000000000009f8 x12: 0000000000000018 [10919.635170] x11: 0000000000000000 x10: 0000000000000000 [10919.640467] x9 : fffffe00108cd168 x8 : 0000000000000000 [10919.645763] x7 : 0000000000000020 x6 : 0000000000000001 [10919.651059] x5 : 0000000000000002 x4 : 0000000000000001 [10919.656356] x3 : 0000000000000000 x2 : 0000000000000000 [10919.661652] x1 : fffffe836eb40000 x0 : 0000000000000000 [10919.666949] Call trace: [10919.669382] etm_event_start+0xf8/0x100 [10919.673203] etm_event_add+0x40/0x60 [10919.676765] event_sched_in.isra.134+0xcc/0x210 [10919.681281] merge_sched_in+0xb0/0x2a8 [10919.685017] visit_groups_merge.constprop.140+0x15c/0x4b8 [10919.690400] ctx_sched_in+0x15c/0x170 [10919.694048] perf_event_sched_in+0x6c/0xa0 [10919.698130] ctx_resched+0x60/0xa0 [10919.701517] perf_event_exec+0x288/0x2f0 [10919.705425] begin_new_exec+0x4c8/0xf58 [10919.709247] load_elf_binary+0x66c/0xf30 [10919.713155] exec_binprm+0x15c/0x450 [10919.716716] __do_execve_file+0x508/0x748 [10919.720711] __arm64_sys_execve+0x40/0x50 [10919.724707] do_el0_svc+0xf4/0x1b8 [10919.728095] el0_sync_handler+0xf8/0x124 [10919.732003] el0_sync+0x140/0x180 Even though we don't support using separate sinks for the ETMs yet (e.g, for 1:1 configurations), we should at least honor the user's choice and handle the limitations gracefully, by simply skipping the tracing on ETMs which can't reach the requested sink. Fixes: f9d81a657bb8 ("coresight: perf: Allow tracing on hotplugged CPUs") Cc: Mathieu Poirier Cc: Mike Leach Reported-by: Jeremy Linton Tested-by: Jeremy Linton Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200916191737.4001561-11-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 859d510e58dac94f0b204b7b5cccafbc130d2291) Signed-off-by: Greg Kroah-Hartman Change-Id: I924ae31b1b5f3bd87543527549a504f1607d3a0e --- drivers/hwtracing/coresight/coresight-etm-perf.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index fd0352b678cf..9b0c5d719232 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -310,6 +310,16 @@ static void etm_event_start(struct perf_event *event, int flags) if (!event_data) goto fail; + /* + * Check if this ETM is allowed to trace, as decided + * at etm_setup_aux(). This could be due to an unreachable + * sink from this ETM. We can't do much in this case if + * the sink was specified or hinted to the driver. For + * now, simply don't record anything on this ETM. + */ + if (!cpumask_test_cpu(cpu, &event_data->mask)) + goto fail_end_stop; + path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ sink = coresight_get_sink(path); From a666e0b1a5ed06ba05584df5d4733e361c7310ca Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 16 Jul 2020 11:57:46 -0600 Subject: [PATCH 275/636] UPSTREAM: coresight: etm4x: Fix save/restore during cpu idle The ETM state save/restore incorrectly reads/writes some of the 64bit registers (e.g, address comparators, vmid/cid comparators etc.) using 32bit accesses. Ensure we use the appropriate width accessors for the registers. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Cc: Mathieu Poirier Cc: Mike Leach Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200716175746.3338735-18-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 342c8a1d1d9e418d32fa02d635cf96989f9a986e) Signed-off-by: Greg Kroah-Hartman Change-Id: I5b7931ebe277472ff30c18f03968b92e7decbf89 --- drivers/hwtracing/coresight/coresight-etm4x.c | 16 ++++++++-------- drivers/hwtracing/coresight/coresight-etm4x.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index 272b660ecb5c..f4b0083a44ab 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1178,8 +1178,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { - state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i)); - state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i)); + state->trcacvr[i] = readq(drvdata->base + TRCACVRn(i)); + state->trcacatr[i] = readq(drvdata->base + TRCACATRn(i)); } /* @@ -1190,10 +1190,10 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) */ for (i = 0; i < drvdata->numcidc; i++) - state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i)); + state->trccidcvr[i] = readq(drvdata->base + TRCCIDCVRn(i)); for (i = 0; i < drvdata->numvmidc; i++) - state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i)); + state->trcvmidcvr[i] = readq(drvdata->base + TRCVMIDCVRn(i)); state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0); state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); @@ -1291,18 +1291,18 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) } for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { - writel_relaxed(state->trcacvr[i], + writeq_relaxed(state->trcacvr[i], drvdata->base + TRCACVRn(i)); - writel_relaxed(state->trcacatr[i], + writeq_relaxed(state->trcacatr[i], drvdata->base + TRCACATRn(i)); } for (i = 0; i < drvdata->numcidc; i++) - writel_relaxed(state->trccidcvr[i], + writeq_relaxed(state->trccidcvr[i], drvdata->base + TRCCIDCVRn(i)); for (i = 0; i < drvdata->numvmidc; i++) - writel_relaxed(state->trcvmidcvr[i], + writeq_relaxed(state->trcvmidcvr[i], drvdata->base + TRCVMIDCVRn(i)); writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 570baf4e15dc..a0a953ba335d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -325,7 +325,7 @@ struct etmv4_save_state { u64 trcacvr[ETM_MAX_SINGLE_ADDR_CMP]; u64 trcacatr[ETM_MAX_SINGLE_ADDR_CMP]; u64 trccidcvr[ETMv4_MAX_CTXID_CMP]; - u32 trcvmidcvr[ETM_MAX_VMID_CMP]; + u64 trcvmidcvr[ETM_MAX_VMID_CMP]; u32 trccidcctlr0; u32 trccidcctlr1; u32 trcvmidcctlr0; From 5938fe1b71fdfb4f071123fb78dceefb05006cd8 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 18 May 2020 12:02:40 -0600 Subject: [PATCH 276/636] UPSTREAM: coresight: etm4x: Fix use-after-free of per-cpu etm drvdata etm probe could be deferred due to the dependency in the trace path chain and may be retried. We need to clear the per-cpu etmdrvdata entry for the etm in case of a failure to avoid use-after-free cases as reported below: KASAN use-after-free bug in etm4_cpu_pm_notify(): [ 8.574566] coresight etm0: CPU0: ETM v4.2 initialized [ 8.581920] BUG: KASAN: use-after-free in etm4_cpu_pm_notify+0x580/0x2024 [ 8.581925] Read of size 8 at addr ffffff813304f8c8 by task swapper/3/0 [ 8.581927] [ 8.581934] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G S W 5.4.28 #314 [ 8.587775] coresight etm1: CPU1: ETM v4.2 initialized [ 8.594195] Call trace: [ 8.594205] dump_backtrace+0x0/0x188 [ 8.594209] show_stack+0x20/0x2c [ 8.594216] dump_stack+0xdc/0x144 [ 8.594227] print_address_description+0x3c/0x494 [ 8.594232] __kasan_report+0x144/0x168 [ 8.601598] coresight etm2: CPU2: ETM v4.2 initialized [ 8.602563] kasan_report+0x10/0x18 [ 8.602568] check_memory_region+0x1a4/0x1b4 [ 8.602572] __kasan_check_read+0x18/0x24 [ 8.602577] etm4_cpu_pm_notify+0x580/0x2024 [ 8.665945] notifier_call_chain+0x5c/0x90 [ 8.670166] __atomic_notifier_call_chain+0x90/0xf8 [ 8.675182] cpu_pm_notify+0x40/0x6c [ 8.678858] cpu_pm_enter+0x38/0x80 [ 8.682451] psci_enter_idle_state+0x34/0x70 [ 8.686844] cpuidle_enter_state+0xb8/0x20c [ 8.691143] cpuidle_enter+0x38/0x4c [ 8.694820] call_cpuidle+0x3c/0x68 [ 8.698408] do_idle+0x1a0/0x280 [ 8.701729] cpu_startup_entry+0x24/0x28 [ 8.705768] secondary_start_kernel+0x15c/0x170 [ 8.710423] [ 8.711972] Allocated by task 242: [ 8.715473] __kasan_kmalloc+0xf0/0x1ac [ 8.719426] kasan_slab_alloc+0x14/0x1c [ 8.723375] __kmalloc_track_caller+0x23c/0x388 [ 8.728040] devm_kmalloc+0x38/0x94 [ 8.731632] etm4_probe+0x48/0x3c8 [ 8.735140] amba_probe+0xbc/0x158 [ 8.738645] really_probe+0x144/0x408 [ 8.742412] driver_probe_device+0x70/0x140 [ 8.746716] __device_attach_driver+0x9c/0x110 [ 8.751287] bus_for_each_drv+0x90/0xd8 [ 8.755236] __device_attach+0xb4/0x164 [ 8.759188] device_initial_probe+0x20/0x2c [ 8.763490] bus_probe_device+0x34/0x94 [ 8.767436] device_add+0x34c/0x3e0 [ 8.771029] amba_device_try_add+0x68/0x440 [ 8.775332] amba_deferred_retry_func+0x48/0xc8 [ 8.779997] process_one_work+0x344/0x648 [ 8.784127] worker_thread+0x2ac/0x47c [ 8.787987] kthread+0x128/0x138 [ 8.791313] ret_from_fork+0x10/0x18 [ 8.794993] [ 8.796532] Freed by task 242: [ 8.799684] __kasan_slab_free+0x15c/0x22c [ 8.803897] kasan_slab_free+0x10/0x1c [ 8.807761] kfree+0x25c/0x4bc [ 8.810913] release_nodes+0x240/0x2b0 [ 8.814767] devres_release_all+0x3c/0x54 [ 8.818887] really_probe+0x178/0x408 [ 8.822661] driver_probe_device+0x70/0x140 [ 8.826963] __device_attach_driver+0x9c/0x110 [ 8.831539] bus_for_each_drv+0x90/0xd8 [ 8.835487] __device_attach+0xb4/0x164 [ 8.839431] device_initial_probe+0x20/0x2c [ 8.843732] bus_probe_device+0x34/0x94 [ 8.847678] device_add+0x34c/0x3e0 [ 8.851274] amba_device_try_add+0x68/0x440 [ 8.855576] amba_deferred_retry_func+0x48/0xc8 [ 8.860240] process_one_work+0x344/0x648 [ 8.864366] worker_thread+0x2ac/0x47c [ 8.868228] kthread+0x128/0x138 [ 8.871557] ret_from_fork+0x10/0x18 [ 8.875231] [ 8.876782] The buggy address belongs to the object at ffffff813304f800 [ 8.876782] which belongs to the cache kmalloc-1k of size 1024 [ 8.889632] The buggy address is located 200 bytes inside of [ 8.889632] 1024-byte region [ffffff813304f800, ffffff813304fc00) [ 8.901761] The buggy address belongs to the page: [ 8.906695] page:ffffffff04ac1200 refcount:1 mapcount:0 mapping:ffffff8146c03800 index:0x0 compound_mapcount: 0 [ 8.917047] flags: 0x4000000000010200(slab|head) [ 8.921799] raw: 4000000000010200 dead000000000100 dead000000000122 ffffff8146c03800 [ 8.929753] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 [ 8.937703] page dumped because: kasan: bad access detected [ 8.943433] [ 8.944974] Memory state around the buggy address: [ 8.949903] ffffff813304f780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 8.957320] ffffff813304f800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 8.964742] >ffffff813304f880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 8.972157] ^ [ 8.977886] ffffff813304f900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 8.985298] ffffff813304f980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 8.992713] ================================================================== Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Reported-by: Sai Prakash Ranjan Tested-by: Sai Prakash Ranjan Cc: Mathieu Poirier Cc: Mike Leach Signed-off-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20200518180242.7916-22-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3f4943d422c5febbb3c764670011a00eb2a86238) Signed-off-by: Greg Kroah-Hartman Change-Id: I6a94a2d3ad18ac582020195e9e671fd22e157953 --- drivers/hwtracing/coresight/coresight-etm4x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index f4b0083a44ab..ce59872234a1 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1494,6 +1494,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) return 0; err_arch_supported: + etmdrvdata[drvdata->cpu] = NULL; if (--etm4_count == 0) { etm4_cpu_pm_unregister(); From 3ad19e8175334c786c4850a12072ddbc8c152621 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Dec 2019 15:31:07 -0700 Subject: [PATCH 277/636] UPSTREAM: coresight: etm4x: Fix unused function warning Some of the newly added code in the etm4x driver is inside of an #ifdef, and some other code is outside of it, leading to a harmless warning when CONFIG_CPU_PM is disabled: drivers/hwtracing/coresight/coresight-etm4x.c:68:13: error: 'etm4_os_lock' defined but not used [-Werror=unused-function] static void etm4_os_lock(struct etmv4_drvdata *drvdata) ^~~~~~~~~~~~ To avoid the warning and simplify the the #ifdef checks, use IS_ENABLED() instead, so the compiler can drop the unused functions without complaining. Fixes: f188b5e76aae ("coresight: etm4x: Save/restore state across CPU low power states") Signed-off-by: Arnd Bergmann [Fixed capital 'f' in title] Signed-off-by: Mathieu Poirier Link: https://lore.kernel.org/r/20191213223107.1484-2-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 500589d8bd73cc4c1fc8dc433b675cea5fe79e86) Signed-off-by: Greg Kroah-Hartman Change-Id: I307db5366f27a8e67e13bba2f8614d21d8152c99 --- drivers/hwtracing/coresight/coresight-etm4x.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index ce59872234a1..66a9c6c9dd30 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -1102,7 +1102,6 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } -#ifdef CONFIG_CPU_PM static int etm4_cpu_save(struct etmv4_drvdata *drvdata) { int i, ret = 0; @@ -1372,17 +1371,17 @@ static struct notifier_block etm4_cpu_pm_nb = { static int etm4_cpu_pm_register(void) { - return cpu_pm_register_notifier(&etm4_cpu_pm_nb); + if (IS_ENABLED(CONFIG_CPU_PM)) + return cpu_pm_register_notifier(&etm4_cpu_pm_nb); + + return 0; } static void etm4_cpu_pm_unregister(void) { - cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); + if (IS_ENABLED(CONFIG_CPU_PM)) + cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); } -#else -static int etm4_cpu_pm_register(void) { return 0; } -static void etm4_cpu_pm_unregister(void) { } -#endif static int etm4_probe(struct amba_device *adev, const struct amba_id *id) { From b8e72f3c71f6e6322b91c30ec905f49ef31344ef Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 30 Nov 2018 11:43:02 -0700 Subject: [PATCH 278/636] UPSTREAM: coresight: tmc: Fix bad register address for CLAIM Commit 4d3ebd3658d8 ("coreisght: tmc: Claim device before use") uses CLAIM tag to validate if the device is available, it needs to pass the device base address to access related registers. In the function tmc_etb_disable_hw() it wrongly passes the driver data pointer as register base address, thus it's easily to produce the kernel warning info like below: [ 83.579898] WARNING: CPU: 4 PID: 2970 at drivers/hwtracing/coresight/coresight.c:207 coresight_disclaim_device_unlocked+0x44/0x80 [ 83.591448] Modules linked in: [ 83.594485] CPU: 4 PID: 2970 Comm: uname Not tainted 4.19.0-rc6-00417-g721b509 #110 [ 83.602067] Hardware name: ARM Juno development board (r2) (DT) [ 83.607932] pstate: 80000085 (Nzcv daIf -PAN -UAO) [ 83.612681] pc : coresight_disclaim_device_unlocked+0x44/0x80 [ 83.618375] lr : coresight_disclaim_device_unlocked+0x44/0x80 [ 83.624064] sp : ffff00000fe3ba20 [ 83.627347] x29: ffff00000fe3ba20 x28: ffff80002d430dc0 [ 83.632618] x27: ffff800033177c00 x26: ffff80002eb44480 [ 83.637889] x25: 0000000000000001 x24: ffff800033c72600 [ 83.643160] x23: ffff0000099b11f8 x22: ffff0000099b11c8 [ 83.648430] x21: 0000000000000002 x20: ffff800033a90418 [ 83.653701] x19: ffff0000099b11c8 x18: 0000000000000000 [ 83.658971] x17: 0000000000000000 x16: 0000000000000000 [ 83.664241] x15: 0000000000000000 x14: 0000000000000000 [ 83.669511] x13: 0000000000000000 x12: 0000000000000000 [ 83.674782] x11: 0000000000000000 x10: 0000000000000000 [ 83.680052] x9 : 0000000000000000 x8 : 0000000000000001 [ 83.685322] x7 : 0000000000010000 x6 : ffff800033ebab18 [ 83.690593] x5 : ffff800033ebab18 x4 : ffff800033e6c698 [ 83.695862] x3 : 0000000000000001 x2 : 0000000000000000 [ 83.701133] x1 : 0000000000000000 x0 : 0000000000000001 [ 83.706404] Call trace: [ 83.708830] coresight_disclaim_device_unlocked+0x44/0x80 [ 83.714180] coresight_disclaim_device+0x34/0x48 [ 83.718756] tmc_disable_etf_sink+0xc4/0xf0 [ 83.722902] coresight_disable_path_from+0xc8/0x240 [ 83.727735] coresight_disable_path+0x24/0x30 [ 83.732053] etm_event_stop+0x130/0x170 [ 83.735854] etm_event_del+0x24/0x30 [ 83.739399] event_sched_out.isra.51+0xcc/0x1e8 [ 83.743887] group_sched_out.part.53+0x44/0xb0 [ 83.748291] ctx_sched_out+0x298/0x2b8 [ 83.752005] task_ctx_sched_out+0x74/0xa8 [ 83.755980] perf_event_exit_task+0x140/0x418 [ 83.760298] do_exit+0x3f4/0xcf0 [ 83.763497] do_group_exit+0x5c/0xc0 [ 83.767041] __arm64_sys_exit_group+0x24/0x28 [ 83.771359] el0_svc_common+0x110/0x178 [ 83.775160] el0_svc_handler+0x94/0xe8 [ 83.778875] el0_svc+0x8/0xc [ 83.781728] ---[ end trace 02d8d8eac46db9e5 ]--- This patch is to fix this bug by using 'drvdata->base' as the register base address for CLAIM related operation. Fixes: 4d3ebd3658d8 ("coreisght: tmc: Claim device before use") Cc: Suzuki Poulose Cc: Mathieu Poirier Cc: Mike Leach Cc: Robert Walker Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 323ed1e0f60b35df55763356d4973a18d5eaea15) Signed-off-by: Greg Kroah-Hartman Change-Id: I74572ff89386b2be714e58ee5fd2daa3d9f363e0 --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index a16273d997a4..8c819deb2bbc 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -90,7 +90,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata) static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) { - coresight_disclaim_device(drvdata); + coresight_disclaim_device(drvdata->base); __tmc_etb_disable_hw(drvdata); } From a5143eb6abfe96cfcb9051bec3f0933481bdf76e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Aug 2020 13:38:41 -0700 Subject: [PATCH 279/636] UPSTREAM: fscrypt: restrict IV_INO_LBLK_32 to ino_bits <= 32 When an encryption policy has the IV_INO_LBLK_32 flag set, the IV generation method involves hashing the inode number. This is different from fscrypt's other IV generation methods, where the inode number is either not used at all or is included directly in the IVs. Therefore, in principle IV_INO_LBLK_32 can work with any length inode number. However, currently fscrypt gets the inode number from inode::i_ino, which is 'unsigned long'. So currently the implementation limit is actually 32 bits (like IV_INO_LBLK_64), since longer inode numbers will have been truncated by the VFS on 32-bit platforms. Fix fscrypt_supported_v2_policy() to enforce the correct limit. This doesn't actually matter currently, since only ext4 and f2fs support IV_INO_LBLK_32, and they both only support 32-bit inode numbers. But we might as well fix it in case it matters in the future. Ideally inode::i_ino would instead be made 64-bit, but for now it's not needed. (Note, this limit does *not* prevent filesystems with 64-bit inode numbers from adding fscrypt support, since IV_INO_LBLK_* support is optional and is useful only on certain hardware.) Fixes: e3b1078bedd3 ("fscrypt: add support for IV_INO_LBLK_32 policies") Reported-by: Jeff Layton Link: https://lore.kernel.org/r/20200824203841.1707847-1-ebiggers@kernel.org Signed-off-by: Eric Biggers (cherry picked from commit 5e895bd4d5233cb054447d0491d4e63c8496d419) Signed-off-by: Greg Kroah-Hartman Change-Id: I0aa264c7e919cf2055b539953bef63a99d347d8d --- fs/crypto/policy.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index d23ff162c78b..0b32c64eb405 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -178,10 +178,15 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, 32, 32)) return false; + /* + * IV_INO_LBLK_32 hashes the inode number, so in principle it can + * support any ino_bits. However, currently the inode number is gotten + * from inode::i_ino which is 'unsigned long'. So for now the + * implementation limit is 32 bits. + */ if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) && - /* This uses hashed inode numbers, so ino_bits doesn't matter. */ !supported_iv_ino_lblk_policy(policy, inode, "IV_INO_LBLK_32", - INT_MAX, 32)) + 32, 32)) return false; if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { From 11018ba76525c5994ed664c17cc6e7132f640eb4 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 17 Aug 2020 18:49:50 -0700 Subject: [PATCH 280/636] UPSTREAM: ARM64: vdso32: Install vdso32 from vdso_install Add the 32-bit vdso Makefile to the vdso_install rule so that 'make vdso_install' installs the 32-bit compat vdso when it is compiled. Fixes: a7f71a2c8903 ("arm64: compat: Add vDSO") Signed-off-by: Stephen Boyd Reviewed-by: Vincenzo Frascino Acked-by: Will Deacon Cc: Vincenzo Frascino Link: https://lore.kernel.org/r/20200818014950.42492-1-swboyd@chromium.org Signed-off-by: Catalin Marinas (cherry picked from commit 8d75785a814241587802655cc33e384230744f0c) Signed-off-by: Greg Kroah-Hartman Change-Id: Ib716ae8fc3923dd427ba8b987b7e69fcb0ad55d1 --- arch/arm64/Makefile | 1 + arch/arm64/kernel/vdso32/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b981ef79e206..fb471be0418d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -155,6 +155,7 @@ dtbs_install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@ # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 8fddb291c0e2..d6ade5f80fd0 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -202,7 +202,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg From 1091b1b93cc0225bbadef70e065a684baec3b667 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 6 Aug 2020 23:25:54 -0700 Subject: [PATCH 281/636] UPSTREAM: mm/page_alloc: silence a KASAN false positive kernel_init_free_pages() will use memset() on s390 to clear all pages from kmalloc_order() which will override KASAN redzones because a redzone was setup from the end of the allocation size to the end of the last page. Silence it by not reporting it there. An example of the report is, BUG: KASAN: slab-out-of-bounds in __free_pages_ok Write of size 4096 at addr 000000014beaa000 Call Trace: show_stack+0x152/0x210 dump_stack+0x1f8/0x248 print_address_description.isra.13+0x5e/0x4d0 kasan_report+0x130/0x178 check_memory_region+0x190/0x218 memset+0x34/0x60 __free_pages_ok+0x894/0x12f0 kfree+0x4f2/0x5e0 unpack_to_rootfs+0x60e/0x650 populate_rootfs+0x56/0x358 do_one_initcall+0x1f4/0xa20 kernel_init_freeable+0x758/0x7e8 kernel_init+0x1c/0x170 ret_from_fork+0x24/0x28 Memory state around the buggy address: 000000014bea9f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000000014bea9f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >000000014beaa000: 03 fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ^ 000000014beaa080: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe 000000014beaa100: fe fe fe fe fe fe fe fe fe fe fe fe fe fe Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Tested-by: Vasily Gorbik Acked-by: Vasily Gorbik Cc: Dmitry Vyukov Cc: Christian Borntraeger Cc: Alexander Potapenko Cc: Kees Cook Cc: Heiko Carstens Link: http://lkml.kernel.org/r/20200610052154.5180-1-cai@lca.pw Signed-off-by: Linus Torvalds (cherry picked from commit 9e15afa5a87a3bf969a3b33c3dadfb8b46df42c0) Signed-off-by: Greg Kroah-Hartman Change-Id: I9375f117c1aa08c0d5d0e881bb5a917458f97c99 --- mm/page_alloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f33013bedea..afc41fad3be5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1072,8 +1072,11 @@ static void kernel_init_free_pages(struct page *page, int numpages) { int i; + /* s390's use of memset() could override KASAN redzones. */ + kasan_disable_current(); for (i = 0; i < numpages; i++) clear_highpage(page + i); + kasan_enable_current(); } static __always_inline bool free_pages_prepare(struct page *page, From 7a41c5760fd56d8c629296e4bc0efe12e174a0eb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jul 2020 15:59:17 -0700 Subject: [PATCH 282/636] UPSTREAM: fscrypt: use smp_load_acquire() for fscrypt_prepared_key Normally smp_store_release() or cmpxchg_release() is paired with smp_load_acquire(). Sometimes smp_load_acquire() can be replaced with the more lightweight READ_ONCE(). However, for this to be safe, all the published memory must only be accessed in a way that involves the pointer itself. This may not be the case if allocating the object also involves initializing a static or global variable, for example. fscrypt_prepared_key includes a pointer to a crypto_skcipher object, which is internal to and is allocated by the crypto subsystem. By using READ_ONCE() for it, we're relying on internal implementation details of the crypto subsystem. Remove this fragile assumption by using smp_load_acquire() instead. (Note: I haven't seen any real-world problems here. This change is just fixing the code to be guaranteed correct and less fragile.) Fixes: 5fee36095cda ("fscrypt: add inline encryption support") Cc: Satya Tangirala Link: https://lore.kernel.org/r/20200721225920.114347-3-ebiggers@kernel.org Signed-off-by: Eric Biggers (cherry picked from commit 97c6327f7192d7312eabc033b26a41ad6816496c) Signed-off-by: Greg Kroah-Hartman Change-Id: I08747eb4b756d1352d4cd385aff232c2a0a2b405 --- fs/crypto/fscrypt_private.h | 15 +++++++++------ fs/crypto/inline_crypt.c | 6 ++++-- fs/crypto/keysetup.c | 6 ++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index cca57e177a18..beb3a61d0dd9 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -362,13 +362,16 @@ fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_info *ci) { /* - * The READ_ONCE() here pairs with the smp_store_release() in - * fscrypt_prepare_key(). (This only matters for the per-mode keys, - * which are shared by multiple inodes.) + * The two smp_load_acquire()'s here pair with the smp_store_release()'s + * in fscrypt_prepare_inline_crypt_key() and fscrypt_prepare_key(). + * I.e., in some cases (namely, if this prep_key is a per-mode + * encryption key) another task can publish blk_key or tfm concurrently, + * executing a RELEASE barrier. We need to use smp_load_acquire() here + * to safely ACQUIRE the memory the other task published. */ if (fscrypt_using_inline_encryption(ci)) - return READ_ONCE(prep_key->blk_key) != NULL; - return READ_ONCE(prep_key->tfm) != NULL; + return smp_load_acquire(&prep_key->blk_key) != NULL; + return smp_load_acquire(&prep_key->tfm) != NULL; } #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ @@ -415,7 +418,7 @@ static inline bool fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, const struct fscrypt_info *ci) { - return READ_ONCE(prep_key->tfm) != NULL; + return smp_load_acquire(&prep_key->tfm) != NULL; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 6504f053ae57..3b3a684c19d5 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -202,8 +202,10 @@ int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, } } /* - * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters - * for the per-mode keys, which are shared by multiple inodes.) + * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). + * I.e., here we publish ->blk_key with a RELEASE barrier so that + * concurrent tasks can ACQUIRE it. Note that this concurrency is only + * possible for per-mode keys, not for per-file keys. */ smp_store_release(&prep_key->blk_key, blk_key); return 0; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 6a198f9be10a..e894f1939a98 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -134,8 +134,10 @@ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, if (IS_ERR(tfm)) return PTR_ERR(tfm); /* - * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters - * for the per-mode keys, which are shared by multiple inodes.) + * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). + * I.e., here we publish ->tfm with a RELEASE barrier so that + * concurrent tasks can ACQUIRE it. Note that this concurrency is only + * possible for per-mode keys, not for per-file keys. */ smp_store_release(&prep_key->tfm, tfm); return 0; From 73b321a1d271f6a9c5f81697f2fe72fcc246c82a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Jul 2020 11:10:12 -0700 Subject: [PATCH 283/636] UPSTREAM: fscrypt: restrict IV_INO_LBLK_* to AES-256-XTS IV_INO_LBLK_* exist only because of hardware limitations, and currently the only known use case for them involves AES-256-XTS. Therefore, for now only allow them in combination with AES-256-XTS. This way we don't have to worry about them being combined with other encryption modes. (To be clear, combining IV_INO_LBLK_* with other encryption modes *should* work just fine. It's just not being tested, so we can't be 100% sure it works. So with no known use case, it's best to disallow it for now, just like we don't allow other weird combinations like AES-256-XTS contents encryption with Adiantum filenames encryption.) This can be relaxed later if a use case for other combinations arises. Fixes: b103fb7653ff ("fscrypt: add support for IV_INO_LBLK_64 policies") Fixes: e3b1078bedd3 ("fscrypt: add support for IV_INO_LBLK_32 policies") Link: https://lore.kernel.org/r/20200721181012.39308-1-ebiggers@kernel.org Signed-off-by: Eric Biggers (cherry picked from commit f000223c981a7c75f6f3ab7288f0be7b571c3644) Signed-off-by: Greg Kroah-Hartman Change-Id: I243e2c3de549e322a311610fa565ffe87990c8e9 --- fs/crypto/policy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 0b32c64eb405..026dbb2fa36f 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -77,6 +77,20 @@ static bool supported_iv_ino_lblk_policy(const struct fscrypt_policy_v2 *policy, struct super_block *sb = inode->i_sb; int ino_bits = 64, lblk_bits = 64; + /* + * IV_INO_LBLK_* exist only because of hardware limitations, and + * currently the only known use case for them involves AES-256-XTS. + * That's also all we test currently. For these reasons, for now only + * allow AES-256-XTS here. This can be relaxed later if a use case for + * IV_INO_LBLK_* with other encryption modes arises. + */ + if (policy->contents_encryption_mode != FSCRYPT_MODE_AES_256_XTS) { + fscrypt_warn(inode, + "Can't use %s policy with contents mode other than AES-256-XTS", + type); + return false; + } + /* * It's unsafe to include inode numbers in the IVs if the filesystem can * potentially renumber inodes, e.g. via filesystem shrinking. From 301a95087dc81a9d389e300cef4b00834091387b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jul 2020 12:42:57 -0700 Subject: [PATCH 284/636] UPSTREAM: driver core: Don't do deferred probe in parallel with kernel_init thread The current deferred probe implementation can mess up suspend/resume ordering if deferred probe thread is kicked off in parallel with the main initcall thread (kernel_init thread) [1]. For example: Say device-B is a consumer of device-A. Initcall thread Deferred probe thread =============== ===================== 1. device-A is added. 2. device-B is added. 3. dpm_list is now [device-A, device-B]. 4. driver-A defers probe of device-A. 5. device-A is moved to end of dpm_list 6. dpm_list is now [device-B, device-A] 7. driver-B is registereed and probes device-B. 8. dpm_list stays as [device-B, device-A]. The reverse order of dpm_list is used for suspend. So in this case device-A would incorrectly get suspended before device-B. Commit 716a7a259690 ("driver core: fw_devlink: Add support for batching fwnode parsing") kicked off the deferred probe thread early during boot to run in parallel with the initcall thread and caused suspend/resume regressions. This patch removes the parallel run of the deferred probe thread to avoid the suspend/resume regressions. [1] - https://lore.kernel.org/lkml/CAGETcx8W96KAw-d_siTX4qHB_-7ddk0miYRDQeHE6E0_8qx-6Q@mail.gmail.com/ Fixes: 716a7a259690 ("driver core: fw_devlink: Add support for batching fwnode parsing") Signed-off-by: Saravana Kannan Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200701194259.3337652-2-saravanak@google.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit cec72f3efc6272420c2c2c699607f03d09b93e41) Signed-off-by: Greg Kroah-Hartman Change-Id: Ib4c56f6100d4d39f187b75d9e6680b6fb3dfbe52 --- drivers/base/base.h | 1 - drivers/base/core.c | 1 - drivers/base/dd.c | 5 ----- 3 files changed, 7 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 8555552f55ae..559b047de9f7 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -134,7 +134,6 @@ extern char *make_class_name(const char *name, struct kobject *kobj); extern int devres_release_all(struct device *dev); extern void device_block_probing(void); extern void device_unblock_probing(void); -extern void driver_deferred_probe_force_trigger(void); /* /sys/devices directory */ extern struct kset *devices_kset; diff --git a/drivers/base/core.c b/drivers/base/core.c index 766f126f12df..39aee721d4db 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1272,7 +1272,6 @@ void fw_devlink_resume(void) goto out; device_link_add_missing_supplier_links(); - driver_deferred_probe_force_trigger(); out: mutex_unlock(&defer_fw_devlink_lock); } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 244f9f7bcab6..4ba9231a6be8 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -160,11 +160,6 @@ static void driver_deferred_probe_trigger(void) if (!driver_deferred_probe_enable) return; - driver_deferred_probe_force_trigger(); -} - -void driver_deferred_probe_force_trigger(void) -{ /* * A successful probe means that all the devices in the pending list * should be triggered to be reprobed. Move all the deferred devices From 79b3f50b945bfbc1e5c53ebc28cc1124f66063a0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 26 Dec 2019 22:32:04 -0800 Subject: [PATCH 285/636] UPSTREAM: dma-buf: free dmabuf->name in dma_buf_release() dma-buf name can be set via DMA_BUF_SET_NAME ioctl, but once set it never gets freed. Free it in dma_buf_release(). Fixes: bb2bb9030425 ("dma-buf: add DMA_BUF_SET_NAME ioctls") Reported-by: syzbot+b2098bc44728a4efb3e9@syzkaller.appspotmail.com Cc: Greg Hackmann Cc: Chenbo Feng Cc: Sumit Semwal Signed-off-by: Cong Wang Acked-by: Chenbo Feng Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20191227063204.5813-1-xiyou.wangcong@gmail.com (cherry picked from commit d1f37226431f5d9657aa144a40f2383adbcf27e1) Signed-off-by: Greg Kroah-Hartman Change-Id: Iad4b1c7124ec4ee65c55c949559d34e47e42a060 --- drivers/dma-buf/dma-buf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 033c3fad7a23..aaeb4b918e04 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -125,6 +125,7 @@ static int dma_buf_release(struct inode *inode, struct file *file) reservation_object_fini(dmabuf->resv); module_put(dmabuf->owner); + kfree(dmabuf->name); kfree(dmabuf); return 0; } From 2c597ce50e30a4aeaf09d9e36f991416af72e725 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 18 Jun 2020 17:17:19 +0200 Subject: [PATCH 286/636] UPSTREAM: s390/setup: init jump labels before command line parsing Command line parameters might set static keys. This is true for s390 at least since commit 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options"). To avoid the following WARN: static_key_enable_cpuslocked(): static key 'init_on_alloc+0x0/0x40' used before call to jump_label_init() call jump_label_init() just before parse_early_param(). jump_label_init() is safe to call multiple times (x86 does that), doesn't do any memory allocations and hence should be safe to call that early. Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Cc: # 5.3: d6df52e9996d: s390/maccess: add no DAT mode to kernel_write Cc: # 5.3 Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens (cherry picked from commit 95e61b1b5d6394b53d147c0fcbe2ae70fbe09446) Signed-off-by: Greg Kroah-Hartman Change-Id: I3a7d1aefc3820733f1c73e699652e2446962354e --- arch/s390/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4bda9055daef..81cd304764b4 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -907,6 +907,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ From afabf0a99c059a09cbc643d16d2fe6bf18b5a955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Mon, 20 Apr 2020 11:25:07 -0700 Subject: [PATCH 287/636] UPSTREAM: ipv6: ndisc: RFC-ietf-6man-ra-pref64-09 is now published as RFC8781 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See: https://www.rfc-editor.org/authors/rfc8781.txt Cc: Erik Kline Cc: Jen Linkova Cc: Lorenzo Colitti Cc: Michael Haro Signed-off-by: Maciej Żenczykowski Fixes: c24a77edc9a7 ("ipv6: ndisc: add support for 'PREF64' dns64 prefix identifier") Signed-off-by: David S. Miller (cherry picked from commit 9175d3f38816835b0801bacbf4f6aff1a1672b71) Signed-off-by: Greg Kroah-Hartman Change-Id: Id940cd32f7295d598a4d1bf1c073b7cee48cff0a --- include/net/ndisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 35d7c887a5b9..ce42654425ab 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -39,7 +39,7 @@ enum { ND_OPT_DNSSL = 31, /* RFC6106 */ ND_OPT_6CO = 34, /* RFC6775 */ ND_OPT_CAPTIVE_PORTAL = 37, /* RFC7710 */ - ND_OPT_PREF64 = 38, /* RFC-ietf-6man-ra-pref64-09 */ + ND_OPT_PREF64 = 38, /* RFC8781 */ __ND_OPT_MAX }; From 4f8388b191470515b3e776e64ef8aeeda59cea92 Mon Sep 17 00:00:00 2001 From: Naoki Kiryu Date: Wed, 22 Apr 2020 16:43:45 +0200 Subject: [PATCH 288/636] UPSTREAM: usb: typec: altmode: Fix typec_altmode_get_partner sometimes returning an invalid pointer Before this commit, typec_altmode_get_partner would return a const struct typec_altmode * pointing to address 0x08 when to_altmode(adev)->partner was NULL. Add a check for to_altmode(adev)->partner being NULL to fix this. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206365 BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1785972 Fixes: 5f54a85db5df ("usb: typec: Make sure an alt mode exist before getting its partner") Cc: stable@vger.kernel.org Signed-off-by: Naoki Kiryu Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200422144345.43262-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0df9433fcae02215c8fd79690c134d535c7bb905) Signed-off-by: Greg Kroah-Hartman Change-Id: I3b5b69f6d31433e31d886c1dfcf852ce9cc11f4b --- drivers/usb/typec/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c index 74cb3c2ecb34..c950171556d8 100644 --- a/drivers/usb/typec/bus.c +++ b/drivers/usb/typec/bus.c @@ -192,7 +192,10 @@ EXPORT_SYMBOL_GPL(typec_altmode_vdm); const struct typec_altmode * typec_altmode_get_partner(struct typec_altmode *adev) { - return adev ? &to_altmode(adev)->partner->adev : NULL; + if (!adev || !to_altmode(adev)->partner) + return NULL; + + return &to_altmode(adev)->partner->adev; } EXPORT_SYMBOL_GPL(typec_altmode_get_partner); From 9c5527b8232760acc147fd89d72282fa23d06ee1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 14 Apr 2020 11:42:48 +0100 Subject: [PATCH 289/636] UPSTREAM: arm64: vdso: don't free unallocated pages The aarch32_vdso_pages[] array never has entries allocated in the C_VVAR or C_VDSO slots, and as the array is zero initialized these contain NULL. However in __aarch32_alloc_vdso_pages() when aarch32_alloc_kuser_vdso_page() fails we attempt to free the page whose struct page is at NULL, which is obviously nonsensical. This patch removes the erroneous page freeing. Fixes: 7c1deeeb0130 ("arm64: compat: VDSO setup for compat layer") Cc: # 5.3.x- Cc: Vincenzo Frascino Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas (cherry picked from commit 9cc3d0c6915aee5140f8335d41bbc3ff1b79aa4e) Signed-off-by: Greg Kroah-Hartman Change-Id: I90fe8e2fb8818bf2d42090f269ce198fdcf1dc7b --- arch/arm64/kernel/vdso.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index ad3a81b2c7ce..3c90b381ac21 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -271,18 +271,7 @@ static int __aarch32_alloc_vdso_pages(void) if (ret) return ret; - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) { - unsigned long c_vvar = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); - unsigned long c_vdso = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); - - free_page(c_vvar); - free_page(c_vdso); - } - - return ret; + return aarch32_alloc_kuser_vdso_page(); } #else static int __aarch32_alloc_vdso_pages(void) From e60888994733348b23be0f5c59f3d1287b66087a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Apr 2020 21:04:40 -0700 Subject: [PATCH 290/636] UPSTREAM: mm/filemap.c: don't bother dropping mmap_sem for zero size readahead When handling a page fault, we drop mmap_sem to start async readahead so that we don't block on IO submission with mmap_sem held. However there's no point to drop mmap_sem in case readahead is disabled. Handle that case to avoid pointless dropping of mmap_sem and retrying the fault. This was actually reported to block mlockall(MCL_CURRENT) indefinitely. Fixes: 6b4c9f446981 ("filemap: drop the mmap_sem for all blocking operations") Reported-by: Minchan Kim Reported-by: Robert Stupp Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Reviewed-by: Josef Bacik Reviewed-by: Minchan Kim Link: http://lkml.kernel.org/r/20200212101356.30759-1-jack@suse.cz Signed-off-by: Linus Torvalds (cherry picked from commit 5c72feee3e45b40a3c96c7145ec422899d0e8964) Signed-off-by: Greg Kroah-Hartman Change-Id: Iecc3a1972853a74d2faf70203fa34d912235be0b --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 16645266fabb..9bf5437449c6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2547,7 +2547,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf, pgoff_t offset = vmf->pgoff; /* If we don't want any read-ahead, don't bother */ - if (vmf->vma->vm_flags & VM_RAND_READ) + if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages) return fpin; if (ra->mmap_miss > 0) ra->mmap_miss--; From 597dda48629c18b15d53fff8acc1dfeb4208dd45 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 25 Mar 2020 10:57:54 -0700 Subject: [PATCH 291/636] UPSTREAM: Input: fix stale timestamp on key autorepeat events We need to refresh timestamp when emitting key autorepeat events, otherwise they will carry timestamp of the original key press event. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206929 Fixes: 3b51c44bd693 ("Input: allow drivers specify timestamp for input events") Cc: stable@vger.kernel.org Reported-by: teika kazura Tested-by: teika kazura Signed-off-by: Dmitry Torokhov (cherry picked from commit 4134252ab7e2c339a54302b88496cb5a89cdbaec) Signed-off-by: Greg Kroah-Hartman Change-Id: I7062d933a35b122f0ffc1c06d0dd7e60561ef97c --- drivers/input/input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/input.c b/drivers/input/input.c index 57072d5a6ae0..f96c40aede85 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -193,6 +193,7 @@ static void input_repeat_key(struct timer_list *t) input_value_sync }; + input_set_timestamp(dev, ktime_get()); input_pass_values(dev, vals, ARRAY_SIZE(vals)); if (dev->rep[REP_PERIOD]) From 2420269444b61ae584b0208f16ff19c0a127f479 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 23 Mar 2020 10:38:51 +0530 Subject: [PATCH 292/636] UPSTREAM: PM: sleep: wakeup: Skip wakeup_source_sysfs_remove() if device is not there Skip wakeup_source_sysfs_remove() to fix a NULL pinter dereference via ws->dev, if the wakeup source is unregistered before registering the wakeup class from device_add(). Fixes: 2ca3d1ecb8c4 ("PM / wakeup: Register wakeup class kobj after device is added") Signed-off-by: Neeraj Upadhyay Cc: 5.4+ # 5.4+ [ rjw: Subject & changelog, white space ] Signed-off-by: Rafael J. Wysocki (cherry picked from commit 87de6594dc45dbf6819f3e0ef92f9331c5a9444c) Signed-off-by: Greg Kroah-Hartman Change-Id: Icaae4b9b5075180aea9941e56c14406b874bd4ec --- drivers/base/power/wakeup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index c248412f1e34..0e8cad8ec4c3 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -244,7 +244,9 @@ void wakeup_source_unregister(struct wakeup_source *ws) { if (ws) { wakeup_source_remove(ws); - wakeup_source_sysfs_remove(ws); + if (ws->dev) + wakeup_source_sysfs_remove(ws); + wakeup_source_destroy(ws); } } From be7cb4739a04ece9c7c3a07c209908fe41a8ea08 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 23 Mar 2020 12:41:09 +0000 Subject: [PATCH 293/636] UPSTREAM: um: Fix header inclusion User Mode Linux is a flavor of x86 that from the vDSO prospective always falls back on system calls. This implies that it does not require any of the unified vDSO definitions and their inclusion causes side effects like this: In file included from include/vdso/processor.h:10:0, from include/vdso/datapage.h:17, from arch/x86/include/asm/vgtod.h:7, from arch/x86/um/../kernel/sys_ia32.c:49: >> arch/x86/include/asm/vdso/processor.h:11:29: error: redefinition of 'rep_nop' static __always_inline void rep_nop(void) ^~~~~~~ In file included from include/linux/rcupdate.h:30:0, from include/linux/rculist.h:11, from include/linux/pid.h:5, from include/linux/sched.h:14, from arch/x86/um/../kernel/sys_ia32.c:25: arch/x86/um/asm/processor.h:24:20: note: previous definition of 'rep_nop' was here static inline void rep_nop(void) Make sure that the unnecessary headers are not included when um is built to address the problem. Fixes: abc22418db02 ("x86/vdso: Enable x86 to use common headers") Reported-by: kbuild test robot Signed-off-by: Vincenzo Frascino Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20200323124109.7104-1-vincenzo.frascino@arm.com (cherry picked from commit 1c1a18b00d7e25d1bed3507880de2da07be704a2) Signed-off-by: Greg Kroah-Hartman Change-Id: Ifc94c3b01a2021a293759dedb44e8ff70c6cebf3 --- arch/x86/include/asm/vgtod.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 44ec25be6bb5..0782d30238b9 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -2,6 +2,11 @@ #ifndef _ASM_X86_VGTOD_H #define _ASM_X86_VGTOD_H +/* + * This check is required to prevent ARCH=um to include + * unwanted headers. + */ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY #include #include #include @@ -12,6 +17,7 @@ typedef u64 gtod_long_t; #else typedef unsigned long gtod_long_t; #endif +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ extern int vclocks_used; static inline bool vclock_was_used(int vclock) From d2437b10baaa250d8aee2c27511f81ceacf8b7ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 11 Mar 2020 11:53:09 +0100 Subject: [PATCH 294/636] UPSTREAM: binderfs: use refcount for binder control devices too Binderfs binder-control devices are cleaned up via binderfs_evict_inode too() which will use refcount_dec_and_test(). However, we missed to set the refcount for binderfs binder-control devices and so we underflowed when the binderfs instance got unmounted. Pretty obvious oversight and should have been part of the more general UAF fix. The good news is that having test cases (suprisingly) helps. Technically, we could detect that we're about to cleanup the binder-control dentry in binderfs_evict_inode() and then simply clean it up. But that makes the assumption that the binder driver itself will never make use of a binderfs binder-control device after the binderfs instance it belongs to has been unmounted and the superblock for it been destroyed. While it is unlikely to ever come to this let's be on the safe side. Performance-wise this also really doesn't matter since the binder-control device is only every really when creating the binderfs filesystem or creating additional binder devices. Both operations are pretty rare. Fixes: f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") Link: https://lore.kernel.org/r/CA+G9fYusdfg7PMfC9Xce-xLT7NiyKSbgojpK35GOm=Pf9jXXrA@mail.gmail.com Reported-by: Naresh Kamboju Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20200311105309.1742827-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 211b64e4b5b6bd5fdc19cd525c2cc9a90e6b0ec9) Signed-off-by: Greg Kroah-Hartman Change-Id: Idb5e9e8945fa95506f3ff4f15bde3efaf65d7c9f --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 110e41f920c2..f303106b3362 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -448,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; From 8b6a361be6d456d98e46d0726e2775b45fd1e883 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 3 Feb 2020 15:55:09 -0600 Subject: [PATCH 295/636] UPSTREAM: ARM: socfpga_defconfig: Add back DEBUG_FS Commit 0e4a459f56c3 ("tracing: Remove unnecessary DEBUG_FS dependency") removed select for DEBUG_FS but we still need it for development purposes. Fixes: 0e4a459f56c3 ("tracing: Remove unnecessary DEBUG_FS dependency") Signed-off-by: Dinh Nguyen (cherry picked from commit 41bae0caf5dc216c7b43712b8c2b6a0d63b4d655) Signed-off-by: Greg Kroah-Hartman Change-Id: I63d93ae1d42ee6e9d5c8146a355fd0b4f4caf0d4 --- arch/arm/configs/socfpga_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 371fca4e1ab7..e5e45314af2a 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -159,6 +159,7 @@ CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_FUNCTION_TRACER=y From 4c4abbc052ce21c30f5e87d11d6b21f0dc50e1b8 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 23 Feb 2020 14:02:56 +0100 Subject: [PATCH 296/636] UPSTREAM: ARM: bcm2835_defconfig: Explicitly restore CONFIG_DEBUG_FS The commit 0e4a459f56c3 ("tracing: Remove unnecessary DEBUG_FS dependency") accidentally dropped the DEBUG FS support in bcm2835_defconfig. So restore the config as before the commit. Reported-by: Marek Szyprowski Fixes: 0e4a459f56c3 ("tracing: Remove unnecessary DEBUG_FS dependency") Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli (cherry picked from commit 1bba60808404b873defa0f3560497eb2e8fe86b8) Signed-off-by: Greg Kroah-Hartman Change-Id: I48e3487e6dd08252b3ab5925be217a5bc8dc47ee --- arch/arm/configs/bcm2835_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index e9bc88937b1e..0f6d1fe1f1c2 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -167,6 +167,7 @@ CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y CONFIG_TEST_KSTRTOX=y +CONFIG_DEBUG_FS=y CONFIG_KGDB=y CONFIG_KGDB_KDB=y CONFIG_STRICT_DEVMEM=y From b8dc99462411a8ca136170e7ac403e3c74b21f45 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 16 Jan 2020 12:09:34 +0100 Subject: [PATCH 297/636] UPSTREAM: PM: hibernate: fix crashes with init_on_free=1 Upon resuming from hibernation, free pages may contain stale data from the kernel that initiated the resume. This breaks the invariant inflicted by init_on_free=1 that freed pages must be zeroed. To deal with this problem, make clear_free_pages() also clear the free pages when init_on_free is enabled. Fixes: 6471384af2a6 ("mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options") Reported-by: Johannes Stezenbach Signed-off-by: Alexander Potapenko Cc: 5.3+ # 5.3+ Signed-off-by: Rafael J. Wysocki (cherry picked from commit 18451f9f9e5810b8bd1245c5ae166f257e0e2b9d) Signed-off-by: Greg Kroah-Hartman Change-Id: I32c22a60082bb3b49eae7de3c7d9d5ae2b965dc5 --- kernel/power/snapshot.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f2635fc751d9..2505d25f89a1 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1145,24 +1145,24 @@ void free_basic_memory_bitmaps(void) void clear_free_pages(void) { -#ifdef CONFIG_PAGE_POISONING_ZERO struct memory_bitmap *bm = free_pages_map; unsigned long pfn; if (WARN_ON(!(free_pages_map))) return; - memory_bm_position_reset(bm); - pfn = memory_bm_next_pfn(bm); - while (pfn != BM_END_OF_MAP) { - if (pfn_valid(pfn)) - clear_highpage(pfn_to_page(pfn)); - + if (IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) || want_init_on_free()) { + memory_bm_position_reset(bm); pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (pfn_valid(pfn)) + clear_highpage(pfn_to_page(pfn)); + + pfn = memory_bm_next_pfn(bm); + } + memory_bm_position_reset(bm); + pr_info("free pages cleared after restore\n"); } - memory_bm_position_reset(bm); - pr_info("free pages cleared after restore\n"); -#endif /* PAGE_POISONING_ZERO */ } /** From a3d2e129b708d2aa1edbb1670e734b266d6395f7 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Fri, 20 Dec 2019 10:21:26 +0800 Subject: [PATCH 298/636] UPSTREAM: raid6/test: fix a compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compilation error is redeclaration showed as following: In file included from ../../../include/linux/limits.h:6, from /usr/include/x86_64-linux-gnu/bits/local_lim.h:38, from /usr/include/x86_64-linux-gnu/bits/posix1_lim.h:161, from /usr/include/limits.h:183, from /usr/lib/gcc/x86_64-linux-gnu/8/include-fixed/limits.h:194, from /usr/lib/gcc/x86_64-linux-gnu/8/include-fixed/syslimits.h:7, from /usr/lib/gcc/x86_64-linux-gnu/8/include-fixed/limits.h:34, from ../../../include/linux/raid/pq.h:30, from algos.c:14: ../../../include/linux/types.h:114:15: error: conflicting types for ‘int64_t’ typedef s64 int64_t; ^~~~~~~ In file included from /usr/include/stdint.h:34, from /usr/lib/gcc/x86_64-linux-gnu/8/include/stdint.h:9, from /usr/include/inttypes.h:27, from ../../../include/linux/raid/pq.h:29, from algos.c:14: /usr/include/x86_64-linux-gnu/bits/stdint-intn.h:27:19: note: previous \ declaration of ‘int64_t’ was here typedef __int64_t int64_t; Fixes: 54d50897d544 ("linux/kernel.h: split *_MAX and *_MIN macros into ") Signed-off-by: Zhengyuan Liu Signed-off-by: Song Liu (cherry picked from commit 6b8651aac1dca6140dd7fb4c9fec2736ed3f6223) Signed-off-by: Greg Kroah-Hartman Change-Id: I3e2955373c6c09fc31464055df40662344d2d347 --- include/linux/raid/pq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index ea8505204fdf..9f3c17506674 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -32,7 +32,6 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; #include #include -#include #include #include #include From cf84a06a47ee5c97c245c311cfceea06d325e038 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 23 Dec 2019 16:38:36 -0500 Subject: [PATCH 299/636] UPSTREAM: selinux: ensure the policy has been loaded before reading the sidtab stats Check to make sure we have loaded a policy before we query the sidtab's hash stats. Failure to do so could result in a kernel panic/oops due to a dereferenced NULL pointer. Fixes: 66f8e2f03c02 ("selinux: sidtab reverse lookup hash table") Reported-by: kernel test robot Signed-off-by: Paul Moore (cherry picked from commit 15b590a81fcdd44ddcb4810f2a6334df8b6ca512) Signed-off-by: Greg Kroah-Hartman Change-Id: I7e18ac17ee6c89d2a63caf02efcfb597f21f2c54 --- security/selinux/ss/services.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 2279dc77d96b..c9c48fc2da7e 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1276,6 +1276,12 @@ int security_sidtab_hash_stats(struct selinux_state *state, char *page) { int rc; + if (!state->initialized) { + pr_err("SELinux: %s: called before initial load_policy\n", + __func__); + return -EINVAL; + } + read_lock(&state->ss->policy_rwlock); rc = sidtab_hash_stats(state->ss->sidtab, page); read_unlock(&state->ss->policy_rwlock); From 641b04730a564aab348f0cd1f6c011ed78f7248b Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 9 Dec 2019 11:31:19 -0800 Subject: [PATCH 300/636] UPSTREAM: of/platform: Unconditionally pause/resume sync state during kernel init Commit 5e6669387e22 ("of/platform: Pause/resume sync state during init and of_platform_populate()") paused/resumed sync state during init only if Linux had parsed and populated a devicetree. However, the check for that (of_have_populated_dt()) can change after of_platform_default_populate_init() executes. One example of this is when devicetree unittests are enabled. This causes an unmatched pause/resume of sync state. To avoid this, just unconditionally pause/resume sync state during init. Fixes: 5e6669387e22 ("of/platform: Pause/resume sync state during init and of_platform_populate()") Reported-by: kernel test robot Signed-off-by: Saravana Kannan Reviewed-by: Frank Rowand Signed-off-by: Rob Herring (cherry picked from commit ee9b280e17dce51c44e1d04d11eb0a4acd0ee1a9) Signed-off-by: Greg Kroah-Hartman Change-Id: I55cd3767daf785f7c46591fcb9c1f9acbb824540 --- drivers/of/platform.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 379544697aa3..1dcf0e863f62 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -526,10 +526,11 @@ static int __init of_platform_default_populate_init(void) { struct device_node *node; + device_links_supplier_sync_state_pause(); + if (!of_have_populated_dt()) return -ENODEV; - device_links_supplier_sync_state_pause(); /* * Handle certain compatibles explicitly, since we don't want to create * platform_devices for every node in /reserved-memory with a @@ -555,8 +556,7 @@ arch_initcall_sync(of_platform_default_populate_init); static int __init of_platform_sync_state_init(void) { - if (of_have_populated_dt()) - device_links_supplier_sync_state_resume(); + device_links_supplier_sync_state_resume(); return 0; } late_initcall_sync(of_platform_sync_state_init); From 34a3a3948ed1dede6d00cab9600fac2476083a80 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 21 Nov 2019 12:26:45 +0000 Subject: [PATCH 301/636] UPSTREAM: virt_wifi: fix use-after-free in virt_wifi_newlink() When virt_wifi interface is created, virt_wifi_newlink() is called and it calls register_netdevice(). if register_netdevice() fails, it internally would call ->priv_destructor(), which is virt_wifi_net_device_destructor() and it frees netdev. but virt_wifi_newlink() still use netdev. So, use-after-free would occur in virt_wifi_newlink(). Test commands: ip link add dummy0 type dummy modprobe bonding ip link add bonding_masters link dummy0 type virt_wifi Splat looks like: [ 202.220554] BUG: KASAN: use-after-free in virt_wifi_newlink+0x88b/0x9a0 [virt_wifi] [ 202.221659] Read of size 8 at addr ffff888061629cb8 by task ip/852 [ 202.222896] CPU: 1 PID: 852 Comm: ip Not tainted 5.4.0-rc5 #3 [ 202.223765] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 202.225073] Call Trace: [ 202.225532] dump_stack+0x7c/0xbb [ 202.226869] print_address_description.constprop.5+0x1be/0x360 [ 202.229362] __kasan_report+0x12a/0x16f [ 202.230714] kasan_report+0xe/0x20 [ 202.232595] virt_wifi_newlink+0x88b/0x9a0 [virt_wifi] [ 202.233370] __rtnl_newlink+0xb9f/0x11b0 [ 202.244909] rtnl_newlink+0x65/0x90 [ ... ] Cc: stable@vger.kernel.org Fixes: c7cdba31ed8b ("mac80211-next: rtnetlink wifi simulation device") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20191121122645.9355-1-ap420073@gmail.com [trim stack dump a bit] Signed-off-by: Johannes Berg (cherry picked from commit bc71d8b580ba81b55b6e15b1c0320632515b4bac) Signed-off-by: Greg Kroah-Hartman Change-Id: I81a9d1f186fa1b24f8d82241998715386526407e --- drivers/net/wireless/virt_wifi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index c9baf4eab960..ba4e7cfbdef1 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -478,7 +478,6 @@ static void virt_wifi_net_device_destructor(struct net_device *dev) */ kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; - free_netdev(dev); } /* No lock interaction. */ @@ -486,7 +485,7 @@ static void virt_wifi_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &virt_wifi_ops; - dev->priv_destructor = virt_wifi_net_device_destructor; + dev->needs_free_netdev = true; } /* Called in a RCU read critical section from netif_receive_skb */ @@ -572,6 +571,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, goto unregister_netdev; } + dev->priv_destructor = virt_wifi_net_device_destructor; priv->being_deleted = false; priv->is_connected = false; priv->is_up = false; From 58b98784f1424b07066be777bd8f740492a06822 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 20 Nov 2019 01:33:20 +0100 Subject: [PATCH 302/636] UPSTREAM: fork: fix pidfd_poll()'s return type pidfd_poll() is defined as returning 'unsigned int' but the .poll method is declared as returning '__poll_t', a bitwise type. Fix this by using the proper return type and using the EPOLL constants instead of the POLL ones, as required for __poll_t. Fixes: b53b0b9d9a61 ("pidfd: add polling support") Cc: Joel Fernandes (Google) Cc: stable@vger.kernel.org # 5.3 Signed-off-by: Luc Van Oostenryck Reviewed-by: Christian Brauner Link: https://lore.kernel.org/r/20191120003320.31138-1-luc.vanoostenryck@gmail.com Signed-off-by: Christian Brauner (cherry picked from commit 9e77716a75bc6cf54965e5ec069ba7c02b32251c) Signed-off-by: Greg Kroah-Hartman Change-Id: I4cb4824929500d28f8f3165289a59eb4122e04ab --- kernel/fork.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 0bc34542101c..03bee168c64d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1685,11 +1685,11 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) /* * Poll support for process exit notification. */ -static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) +static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct task_struct *task; struct pid *pid = file->private_data; - int poll_flags = 0; + __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); @@ -1701,7 +1701,7 @@ static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts) * group, then poll(2) should block, similar to the wait(2) family. */ if (!task || (task->exit_state && thread_group_empty(task))) - poll_flags = POLLIN | POLLRDNORM; + poll_flags = EPOLLIN | EPOLLRDNORM; rcu_read_unlock(); return poll_flags; From ea3411af06c9978df68f061002608224be718bcb Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Tue, 12 Nov 2019 15:45:20 +0200 Subject: [PATCH 303/636] UPSTREAM: scripts/tools-support-relr.sh: un-quote variables When the CC variable contains quotes, e.g. when using ccache (make CC="ccache "), this script always fails, so CONFIG_RELR is never enabled, even when the toolchain supports this feature. Removing the /dev/null redirect and invoking the script manually shows the issue: $ CC='/usr/bin/ccache clang' ./scripts/tools-support-relr.sh ./scripts/tools-support-relr.sh: 7: ./scripts/tools-support-relr.sh: /usr/bin/ccache clang: not found Fix this by un-quoting the variables. Before: $ make ARCH=arm64 CC='/usr/bin/ccache clang' LD=ld.lld \ NM=llvm-nm OBJCOPY=llvm-objcopy defconfig $ grep RELR .config CONFIG_ARCH_HAS_RELR=y With this change: $ make ARCH=arm64 CC='/usr/bin/ccache clang' LD=ld.lld \ NM=llvm-nm OBJCOPY=llvm-objcopy defconfig $ grep RELR .config CONFIG_TOOLS_SUPPORT_RELR=y CONFIG_ARCH_HAS_RELR=y CONFIG_RELR=y Fixes: 5cf896fb6be3 ("arm64: Add support for relocating the kernel with RELR relocations") Reported-by: Dmitry Golovin Reviewed-by: Nathan Chancellor Reviewed-by: Masahiro Yamada Link: https://github.com/ClangBuiltLinux/linux/issues/769 Cc: Peter Collingbourne Signed-off-by: Ilie Halip Signed-off-by: Will Deacon (cherry picked from commit 65e1f38d9a2f07d4b81f369864c105880e47bd5a) Signed-off-by: Greg Kroah-Hartman Change-Id: I7a8856cd27479cf9c2e1d6a5f812bf5878cc80f3 --- scripts/tools-support-relr.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh index 97a2c844a95e..45e8aa360b45 100755 --- a/scripts/tools-support-relr.sh +++ b/scripts/tools-support-relr.sh @@ -4,13 +4,13 @@ tmp_file=$(mktemp) trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT -cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1 +cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1 void *p = &p; END -"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file +$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file # Despite printing an error message, GNU nm still exits with exit code 0 if it # sees a relr section. So we need to check that nothing is printed to stderr. -test -z "$("$NM" $tmp_file 2>&1 >/dev/null)" +test -z "$($NM $tmp_file 2>&1 >/dev/null)" -"$OBJCOPY" -O binary $tmp_file $tmp_file.bin +$OBJCOPY -O binary $tmp_file $tmp_file.bin From 2ffafc7ac59c6307b2fcc1b27b0062f852c04bc4 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 23 Oct 2019 16:37:45 +0100 Subject: [PATCH 304/636] UPSTREAM: sched/topology: Allow sched_asym_cpucapacity to be disabled While the static key is correctly initialized as being disabled, it will remain forever enabled once turned on. This means that if we start with an asymmetric system and hotplug out enough CPUs to end up with an SMP system, the static key will remain set - which is obviously wrong. We should detect this and turn off things like misfit migration and capacity aware wakeups. As Quentin pointed out, having separate root domains makes this slightly trickier. We could have exclusive cpusets that create an SMP island - IOW, the domains within this root domain will not see any asymmetry. This means we can't just disable the key on domain destruction, we need to count how many asymmetric root domains we have. Consider the following example using Juno r0 which is 2+4 big.LITTLE, where two identical cpusets are created: they both span both big and LITTLE CPUs: asym0 asym1 [ ][ ] L L B L L B $ cgcreate -g cpuset:asym0 $ cgset -r cpuset.cpus=0,1,3 asym0 $ cgset -r cpuset.mems=0 asym0 $ cgset -r cpuset.cpu_exclusive=1 asym0 $ cgcreate -g cpuset:asym1 $ cgset -r cpuset.cpus=2,4,5 asym1 $ cgset -r cpuset.mems=0 asym1 $ cgset -r cpuset.cpu_exclusive=1 asym1 $ cgset -r cpuset.sched_load_balance=0 . (the CPU numbering may look odd because on the Juno LITTLEs are CPUs 0,3-5 and bigs are CPUs 1-2) If we make one of those SMP (IOW remove asymmetry) by e.g. hotplugging its big core, we would end up with an SMP cpuset and an asymmetric cpuset - the static key must remain set, because we still have one asymmetric root domain. With the above example, this could be done with: $ echo 0 > /sys/devices/system/cpu/cpu2/online Which would result in: asym0 asym1 [ ][ ] L L B L L When both SMP and asymmetric cpusets are present, all CPUs will observe sched_asym_cpucapacity being set (it is system-wide), but not all CPUs observe asymmetry in their sched domain hierarchy: per_cpu(sd_asym_cpucapacity, ) == per_cpu(sd_asym_cpucapacity, ) == NULL Change the simple key enablement to an increment, and decrement the key counter when destroying domains that cover asymmetric CPUs. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Cc: Dietmar.Eggemann@arm.com Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hannes@cmpxchg.org Cc: lizefan@huawei.com Cc: morten.rasmussen@arm.com Cc: qperret@google.com Cc: tj@kernel.org Cc: vincent.guittot@linaro.org Fixes: df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations") Link: https://lkml.kernel.org/r/20191023153745.19515-3-valentin.schneider@arm.com Signed-off-by: Ingo Molnar (cherry picked from commit e284df705cf1eeedb5ec3a66ed82d17a64659150) Signed-off-by: Greg Kroah-Hartman Change-Id: I40c72d879144df86ca58d4fe83e5d41c4fe292a0 --- kernel/sched/topology.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 6d249207baf7..d4a4a8053400 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2007,7 +2007,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att rcu_read_unlock(); if (has_asym) - static_branch_enable_cpuslocked(&sched_asym_cpucapacity); + static_branch_inc_cpuslocked(&sched_asym_cpucapacity); ret = 0; error: @@ -2098,8 +2098,12 @@ int sched_init_domains(const struct cpumask *cpu_map) */ static void detach_destroy_domains(const struct cpumask *cpu_map) { + unsigned int cpu = cpumask_any(cpu_map); int i; + if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu))) + static_branch_dec_cpuslocked(&sched_asym_cpucapacity); + rcu_read_lock(); for_each_cpu(i, cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); From 69b6b5ba33338c465a7ec5776c5b40997ac99768 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 21 Oct 2019 18:47:59 +0000 Subject: [PATCH 305/636] UPSTREAM: virt_wifi: fix refcnt leak in module exit routine virt_wifi_newlink() calls netdev_upper_dev_link() and it internally holds reference count of lower interface. Current code does not release a reference count of the lower interface when the lower interface is being deleted. So, reference count leaks occur. Test commands: ip link add dummy0 type dummy ip link add vw1 link dummy0 type virt_wifi ip link del dummy0 Splat looks like: [ 133.787526][ T788] WARNING: CPU: 1 PID: 788 at net/core/dev.c:8274 rollback_registered_many+0x835/0xc80 [ 133.788355][ T788] Modules linked in: virt_wifi cfg80211 dummy team af_packet sch_fq_codel ip_tables x_tables unix [ 133.789377][ T788] CPU: 1 PID: 788 Comm: ip Not tainted 5.4.0-rc3+ #96 [ 133.790069][ T788] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 133.791167][ T788] RIP: 0010:rollback_registered_many+0x835/0xc80 [ 133.791906][ T788] Code: 00 4d 85 ff 0f 84 b5 fd ff ff ba c0 0c 00 00 48 89 de 4c 89 ff e8 9b 58 04 00 48 89 df e8 30 [ 133.794317][ T788] RSP: 0018:ffff88805ba3f338 EFLAGS: 00010202 [ 133.795080][ T788] RAX: ffff88805e57e801 RBX: ffff88805ba34000 RCX: ffffffffa9294723 [ 133.796045][ T788] RDX: 1ffff1100b746816 RSI: 0000000000000008 RDI: ffffffffabcc4240 [ 133.797006][ T788] RBP: ffff88805ba3f4c0 R08: fffffbfff5798849 R09: fffffbfff5798849 [ 133.797993][ T788] R10: 0000000000000001 R11: fffffbfff5798848 R12: dffffc0000000000 [ 133.802514][ T788] R13: ffff88805ba3f440 R14: ffff88805ba3f400 R15: ffff88805ed622c0 [ 133.803237][ T788] FS: 00007f2e9608c0c0(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000 [ 133.804002][ T788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 133.804664][ T788] CR2: 00007f2e95610603 CR3: 000000005f68c004 CR4: 00000000000606e0 [ 133.805363][ T788] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 133.806073][ T788] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 133.806787][ T788] Call Trace: [ 133.807069][ T788] ? generic_xdp_install+0x310/0x310 [ 133.807612][ T788] ? lock_acquire+0x164/0x3b0 [ 133.808077][ T788] ? is_bpf_text_address+0x5/0xf0 [ 133.808640][ T788] ? deref_stack_reg+0x9c/0xd0 [ 133.809138][ T788] ? __nla_validate_parse+0x98/0x1ab0 [ 133.809944][ T788] unregister_netdevice_many.part.122+0x13/0x1b0 [ 133.810599][ T788] rtnl_delete_link+0xbc/0x100 [ 133.811073][ T788] ? rtnl_af_register+0xc0/0xc0 [ 133.811672][ T788] rtnl_dellink+0x30e/0x8a0 [ 133.812205][ T788] ? is_bpf_text_address+0x5/0xf0 [ ... ] [ 144.110530][ T788] unregister_netdevice: waiting for dummy0 to become free. Usage count = 1 This patch adds notifier routine to delete upper interface before deleting lower interface. Fixes: c7cdba31ed8b ("mac80211-next: rtnetlink wifi simulation device") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller (cherry picked from commit 1962f86b42ed06ea6af9ff09390243b99d9eb83a) Signed-off-by: Greg Kroah-Hartman Change-Id: I6cd61c66c92363b7f59615ceb753b91bc3965287 --- drivers/net/wireless/virt_wifi.c | 54 ++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index ba4e7cfbdef1..075e25d65634 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -576,6 +576,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, priv->is_connected = false; priv->is_up = false; INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete); + __module_get(THIS_MODULE); return 0; unregister_netdev: @@ -606,6 +607,7 @@ static void virt_wifi_dellink(struct net_device *dev, netdev_upper_dev_unlink(priv->lowerdev, dev); unregister_netdevice_queue(dev, head); + module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ } @@ -618,6 +620,42 @@ static struct rtnl_link_ops virt_wifi_link_ops = { .priv_size = sizeof(struct virt_wifi_netdev_priv), }; +static bool netif_is_virt_wifi_dev(const struct net_device *dev) +{ + return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler; +} + +static int virt_wifi_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr); + struct virt_wifi_netdev_priv *priv; + struct net_device *upper_dev; + LIST_HEAD(list_kill); + + if (!netif_is_virt_wifi_dev(lower_dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + priv = rtnl_dereference(lower_dev->rx_handler_data); + if (!priv) + return NOTIFY_DONE; + + upper_dev = priv->upperdev; + + upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); + unregister_netdevice_many(&list_kill); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block virt_wifi_notifier = { + .notifier_call = virt_wifi_event, +}; + /* Acquires and releases the rtnl lock. */ static int __init virt_wifi_init_module(void) { @@ -626,14 +664,25 @@ static int __init virt_wifi_init_module(void) /* Guaranteed to be locallly-administered and not multicast. */ eth_random_addr(fake_router_bssid); + err = register_netdevice_notifier(&virt_wifi_notifier); + if (err) + return err; + + err = -ENOMEM; common_wiphy = virt_wifi_make_wiphy(); if (!common_wiphy) - return -ENOMEM; + goto notifier; err = rtnl_link_register(&virt_wifi_link_ops); if (err) - virt_wifi_destroy_wiphy(common_wiphy); + goto destroy_wiphy; + return 0; + +destroy_wiphy: + virt_wifi_destroy_wiphy(common_wiphy); +notifier: + unregister_netdevice_notifier(&virt_wifi_notifier); return err; } @@ -643,6 +692,7 @@ static void __exit virt_wifi_cleanup_module(void) /* Will delete any devices that depend on the wiphy. */ rtnl_link_unregister(&virt_wifi_link_ops); virt_wifi_destroy_wiphy(common_wiphy); + unregister_netdevice_notifier(&virt_wifi_notifier); } int virt_wifi_register_network_simulation From 650d5657c27efd76975157b9fbbd4b705606df60 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 2 Oct 2019 17:46:36 +0000 Subject: [PATCH 306/636] UPSTREAM: MIPS: VDSO: Fix build for binutils < 2.25 Versions of binutils prior to 2.25 are unable to link our VDSO due to an unsupported R_MIPS_PC32 relocation generated by the ".word _start - ." line of the inline asm in get_vdso_base(). As such, the intent is that when building with binutils older than 2.25 we don't build code for gettimeofday() & friends in the VDSO that rely upon get_vdso_base(). Commit 24640f233b46 ("mips: Add support for generic vDSO") converted us to using generic VDSO infrastructure, and as part of that the gettimeofday() functionality moved to a new vgettimeofday.c file. The check for binutils < 2.25 wasn't updated to handle this new filename, and so it continues trying to remove the old unused filename from the build. The end result is that we try to include the gettimeofday() code in builds that will fail to link. Fix this by updating the binutils < 2.25 case to remove vgettimeofday.c from obj-vdso-y, rather than gettimeofday.c. Signed-off-by: Paul Burton Fixes: 24640f233b46 ("mips: Add support for generic vDSO") Cc: Vincenzo Frascino Cc: linux-mips@vger.kernel.org (cherry picked from commit 8919975b617197ebe39290d379caa02caf6bf3e3) Signed-off-by: Greg Kroah-Hartman Change-Id: Ief56256c0558bc4e6eebb414264467ec9ac9af51 --- arch/mips/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 920ec7caee96..5c00f8d3d3d6 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -60,7 +60,7 @@ CFLAGS_REMOVE_vgettimeofday.o = -pg ifndef CONFIG_CPU_MIPSR6 ifeq ($(call ld-ifversion, -lt, 225000000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) - obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) + obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO endif endif From 2d44a033d4559eddb86489b20fa3ab0c8145b607 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 20 Sep 2019 21:54:18 +0200 Subject: [PATCH 307/636] UPSTREAM: cfg80211: validate SSID/MBSSID element ordering assumption The code copying the data assumes that the SSID element is before the MBSSID element, but since the data is untrusted from the AP, this cannot be guaranteed. Validate that this is indeed the case and ignore the MBSSID otherwise, to avoid having to deal with both cases for the copy of data that should be between them. Cc: stable@vger.kernel.org Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Link: https://lore.kernel.org/r/1569009255-I1673911f5eae02964e21bdc11b2bf58e5e207e59@changeid Signed-off-by: Johannes Berg (cherry picked from commit 242b0931c1918c56cd1dc5563fd250a3c39b996d) Signed-off-by: Greg Kroah-Hartman Change-Id: I14b367dd9a100ec688dd1d718d1f6429f83e2a6c --- net/wireless/scan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index aa60d99242d2..c81d3250047e 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1555,7 +1555,12 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, return; new_ie_len -= trans_ssid[1]; mbssid = cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen); - if (!mbssid) + /* + * It's not valid to have the MBSSID element before SSID + * ignore if that happens - the code below assumes it is + * after (while copying things inbetween). + */ + if (!mbssid || mbssid < trans_ssid) return; new_ie_len -= mbssid[1]; rcu_read_lock(); From a06fa34d73bacf36e991b02623d9746f835d2fd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2019 12:29:34 -0700 Subject: [PATCH 308/636] UPSTREAM: kcm: disable preemption in kcm_parse_func_strparser() After commit a2c11b034142 ("kcm: use BPF_PROG_RUN") syzbot easily triggers the warning in cant_sleep(). As explained in commit 6cab5e90ab2b ("bpf: run bpf programs with preemption disabled") we need to disable preemption before running bpf programs. BUG: assuming atomic context at net/kcm/kcmsock.c:382 in_atomic(): 0, irqs_disabled(): 0, pid: 7, name: kworker/u4:0 3 locks held by kworker/u4:0/7: #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: __write_once_size include/linux/compiler.h:226 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: atomic64_set include/asm-generic/atomic-instrumented.h:855 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: atomic_long_set include/asm-generic/atomic-long.h:40 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: set_work_data kernel/workqueue.c:620 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: set_work_pool_and_clear_pending kernel/workqueue.c:647 [inline] #0: ffff888216726128 ((wq_completion)kstrp){+.+.}, at: process_one_work+0x88b/0x1740 kernel/workqueue.c:2240 #1: ffff8880a989fdc0 ((work_completion)(&strp->work)){+.+.}, at: process_one_work+0x8c1/0x1740 kernel/workqueue.c:2244 #2: ffff888098998d10 (sk_lock-AF_INET){+.+.}, at: lock_sock include/net/sock.h:1522 [inline] #2: ffff888098998d10 (sk_lock-AF_INET){+.+.}, at: strp_sock_lock+0x2e/0x40 net/strparser/strparser.c:440 CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: kstrp strp_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 __cant_sleep kernel/sched/core.c:6826 [inline] __cant_sleep.cold+0xa4/0xbc kernel/sched/core.c:6803 kcm_parse_func_strparser+0x54/0x200 net/kcm/kcmsock.c:382 __strp_recv+0x5dc/0x1b20 net/strparser/strparser.c:221 strp_recv+0xcf/0x10b net/strparser/strparser.c:343 tcp_read_sock+0x285/0xa00 net/ipv4/tcp.c:1639 strp_read_sock+0x14d/0x200 net/strparser/strparser.c:366 do_strp_work net/strparser/strparser.c:414 [inline] strp_work+0xe3/0x130 net/strparser/strparser.c:423 process_one_work+0x9af/0x1740 kernel/workqueue.c:2269 Fixes: a2c11b034142 ("kcm: use BPF_PROG_RUN") Fixes: 6cab5e90ab2b ("bpf: run bpf programs with preemption disabled") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller (cherry picked from commit 0355d6c1d591b8f9e281783ec0cf95fbed893194) Signed-off-by: Greg Kroah-Hartman Change-Id: Ie65823d4101c1178182a1995f698cf3bfd2b2b70 --- net/kcm/kcmsock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 8cc0e5a3b467..a700cfa533b7 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -381,8 +381,12 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); struct bpf_prog *prog = psock->bpf_prog; + int res; - return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; } static int kcm_read_sock_done(struct strparser *strp, int err) From f8fc36851cb6751e59331eef1433167d00ce4040 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Aug 2019 13:00:15 +0200 Subject: [PATCH 309/636] UPSTREAM: timekeeping/vsyscall: Prevent math overflow in BOOTTIME update The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the nanoseconds based boot time offset left by the clocksource shift. That overflows once the boot time offset becomes large enough. As a consequence CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to misbehave. Fix it by storing a timespec64 representation of the offset when boot time is adjusted and add that to the MONOTONIC base time value in the vdso data page. Using the timespec64 representation avoids a 64bit division in the update code. Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") Reported-by: Chris Clayton Signed-off-by: Thomas Gleixner Tested-by: Chris Clayton Tested-by: Vincenzo Frascino Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908221257580.1983@nanos.tec.linutronix.de (cherry picked from commit b99328a60a482108f5195b4d611f90992ca016ba) Signed-off-by: Greg Kroah-Hartman Change-Id: I61f5da6821a95b4cb855f737ef0dc94acbb1ccfe --- include/linux/timekeeper_internal.h | 5 +++++ kernel/time/timekeeping.c | 5 +++++ kernel/time/vsyscall.c | 22 +++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7acb953298a7..84ff2844df2a 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -57,6 +57,7 @@ struct tk_read_base { * @cs_was_changed_seq: The sequence number of clocksource change events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. @@ -84,6 +85,9 @@ struct tk_read_base { * * wall_to_monotonic is no longer the boot time, getboottime must be * used instead. + * + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to + * accelerate the VDSO update for CLOCK_BOOTTIME. */ struct timekeeper { struct tk_read_base tkr_mono; @@ -99,6 +103,7 @@ struct timekeeper { u8 cs_was_changed_seq; ktime_t next_leap_ktime; u64 raw_sec; + struct timespec64 monotonic_to_boot; /* The following members are for timekeeping internal use */ u64 cycle_interval; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c66fd11d94bc..aa798afcb089 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -150,6 +150,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm) static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) { tk->offs_boot = ktime_add(tk->offs_boot, delta); + /* + * Timespec representation for VDSO update to avoid 64bit division + * on every update. + */ + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot); } /* diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 565ab06917a7..15882e4b5c83 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata, struct timekeeper *tk) { struct vdso_timestamp *vdso_ts; - u64 nsec; + u64 nsec, sec; vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; @@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata, } vdso_ts->nsec = nsec; - /* CLOCK_MONOTONIC_RAW */ - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; - vdso_ts->sec = tk->raw_sec; - vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* Copy MONOTONIC time for BOOTTIME */ + sec = vdso_ts->sec; + /* Add the boot offset */ + sec += tk->monotonic_to_boot.tv_sec; + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift; /* CLOCK_BOOTTIME */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec + - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift); + vdso_ts->sec = sec; + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); vdso_ts->sec++; } vdso_ts->nsec = nsec; + /* CLOCK_MONOTONIC_RAW */ + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts->sec = tk->raw_sec; + vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + /* CLOCK_TAI */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; From 7783c5c8ec0d6781dc180be5fd075f06f90e9cb5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:49:02 -0700 Subject: [PATCH 310/636] UPSTREAM: page flags: prioritize kasan bits over last-cpuid ARM64 randdconfig builds regularly run into a build error, especially when NUMA_BALANCING and SPARSEMEM are enabled but not SPARSEMEM_VMEMMAP: #error "KASAN: not enough bits in page flags for tag" The last-cpuid bits are already contitional on the available space, so the result of the calculation is a bit random on whether they were already left out or not. Adding the kasan tag bits before last-cpuid makes it much more likely to end up with a successful build here, and should be reliable for randconfig at least, as long as that does not randomize NR_CPUS or NODES_SHIFT but uses the defaults. In order for the modified check to not trigger in the x86 vdso32 code where all constants are wrong (building with -m32), enclose all the definitions with an #ifdef. [arnd@arndb.de: build fix] Link: http://lkml.kernel.org/r/CAK8P3a3Mno1SWTcuAOT0Wa9VS15pdU6EfnkxLbDpyS55yO04+g@mail.gmail.com Link: http://lkml.kernel.org/r/20190722115520.3743282-1-arnd@arndb.de Link: https://lore.kernel.org/lkml/20190618095347.3850490-1-arnd@arndb.de/ Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc") Signed-off-by: Arnd Bergmann Signed-off-by: Arnd Bergmann Reviewed-by: Andrey Konovalov Reviewed-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Will Deacon Cc: Christoph Lameter Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit ee38d94a0ad89890b770f6c876263cf9fcbfde84) Signed-off-by: Greg Kroah-Hartman Change-Id: I71d6f45d73cea80456a0e0b256221ba62216c86c --- arch/mips/include/asm/vdso/vdso.h | 1 + include/linux/page-flags-layout.h | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index 048d12fbb925..e45beb525f29 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -13,6 +13,7 @@ #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT) /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */ +#define BUILD_VDSO32_64 #undef CONFIG_64BIT #define CONFIG_32BIT 1 #ifndef __ASSEMBLY__ diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 1dda31825ec4..71283739ffd2 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -32,6 +32,7 @@ #endif /* CONFIG_SPARSEMEM */ +#ifndef BUILD_VDSO32_64 /* * page->flags layout: * @@ -76,20 +77,22 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#ifdef CONFIG_KASAN_SW_TAGS +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#ifdef CONFIG_KASAN_SW_TAGS -#define KASAN_TAG_WIDTH 8 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ > BITS_PER_LONG - NR_PAGEFLAGS -#error "KASAN: not enough bits in page flags for tag" -#endif -#else -#define KASAN_TAG_WIDTH 0 +#error "Not enough bits in page flags" #endif /* @@ -104,4 +107,5 @@ #define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif +#endif #endif /* _LINUX_PAGE_FLAGS_LAYOUT */ From fdf14d23dd7909a4c9f76b18b61a8564b979c8e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Aug 2019 21:48:54 -0700 Subject: [PATCH 311/636] UPSTREAM: kasan: remove clang version check for KASAN_STACK asan-stack mode still uses dangerously large kernel stacks of tens of kilobytes in some drivers, and it does not seem that anyone is working on the clang bug. Turn it off for all clang versions to prevent users from accidentally enabling it once they update to clang-9, and to help automated build testing with clang-9. Link: https://bugs.llvm.org/show_bug.cgi?id=38809 Link: http://lkml.kernel.org/r/20190719200347.2596375-1-arnd@arndb.de Fixes: 6baec880d7a5 ("kasan: turn off asan-stack for clang-8 and earlier") Signed-off-by: Arnd Bergmann Acked-by: Nick Desaulniers Reviewed-by: Mark Brown Reviewed-by: Andrey Ryabinin Cc: Qian Cai Cc: Andrey Konovalov Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit ebb6d35a74ce21ce1673b8f404c1039d5a1e7e2d) Signed-off-by: Greg Kroah-Hartman Change-Id: I402d4d41933caa118effce21dba82c895812200d --- lib/Kconfig.kasan | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 9defbcac7621..2a049c66dc0e 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -105,7 +105,6 @@ endchoice config KASAN_STACK_ENABLE bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST - default !(CLANG_VERSION < 90000) depends on KASAN help The LLVM stack address sanitizer has a know problem that @@ -114,11 +113,11 @@ config KASAN_STACK_ENABLE Disabling asan-stack makes it safe to run kernels build with clang-8 with KASAN enabled, though it loses some of the functionality. - This feature is always disabled when compile-testing with clang-8 - or earlier to avoid cluttering the output in stack overflow - warnings, but clang-8 users can still enable it for builds without - CONFIG_COMPILE_TEST. On gcc and later clang versions it is - assumed to always be safe to use and enabled by default. + This feature is always disabled when compile-testing with clang + to avoid cluttering the output in stack overflow warnings, + but clang users can still enable it for builds without + CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe + to use and enabled by default. config KASAN_STACK int From 4f973dff4012e050b7769a2ecd85f31b0da910e5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Jul 2019 15:01:53 +0200 Subject: [PATCH 312/636] UPSTREAM: timekeeping/vsyscall: Use __iter_div_u64_rem() On 32-bit x86 when building with clang-9, the 'division' loop gets turned back into an inefficient division that causes a link error: kernel/time/vsyscall.o: In function `update_vsyscall': vsyscall.c:(.text+0xe3): undefined reference to `__udivdi3' Use the existing __iter_div_u64_rem() function which is used to address the same issue in other places. Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lkml.kernel.org/r/20190710130206.1670830-1-arnd@arndb.de (cherry picked from commit 0df1c9868c3a1916198ee09c323ca5932a0b8a11) Signed-off-by: Greg Kroah-Hartman Change-Id: Ia4f87df23b07dd0835ca5f1a6eb49ad2f757aefd --- kernel/time/vsyscall.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 15882e4b5c83..5ee0f7709410 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -108,11 +108,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; nsec = nsec + tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec = nsec - NSEC_PER_SEC; - vdso_ts->sec++; - } - vdso_ts->nsec = nsec; + vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); update_vdso_data(vdata, tk); From afb8552257c4ad744e370f016017adc496a161b1 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 24 Jun 2019 15:00:19 +0100 Subject: [PATCH 313/636] UPSTREAM: arm64: compat: No need for pre-ARMv7 barriers on an ARMv8 system Remove the deprecated (pre-ARMv7) compat barriers as they would not be used on an ARMv8 system. Fixes: a7f71a2c8903 ("arm64: compat: Add vDSO") Signed-off-by: Catalin Marinas Signed-off-by: Thomas Gleixner Cc: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190624140018.GD29120@arrakis.emea.arm.com (cherry picked from commit 6a5b78b32d10cd7901f639870eca304b270769f9) Signed-off-by: Greg Kroah-Hartman Change-Id: Ifbe5bca4fad81d309fffd6e5e73defe254781b90 --- arch/arm64/include/asm/vdso/compat_barrier.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index d44313a5d033..3fd8fd6d8fc2 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -18,14 +18,7 @@ #undef dmb #endif -#if __LINUX_ARM_ARCH__ >= 7 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#elif __LINUX_ARM_ARCH__ == 6 -#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory") -#else -#define dmb(x) __asm__ __volatile__ ("" : : : "memory") -#endif #if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) From 3c5de8a1b548623ab3f9d0389e1423157828de94 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 24 Jun 2019 14:56:24 +0100 Subject: [PATCH 314/636] UPSTREAM: vdso: Remove superfluous #ifdef __KERNEL__ in vdso/datapage.h With the move to UAPI headers, such #ifdefs are no longer necessary. Fixes: 361f8aee9b09 ("vdso: Define standardized vdso_datapage") Signed-off-by: Catalin Marinas Signed-off-by: Thomas Gleixner Cc: Vincenzo Frascino Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: Will Deacon Cc: Arnd Bergmann Cc: Russell King Cc: Ralf Baechle Cc: Paul Burton Cc: Daniel Lezcano Cc: Mark Salyzyn Cc: Peter Collingbourne Cc: Shuah Khan Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Rasmus Villemoes Cc: Huw Davies Cc: Shijith Thotton Cc: Andre Przywara Link: https://lkml.kernel.org/r/20190624135624.GB29120@arrakis.emea.arm.com (cherry picked from commit ed75e8f60bb1d41d751ccad470e15bc2a57adee6) Signed-off-by: Greg Kroah-Hartman Change-Id: I4e51879a0b05da1694c6cb41b0a75d52b7dbbd94 --- include/vdso/datapage.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 925c2acab6a6..96442d540116 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -2,8 +2,6 @@ #ifndef __VDSO_DATAPAGE_H #define __VDSO_DATAPAGE_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ #include @@ -116,6 +114,4 @@ extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden") #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* __VDSO_DATAPAGE_H */ From e0ceb101369276b53931b3efcf154c4b4f17f95a Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 23 Jun 2019 14:27:17 +0300 Subject: [PATCH 315/636] UPSTREAM: fork: don't check parent_tidptr with CLONE_PIDFD Give userspace a cheap and reliable way to tell whether CLONE_PIDFD is supported by the kernel or not. The easiest way is to pass an invalid file descriptor value in parent_tidptr, perform the syscall and verify that parent_tidptr has been changed to a valid file descriptor value. CLONE_PIDFD uses parent_tidptr to return pidfds. CLONE_PARENT_SETTID will use parent_tidptr to return the tid of the parent. The two flags cannot be used together. Old kernels that only support CLONE_PARENT_SETTID will not verify the value pointed to by parent_tidptr. This behavior is unchanged even with the introduction of CLONE_PIDFD. However, if CLONE_PIDFD is specified the kernel will currently check the value pointed to by parent_tidptr before placing the pidfd in the memory pointed to. EINVAL will be returned if the value in parent_tidptr is not 0. If CLONE_PIDFD is supported and fd 0 is closed, then the returned pidfd can and likely will be 0 and parent_tidptr will be unchanged. This means userspace must either check CLONE_PIDFD support beforehand or check that fd 0 is not closed when invoking CLONE_PIDFD. The check for pidfd == 0 was introduced during the v5.2 merge window by commit b3e583825266 ("clone: add CLONE_PIDFD") to ensure that CLONE_PIDFD could be potentially extended by passing in flags through the return argument. However, that extension would look horrible, and with the upcoming introduction of the clone3 syscall in v5.3 there is no need to extend legacy clone syscall this way. (Even if it would need to be extended, CLONE_DETACHED can be reused with CLONE_PIDFD.) So remove the pidfd == 0 check. Userspace that needs to be portable to kernels without CLONE_PIDFD support can then be advised to initialize pidfd to -1 and check the pidfd value returned by CLONE_PIDFD. Fixes: b3e583825266 ("clone: add CLONE_PIDFD") Signed-off-by: Dmitry V. Levin Signed-off-by: Christian Brauner (cherry picked from commit 9014143bab2f3bc0b9e5db3bc8d00e2a43e50fbd) Signed-off-by: Greg Kroah-Hartman Change-Id: Ifc531d94c3b9d3b08a6570d5f7779858dd1d6976 --- kernel/fork.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 03bee168c64d..a2f2ad3ee9f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1829,8 +1829,6 @@ static __latent_entropy struct task_struct *copy_process( } if (clone_flags & CLONE_PIDFD) { - int reserved; - /* * - CLONE_PARENT_SETTID is useless for pidfds and also * parent_tidptr is used to return pidfds. @@ -1841,16 +1839,6 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) return ERR_PTR(-EINVAL); - - /* - * Verify that parent_tidptr is sane so we can potentially - * reuse it later. - */ - if (get_user(reserved, parent_tidptr)) - return ERR_PTR(-EFAULT); - - if (reserved != 0) - return ERR_PTR(-EINVAL); } /* From 472105fcb7f108cc7a16a7189611ec7abcb6cf32 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 14 Jun 2019 10:12:45 -0700 Subject: [PATCH 316/636] UPSTREAM: cgroup: Move cgroup_parse_float() implementation out of CONFIG_SYSFS a5e112e6424a ("cgroup: add cgroup_parse_float()") accidentally added cgroup_parse_float() inside CONFIG_SYSFS block. Move it outside so that it doesn't cause failures on !CONFIG_SYSFS builds. Signed-off-by: Tejun Heo Fixes: a5e112e6424a ("cgroup: add cgroup_parse_float()") (cherry picked from commit 38cf3a687f5827fcfc81cbc433ef5822693a49c1) Signed-off-by: Greg Kroah-Hartman Change-Id: Icbd35d6b73b2f7d05a23ad84880c447a6035fa11 --- kernel/cgroup/cgroup.c | 84 +++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e6bad6b3f604..68154bffea45 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6156,6 +6156,48 @@ struct cgroup *cgroup_get_from_fd(int fd) } EXPORT_SYMBOL_GPL(cgroup_get_from_fd); +static u64 power_of_ten(int power) +{ + u64 v = 1; + while (power--) + v *= 10; + return v; +} + +/** + * cgroup_parse_float - parse a floating number + * @input: input string + * @dec_shift: number of decimal digits to shift + * @v: output + * + * Parse a decimal floating point number in @input and store the result in + * @v with decimal point right shifted @dec_shift times. For example, if + * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. + * Returns 0 on success, -errno otherwise. + * + * There's nothing cgroup specific about this function except that it's + * currently the only user. + */ +int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) +{ + s64 whole, frac = 0; + int fstart = 0, fend = 0, flen; + + if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) + return -EINVAL; + if (frac < 0) + return -EINVAL; + + flen = fend > fstart ? fend - fstart : 0; + if (flen < dec_shift) + frac *= power_of_ten(dec_shift - flen); + else + frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); + + *v = whole * power_of_ten(dec_shift) + frac; + return 0; +} + /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. @@ -6332,46 +6374,4 @@ static int __init cgroup_sysfs_init(void) } subsys_initcall(cgroup_sysfs_init); -static u64 power_of_ten(int power) -{ - u64 v = 1; - while (power--) - v *= 10; - return v; -} - -/** - * cgroup_parse_float - parse a floating number - * @input: input string - * @dec_shift: number of decimal digits to shift - * @v: output - * - * Parse a decimal floating point number in @input and store the result in - * @v with decimal point right shifted @dec_shift times. For example, if - * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. - * Returns 0 on success, -errno otherwise. - * - * There's nothing cgroup specific about this function except that it's - * currently the only user. - */ -int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) -{ - s64 whole, frac = 0; - int fstart = 0, fend = 0, flen; - - if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) - return -EINVAL; - if (frac < 0) - return -EINVAL; - - flen = fend > fstart ? fend - fstart : 0; - if (flen < dec_shift) - frac *= power_of_ten(dec_shift - flen); - else - frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); - - *v = whole * power_of_ten(dec_shift) + frac; - return 0; -} - #endif /* CONFIG_SYSFS */ From 36960edcc32ce26bcf470e6129c4dc26a44e97ca Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 9 Apr 2019 21:52:35 +0200 Subject: [PATCH 317/636] UPSTREAM: parisc: Switch from DISCONTIGMEM to SPARSEMEM The commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") breaks memory management on a parisc c8000 workstation with this memory layout: 0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB 1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB 2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB With the patch 1c30844d2dfe, the kernel will incorrectly reclaim the first zone when it fills up, ignoring the fact that there are two completely free zones. Basiscally, it limits cache size to 1GiB. The parisc kernel is currently using the DISCONTIGMEM implementation, but isn't NUMA. Avoid this issue or strange work-arounds by switching to the more commonly used SPARSEMEM implementation. Reported-by: Mikulas Patocka Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs") Signed-off-by: Helge Deller (cherry picked from commit dbdf0760990583649bfaca75fd98f76afd5f3905) Signed-off-by: Greg Kroah-Hartman Change-Id: Icea3d5728ab2a218c4f6aa14b2c89984bba484dd --- arch/parisc/Kconfig | 12 ++-- arch/parisc/include/asm/mmzone.h | 58 +--------------- arch/parisc/include/asm/page.h | 4 +- arch/parisc/include/asm/sparsemem.h | 14 ++++ arch/parisc/kernel/parisc_ksyms.c | 6 -- arch/parisc/mm/init.c | 102 +++++++++++++--------------- 6 files changed, 68 insertions(+), 128 deletions(-) create mode 100644 arch/parisc/include/asm/sparsemem.h diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 8e6d83f79e72..7644b6518b6a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -35,6 +35,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE + select ARCH_DISCARD_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC select VIRT_TO_BUS select MODULES_USE_ELF_RELA @@ -311,21 +312,16 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on 64BIT -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y depends on 64BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config NODES_SHIFT - int - default "3" - depends on NEED_MULTIPLE_NODES + depends on ARCH_SPARSEMEM_ENABLE source "kernel/Kconfig.hz" diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index fafa3893fd70..8d390406d862 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -2,62 +2,6 @@ #ifndef _PARISC_MMZONE_H #define _PARISC_MMZONE_H -#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ -#ifdef CONFIG_DISCONTIGMEM - -extern int npmem_ranges; - -struct node_map_data { - pg_data_t pg_data; -}; - -extern struct node_map_data node_data[]; - -#define NODE_DATA(nid) (&node_data[nid].pg_data) - -/* We have these possible memory map layouts: - * Astro: 0-3.75, 67.75-68, 4-64 - * zx1: 0-1, 257-260, 4-256 - * Stretch (N-class): 0-2, 4-32, 34-xxx - */ - -/* Since each 1GB can only belong to one region (node), we can create - * an index table for pfn to nid lookup; each entry in pfnnid_map - * represents 1GB, and contains the node that the memory belongs to. */ - -#define PFNNID_SHIFT (30 - PAGE_SHIFT) -#define PFNNID_MAP_MAX 512 /* support 512GB */ -extern signed char pfnnid_map[PFNNID_MAP_MAX]; - -#ifndef CONFIG_64BIT -#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) -#else -/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ -#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) -#endif - -static inline int pfn_to_nid(unsigned long pfn) -{ - unsigned int i; - - if (unlikely(pfn_is_io(pfn))) - return 0; - - i = pfn >> PFNNID_SHIFT; - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); - - return pfnnid_map[i]; -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#endif #endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index af00fe9bf846..936d43999b63 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -145,9 +145,9 @@ extern int npmem_ranges; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_DISCONTIGMEM */ +#endif #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h new file mode 100644 index 000000000000..b5c3a79045b4 --- /dev/null +++ b/arch/parisc/include/asm/sparsemem.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_PARISC_SPARSEMEM_H +#define ASM_PARISC_SPARSEMEM_H + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +#define MAX_PHYSMEM_BITS 39 /* 512 GB */ +#define SECTION_SIZE_BITS 27 /* 128 MB */ + +#endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa2265d439..174213b1716e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -138,12 +138,6 @@ extern void $$dyncall(void); EXPORT_SYMBOL($$dyncall); #endif -#ifdef CONFIG_DISCONTIGMEM -#include -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(pfnnid_map); -#endif - #ifdef CONFIG_FUNCTION_TRACER extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 10a52664e29f..45ba4ac504ab 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -33,6 +33,7 @@ #include #include #include +#include extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -49,11 +50,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); -#ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; -#endif - static struct resource data_resource = { .name = "Kernel data", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -77,8 +73,8 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; -int npmem_ranges __read_mostly; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; +int npmem_ranges __initdata; /* * get_memblock() allocates pages via memblock. @@ -111,7 +107,7 @@ static void * __init get_memblock(unsigned long size) } #ifdef CONFIG_64BIT -#define MAX_MEM (~0UL) +#define MAX_MEM (1UL << MAX_PHYSMEM_BITS) #else /* !CONFIG_64BIT */ #define MAX_MEM (3584U*1024U*1024U) #endif /* !CONFIG_64BIT */ @@ -150,7 +146,7 @@ static void __init mem_limit_func(void) static void __init setup_bootmem(void) { unsigned long mem_max; -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; int npmem_holes; #endif @@ -168,23 +164,20 @@ static void __init setup_bootmem(void) int j; for (j = i; j > 0; j--) { - unsigned long tmp; + physmem_range_t tmp; if (pmem_ranges[j-1].start_pfn < pmem_ranges[j].start_pfn) { break; } - tmp = pmem_ranges[j-1].start_pfn; - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; - pmem_ranges[j].start_pfn = tmp; - tmp = pmem_ranges[j-1].pages; - pmem_ranges[j-1].pages = pmem_ranges[j].pages; - pmem_ranges[j].pages = tmp; + tmp = pmem_ranges[j-1]; + pmem_ranges[j-1] = pmem_ranges[j]; + pmem_ranges[j] = tmp; } } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* * Throw out ranges that are too far apart (controlled by * MAX_GAP). @@ -196,7 +189,7 @@ static void __init setup_bootmem(void) pmem_ranges[i-1].pages) > MAX_GAP) { npmem_ranges = i; printk("Large gap in memory detected (%ld pages). " - "Consider turning on CONFIG_DISCONTIGMEM\n", + "Consider turning on CONFIG_SPARSEMEM\n", pmem_ranges[i].start_pfn - (pmem_ranges[i-1].start_pfn + pmem_ranges[i-1].pages)); @@ -261,9 +254,8 @@ static void __init setup_bootmem(void) printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* Merge the ranges, keeping track of the holes */ - { unsigned long end_pfn; unsigned long hole_pages; @@ -286,18 +278,6 @@ static void __init setup_bootmem(void) } #endif -#ifdef CONFIG_DISCONTIGMEM - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - } - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); - - for (i = 0; i < npmem_ranges; i++) { - node_set_state(i, N_NORMAL_MEMORY); - node_set_online(i); - } -#endif - /* * Initialize and free the full range of memory in each range. */ @@ -338,7 +318,7 @@ static void __init setup_bootmem(void) memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* reserve the holes */ @@ -384,6 +364,9 @@ static void __init setup_bootmem(void) /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ pdc_pdt_init(); + + memblock_allow_resize(); + memblock_dump_all(); } static int __init parisc_text_address(unsigned long vaddr) @@ -711,37 +694,46 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } -void __init paging_init(void) +static void __init parisc_bootmem_free(void) { + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; int i; + for (i = 0; i < npmem_ranges; i++) { + unsigned long start = pmem_ranges[i].start_pfn; + unsigned long size = pmem_ranges[i].pages; + unsigned long end = start + size; + + if (mem_start_pfn > start) + mem_start_pfn = start; + if (mem_end_pfn < end) + mem_end_pfn = end; + mem_size_pfn += size; + } + + zones_size[0] = mem_end_pfn - mem_start_pfn; + holes_size[0] = zones_size[0] - mem_size_pfn; + + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); +} + +void __init paging_init(void) +{ setup_bootmem(); pagetable_init(); gateway_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); - for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; - -#ifdef CONFIG_DISCONTIGMEM - /* Need to initialize the pfnnid_map before we can initialize - the zone */ - { - int j; - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); - j++) { - pfnnid_map[j] = i; - } - } -#endif - - free_area_init_node(i, zones_size, - pmem_ranges[i].start_pfn, NULL); - } + /* + * Mark all memblocks as present for sparsemem using + * memory_present() and then initialize sparsemem. + */ + memblocks_present(); + sparse_init(); + parisc_bootmem_free(); } #ifdef CONFIG_PA20 From bd9a94a360d0d8bc5e9c6353e647069e85075620 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 18 Apr 2019 17:49:55 -0700 Subject: [PATCH 318/636] UPSTREAM: slab: store tagged freelist for off-slab slabmgmt Commit 51dedad06b5f ("kasan, slab: make freelist stored without tags") calls kasan_reset_tag() for off-slab slab management object leading to freelist being stored non-tagged. However, cache_grow_begin() calls alloc_slabmgmt() which calls kmem_cache_alloc_node() assigns a tag for the address and stores it in the shadow address. As the result, it causes endless errors below during boot due to drain_freelist() -> slab_destroy() -> kasan_slab_free() which compares already untagged freelist against the stored tag in the shadow address. Since off-slab slab management object freelist is such a special case, just store it tagged. Non-off-slab management object freelist is still stored untagged which has not been assigned a tag and should not cause any other troubles with this inconsistency. BUG: KASAN: double-free or invalid-free in slab_destroy+0x84/0x88 Pointer tag: [ff], memory tag: [99] CPU: 0 PID: 1376 Comm: kworker/0:4 Tainted: G W 5.1.0-rc3+ #8 Hardware name: HPE Apollo 70 /C01_APACHE_MB , BIOS L50_5.13_1.0.6 07/10/2018 Workqueue: cgroup_destroy css_killed_work_fn Call trace: print_address_description+0x74/0x2a4 kasan_report_invalid_free+0x80/0xc0 __kasan_slab_free+0x204/0x208 kasan_slab_free+0xc/0x18 kmem_cache_free+0xe4/0x254 slab_destroy+0x84/0x88 drain_freelist+0xd0/0x104 __kmem_cache_shrink+0x1ac/0x224 __kmemcg_cache_deactivate+0x1c/0x28 memcg_deactivate_kmem_caches+0xa0/0xe8 memcg_offline_kmem+0x8c/0x3d4 mem_cgroup_css_offline+0x24c/0x290 css_killed_work_fn+0x154/0x618 process_one_work+0x9cc/0x183c worker_thread+0x9b0/0xe38 kthread+0x374/0x390 ret_from_fork+0x10/0x18 Allocated by task 1625: __kasan_kmalloc+0x168/0x240 kasan_slab_alloc+0x18/0x20 kmem_cache_alloc_node+0x1f8/0x3a0 cache_grow_begin+0x4fc/0xa24 cache_alloc_refill+0x2f8/0x3e8 kmem_cache_alloc+0x1bc/0x3bc sock_alloc_inode+0x58/0x334 alloc_inode+0xb8/0x164 new_inode_pseudo+0x20/0xec sock_alloc+0x74/0x284 __sock_create+0xb0/0x58c sock_create+0x98/0xb8 __sys_socket+0x60/0x138 __arm64_sys_socket+0xa4/0x110 el0_svc_handler+0x2c0/0x47c el0_svc+0x8/0xc Freed by task 1625: __kasan_slab_free+0x114/0x208 kasan_slab_free+0xc/0x18 kfree+0x1a8/0x1e0 single_release+0x7c/0x9c close_pdeo+0x13c/0x43c proc_reg_release+0xec/0x108 __fput+0x2f8/0x784 ____fput+0x1c/0x28 task_work_run+0xc0/0x1b0 do_notify_resume+0xb44/0x1278 work_pending+0x8/0x10 The buggy address belongs to the object at ffff809681b89e00 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffff809681b89e00, ffff809681b89e80) The buggy address belongs to the page: page:ffff7fe025a06e00 count:1 mapcount:0 mapping:01ff80082000fb00 index:0xffff809681b8fe04 flags: 0x17ffffffc000200(slab) raw: 017ffffffc000200 ffff7fe025a06d08 ffff7fe022ef7b88 01ff80082000fb00 raw: ffff809681b8fe04 ffff809681b80000 00000001000000e0 0000000000000000 page dumped because: kasan: bad access detected page allocated via order 0, migratetype Unmovable, gfp_mask 0x2420c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_COMP|__GFP_THISNODE) prep_new_page+0x4e0/0x5e0 get_page_from_freelist+0x4ce8/0x50d4 __alloc_pages_nodemask+0x738/0x38b8 cache_grow_begin+0xd8/0xa24 ____cache_alloc_node+0x14c/0x268 __kmalloc+0x1c8/0x3fc ftrace_free_mem+0x408/0x1284 ftrace_free_init_mem+0x20/0x28 kernel_init+0x24/0x548 ret_from_fork+0x10/0x18 Memory state around the buggy address: ffff809681b89c00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ffff809681b89d00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe >ffff809681b89e00: 99 99 99 99 99 99 99 99 fe fe fe fe fe fe fe fe ^ ffff809681b89f00: 43 43 43 43 43 fe fe fe fe fe fe fe fe fe fe fe ffff809681b8a000: 6d fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe Link: http://lkml.kernel.org/r/20190403022858.97584-1-cai@lca.pw Fixes: 51dedad06b5f ("kasan, slab: make freelist stored without tags") Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 1a62b18d51e5c5ecc0345c85bb9fef870ab721ed) Signed-off-by: Greg Kroah-Hartman Change-Id: I99046a0e1cb893062167a376795fee3a59f56b36 --- mm/slab.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index fa53bb09495d..4e2be129e568 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2379,7 +2379,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, /* Slab management obj is off-slab. */ freelist = kmem_cache_alloc_node(cachep->freelist_cache, local_flags, nodeid); - freelist = kasan_reset_tag(freelist); if (!freelist) return NULL; } else { From b5f2b9e2042ea44faa88830ec93eb0fbbf3d1d27 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Mar 2019 22:12:45 -0700 Subject: [PATCH 319/636] UPSTREAM: crypto: chacha-generic - fix use as arm64 no-NEON fallback The arm64 implementations of ChaCha and XChaCha are failing the extra crypto self-tests following my patches to test the !may_use_simd() code paths, which previously were untested. The problem is as follows: When !may_use_simd(), the arm64 NEON implementations fall back to the generic implementation, which uses the skcipher_walk API to iterate through the src/dst scatterlists. Due to how the skcipher_walk API works, walk.stride is set from the skcipher_alg actually being used, which in this case is the arm64 NEON algorithm. Thus walk.stride is 5*CHACHA_BLOCK_SIZE, not CHACHA_BLOCK_SIZE. This unnecessarily large stride shouldn't cause an actual problem. However, the generic implementation computes round_down(nbytes, walk.stride). round_down() assumes the round amount is a power of 2, which 5*CHACHA_BLOCK_SIZE is not, so it gives the wrong result. This causes the following case in skcipher_walk_done() to be hit, causing a WARN() and failing the encryption operation: if (WARN_ON(err)) { /* unexpected case; didn't process all bytes */ err = -EINVAL; goto finish; } Fix it by rounding down to CHACHA_BLOCK_SIZE instead of walk.stride. (Or we could replace round_down() with rounddown(), but that would add a slow division operation every time, which I think we should avoid.) Fixes: 2fe55987b262 ("crypto: arm64/chacha - use combined SIMD/ALU routine for more speed") Cc: # v5.0+ Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel Signed-off-by: Herbert Xu (cherry picked from commit 7aceaaef04eaaf6019ca159bc354d800559bba1d) Signed-off-by: Greg Kroah-Hartman Change-Id: Idb028928e0a9ea82a6329aa9148fc2d82d1c037d --- crypto/chacha_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c index daf03beba9de..b42198de8b1a 100644 --- a/crypto/chacha_generic.c +++ b/crypto/chacha_generic.c @@ -31,7 +31,7 @@ static int chacha_stream_xor(struct skcipher_request *req, unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); + nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); chacha_crypt_generic(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes, ctx->nrounds); From 78d5eb1bf04c1c651b227a6925de4ce98c5259ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 21 Nov 2018 19:02:15 +0100 Subject: [PATCH 320/636] UPSTREAM: drm/prime: Fix drm_gem_prime_mmap() stack use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/drm_prime.c: In function 'drm_gem_prime_mmap': >> drivers/gpu/drm/drm_prime.c:688:1: warning: the frame size of 1592 bytes is larger than 1024 bytes [-Wframe-larger-than=] Fix by allocating on the heap. Fixes: 7698799f9554 ("drm/prime: Add drm_gem_prime_mmap()") Reported-by: kbuild test robot Cc: Daniel Vetter Cc: Christian König Signed-off-by: Noralf Trønnes Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181121180215.13881-1-noralf@tronnes.org (cherry picked from commit 10fdb7d2ad4244c668f33706f98d019795fc63c8) Signed-off-by: Greg Kroah-Hartman Change-Id: If62398a10b65d2fdeb36a2e8eb3a33e942ce8e60 --- drivers/gpu/drm/drm_prime.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 2f09f3473375..0183b85bc1b2 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -715,24 +715,33 @@ EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); */ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { - /* Used by drm_gem_mmap() to lookup the GEM object */ - struct drm_file priv = { - .minor = obj->dev->primary, - }; - struct file fil = { - .private_data = &priv, - }; + struct drm_file *priv; + struct file *fil; int ret; - ret = drm_vma_node_allow(&obj->vma_node, &priv); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + fil = kzalloc(sizeof(*fil), GFP_KERNEL); + if (!priv || !fil) { + ret = -ENOMEM; + goto out; + } + + /* Used by drm_gem_mmap() to lookup the GEM object */ + priv->minor = obj->dev->primary; + fil->private_data = priv; + + ret = drm_vma_node_allow(&obj->vma_node, priv); if (ret) - return ret; + goto out; vma->vm_pgoff += drm_vma_node_start(&obj->vma_node); - ret = obj->dev->driver->fops->mmap(&fil, vma); + ret = obj->dev->driver->fops->mmap(fil, vma); - drm_vma_node_revoke(&obj->vma_node, &priv); + drm_vma_node_revoke(&obj->vma_node, priv); +out: + kfree(priv); + kfree(fil); return ret; } From 318d38951e9526a647376c1b3acaad8197317b24 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 13 Nov 2018 21:44:33 -0500 Subject: [PATCH 321/636] UPSTREAM: selinux: fix non-MLS handling in mls_context_to_sid() Commit 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") inadvertently changed how we handle labels that did not contain MLS information. This patch restores the proper behavior in mls_context_to_sid() and adds a comment explaining the proper behavior to help ensure this doesn't happen again. Fixes: 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") Reported-by: Stephen Smalley Signed-off-by: Paul Moore (cherry picked from commit 877181a8d9dc663f7a73f77f50af714d7888ec3b) Signed-off-by: Greg Kroah-Hartman Change-Id: I503260e72b6b05b1d4f2be7bc8b04e4538ca5c5f --- security/selinux/ss/mls.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 18ba0c2328fb..5e05f5b902d7 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -245,9 +245,13 @@ int mls_context_to_sid(struct policydb *pol, char *rangep[2]; if (!pol->mls_enabled) { - if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') - return 0; - return -EINVAL; + /* + * With no MLS, only return -EINVAL if there is a MLS field + * and it did not come from an xattr. + */ + if (oldc && def_sid == SECSID_NULL) + return -EINVAL; + return 0; } /* From 1f9f196c54b8bd71dbef4c39d4c5fd6cf150e602 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 24 Oct 2018 14:24:30 -0400 Subject: [PATCH 322/636] UPSTREAM: drm: Fix doc warning in drm_connector_attach_edid_property() Fixes the following warnings: ../drivers/gpu/drm/drm_connector.c:305: warning: Excess function parameter 'dev' description in 'drm_connector_attach_edid_property' ../drivers/gpu/drm/drm_connector.c:306: warning: Excess function parameter 'dev' description in 'drm_connector_attach_edid_property' ../drivers/gpu/drm/drm_connector.c:305: warning: Excess function parameter 'dev' description in 'drm_connector_attach_edid_property' ../drivers/gpu/drm/drm_connector.c:305: warning: Excess function parameter 'dev' description in 'drm_connector_attach_edid_property' ../drivers/gpu/drm/drm_connector.c:305: warning: Excess function parameter 'dev' description in 'drm_connector_attach_edid_property' Fixes: 6b7e2d5c3032 ("drm: add drm_connector_attach_edid_property()") Cc: Gerd Hoffmann Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Reviewed-by: Alex Deucher Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20181024182442.206411-1-sean@poorly.run (cherry picked from commit 9d32bdbb5b5f8bcd5de241ec73208542b872136e) Signed-off-by: Greg Kroah-Hartman Change-Id: Ie0ced7695277c361be7ba48aa92d3339f2360283 --- drivers/gpu/drm/drm_connector.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 42111d7bb18e..47794819e41f 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -290,7 +290,6 @@ EXPORT_SYMBOL(drm_connector_init); /** * drm_connector_attach_edid_property - attach edid property. - * @dev: DRM device * @connector: the connector * * Some connector types like DRM_MODE_CONNECTOR_VIRTUAL do not get a From db9a0291da1dce1e13ff94c4b2561aa07ce16795 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 20 Apr 2020 14:01:02 +0200 Subject: [PATCH 323/636] UPSTREAM: of: property: Do not link to disabled devices When creating a consumer/supplier relationship between two devices, make sure the supplier node is actually active. Otherwise this will create a link relationship that will never be fulfilled. This, in the worst case scenario, will hang the system during boot. Note that, in practice, the fact that a device-tree represented consumer/supplier relationship isn't fulfilled will not prevent devices from successfully probing. Fixes: a3e1d1a7f5fc ("of: property: Add functional dependency link from DT bindings") Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Saravana Kannan Signed-off-by: Rob Herring (cherry picked from commit 7456427af9def0fec5508dd8b861556038ee96a8) Signed-off-by: Greg Kroah-Hartman Change-Id: If9e3e8702dfd7ce955c3167e38cdec4276683bbe --- drivers/of/property.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 6bc0f61596b4..0a5211387347 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1031,8 +1031,20 @@ static int of_link_to_phandle(struct device *dev, struct device_node *sup_np, * Find the device node that contains the supplier phandle. It may be * @sup_np or it may be an ancestor of @sup_np. */ - while (sup_np && !of_find_property(sup_np, "compatible", NULL)) + while (sup_np) { + + /* Don't allow linking to a disabled supplier */ + if (!of_device_is_available(sup_np)) { + of_node_put(sup_np); + sup_np = NULL; + } + + if (of_find_property(sup_np, "compatible", NULL)) + break; + sup_np = of_get_next_parent(sup_np); + } + if (!sup_np) { dev_dbg(dev, "Not linking to %pOFP - No device\n", tmp_np); return -ENODEV; From f821ce6b3c153ae7a981cccb2180f87c5c5af07b Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 20 Apr 2020 14:01:01 +0200 Subject: [PATCH 324/636] UPSTREAM: of: property: Fix create device links for all child-supplier dependencies Upon adding a new device from a DT node, we scan its properties and its children's properties in order to create a consumer/supplier relationship between the device and the property provider. That said, it's possible for some of the node's children to be disabled, which will create links that'll never be fulfilled. To get around this, use the for_each_available_child_of_node() function instead of for_each_available_node() when iterating over the node's children. Fixes: d4387cd11741 ("of: property: Create device links for all child-supplier depencencies") Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Saravana Kannan Signed-off-by: Rob Herring (cherry picked from commit ed3655729182a59b9bef1b564c6fc2dcbbbe954e) Signed-off-by: Greg Kroah-Hartman Change-Id: I7160622405e61a1f852c65b3cbd5866e303c46e7 --- drivers/of/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 0a5211387347..28be835613ce 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1339,7 +1339,7 @@ static int of_link_to_suppliers(struct device *dev, if (of_link_property(dev, con_np, p->name)) ret = -ENODEV; - for_each_child_of_node(con_np, child) + for_each_available_child_of_node(con_np, child) if (of_link_to_suppliers(dev, child) && !ret) ret = -EAGAIN; From 730887909d429cee30d2c147c1364079f1283225 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 29 Apr 2020 16:10:50 +0100 Subject: [PATCH 325/636] UPSTREAM: arm64: vdso: Add -fasynchronous-unwind-tables to cflags On arm64 linux gcc uses -fasynchronous-unwind-tables -funwind-tables by default since gcc-8, so now the de facto platform ABI is to allow unwinding from async signal handlers. However on bare metal targets (aarch64-none-elf), and on old gcc, async and sync unwind tables are not enabled by default to avoid runtime memory costs. This means if linux is built with a baremetal toolchain the vdso.so may not have unwind tables which breaks the gcc platform ABI guarantee in userspace. Add -fasynchronous-unwind-tables explicitly to the vgettimeofday.o cflags to address the ABI change. Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation") Cc: Will Deacon Reported-by: Szabolcs Nagy Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas (cherry picked from commit 1578e5d03112e3e9d37e1c4d95b6dfb734c73955) Signed-off-by: Greg Kroah-Hartman Change-Id: I5dbd078b47b250e1bcc09538ec52f1a2ceaa1714 --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 7155a6ff320d..5fb656a0b7d7 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -33,7 +33,7 @@ UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) From 4a1753307d32ac3ef2c202f215a5cec80373fed1 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 11 Nov 2020 21:28:52 -0800 Subject: [PATCH 326/636] ANDROID: Add dependencies of dm-user.ko Leaf changes summary: 11 artifacts changed (377 filtered out) Changed leaf types summary: 1 (280 filtered out) leaf types changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed (92 filtered out), 10 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed (5 filtered out), 0 Added variable 10 Added functions: [A] 'function unsigned long int _copy_from_iter(void*, unsigned long int, iov_iter*)' [A] 'function void bio_advance(bio*, unsigned int)' [A] 'function void bio_endio(bio*)' [A] 'function void bio_put(bio*)' [A] 'function unsigned long int copy_page_from_iter(page*, unsigned long int, unsigned long int, iov_iter*)' [A] 'function unsigned long int copy_page_to_iter(page*, unsigned long int, unsigned long int, iov_iter*)' [A] 'function int dm_register_target(target_type*)' [A] 'function void dm_unregister_target(target_type*)' [A] 'function void mempool_exit(mempool_s*)' [A] 'function int mempool_init(mempool_s*, int, void* (unsigned int, void*)*, void (void*, void*)*, void*)' 'struct cfg80211_bitrate_mask at cfg80211.h:893:1' changed: type size hasn't changed there are data member changes: '__anonymous_struct__1 cfg80211_bitrate_mask::control[3]' has *some* difference - please report as a bug one impacted interface Bug: 171749628 Signed-off-by: Palmer Dabbelt Change-Id: I5ab039467ea5c57ac1ded16bd76f27c0fbcbc05b --- android/abi_gki_aarch64 | 36 +- android/abi_gki_aarch64.xml | 3084 +++++++++++++++++++++-------------- 2 files changed, 1881 insertions(+), 1239 deletions(-) diff --git a/android/abi_gki_aarch64 b/android/abi_gki_aarch64 index 7bb7e004c3de..31b7d495cb9b 100644 --- a/android/abi_gki_aarch64 +++ b/android/abi_gki_aarch64 @@ -10,6 +10,9 @@ dummy_dma_ops find_next_bit find_next_zero_bit + finish_wait + init_wait_entry + __init_waitqueue_head kfree __kmalloc kmalloc_caches @@ -26,16 +29,19 @@ of_property_read_variable_u32_array platform_get_irq platform_get_resource + prepare_to_wait_event printk __put_task_struct ___ratelimit _raw_spin_lock_irqsave _raw_spin_unlock_irqrestore + schedule snprintf __stack_chk_fail __stack_chk_guard strcmp __udelay + __wake_up # required by arm-smmu.ko alloc_io_pgtable_ops @@ -80,6 +86,30 @@ platform_driver_unregister put_device +# required by dm-user.ko + bio_advance + bio_endio + bio_put + __check_object_size + _copy_from_iter + copy_page_from_iter + copy_page_to_iter + _copy_to_iter + dm_register_target + dm_unregister_target + kasprintf + __list_del_entry_valid + __ll_sc_atomic_add + mempool_alloc + mempool_exit + mempool_free + mempool_init + mempool_kfree + mempool_kmalloc + misc_deregister + misc_register + no_llseek + # required by ufshcd-core.ko __alloc_workqueue_key async_schedule @@ -121,13 +151,10 @@ down_write event_triggers_call find_last_bit - finish_wait flush_work free_irq __init_rwsem init_timer_key - init_wait_entry - __init_waitqueue_head jiffies jiffies_to_usecs keyslot_manager_create @@ -150,7 +177,6 @@ __pm_runtime_idle __pm_runtime_resume preempt_schedule_notrace - prepare_to_wait_event print_hex_dump queue_delayed_work_on queue_work_on @@ -162,7 +188,6 @@ regulator_set_load regulator_set_voltage request_threaded_irq - schedule schedule_timeout __scsi_add_device scsi_add_host_with_dma @@ -208,7 +233,6 @@ usleep_range utf16s_to_utf8s wait_for_completion_timeout - __wake_up __warn_printk # required by ufshcd-pltfrm.ko diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index e7b884328a84..e7bd92444d33 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -182,6 +182,7 @@ + @@ -251,9 +252,12 @@ + + + @@ -414,6 +418,8 @@ + + @@ -633,6 +639,8 @@ + + @@ -1397,7 +1405,9 @@ + + @@ -2813,7 +2823,7 @@ - + @@ -3127,14 +3137,6 @@ - - - - - - - - @@ -3312,7 +3314,7 @@ - + @@ -3565,7 +3567,7 @@ - + @@ -5059,7 +5061,7 @@ - + @@ -5211,6 +5213,14 @@ + + + + + + + + @@ -5273,7 +5283,7 @@ - + @@ -5818,7 +5828,7 @@ - + @@ -7100,7 +7110,7 @@ - + @@ -7493,7 +7503,7 @@ - + @@ -7540,7 +7550,7 @@ - + @@ -7589,7 +7599,7 @@ - + @@ -8058,6 +8068,11 @@ + + + + + @@ -8523,7 +8538,7 @@ - + @@ -8679,7 +8694,7 @@ - + @@ -9387,7 +9402,6 @@ - @@ -10374,7 +10388,7 @@ - + @@ -10655,7 +10669,7 @@ - + @@ -11035,7 +11049,7 @@ - + @@ -11154,7 +11168,7 @@ - + @@ -11716,7 +11730,7 @@ - + @@ -12302,7 +12316,7 @@ - + @@ -12422,7 +12436,7 @@ - + @@ -12431,7 +12445,7 @@ - + @@ -14537,6 +14551,17 @@ + + + + + + + + + + + @@ -15620,6 +15645,7 @@ + @@ -16611,7 +16637,7 @@ - + @@ -17872,7 +17898,7 @@ - + @@ -17927,7 +17953,7 @@ - + @@ -18646,6 +18672,10 @@ + + + + @@ -19668,7 +19698,7 @@ - + @@ -19760,7 +19790,7 @@ - + @@ -20078,7 +20108,7 @@ - + @@ -22065,7 +22095,7 @@ - + @@ -22228,7 +22258,7 @@ - + @@ -22879,7 +22909,6 @@ - @@ -22929,7 +22958,6 @@ - @@ -23117,7 +23145,7 @@ - + @@ -23878,7 +23906,7 @@ - + @@ -23921,7 +23949,7 @@ - + @@ -23990,7 +24018,7 @@ - + @@ -24129,7 +24157,7 @@ - + @@ -25235,7 +25263,7 @@ - + @@ -25392,14 +25420,14 @@ - + - + @@ -25419,7 +25447,7 @@ - + @@ -25495,7 +25523,7 @@ - + @@ -25528,7 +25556,7 @@ - + @@ -25571,7 +25599,7 @@ - + @@ -25685,13 +25713,13 @@ - + - + @@ -25725,7 +25753,7 @@ - + @@ -26023,7 +26051,7 @@ - + @@ -26174,7 +26202,7 @@ - + @@ -26286,7 +26314,7 @@ - + @@ -26566,7 +26594,7 @@ - + @@ -26575,12 +26603,12 @@ - + - + @@ -26618,7 +26646,7 @@ - + @@ -26643,7 +26671,7 @@ - + @@ -26878,11 +26906,11 @@ - + - + @@ -27053,7 +27081,7 @@ - + @@ -27158,23 +27186,23 @@ - + - + - + - + @@ -27458,6 +27486,7 @@ + @@ -28048,6 +28077,7 @@ + @@ -28181,7 +28211,7 @@ - + @@ -28248,7 +28278,7 @@ - + @@ -28432,7 +28462,7 @@ - + @@ -28452,7 +28482,7 @@ - + @@ -28475,25 +28505,6 @@ - - - - - - - - - - - - - - - - - - - @@ -28515,7 +28526,7 @@ - + @@ -28524,7 +28535,7 @@ - + @@ -28619,17 +28630,6 @@ - - - - - - - - - - - @@ -28720,24 +28720,41 @@ - + - + - + - + - + - - + + + + - - + + + + + + + + + + + + + + + + + @@ -28781,27 +28798,18 @@ - + - + - + - - + + - - - - - - - - - - - + + @@ -28924,7 +28932,7 @@ - + @@ -28995,7 +29003,10 @@ + + + @@ -29018,14 +29029,14 @@ - + - + @@ -29036,14 +29047,14 @@ - + - + @@ -29064,44 +29075,44 @@ - - + + - + - + - + - + - + - + - + @@ -29113,27 +29124,27 @@ - + - - + + - + - + - + @@ -29144,11 +29155,11 @@ - + - + @@ -29172,7 +29183,7 @@ - + @@ -29271,7 +29282,7 @@ - + @@ -29381,7 +29392,7 @@ - + @@ -29439,7 +29450,9 @@ + + @@ -30156,7 +30169,7 @@ - + @@ -30186,7 +30199,7 @@ - + @@ -30195,7 +30208,7 @@ - + @@ -30360,7 +30373,7 @@ - + @@ -30697,7 +30710,7 @@ - + @@ -30875,13 +30888,13 @@ - + - + @@ -31242,7 +31255,7 @@ - + @@ -31268,7 +31281,7 @@ - + @@ -31281,7 +31294,7 @@ - + @@ -31944,7 +31957,7 @@ - + @@ -32001,7 +32014,7 @@ - + @@ -32027,7 +32040,7 @@ - + @@ -32036,7 +32049,7 @@ - + @@ -32060,7 +32073,7 @@ - + @@ -32121,7 +32134,7 @@ - + @@ -32320,6 +32333,7 @@ + @@ -32331,6 +32345,8 @@ + + @@ -32357,7 +32373,18 @@ - + + + + + + + + + + + + @@ -32395,9 +32422,10 @@ - + + @@ -33002,7 +33030,7 @@ - + @@ -33575,7 +33603,7 @@ - + @@ -34651,7 +34679,7 @@ - + @@ -34948,7 +34976,7 @@ - + @@ -35118,7 +35146,7 @@ - + @@ -35145,7 +35173,7 @@ - + @@ -35406,7 +35434,7 @@ - + @@ -35931,11 +35959,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -35950,10 +36203,18 @@ + + + + + + + + @@ -35975,6 +36236,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -36048,22 +36413,10 @@ - + - - - - - - - - - - - - @@ -36074,6 +36427,7 @@ + @@ -36375,7 +36729,7 @@ - + @@ -36574,11 +36928,6 @@ - - - - - @@ -37095,26 +37444,26 @@ - - + + - + - + - + - + - + @@ -37152,105 +37501,105 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37337,13 +37686,13 @@ - - + + - + - + @@ -37357,82 +37706,82 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37444,7 +37793,7 @@ - + @@ -37488,75 +37837,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37627,7 +37976,7 @@ - + @@ -37725,18 +38074,18 @@ - + - + - + - + - + @@ -37862,69 +38211,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37947,7 +38296,7 @@ - + @@ -37985,7 +38334,7 @@ - + @@ -38081,7 +38430,7 @@ - + @@ -38091,7 +38440,7 @@ - + @@ -38455,289 +38804,289 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38749,150 +39098,150 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38908,586 +39257,586 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -40396,6 +40745,11 @@ + + + + + @@ -40558,17 +40912,6 @@ - - - - - - - - - - - @@ -40777,8 +41120,8 @@ - - + + @@ -40793,7 +41136,7 @@ - + @@ -40801,7 +41144,7 @@ - + @@ -41008,7 +41351,7 @@ - + @@ -41038,7 +41381,7 @@ - + @@ -41148,107 +41491,107 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -41436,7 +41779,7 @@ - + @@ -41449,6 +41792,17 @@ + + + + + + + + + + + @@ -41732,7 +42086,7 @@ - + @@ -41972,8 +42326,8 @@ - - + + @@ -41982,7 +42336,7 @@ - + @@ -42276,6 +42630,17 @@ + + + + + + + + + + + @@ -42421,6 +42786,7 @@ + @@ -44492,7 +44858,7 @@ - + @@ -44517,7 +44883,7 @@ - + @@ -45036,7 +45402,7 @@ - + @@ -45303,7 +45669,7 @@ - + @@ -47038,7 +47404,7 @@ - + @@ -47047,7 +47413,7 @@ - + @@ -47121,7 +47487,7 @@ - + @@ -47129,7 +47495,7 @@ - + @@ -48492,6 +48858,7 @@ + @@ -48589,6 +48956,7 @@ + @@ -49390,8 +49758,8 @@ - - + + @@ -49569,7 +49937,7 @@ - + @@ -49688,7 +50056,7 @@ - + @@ -50099,7 +50467,7 @@ - + @@ -50169,7 +50537,7 @@ - + @@ -50535,7 +50903,7 @@ - + @@ -50610,7 +50978,7 @@ - + @@ -51253,7 +51621,7 @@ - + @@ -51278,7 +51646,7 @@ - + @@ -51589,6 +51957,17 @@ + + + + + + + + + + + @@ -51614,6 +51993,7 @@ + @@ -52161,8 +52541,19 @@ + + + + + + + + + + + - + @@ -52292,6 +52683,7 @@ + @@ -53021,6 +53413,14 @@ + + + + + + + + @@ -53121,14 +53521,6 @@ - - - - - - - - @@ -53147,8 +53539,8 @@ - - + + @@ -54113,7 +54505,7 @@ - + @@ -54132,7 +54524,7 @@ - + @@ -54159,7 +54551,7 @@ - + @@ -54170,34 +54562,34 @@ - + - + - + - + - + - + - + - + - + - + @@ -54558,7 +54950,7 @@ - + @@ -55092,12 +55484,12 @@ - + - + @@ -55107,7 +55499,7 @@ - + @@ -55193,7 +55585,7 @@ - + @@ -55438,7 +55830,7 @@ - + @@ -55584,7 +55976,7 @@ - + @@ -56533,7 +56925,7 @@ - + @@ -57085,7 +57477,7 @@ - + @@ -57095,7 +57487,7 @@ - + @@ -57134,7 +57526,7 @@ - + @@ -57155,7 +57547,7 @@ - + @@ -57188,12 +57580,12 @@ - + - + @@ -57282,7 +57674,7 @@ - + @@ -57294,7 +57686,7 @@ - + @@ -58257,7 +58649,7 @@ - + @@ -58383,7 +58775,7 @@ - + @@ -58391,7 +58783,7 @@ - + @@ -58434,7 +58826,7 @@ - + @@ -58460,7 +58852,7 @@ - + @@ -58468,7 +58860,7 @@ - + @@ -59358,6 +59750,17 @@ + + + + + + + + + + + @@ -59477,17 +59880,6 @@ - - - - - - - - - - - @@ -60419,7 +60811,7 @@ - + @@ -60456,7 +60848,7 @@ - + @@ -60643,7 +61035,7 @@ - + @@ -60689,7 +61081,7 @@ - + @@ -61727,7 +62119,7 @@ - + @@ -61739,7 +62131,7 @@ - + @@ -62278,7 +62670,7 @@ - + @@ -62296,7 +62688,7 @@ - + @@ -62627,7 +63019,7 @@ - + @@ -62650,15 +63042,15 @@ - + - + - + - + @@ -62825,67 +63217,67 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -63135,7 +63527,7 @@ - + @@ -63509,6 +63901,17 @@ + + + + + + + + + + + @@ -63626,17 +64029,17 @@ - + - + - + @@ -63647,7 +64050,7 @@ - + @@ -63804,6 +64207,7 @@ + @@ -63813,7 +64217,7 @@ - + @@ -63941,30 +64345,30 @@ - + - + - + - + - + @@ -64032,13 +64436,13 @@ - + - + @@ -64379,7 +64783,7 @@ - + @@ -65464,6 +65868,17 @@ + + + + + + + + + + + @@ -65497,7 +65912,7 @@ - + @@ -65513,7 +65928,7 @@ - + @@ -65537,7 +65952,7 @@ - + @@ -65579,6 +65994,7 @@ + @@ -65627,11 +66043,23 @@ + + + + + + + + + + + + @@ -65648,24 +66076,24 @@ - + - + - + - - - - + + + + - + @@ -65684,7 +66112,7 @@ - + @@ -65772,6 +66200,14 @@ + + + + + + + + @@ -65898,14 +66334,6 @@ - - - - - - - - @@ -65929,6 +66357,17 @@ + + + + + + + + + + + @@ -65978,6 +66417,7 @@ + @@ -66164,6 +66604,17 @@ + + + + + + + + + + + @@ -66217,7 +66668,7 @@ - + @@ -66233,7 +66684,7 @@ - + @@ -66337,6 +66788,7 @@ + @@ -66649,7 +67101,7 @@ - + @@ -66670,27 +67122,31 @@ - + - + - + + + + + - + - + - + @@ -66699,6 +67155,15 @@ + + + + + + + + + @@ -66709,14 +67174,34 @@ + + + + + + + + + + + + + + + + + + + - + - + + @@ -66732,7 +67217,7 @@ - + @@ -66869,7 +67354,7 @@ - + @@ -67144,6 +67629,17 @@ + + + + + + + + + + + @@ -67213,6 +67709,7 @@ + @@ -67416,7 +67913,7 @@ - + @@ -67426,7 +67923,7 @@ - + @@ -67467,6 +67964,17 @@ + + + + + + + + + + + @@ -67685,6 +68193,7 @@ + @@ -67861,7 +68370,7 @@ - + @@ -68146,6 +68655,10 @@ + + + + @@ -68162,12 +68675,24 @@ + + + + + + + + + + + + @@ -68256,6 +68781,17 @@ + + + + + + + + + + + @@ -68268,6 +68804,7 @@ + @@ -68280,6 +68817,7 @@ + @@ -68325,7 +68863,7 @@ - + @@ -68353,7 +68891,7 @@ - + @@ -68899,10 +69437,10 @@ - - - - + + + + @@ -69487,32 +70025,32 @@ - - - - + + + + - - + + - - - - + + + + - - + + - - + + - - + + @@ -69888,6 +70426,14 @@ + + + + + + + + @@ -69939,7 +70485,7 @@ - + @@ -70230,75 +70776,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -71350,7 +71896,7 @@ - + @@ -72443,7 +72989,7 @@ - + @@ -72483,7 +73029,7 @@ - + @@ -74309,7 +74855,7 @@ - + @@ -74396,10 +74942,10 @@ - + - + @@ -74408,7 +74954,7 @@ - + @@ -74543,7 +75089,7 @@ - + @@ -74633,7 +75179,7 @@ - + @@ -75392,7 +75938,7 @@ - + @@ -76259,7 +76805,7 @@ - + @@ -76274,7 +76820,7 @@ - + @@ -76418,7 +76964,7 @@ - + @@ -76521,7 +77067,7 @@ - + @@ -76695,7 +77241,7 @@ - + @@ -76826,7 +77372,7 @@ - + @@ -76958,7 +77504,7 @@ - + @@ -77198,7 +77744,7 @@ - + @@ -77465,7 +78011,7 @@ - + @@ -77791,7 +78337,7 @@ - + @@ -77826,7 +78372,7 @@ - + @@ -78054,7 +78600,7 @@ - + @@ -78128,7 +78674,7 @@ - + @@ -78352,7 +78898,7 @@ - + @@ -78360,7 +78906,7 @@ - + @@ -78501,7 +79047,7 @@ - + @@ -78871,7 +79417,7 @@ - + @@ -79723,7 +80269,7 @@ - + @@ -80120,7 +80666,7 @@ - + @@ -80177,18 +80723,29 @@ - + - + - + - + + + + + + + + + + + + @@ -80259,7 +80816,7 @@ - + @@ -80435,7 +80992,7 @@ - + @@ -80512,7 +81069,7 @@ - + @@ -80534,7 +81091,7 @@ - + @@ -80556,7 +81113,7 @@ - + @@ -80564,7 +81121,7 @@ - + @@ -80573,7 +81130,7 @@ - + @@ -80602,7 +81159,7 @@ - + @@ -80666,7 +81223,7 @@ - + @@ -80719,8 +81276,8 @@ - - + + @@ -81176,6 +81733,7 @@ + @@ -81265,8 +81823,8 @@ - - + + @@ -81429,17 +81987,17 @@ - + - + - + @@ -81971,7 +82529,7 @@ - + @@ -82233,7 +82791,7 @@ - + @@ -82341,7 +82899,7 @@ - + @@ -82420,11 +82978,11 @@ - + - + @@ -82436,6 +82994,11 @@ + + + + + @@ -82799,7 +83362,7 @@ - + @@ -82815,7 +83378,7 @@ - + @@ -82823,7 +83386,7 @@ - + @@ -82874,7 +83437,7 @@ - + @@ -82906,7 +83469,7 @@ - + @@ -82955,7 +83518,7 @@ - + @@ -82963,7 +83526,8 @@ - + + @@ -83045,10 +83609,10 @@ - - - - + + + + @@ -83357,6 +83921,17 @@ + + + + + + + + + + + @@ -83378,12 +83953,12 @@ - + - + @@ -83448,7 +84023,7 @@ - + @@ -83477,7 +84052,7 @@ - + @@ -83487,7 +84062,7 @@ - + @@ -83659,17 +84234,6 @@ - - - - - - - - - - - @@ -83740,7 +84304,6 @@ - @@ -83790,7 +84353,7 @@ - + @@ -83816,7 +84379,7 @@ - + @@ -84149,46 +84712,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -85268,6 +85831,13 @@ + + + + + + + @@ -85313,12 +85883,6 @@ - - - - - - @@ -85358,7 +85922,7 @@ - + @@ -85503,6 +86067,15 @@ + + + + + + + + + @@ -85837,7 +86410,7 @@ - + @@ -85887,7 +86460,7 @@ - + @@ -86262,7 +86835,7 @@ - + @@ -86674,7 +87247,7 @@ - + @@ -86822,7 +87395,7 @@ - + @@ -86838,7 +87411,7 @@ - + @@ -88084,7 +88657,7 @@ - + @@ -88466,76 +89039,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -88776,6 +89279,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -88996,6 +89577,10 @@ + + + + @@ -89346,14 +89931,6 @@ - - - - - - - - @@ -89450,7 +90027,7 @@ - + @@ -90545,7 +91122,7 @@ - + @@ -90825,7 +91402,7 @@ - + @@ -90858,7 +91435,7 @@ - + @@ -90981,7 +91558,7 @@ - + @@ -91027,7 +91604,7 @@ - + @@ -91164,7 +91741,7 @@ - + @@ -91204,7 +91781,7 @@ - + @@ -91223,7 +91800,7 @@ - + @@ -91450,27 +92027,27 @@ - + - + - + - + @@ -91623,6 +92200,14 @@ + + + + + + + + @@ -92073,7 +92658,7 @@ - + @@ -92120,7 +92705,7 @@ - + @@ -92134,7 +92719,7 @@ - + @@ -92153,7 +92738,7 @@ - + @@ -92162,7 +92747,7 @@ - + @@ -92177,7 +92762,7 @@ - + @@ -92269,7 +92854,7 @@ - + @@ -92739,7 +93324,7 @@ - + @@ -93028,7 +93613,7 @@ - + @@ -93042,7 +93627,7 @@ - + @@ -93050,7 +93635,7 @@ - + @@ -93115,7 +93700,7 @@ - + @@ -93216,7 +93801,7 @@ - + @@ -93235,7 +93820,7 @@ - + @@ -93265,7 +93850,7 @@ - + @@ -93411,7 +93996,7 @@ - + @@ -93461,7 +94046,7 @@ - + @@ -94318,7 +94903,7 @@ - + @@ -94403,7 +94988,27 @@ - + + + + + + + + + + + + + + + + + + + + + @@ -94411,7 +95016,7 @@ - + @@ -94423,7 +95028,7 @@ - + @@ -94569,7 +95174,7 @@ - + @@ -95016,7 +95621,7 @@ - + @@ -95037,7 +95642,7 @@ - + @@ -95069,7 +95674,7 @@ - + @@ -95557,7 +96162,7 @@ - + @@ -95578,8 +96183,8 @@ - - + + @@ -95646,7 +96251,7 @@ - + @@ -95703,6 +96308,17 @@ + + + + + + + + + + + @@ -95762,16 +96378,6 @@ - - - - - - - - - - @@ -96443,14 +97049,14 @@ - + - + - + @@ -97319,13 +97925,13 @@ - - + + - + @@ -97335,8 +97941,8 @@ - - + + @@ -97381,7 +97987,7 @@ - + @@ -97626,6 +98232,17 @@ + + + + + + + + + + + @@ -97721,7 +98338,7 @@ - + @@ -97807,6 +98424,7 @@ + @@ -97817,27 +98435,27 @@ - - + + - + - + - + - + @@ -97845,7 +98463,7 @@ - + @@ -97853,14 +98471,14 @@ - + - + @@ -97868,7 +98486,7 @@ - + @@ -97877,7 +98495,7 @@ - + @@ -97886,11 +98504,11 @@ - + - + @@ -98244,14 +98862,14 @@ - + - + @@ -98468,7 +99086,7 @@ - + @@ -99050,7 +99668,7 @@ - + @@ -99090,7 +99708,7 @@ - + @@ -99098,7 +99716,7 @@ - + @@ -99129,13 +99747,13 @@ - + - + @@ -101225,10 +101843,10 @@ - - - - + + + + @@ -101278,6 +101896,6 @@ From e028389dab575bf316e1f864cc91a96d7faec4a9 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Mon, 16 Nov 2020 14:21:34 -0800 Subject: [PATCH 327/636] ANDROID: arm64: Fix off-by-one vdso trampoline return value The VDSO_compat_* symbols added by vdso.lds.S for sigreturn32 are not being tagged in the symbol table as FUNC symbols by ld.lld. As a result, I see a SIGBUS error when running the LTP test syscalls.rt_sigaction01. This was fixed in https://reviews.llvm.org/D86263. Until we can update to the latest version of llvm, let's drop the bottom bit for these symbols in the VDSO_SYMBOL() macro so that the `vdso_trampoline + thumb` functions properly. Test: LTP test syscalls.rt_sigaction01 Bug: 172307050 Fixes: 71ca95b7e949 ("FROMLIST: arm64: vdso32: Allow ld.lld to properly link the VDSO") Signed-off-by: Will McVicker Change-Id: I24cf58ae62616a80c82653a7fc7dea93b33a3d0d --- arch/arm64/include/asm/vdso.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 9b197e5ea759..a08cc3cb0f8e 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -32,9 +32,10 @@ #include #endif -#define VDSO_SYMBOL(base, name) \ -({ \ - (void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \ +#define VDSO_SYMBOL(base, name) \ +({ \ + (void *)((vdso_offset_##name & ~1UL) - VDSO_LBASE + \ + (unsigned long)(base)); \ }) #endif /* !__ASSEMBLY__ */ From 7f0c07bea50da99ebb98f126b9537225268f176e Mon Sep 17 00:00:00 2001 From: Chris Ye Date: Thu, 29 Oct 2020 12:38:00 -0700 Subject: [PATCH 328/636] FROMGIT: Input: Add devices for HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE Kernel 5.4 introduces HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, devices need to be set explicitly with this flag. Bug: 170072925 Bug: 170126889 Tests: atest android.hardware.input.cts.tests (cherry picked from commit f59ee399de4a8ca4d7d19cdcabb4b63e94867f09 https://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git for-5.10/upstream-fixes) Signed-off-by: Chris Ye Change-Id: I65742e96babc59adde0ab32656850ef4f164ac3e --- drivers/hid/hid-ids.h | 4 ++++ drivers/hid/hid-quirks.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 2c055e60e590..2e63951d7e72 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -442,6 +442,10 @@ #define USB_VENDOR_ID_FRUCTEL 0x25B6 #define USB_DEVICE_ID_GAMETEL_MT_MODE 0x0002 +#define USB_VENDOR_ID_GAMEVICE 0x27F8 +#define USB_DEVICE_ID_GAMEVICE_GV186 0x0BBE +#define USB_DEVICE_ID_GAMEVICE_KISHI 0x0BBF + #define USB_VENDOR_ID_GAMERON 0x0810 #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR 0x0001 #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR 0x0002 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 2d8d20a7f457..9aed59a3f2e6 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -86,6 +86,10 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, + { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, From 7d222dce964ddb0be42ae4f2f45beb1beaee19bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 2 Nov 2020 22:27:27 +0100 Subject: [PATCH 329/636] regulator: defer probe when trying to get voltage from unresolved supply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cf1ad559a20d1930aa7b47a52f54e1f8718de301 ] regulator_get_voltage_rdev() is called in regulator probe() when applying machine constraints. The "fixed" commit exposed the problem that non-bypassed regulators can forward the request to its parent (like bypassed ones) supply. Return -EPROBE_DEFER when the supply is expected but not resolved yet. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław Reported-by: Ondřej Jirman Reported-by: Corentin Labbe Tested-by: Ondřej Jirman Link: https://lore.kernel.org/r/a9041d68b4d35e4a2dd71629c8a6422662acb5ee.1604351936.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c290c8942131..ad5235ca8e4e 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3405,6 +3405,8 @@ static int _regulator_get_voltage(struct regulator_dev *rdev) ret = rdev->desc->fixed_uV; } else if (rdev->supply) { ret = _regulator_get_voltage(rdev->supply->rdev); + } else if (rdev->supply_name) { + return -EPROBE_DEFER; } else { return -EINVAL; } From 68e51bf3761736359110b198c42e6f78056e3719 Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Tue, 1 Sep 2020 17:30:13 +0800 Subject: [PATCH 330/636] time: Prevent undefined behaviour in timespec64_to_ns() [ Upstream commit cb47755725da7b90fecbb2aa82ac3b24a7adb89b ] UBSAN reports: Undefined behaviour in ./include/linux/time64.h:127:27 signed integer overflow: 17179869187 * 1000000000 cannot be represented in type 'long long int' Call Trace: timespec64_to_ns include/linux/time64.h:127 [inline] set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180 do_setitimer+0x8e/0x740 kernel/time/itimer.c:245 __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336 do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295 Commit bd40a175769d ("y2038: itimer: change implementation to timespec64") replaced the original conversion which handled time clamping correctly with timespec64_to_ns() which has no overflow protection. Fix it in timespec64_to_ns() as this is not necessarily limited to the usage in itimers. [ tglx: Added comment and adjusted the fixes tag ] Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64") Signed-off-by: Zeng Tao Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com Signed-off-by: Sasha Levin --- include/linux/time64.h | 4 ++++ kernel/time/itimer.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/time64.h b/include/linux/time64.h index 4a45aea0f96e..8dbdf6cae3e8 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -138,6 +138,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts) */ static inline s64 timespec64_to_ns(const struct timespec64 *ts) { + /* Prevent multiplication overflow */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return KTIME_MAX; + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; } diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9a65713c8309..2e2b335ef101 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -154,10 +154,6 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; - /* - * Use the to_ktime conversion because that clamps the maximum - * value to KTIME_MAX and avoid multiplication overflows. - */ nval = ktime_to_ns(timeval_to_ktime(value->it_value)); ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval)); From 65d733cd9d19e8e96a8e28bf9665bf43e6869748 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 28 Oct 2020 15:24:34 +0800 Subject: [PATCH 331/636] nbd: don't update block size after device is started [ Upstream commit b40813ddcd6bf9f01d020804e4cb8febc480b9e4 ] Mounted NBD device can be resized, one use case is rbd-nbd. Fix the issue by setting up default block size, then not touch it in nbd_size_update() any more. This kind of usage is aligned with loop which has same use case too. Cc: stable@vger.kernel.org Fixes: c8a83a6b54d0 ("nbd: Use set_blocksize() to set device blocksize") Reported-by: lining Signed-off-by: Ming Lei Cc: Josef Bacik Cc: Jan Kara Tested-by: lining Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 52e1e71e8124..706115ecd9be 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -276,7 +276,7 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd) +static void nbd_size_update(struct nbd_device *nbd, bool start) { struct nbd_config *config = nbd->config; struct block_device *bdev = bdget_disk(nbd->disk, 0); @@ -292,7 +292,8 @@ static void nbd_size_update(struct nbd_device *nbd) if (bdev) { if (bdev->bd_disk) { bd_set_size(bdev, config->bytesize); - set_blocksize(bdev, config->blksize); + if (start) + set_blocksize(bdev, config->blksize); } else bdev->bd_invalidated = 1; bdput(bdev); @@ -307,7 +308,7 @@ static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, config->blksize = blocksize; config->bytesize = blocksize * nr_blocks; if (nbd->task_recv != NULL) - nbd_size_update(nbd); + nbd_size_update(nbd, false); } static void nbd_complete_rq(struct request *req) @@ -1244,7 +1245,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd); + nbd_size_update(nbd, true); return error; } From b77df211071a8881b01d28d5cf52d240db018bf7 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 31 Mar 2020 01:40:42 -0700 Subject: [PATCH 332/636] usb: dwc3: gadget: Continue to process pending requests [ Upstream commit d9feef974e0d8cb6842533c92476a1b32a41ba31 ] If there are still pending requests because no TRB was available, prepare more when started requests are completed. Introduce dwc3_gadget_ep_should_continue() to check for incomplete and pending requests to resume updating new TRBs to the controller's TRB cache. Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f0d2f0a4e990..f24cfb3a6907 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2403,10 +2403,8 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->request.actual = req->request.length - req->remaining; - if (!dwc3_gadget_ep_request_completed(req)) { - __dwc3_gadget_kick_transfer(dep); + if (!dwc3_gadget_ep_request_completed(req)) goto out; - } dwc3_gadget_giveback(dep, req, status); @@ -2430,6 +2428,24 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, } } +static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) +{ + struct dwc3_request *req; + + if (!list_empty(&dep->pending_list)) + return true; + + /* + * We only need to check the first entry of the started list. We can + * assume the completed requests are removed from the started list. + */ + req = next_request(&dep->started_list); + if (!req) + return false; + + return !dwc3_gadget_ep_request_completed(req); +} + static void dwc3_gadget_endpoint_frame_from_event(struct dwc3_ep *dep, const struct dwc3_event_depevt *event) { @@ -2459,6 +2475,8 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep, if (stop) dwc3_stop_active_transfer(dep, true, true); + else if (dwc3_gadget_ep_should_continue(dep)) + __dwc3_gadget_kick_transfer(dep); /* * WORKAROUND: This is the 2nd half of U1/U2 -> U0 workaround. From c140bebb066509710a8c83c9cec2ff866410db11 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 24 Sep 2020 01:21:24 -0700 Subject: [PATCH 333/636] usb: dwc3: gadget: Reclaim extra TRBs after request completion [ Upstream commit 690e5c2dc29f8891fcfd30da67e0d5837c2c9df5 ] An SG request may be partially completed (due to no available TRBs). Don't reclaim extra TRBs and clear the needs_extra_trb flag until the request is fully completed. Otherwise, the driver will reclaim the wrong TRB. Cc: stable@vger.kernel.org Fixes: 1f512119a08c ("usb: dwc3: gadget: add remaining sg entries to ring") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/gadget.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f24cfb3a6907..6ab5c48f5d87 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2386,6 +2386,11 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event, status); + req->request.actual = req->request.length - req->remaining; + + if (!dwc3_gadget_ep_request_completed(req)) + goto out; + if (req->needs_extra_trb) { unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); @@ -2401,11 +2406,6 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep, req->needs_extra_trb = false; } - req->request.actual = req->request.length - req->remaining; - - if (!dwc3_gadget_ep_request_completed(req)) - goto out; - dwc3_gadget_giveback(dep, req, status); out: From 2f154092ea3672b78dff6d97c78bededcc128e66 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 1 Sep 2020 08:09:01 -0400 Subject: [PATCH 334/636] btrfs: sysfs: init devices outside of the chunk_mutex [ Upstream commit ca10845a56856fff4de3804c85e6424d0f6d0cde ] While running btrfs/061, btrfs/073, btrfs/078, or btrfs/178 we hit the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc3+ #4 Not tainted ------------------------------------------------------ kswapd0/100 is trying to acquire lock: ffff96ecc22ef4a0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0x3f/0x330 but task is already holding lock: ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire+0x65/0x80 slab_pre_alloc_hook.constprop.0+0x20/0x200 kmem_cache_alloc+0x37/0x270 alloc_inode+0x82/0xb0 iget_locked+0x10d/0x2c0 kernfs_get_inode+0x1b/0x130 kernfs_get_tree+0x136/0x240 sysfs_get_tree+0x16/0x40 vfs_get_tree+0x28/0xc0 path_mount+0x434/0xc00 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #2 (kernfs_mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7e0 kernfs_add_one+0x23/0x150 kernfs_create_link+0x63/0xa0 sysfs_do_create_link_sd+0x5e/0xd0 btrfs_sysfs_add_devices_dir+0x81/0x130 btrfs_init_new_device+0x67f/0x1250 btrfs_ioctl+0x1ef/0x2e20 __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&fs_info->chunk_mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7e0 btrfs_chunk_alloc+0x125/0x3a0 find_free_extent+0xdf6/0x1210 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb0/0x310 alloc_tree_block_no_bg_flush+0x4a/0x60 __btrfs_cow_block+0x11a/0x530 btrfs_cow_block+0x104/0x220 btrfs_search_slot+0x52e/0x9d0 btrfs_insert_empty_items+0x64/0xb0 btrfs_insert_delayed_items+0x90/0x4f0 btrfs_commit_inode_delayed_items+0x93/0x140 btrfs_log_inode+0x5de/0x2020 btrfs_log_inode_parent+0x429/0xc90 btrfs_log_new_name+0x95/0x9b btrfs_rename2+0xbb9/0x1800 vfs_rename+0x64f/0x9f0 do_renameat2+0x320/0x4e0 __x64_sys_rename+0x1f/0x30 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (&delayed_node->mutex){+.+.}-{3:3}: __lock_acquire+0x119c/0x1fc0 lock_acquire+0xa7/0x3d0 __mutex_lock+0x7e/0x7e0 __btrfs_release_delayed_node.part.0+0x3f/0x330 btrfs_evict_inode+0x24c/0x500 evict+0xcf/0x1f0 dispose_list+0x48/0x70 prune_icache_sb+0x44/0x50 super_cache_scan+0x161/0x1e0 do_shrink_slab+0x178/0x3c0 shrink_slab+0x17c/0x290 shrink_node+0x2b2/0x6d0 balance_pgdat+0x30a/0x670 kswapd+0x213/0x4c0 kthread+0x138/0x160 ret_from_fork+0x1f/0x30 other info that might help us debug this: Chain exists of: &delayed_node->mutex --> kernfs_mutex --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(kernfs_mutex); lock(fs_reclaim); lock(&delayed_node->mutex); *** DEADLOCK *** 3 locks held by kswapd0/100: #0: ffffffff8dd74700 (fs_reclaim){+.+.}-{0:0}, at: __fs_reclaim_acquire+0x5/0x30 #1: ffffffff8dd65c50 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x115/0x290 #2: ffff96ed2ade30e0 (&type->s_umount_key#36){++++}-{3:3}, at: super_cache_scan+0x38/0x1e0 stack backtrace: CPU: 0 PID: 100 Comm: kswapd0 Not tainted 5.9.0-rc3+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x8b/0xb8 check_noncircular+0x12d/0x150 __lock_acquire+0x119c/0x1fc0 lock_acquire+0xa7/0x3d0 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 __mutex_lock+0x7e/0x7e0 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 ? __btrfs_release_delayed_node.part.0+0x3f/0x330 ? lock_acquire+0xa7/0x3d0 ? find_held_lock+0x2b/0x80 __btrfs_release_delayed_node.part.0+0x3f/0x330 btrfs_evict_inode+0x24c/0x500 evict+0xcf/0x1f0 dispose_list+0x48/0x70 prune_icache_sb+0x44/0x50 super_cache_scan+0x161/0x1e0 do_shrink_slab+0x178/0x3c0 shrink_slab+0x17c/0x290 shrink_node+0x2b2/0x6d0 balance_pgdat+0x30a/0x670 kswapd+0x213/0x4c0 ? _raw_spin_unlock_irqrestore+0x41/0x50 ? add_wait_queue_exclusive+0x70/0x70 ? balance_pgdat+0x670/0x670 kthread+0x138/0x160 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x1f/0x30 This happens because we are holding the chunk_mutex at the time of adding in a new device. However we only need to hold the device_list_mutex, as we're going to iterate over the fs_devices devices. Move the sysfs init stuff outside of the chunk_mutex to get rid of this lockdep splat. CC: stable@vger.kernel.org # 4.4.x: f3cd2c58110dad14e: btrfs: sysfs, rename device_link add/remove functions CC: stable@vger.kernel.org # 4.4.x Reported-by: David Sterba Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/volumes.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 05daa2b816c3..4067d0196556 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2459,9 +2459,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_set_super_num_devices(fs_info->super_copy, orig_super_num_devices + 1); - /* add sysfs device entry */ - btrfs_sysfs_add_device_link(fs_devices, device); - /* * we've got more storage, clear any full flags on the space * infos @@ -2469,6 +2466,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_clear_space_info_full(fs_info); mutex_unlock(&fs_info->chunk_mutex); + + /* Add sysfs device entry */ + btrfs_sysfs_add_device_link(fs_devices, device); + mutex_unlock(&fs_devices->device_list_mutex); if (seeding_dev) { From 937870d1b282594900dfd9159668049eac0c4a88 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 22 Sep 2020 17:27:29 +0900 Subject: [PATCH 335/636] btrfs: reschedule when cloning lots of extents [ Upstream commit 6b613cc97f0ace77f92f7bc112b8f6ad3f52baf8 ] We have several occurrences of a soft lockup from fstest's generic/175 testcase, which look more or less like this one: watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [xfs_io:10030] Kernel panic - not syncing: softlockup: hung tasks CPU: 0 PID: 10030 Comm: xfs_io Tainted: G L 5.9.0-rc5+ #768 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack+0x77/0xa0 panic+0xfa/0x2cb watchdog_timer_fn.cold+0x85/0xa5 ? lockup_detector_update_enable+0x50/0x50 __hrtimer_run_queues+0x99/0x4c0 ? recalibrate_cpu_khz+0x10/0x10 hrtimer_run_queues+0x9f/0xb0 update_process_times+0x28/0x80 tick_handle_periodic+0x1b/0x60 __sysvec_apic_timer_interrupt+0x76/0x210 asm_call_on_stack+0x12/0x20 sysvec_apic_timer_interrupt+0x7f/0x90 asm_sysvec_apic_timer_interrupt+0x12/0x20 RIP: 0010:btrfs_tree_unlock+0x91/0x1a0 [btrfs] RSP: 0018:ffffc90007123a58 EFLAGS: 00000282 RAX: ffff8881cea2fbe0 RBX: ffff8881cea2fbe0 RCX: 0000000000000000 RDX: ffff8881d23fd200 RSI: ffffffff82045220 RDI: ffff8881cea2fba0 RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000032 R10: 0000160000000000 R11: 0000000000001000 R12: 0000000000001000 R13: ffff8882357fd5b0 R14: ffff88816fa76e70 R15: ffff8881cea2fad0 ? btrfs_tree_unlock+0x15b/0x1a0 [btrfs] btrfs_release_path+0x67/0x80 [btrfs] btrfs_insert_replace_extent+0x177/0x2c0 [btrfs] btrfs_replace_file_extents+0x472/0x7c0 [btrfs] btrfs_clone+0x9ba/0xbd0 [btrfs] btrfs_clone_files.isra.0+0xeb/0x140 [btrfs] ? file_update_time+0xcd/0x120 btrfs_remap_file_range+0x322/0x3b0 [btrfs] do_clone_file_range+0xb7/0x1e0 vfs_clone_file_range+0x30/0xa0 ioctl_file_clone+0x8a/0xc0 do_vfs_ioctl+0x5b2/0x6f0 __x64_sys_ioctl+0x37/0xa0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f87977fc247 RSP: 002b:00007ffd51a2f6d8 EFLAGS: 00000206 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f87977fc247 RDX: 00007ffd51a2f710 RSI: 000000004020940d RDI: 0000000000000003 RBP: 0000000000000004 R08: 00007ffd51a79080 R09: 0000000000000000 R10: 00005621f11352f2 R11: 0000000000000206 R12: 0000000000000000 R13: 0000000000000000 R14: 00005621f128b958 R15: 0000000080000000 Kernel Offset: disabled ---[ end Kernel panic - not syncing: softlockup: hung tasks ]--- All of these lockup reports have the call chain btrfs_clone_files() -> btrfs_clone() in common. btrfs_clone_files() calls btrfs_clone() with both source and destination extents locked and loops over the source extent to create the clones. Conditionally reschedule in the btrfs_clone() loop, to give some time back to other processes. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 01a90fa03c24..f0e71aa05d22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4217,6 +4217,8 @@ process_slot: ret = -EINTR; goto out; } + + cond_resched(); } ret = 0; From 9d9c830b50e371eae7147764235baa8b5fbcd17b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 15 Oct 2020 21:41:44 +0100 Subject: [PATCH 336/636] genirq: Let GENERIC_IRQ_IPI select IRQ_DOMAIN_HIERARCHY [ Upstream commit 151a535171be6ff824a0a3875553ea38570f4c05 ] kernel/irq/ipi.c otherwise fails to compile if nothing else selects it. Fixes: 379b656446a3 ("genirq: Add GENERIC_IRQ_IPI Kconfig symbol") Reported-by: Pavel Machek Tested-by: Pavel Machek Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201015101222.GA32747@amd Signed-off-by: Sasha Levin --- kernel/irq/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..d532bf0c5a67 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -80,6 +80,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + select IRQ_DOMAIN_HIERARCHY # Generic MSI interrupt support config GENERIC_MSI_IRQ From 36f3a81b75b1cff6f886aeaaa69f93bcb974fa3e Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 8 Oct 2020 09:12:15 +0200 Subject: [PATCH 337/636] hv_balloon: disable warning when floor reached [ Upstream commit 2c3bd2a5c86fe744e8377733c5e511a5ca1e14f5 ] It is not an error if the host requests to balloon down, but the VM refuses to do so. Without this change a warning is logged in dmesg every five minutes. Fixes: b3bb97b8a49f3 ("Drivers: hv: balloon: Add logging for dynamic memory operations") Signed-off-by: Olaf Hering Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201008071216.16554-1-olaf@aepfle.de Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/hv/hv_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 9ca0706a9d40..e5fc719a34e7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1275,7 +1275,7 @@ static void balloon_up(struct work_struct *dummy) /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { - pr_warn("Balloon request will be partially fulfilled. %s\n", + pr_info("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); From 243a16469c3431affdfdcc26674bd266fbfd5f8e Mon Sep 17 00:00:00 2001 From: zhuoliang zhang Date: Fri, 23 Oct 2020 09:05:35 +0200 Subject: [PATCH 338/636] net: xfrm: fix a race condition during allocing spi [ Upstream commit a779d91314ca7208b7feb3ad817b62904397c56d ] we found that the following race condition exists in xfrm_alloc_userspi flow: user thread state_hash_work thread ---- ---- xfrm_alloc_userspi() __find_acq_core() /*alloc new xfrm_state:x*/ xfrm_state_alloc() /*schedule state_hash_work thread*/ xfrm_hash_grow_check() xfrm_hash_resize() xfrm_alloc_spi /*hold lock*/ x->id.spi = htonl(spi) spin_lock_bh(&net->xfrm.xfrm_state_lock) /*waiting lock release*/ xfrm_hash_transfer() spin_lock_bh(&net->xfrm.xfrm_state_lock) /*add x into hlist:net->xfrm.state_byspi*/ hlist_add_head_rcu(&x->byspi) spin_unlock_bh(&net->xfrm.xfrm_state_lock) /*add x into hlist:net->xfrm.state_byspi 2 times*/ hlist_add_head_rcu(&x->byspi) 1. a new state x is alloced in xfrm_state_alloc() and added into the bydst hlist in __find_acq_core() on the LHS; 2. on the RHS, state_hash_work thread travels the old bydst and tranfers every xfrm_state (include x) into the new bydst hlist and new byspi hlist; 3. user thread on the LHS gets the lock and adds x into the new byspi hlist again. So the same xfrm_state (x) is added into the same list_hash (net->xfrm.state_byspi) 2 times that makes the list_hash become an inifite loop. To fix the race, x->id.spi = htonl(spi) in the xfrm_alloc_spi() is moved to the back of spin_lock_bh, sothat state_hash_work thread no longer add x which id.spi is zero into the hash_list. Fixes: f034b5d4efdf ("[XFRM]: Dynamic xfrm_state hash table sizing.") Signed-off-by: zhuoliang zhang Acked-by: Herbert Xu Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a649d7c2f48c..84dea0ad1666 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1825,6 +1825,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + __be32 newspi = 0; u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); @@ -1843,21 +1844,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) xfrm_state_put(x0); goto unlock; } - x->id.spi = minspi; + newspi = minspi; } else { u32 spi = 0; for (h = 0; h < high-low+1; h++) { spi = low + prandom_u32()%(high-low+1); x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { - x->id.spi = htonl(spi); + newspi = htonl(spi); break; } xfrm_state_put(x0); } } - if (x->id.spi) { + if (newspi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); + x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); From bbec3ef4913e0b3918c55d4ac23ca23aa880ffbe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 26 Oct 2020 15:19:38 -0700 Subject: [PATCH 339/636] xfs: set xefi_discard when creating a deferred agfl free log intent item [ Upstream commit 2c334e12f957cd8c6bb66b4aa3f79848b7c33cab ] Make sure that we actually initialize xefi_discard when we're scheduling a deferred free of an AGFL block. This was (eventually) found by the UBSAN while I was banging on realtime rmap problems, but it exists in the upstream codebase. While we're at it, rearrange the structure to reduce the struct size from 64 to 56 bytes. Fixes: fcb762f5de2e ("xfs: add bmapi nodiscard flag") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_alloc.c | 1 + fs/xfs/libxfs/xfs_bmap.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 1eb7933dac83..b3a9043b0c9e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2213,6 +2213,7 @@ xfs_defer_agfl_block( new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; new->xefi_oinfo = *oinfo; + new->xefi_skip_discard = false; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 488dc8860fd7..50242ba3cdb7 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -52,9 +52,9 @@ struct xfs_extent_free_item { xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + bool xefi_skip_discard; struct list_head xefi_list; struct xfs_owner_info xefi_oinfo; /* extent owner */ - bool xefi_skip_discard; }; #define XFS_BMAP_MAX_NMAP 4 From a79b767a92242b2801bb2f4433501a1b78bcb4e3 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 29 Oct 2020 16:39:46 +0100 Subject: [PATCH 340/636] netfilter: ipset: Update byte and packet counters regardless of whether they match [ Upstream commit 7d10e62c2ff8e084c136c94d32d9a94de4d31248 ] In ip_set_match_extensions(), for sets with counters, we take care of updating counters themselves by calling ip_set_update_counter(), and of checking if the given comparison and values match, by calling ip_set_match_counter() if needed. However, if a given comparison on counters doesn't match the configured values, that doesn't mean the set entry itself isn't matching. This fix restores the behaviour we had before commit 4750005a85f7 ("netfilter: ipset: Fix "don't update counters" mode when counters used at the matching"), without reintroducing the issue fixed there: back then, mtype_data_match() first updated counters in any case, and then took care of matching on counters. Now, if the IPSET_FLAG_SKIP_COUNTER_UPDATE flag is set, ip_set_update_counter() will anyway skip counter updates if desired. The issue observed is illustrated by this reproducer: ipset create c hash:ip counters ipset add c 192.0.2.1 iptables -I INPUT -m set --match-set c src --bytes-gt 800 -j DROP if we now send packets from 192.0.2.1, bytes and packets counters for the entry as shown by 'ipset list' are always zero, and, no matter how many bytes we send, the rule will never match, because counters themselves are not updated. Reported-by: Mithil Mhatre Fixes: 4750005a85f7 ("netfilter: ipset: Fix "don't update counters" mode when counters used at the matching") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 36ebc40a4313..0427e66bc478 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -488,13 +488,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) { struct ip_set_counter *counter = ext_counter(data, set); + ip_set_update_counter(counter, ext, flags); + if (flags & IPSET_FLAG_MATCH_COUNTERS && !(ip_set_match_counter(ip_set_get_packets(counter), mext->packets, mext->packets_op) && ip_set_match_counter(ip_set_get_bytes(counter), mext->bytes, mext->bytes_op))) return false; - ip_set_update_counter(counter, ext, flags); } if (SET_WITH_SKBINFO(set)) ip_set_get_skbinfo(ext_skbinfo(data, set), From 5e6d8c982529d72c307a2597178e6639b02b26c4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Nov 2020 00:31:03 +0100 Subject: [PATCH 341/636] perf tools: Add missing swap for ino_generation [ Upstream commit fe01adb72356a4e2f8735e4128af85921ca98fa1 ] We are missing swap for ino_generation field. Fixes: 5c5e854bc760 ("perf tools: Add attr->mmap2 support") Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20201101233103.3537427-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/session.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f016d1b330e5..6a2037b52098 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -488,6 +488,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; From 248470f36b8fd123096a477e5cd9b4073a0d62ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Nov 2020 13:18:07 +0300 Subject: [PATCH 342/636] ALSA: hda: prevent undefined shift in snd_hdac_ext_bus_get_link() [ Upstream commit 158e1886b6262c1d1c96a18c85fac5219b8bf804 ] This is harmless, but the "addr" comes from the user and it could lead to a negative shift or to shift wrapping if it's too high. Fixes: 0b00a5615dc4 ("ALSA: hdac_ext: add hdac extended controller") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20201103101807.GC1127762@mwanda Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/hda/ext/hdac_ext_controller.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index 84b44cdae28a..b96abebcfd1a 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -156,6 +156,8 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_bus *bus, return NULL; if (bus->idx != bus_idx) return NULL; + if (addr < 0 || addr > 31) + return NULL; list_for_each_entry(hlink, &bus->hlink_list, list) { for (i = 0; i < HDA_MAX_CODECS; i++) { From 79b986c759b118d8632a41d4cf49ce46c5446287 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 18 Jun 2020 12:47:06 +0200 Subject: [PATCH 343/636] can: rx-offload: don't call kfree_skb() from IRQ context [ Upstream commit 2ddd6bfe7bdbb6c661835c3ff9cab8e0769940a6 ] A CAN driver, using the rx-offload infrastructure, is reading CAN frames (usually in IRQ context) from the hardware and placing it into the rx-offload queue to be delivered to the networking stack via NAPI. In case the rx-offload queue is full, trying to add more skbs results in the skbs being dropped using kfree_skb(). If done from hard-IRQ context this results in the following warning: [ 682.552693] ------------[ cut here ]------------ [ 682.557360] WARNING: CPU: 0 PID: 3057 at net/core/skbuff.c:650 skb_release_head_state+0x74/0x84 [ 682.566075] Modules linked in: can_raw can coda_vpu flexcan dw_hdmi_ahb_audio v4l2_jpeg imx_vdoa can_dev [ 682.575597] CPU: 0 PID: 3057 Comm: cansend Tainted: G W 5.7.0+ #18 [ 682.583098] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 682.589657] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 682.597423] [] (show_stack) from [] (dump_stack+0xe0/0x114) [ 682.604759] [] (dump_stack) from [] (__warn+0xc0/0x10c) [ 682.611742] [] (__warn) from [] (warn_slowpath_fmt+0x5c/0xc0) [ 682.619248] [] (warn_slowpath_fmt) from [] (skb_release_head_state+0x74/0x84) [ 682.628143] [] (skb_release_head_state) from [] (skb_release_all+0xc/0x24) [ 682.636774] [] (skb_release_all) from [] (kfree_skb+0x74/0x1c8) [ 682.644479] [] (kfree_skb) from [] (can_rx_offload_queue_sorted+0xe0/0xe8 [can_dev]) [ 682.654051] [] (can_rx_offload_queue_sorted [can_dev]) from [] (can_rx_offload_get_echo_skb+0x48/0x94 [can_dev]) [ 682.666007] [] (can_rx_offload_get_echo_skb [can_dev]) from [] (flexcan_irq+0x194/0x5dc [flexcan]) [ 682.676734] [] (flexcan_irq [flexcan]) from [] (__handle_irq_event_percpu+0x4c/0x3ec) [ 682.686322] [] (__handle_irq_event_percpu) from [] (handle_irq_event_percpu+0x2c/0x88) [ 682.695993] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x38/0x5c) [ 682.704887] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xc8/0x180) [ 682.713432] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x30/0x44) [ 682.722063] [] (generic_handle_irq) from [] (__handle_domain_irq+0x64/0xdc) [ 682.730783] [] (__handle_domain_irq) from [] (gic_handle_irq+0x48/0x9c) [ 682.739158] [] (gic_handle_irq) from [] (__irq_svc+0x70/0x98) [ 682.746656] Exception stack(0xe80e9dd8 to 0xe80e9e20) [ 682.751725] 9dc0: 00000001 e80e8000 [ 682.759922] 9de0: e820cf80 00000000 ffffe000 00000000 eaf08fe4 00000000 600d0013 00000000 [ 682.768117] 9e00: c1732e3c c16093a8 e820d4c0 e80e9e28 c018a57c c018b870 600d0013 ffffffff [ 682.776315] [] (__irq_svc) from [] (lock_acquire+0x108/0x4e8) [ 682.783821] [] (lock_acquire) from [] (down_write+0x48/0xa8) [ 682.791242] [] (down_write) from [] (unlink_file_vma+0x24/0x40) [ 682.798922] [] (unlink_file_vma) from [] (free_pgtables+0x34/0xb8) [ 682.806858] [] (free_pgtables) from [] (exit_mmap+0xe4/0x170) [ 682.814361] [] (exit_mmap) from [] (mmput+0x5c/0x110) [ 682.821171] [] (mmput) from [] (do_exit+0x374/0xbe4) [ 682.827892] [] (do_exit) from [] (do_group_exit+0x38/0xb4) [ 682.835132] [] (do_group_exit) from [] (__wake_up_parent+0x0/0x14) [ 682.843063] irq event stamp: 1936 [ 682.846399] hardirqs last enabled at (1935): [] rmqueue+0xf4/0xc64 [ 682.853553] hardirqs last disabled at (1936): [] __irq_svc+0x60/0x98 [ 682.860799] softirqs last enabled at (1878): [] raw_release+0x108/0x1f0 [can_raw] [ 682.869256] softirqs last disabled at (1876): [] release_sock+0x18/0x98 [ 682.876753] ---[ end trace 7bca4751ce44c444 ]--- This patch fixes the problem by replacing the kfree_skb() by dev_kfree_skb_any(), as rx-offload might be called from threaded IRQ handlers as well. Fixes: ca913f1ac024 ("can: rx-offload: can_rx_offload_queue_sorted(): fix error handling, avoid skb mem leak") Fixes: 6caf8a6d6586 ("can: rx-offload: can_rx_offload_queue_tail(): fix error handling, avoid skb mem leak") Link: http://lore.kernel.org/r/20201019190524.1285319-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/rx-offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index 5f7e97d54733..5cf4171df1f4 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -281,7 +281,7 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { - kfree_skb(skb); + dev_kfree_skb_any(skb); return -ENOBUFS; } @@ -326,7 +326,7 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, { if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) { - kfree_skb(skb); + dev_kfree_skb_any(skb); return -ENOBUFS; } From 7e4cf2ec0ca236c3e5f904239cec6efe1f3baf22 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Sat, 3 Oct 2020 00:41:45 +0900 Subject: [PATCH 344/636] can: dev: can_get_echo_skb(): prevent call to kfree_skb() in hard IRQ context [ Upstream commit 2283f79b22684d2812e5c76fc2280aae00390365 ] If a driver calls can_get_echo_skb() during a hardware IRQ (which is often, but not always, the case), the 'WARN_ON(in_irq)' in net/core/skbuff.c#skb_release_head_state() might be triggered, under network congestion circumstances, together with the potential risk of a NULL pointer dereference. The root cause of this issue is the call to kfree_skb() instead of dev_kfree_skb_irq() in net/core/dev.c#enqueue_to_backlog(). This patch prevents the skb to be freed within the call to netif_rx() by incrementing its reference count with skb_get(). The skb is finally freed by one of the in-irq-context safe functions: dev_consume_skb_any() or dev_kfree_skb_any(). The "any" version is used because some drivers might call can_get_echo_skb() in a normal context. The reason for this issue to occur is that initially, in the core network stack, loopback skb were not supposed to be received in hardware IRQ context. The CAN stack is an exeption. This bug was previously reported back in 2017 in [1] but the proposed patch never got accepted. While [1] directly modifies net/core/dev.c, we try to propose here a smoother modification local to CAN network stack (the assumption behind is that only CAN devices are affected by this issue). [1] http://lore.kernel.org/r/57a3ffb6-3309-3ad5-5a34-e93c3fe3614d@cetitec.com Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20201002154219.4887-2-mailhol.vincent@wanadoo.fr Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 1545f2b299d0..be532eb64baa 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -520,7 +520,11 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) if (!skb) return 0; - netif_rx(skb); + skb_get(skb); + if (netif_rx(skb) == NET_RX_SUCCESS) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); return len; } From 7a63a403d3fae2726e062d81fe6d15417529b25c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 20 Oct 2020 08:44:43 +0200 Subject: [PATCH 345/636] can: dev: __can_get_echo_skb(): fix real payload length return value for RTR frames [ Upstream commit ed3320cec279407a86bc4c72edc4a39eb49165ec ] The can_get_echo_skb() function returns the number of received bytes to be used for netdev statistics. In the case of RTR frames we get a valid (potential non-zero) data length value which has to be passed for further operations. But on the wire RTR frames have no payload length. Therefore the value to be used in the statistics has to be zero for RTR frames. Reported-by: Vincent Mailhol Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20201020064443.80164-1-socketcan@hartkopp.net Fixes: cf5046b309b3 ("can: dev: let can_get_echo_skb() return dlc of CAN frame") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index be532eb64baa..1950b13f22df 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -493,9 +493,13 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 */ struct sk_buff *skb = priv->echo_skb[idx]; struct canfd_frame *cf = (struct canfd_frame *)skb->data; - u8 len = cf->len; - *len_ptr = len; + /* get the real payload length for netdev statistics */ + if (cf->can_id & CAN_RTR_FLAG) + *len_ptr = 0; + else + *len_ptr = cf->len; + priv->echo_skb[idx] = NULL; return skb; From ef02687fc78099ef3fd2eb8a150c2823fe2d9061 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 18 Dec 2019 09:39:02 +0100 Subject: [PATCH 346/636] can: can_create_echo_skb(): fix echo skb generation: always use skb_clone() [ Upstream commit 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 ] All user space generated SKBs are owned by a socket (unless injected into the key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned up. This leads to a problem when a CAN driver calls can_put_echo_skb() on a unshared SKB. If the socket is closed prior to the TX complete handler, can_get_echo_skb() and the subsequent delivering of the echo SKB to all registered callbacks, a SKB with a refcount of 0 is delivered. To avoid the problem, in can_get_echo_skb() the original SKB is now always cloned, regardless of shared SKB or not. If the process exists it can now safely discard its SKBs, without disturbing the delivery of the echo SKB. The problem shows up in the j1939 stack, when it clones the incoming skb, which detects the already 0 refcount. We can easily reproduce this with following example: testj1939 -B -r can0: & cansend can0 1823ff40#0123 WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174 refcount_t: addition on 0; use-after-free. Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x8c/0xa0) [] (dump_stack) from [] (__warn+0xe0/0x108) [] (__warn) from [] (warn_slowpath_fmt+0xa8/0xcc) [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x108/0x174) [] (refcount_warn_saturate) from [] (j1939_can_recv+0x20c/0x210) [] (j1939_can_recv) from [] (can_rcv_filter+0xb4/0x268) [] (can_rcv_filter) from [] (can_receive+0xb0/0xe4) [] (can_receive) from [] (can_rcv+0x48/0x98) [] (can_rcv) from [] (__netif_receive_skb_one_core+0x64/0x88) [] (__netif_receive_skb_one_core) from [] (__netif_receive_skb+0x38/0x94) [] (__netif_receive_skb) from [] (netif_receive_skb_internal+0x64/0xf8) [] (netif_receive_skb_internal) from [] (netif_receive_skb+0x34/0x19c) [] (netif_receive_skb) from [] (can_rx_offload_napi_poll+0x58/0xb4) Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Signed-off-by: Oleksij Rempel Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- include/linux/can/skb.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b3379a97245c..a34694e675c9 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) */ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) { - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *nskb; - if (likely(nskb)) { - can_skb_set_owner(nskb, skb->sk); - consume_skb(skb); - return nskb; - } else { - kfree_skb(skb); - return NULL; - } + nskb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!nskb)) { + kfree_skb(skb); + return NULL; } - /* we can assume to have an unshared skb with proper owner */ - return skb; + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; } #endif /* !_CAN_SKB_H */ From 8b3913fab07286cce3bd6e88233b204502950df9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Aug 2020 17:06:04 +0300 Subject: [PATCH 347/636] can: peak_usb: add range checking in decode operations [ Upstream commit a6921dd524fe31d1f460c161d3526a407533b6db ] These values come from skb->data so Smatch considers them untrusted. I believe Smatch is correct but I don't have a way to test this. The usb_if->dev[] array has 2 elements but the index is in the 0-15 range without checks. The cfd->len can be up to 255 but the maximum valid size is CANFD_MAX_DLEN (64) so that could lead to memory corruption. Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200813140604.GA456946@mwanda Acked-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 48 +++++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 41988358f63c..19600d35aac5 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -476,12 +476,18 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_rx_msg *rm = (struct pucan_rx_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_msg_get_channel(rm)]; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct canfd_frame *cfd; struct sk_buff *skb; const u16 rx_msg_flags = le16_to_cpu(rm->flags); + if (pucan_msg_get_channel(rm) >= ARRAY_SIZE(usb_if->dev)) + return -ENOMEM; + + dev = usb_if->dev[pucan_msg_get_channel(rm)]; + netdev = dev->netdev; + if (rx_msg_flags & PUCAN_MSG_EXT_DATA_LEN) { /* CANFD frame case */ skb = alloc_canfd_skb(netdev, &cfd); @@ -527,15 +533,21 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_status_msg *sm = (struct pucan_status_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; - struct pcan_usb_fd_device *pdev = - container_of(dev, struct pcan_usb_fd_device, dev); + struct pcan_usb_fd_device *pdev; enum can_state new_state = CAN_STATE_ERROR_ACTIVE; enum can_state rx_state, tx_state; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; + if (pucan_stmsg_get_channel(sm) >= ARRAY_SIZE(usb_if->dev)) + return -ENOMEM; + + dev = usb_if->dev[pucan_stmsg_get_channel(sm)]; + pdev = container_of(dev, struct pcan_usb_fd_device, dev); + netdev = dev->netdev; + /* nothing should be sent while in BUS_OFF state */ if (dev->can.state == CAN_STATE_BUS_OFF) return 0; @@ -587,9 +599,14 @@ static int pcan_usb_fd_decode_error(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pucan_error_msg *er = (struct pucan_error_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pucan_ermsg_get_channel(er)]; - struct pcan_usb_fd_device *pdev = - container_of(dev, struct pcan_usb_fd_device, dev); + struct pcan_usb_fd_device *pdev; + struct peak_usb_device *dev; + + if (pucan_ermsg_get_channel(er) >= ARRAY_SIZE(usb_if->dev)) + return -EINVAL; + + dev = usb_if->dev[pucan_ermsg_get_channel(er)]; + pdev = container_of(dev, struct pcan_usb_fd_device, dev); /* keep a trace of tx and rx error counters for later use */ pdev->bec.txerr = er->tx_err_cnt; @@ -603,11 +620,17 @@ static int pcan_usb_fd_decode_overrun(struct pcan_usb_fd_if *usb_if, struct pucan_msg *rx_msg) { struct pcan_ufd_ovr_msg *ov = (struct pcan_ufd_ovr_msg *)rx_msg; - struct peak_usb_device *dev = usb_if->dev[pufd_omsg_get_channel(ov)]; - struct net_device *netdev = dev->netdev; + struct peak_usb_device *dev; + struct net_device *netdev; struct can_frame *cf; struct sk_buff *skb; + if (pufd_omsg_get_channel(ov) >= ARRAY_SIZE(usb_if->dev)) + return -EINVAL; + + dev = usb_if->dev[pufd_omsg_get_channel(ov)]; + netdev = dev->netdev; + /* allocate an skb to store the error frame */ skb = alloc_can_err_skb(netdev, &cf); if (!skb) @@ -724,6 +747,9 @@ static int pcan_usb_fd_encode_msg(struct peak_usb_device *dev, u16 tx_msg_size, tx_msg_flags; u8 can_dlc; + if (cfd->len > CANFD_MAX_DLEN) + return -EINVAL; + tx_msg_size = ALIGN(sizeof(struct pucan_tx_msg) + cfd->len, 4); tx_msg->size = cpu_to_le16(tx_msg_size); tx_msg->type = cpu_to_le16(PUCAN_MSG_CAN_TX); From 6186db959f7bfca5330f063273a42b3bb49154d3 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Wed, 14 Oct 2020 10:56:31 +0200 Subject: [PATCH 348/636] can: peak_usb: peak_usb_get_ts_time(): fix timestamp wrapping [ Upstream commit ecc7b4187dd388549544195fb13a11b4ea8e6a84 ] Fabian Inostroza has discovered a potential problem in the hardware timestamp reporting from the PCAN-USB USB CAN interface (only), related to the fact that a timestamp of an event may precede the timestamp used for synchronization when both records are part of the same USB packet. However, this case was used to detect the wrapping of the time counter. This patch details and fixes the two identified cases where this problem can occur. Reported-by: Fabian Inostroza Signed-off-by: Stephane Grosjean Link: https://lore.kernel.org/r/20201014085631.15128-1-s.grosjean@peak-system.com Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 51 ++++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index afc8d978124e..db156a11e6db 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -138,14 +138,55 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) /* protect from getting time before setting now */ if (ktime_to_ns(time_ref->tv_host)) { u64 delta_us; + s64 delta_ts = 0; - delta_us = ts - time_ref->ts_dev_2; - if (ts < time_ref->ts_dev_2) - delta_us &= (1 << time_ref->adapter->ts_used_bits) - 1; + /* General case: dev_ts_1 < dev_ts_2 < ts, with: + * + * - dev_ts_1 = previous sync timestamp + * - dev_ts_2 = last sync timestamp + * - ts = event timestamp + * - ts_period = known sync period (theoretical) + * ~ dev_ts2 - dev_ts1 + * *but*: + * + * - time counters wrap (see adapter->ts_used_bits) + * - sometimes, dev_ts_1 < ts < dev_ts2 + * + * "normal" case (sync time counters increase): + * must take into account case when ts wraps (tsw) + * + * < ts_period > < > + * | | | + * ---+--------+----+-------0-+--+--> + * ts_dev_1 | ts_dev_2 | + * ts tsw + */ + if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { + /* case when event time (tsw) wraps */ + if (ts < time_ref->ts_dev_1) + delta_ts = 1 << time_ref->adapter->ts_used_bits; - delta_us += time_ref->ts_total; + /* Otherwise, sync time counter (ts_dev_2) has wrapped: + * handle case when event time (tsn) hasn't. + * + * < ts_period > < > + * | | | + * ---+--------+--0-+---------+--+--> + * ts_dev_1 | ts_dev_2 | + * tsn ts + */ + } else if (time_ref->ts_dev_1 < ts) { + delta_ts = -(1 << time_ref->adapter->ts_used_bits); + } - delta_us *= time_ref->adapter->us_per_ts_scale; + /* add delay between last sync and event timestamps */ + delta_ts += (signed int)(ts - time_ref->ts_dev_2); + + /* add time from beginning to last sync */ + delta_ts += time_ref->ts_total; + + /* convert ticks number into microseconds */ + delta_us = delta_ts * time_ref->adapter->us_per_ts_scale; delta_us >>= time_ref->adapter->us_per_ts_shift; *time = ktime_add_us(time_ref->tv_host_0, delta_us); From 27eaaeedf71658779764d3d746baeb3852a3f493 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 13 Oct 2020 17:39:47 +0200 Subject: [PATCH 349/636] can: peak_canfd: pucan_handle_can_rx(): fix echo management when loopback is on [ Upstream commit 93ef65e5a6357cc7381f85fcec9283fe29970045 ] Echo management is driven by PUCAN_MSG_LOOPED_BACK bit, while loopback frames are identified with PUCAN_MSG_SELF_RECEIVE bit. Those bits are set for each outgoing frame written to the IP core so that a copy of each one will be placed into the rx path. Thus, - when PUCAN_MSG_LOOPED_BACK is set then the rx frame is an echo of a previously sent frame, - when PUCAN_MSG_LOOPED_BACK+PUCAN_MSG_SELF_RECEIVE are set, then the rx frame is an echo AND a loopback frame. Therefore, this frame must be put into the socket rx path too. This patch fixes how CAN frames are handled when these are sent while the can interface is configured in "loopback on" mode. Signed-off-by: Stephane Grosjean Link: https://lore.kernel.org/r/20201013153947.28012-1-s.grosjean@peak-system.com Fixes: 8ac8321e4a79 ("can: peak: add support for PEAK PCAN-PCIe FD CAN-FD boards") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/peak_canfd/peak_canfd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 5696d7e80751..4bc5d522c74b 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -256,8 +256,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, cf_len = get_can_dlc(pucan_msg_get_dlc(msg)); /* if this frame is an echo, */ - if ((rx_msg_flags & PUCAN_MSG_LOOPED_BACK) && - !(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) { + if (rx_msg_flags & PUCAN_MSG_LOOPED_BACK) { unsigned long flags; spin_lock_irqsave(&priv->echo_lock, flags); @@ -271,7 +270,13 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, netif_wake_queue(priv->ndev); spin_unlock_irqrestore(&priv->echo_lock, flags); - return 0; + + /* if this frame is only an echo, stop here. Otherwise, + * continue to push this application self-received frame into + * its own rx queue. + */ + if (!(rx_msg_flags & PUCAN_MSG_SELF_RECEIVE)) + return 0; } /* otherwise, it should be pushed into rx fifo */ From 66bc0b219fe70327596a7b00aab3440bae8c304c Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Tue, 20 Oct 2020 23:53:55 +0800 Subject: [PATCH 350/636] can: flexcan: remove FLEXCAN_QUIRK_DISABLE_MECR quirk for LS1021A [ Upstream commit 018799649071a1638c0c130526af36747df4355a ] After double check with Layerscape CAN owner (Pankaj Bansal), confirm that LS1021A doesn't support ECC feature, so remove FLEXCAN_QUIRK_DISABLE_MECR quirk. Fixes: 99b7668c04b27 ("can: flexcan: adding platform specific details for LS1021A") Cc: Pankaj Bansal Signed-off-by: Joakim Zhang Link: https://lore.kernel.org/r/20201020155402.30318-4-qiangqing.zhang@nxp.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/flexcan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0be8db6ab319..92fe345e48ab 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -301,8 +301,7 @@ static const struct flexcan_devtype_data fsl_vf610_devtype_data = { static const struct flexcan_devtype_data fsl_ls1021a_r2_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | - FLEXCAN_QUIRK_DISABLE_MECR | FLEXCAN_QUIRK_BROKEN_PERR_STATE | - FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, + FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP, }; static const struct can_bittiming_const flexcan_bittiming_const = { From 955113c89ca9d49b8eef9f46f248397d1a1e55b1 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Thu, 29 Oct 2020 14:30:48 -0700 Subject: [PATCH 351/636] xfs: flush new eof page on truncate to avoid post-eof corruption [ Upstream commit 869ae85dae64b5540e4362d7fe4cd520e10ec05c ] It is possible to expose non-zeroed post-EOF data in XFS if the new EOF page is dirty, backed by an unwritten block and the truncate happens to race with writeback. iomap_truncate_page() will not zero the post-EOF portion of the page if the underlying block is unwritten. The subsequent call to truncate_setsize() will, but doesn't dirty the page. Therefore, if writeback happens to complete after iomap_truncate_page() (so it still sees the unwritten block) but before truncate_setsize(), the cached page becomes inconsistent with the on-disk block. A mapped read after the associated page is reclaimed or invalidated exposes non-zero post-EOF data. For example, consider the following sequence when run on a kernel modified to explicitly flush the new EOF page within the race window: $ xfs_io -fc "falloc 0 4k" -c fsync /mnt/file $ xfs_io -c "pwrite 0 4k" -c "truncate 1k" /mnt/file ... $ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file 00000400: 00 00 00 00 00 00 00 00 ........ $ umount /mnt/; mount /mnt/ $ xfs_io -c "mmap 0 4k" -c "mread -v 1k 8" /mnt/file 00000400: cd cd cd cd cd cd cd cd ........ Update xfs_setattr_size() to explicitly flush the new EOF page prior to the page truncate to ensure iomap has the latest state of the underlying block. Fixes: 68a9f5e7007c ("xfs: implement iomap based buffered write path") Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/xfs_iops.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e427ad097e2e..948ac1290121 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -895,6 +895,16 @@ xfs_setattr_size( error = iomap_zero_range(inode, oldsize, newsize - oldsize, &did_zeroing, &xfs_iomap_ops); } else { + /* + * iomap won't detect a dirty page over an unwritten block (or a + * cow block over a hole) and subsequently skips zeroing the + * newly post-EOF portion of the page. Flush the new EOF to + * convert the block before the pagecache truncate. + */ + error = filemap_write_and_wait_range(inode->i_mapping, newsize, + newsize); + if (error) + return error; error = iomap_truncate_page(inode, newsize, &did_zeroing, &xfs_iomap_ops); } From cba36f708a8a23a6e51dd2ddf70ab8449c692c15 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 2 Nov 2020 17:14:07 -0800 Subject: [PATCH 352/636] xfs: fix scrub flagging rtinherit even if there is no rt device [ Upstream commit c1f6b1ac00756a7108e5fcb849a2f8230c0b62a5 ] The kernel has always allowed directories to have the rtinherit flag set, even if there is no rt device, so this check is wrong. Fixes: 80e4e1268802 ("xfs: scrub inodes") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index e386c9b0b4ab..8d45d60832db 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -131,8 +131,7 @@ xchk_inode_flags( goto bad; /* rt flags require rt device */ - if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && - !mp->m_rtdev_targp) + if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) goto bad; /* new rt bitmap flag only valid for rbmino */ From 0982aa54d054bf11fe202522e29e4da9b288653b Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Wed, 28 Oct 2020 10:41:02 -0500 Subject: [PATCH 353/636] tpm: efi: Don't create binary_bios_measurements file for an empty log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ffd778aff45be760292225049e0141255d4ad6e ] Mimic the pre-existing ACPI and Device Tree event log behavior by not creating the binary_bios_measurements file when the EFI TPM event log is empty. This fixes the following NULL pointer dereference that can occur when reading /sys/kernel/security/tpm0/binary_bios_measurements after the kernel received an empty event log from the firmware: BUG: kernel NULL pointer dereference, address: 000000000000002c #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 3932 Comm: fwupdtpmevlog Not tainted 5.9.0-00003-g629990edad62 #17 Hardware name: LENOVO 20LCS03L00/20LCS03L00, BIOS N27ET38W (1.24 ) 11/28/2019 RIP: 0010:tpm2_bios_measurements_start+0x3a/0x550 Code: 54 53 48 83 ec 68 48 8b 57 70 48 8b 1e 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 48 8b 82 c0 06 00 00 48 8b 8a c8 06 00 00 <44> 8b 60 1c 48 89 4d a0 4c 89 e2 49 83 c4 20 48 83 fb 00 75 2a 49 RSP: 0018:ffffa9c901203db0 EFLAGS: 00010246 RAX: 0000000000000010 RBX: 0000000000000000 RCX: 0000000000000010 RDX: ffff8ba1eb99c000 RSI: ffff8ba1e4ce8280 RDI: ffff8ba1e4ce8258 RBP: ffffa9c901203e40 R08: ffffa9c901203dd8 R09: ffff8ba1ec443300 R10: ffffa9c901203e50 R11: 0000000000000000 R12: ffff8ba1e4ce8280 R13: ffffa9c901203ef0 R14: ffffa9c901203ef0 R15: ffff8ba1e4ce8258 FS: 00007f6595460880(0000) GS:ffff8ba1ef880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000002c CR3: 00000007d8d18003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __kmalloc_node+0x113/0x320 ? kvmalloc_node+0x31/0x80 seq_read+0x94/0x420 vfs_read+0xa7/0x190 ksys_read+0xa7/0xe0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 In this situation, the bios_event_log pointer in the tpm_bios_log struct was not NULL but was equal to the ZERO_SIZE_PTR (0x10) value. This was due to the following kmemdup() in tpm_read_log_efi(): int tpm_read_log_efi(struct tpm_chip *chip) { ... /* malloc EventLog space */ log->bios_event_log = kmemdup(log_tbl->log, log_size, GFP_KERNEL); if (!log->bios_event_log) { ret = -ENOMEM; goto out; } ... } When log_size is zero, due to an empty event log from firmware, ZERO_SIZE_PTR is returned from kmemdup(). Upon a read of the binary_bios_measurements file, the tpm2_bios_measurements_start() function does not perform a ZERO_OR_NULL_PTR() check on the bios_event_log pointer before dereferencing it. Rather than add a ZERO_OR_NULL_PTR() check in functions that make use of the bios_event_log pointer, simply avoid creating the binary_bios_measurements_file as is done in other event log retrieval backends. Explicitly ignore all of the events in the final event log when the main event log is empty. The list of events in the final event log cannot be accurately parsed without referring to the first event in the main event log (the event log header) so the final event log is useless in such a situation. Fixes: 58cc1e4faf10 ("tpm: parse TPM event logs based on EFI table") Link: https://lore.kernel.org/linux-integrity/E1FDCCCB-CA51-4AEE-AC83-9CDE995EAE52@canonical.com/ Reported-by: Kai-Heng Feng Reported-by: Kenneth R. Crudup Reported-by: Mimi Zohar Cc: Thiébaud Weksteen Cc: Ard Biesheuvel Signed-off-by: Tyler Hicks Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/eventlog/efi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c index 3e673ab22cb4..abd3beeb5158 100644 --- a/drivers/char/tpm/eventlog/efi.c +++ b/drivers/char/tpm/eventlog/efi.c @@ -43,6 +43,11 @@ int tpm_read_log_efi(struct tpm_chip *chip) log_size = log_tbl->size; memunmap(log_tbl); + if (!log_size) { + pr_warn("UEFI TPM log area empty\n"); + return -EIO; + } + log_tbl = memremap(efi.tpm_log, sizeof(*log_tbl) + log_size, MEMREMAP_WB); if (!log_tbl) { From b98f2b8fb91bbe82b5372ce782ce619da57eff1c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 11 Sep 2019 17:42:28 +0100 Subject: [PATCH 354/636] Btrfs: fix missing error return if writeback for extent buffer never started [ Upstream commit 0607eb1d452d45c5ac4c745a9e9e0d95152ea9d0 ] If lock_extent_buffer_for_io() fails, it returns a negative value, but its caller btree_write_cache_pages() ignores such error. This means that a call to flush_write_bio(), from lock_extent_buffer_for_io(), might have failed. We should make btree_write_cache_pages() notice such error values and stop immediatelly, making sure filemap_fdatawrite_range() returns an error to the transaction commit path. A failure from flush_write_bio() should also result in the endio callback end_bio_extent_buffer_writepage() being invoked, which sets the BTRFS_FS_*_ERR bits appropriately, so that there's no risk a transaction or log commit doesn't catch a writeback failure. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 301111922a1a..dabf153843e9 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3913,6 +3913,10 @@ retry: if (!ret) { free_extent_buffer(eb); continue; + } else if (ret < 0) { + done = 1; + free_extent_buffer(eb); + break; } ret = write_one_eb(eb, fs_info, wbc, &epd); From 7eef8b2c424430bc81701a5fd0445bd2b9b243cb Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Sun, 9 Aug 2020 08:32:58 +0900 Subject: [PATCH 355/636] ath9k_htc: Use appropriate rs_datalen type commit 5024f21c159f8c1668f581fff37140741c0b1ba9 upstream. kernel test robot says: drivers/net/wireless/ath/ath9k/htc_drv_txrx.c:987:20: sparse: warning: incorrect type in assignment (different base types) drivers/net/wireless/ath/ath9k/htc_drv_txrx.c:987:20: sparse: expected restricted __be16 [usertype] rs_datalen drivers/net/wireless/ath/ath9k/htc_drv_txrx.c:987:20: sparse: got unsigned short [usertype] drivers/net/wireless/ath/ath9k/htc_drv_txrx.c:988:13: sparse: warning: restricted __be16 degrades to integer drivers/net/wireless/ath/ath9k/htc_drv_txrx.c:1001:13: sparse: warning: restricted __be16 degrades to integer Indeed rs_datalen has host byte order, so modify it's own type. Reported-by: kernel test robot Fixes: cd486e627e67 ("ath9k_htc: Discard undersized packets") Signed-off-by: Masashi Honma Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200808233258.4596-1-masashi.honma@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index f19393e584dc..d567fbe79cff 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -973,7 +973,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, struct ath_htc_rx_status *rxstatus; struct ath_rx_status rx_stats; bool decrypt_error = false; - __be16 rs_datalen; + u16 rs_datalen; bool is_phyerr; if (skb->len < HTC_RX_FRAME_HEADER_SIZE) { From 580a117919f3ae1390be6b4111253ee9595938f5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:06 +0100 Subject: [PATCH 356/636] netfilter: use actual socket sk rather than skb sk when routing harder commit 46d6c5ae953cc0be38efd0e469284df7c4328cf8 upstream. If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin [Jason: backported to 4.19 from Sasha's 5.4 backport] Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 2 +- net/ipv4/netfilter.c | 12 +++++++----- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_reject_ipv4.c | 2 +- net/ipv4/netfilter/nft_chain_route_ipv4.c | 2 +- net/ipv6/netfilter.c | 6 +++--- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nft_chain_route_ipv6.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- 13 files changed, 22 insertions(+), 20 deletions(-) diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 95ab5cc64422..45ff1330b339 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index c0dc4dd78887..47a2de582f57 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -36,7 +36,7 @@ struct nf_ipv6_ops { }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 8d2e5dc9a827..3d670d5aea34 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,17 +17,19 @@ #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - const struct sock *sk = skb_to_full_sk(skb); - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; + __u8 flags; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; + sk = sk_to_full_sk(sk); + flags = sk ? inet_sk_flowi_flags(sk) : 0; + if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) @@ -91,8 +93,8 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); } return 0; } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 690b17ef6a44..d64b1ef43c10 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -54,7 +54,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; if (nfct) { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index dea138ca8925..0829f46ddfdd 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -65,7 +65,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index 6115bf1ff6f0..6a27766b7d0f 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -329,7 +329,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 5cd06ba3535d..4996db1f64a1 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -129,7 +129,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; niph = ip_hdr(nskb); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 7d82934c46f4..61003768e52b 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -50,7 +50,7 @@ static unsigned int nf_route_table_hook(void *priv, iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0b1f3e927b..5679fa3f696a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -17,10 +17,10 @@ #include #include -int ip6_route_me_harder(struct net *net, struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk = sk_to_full_sk(skb->sk); + struct sock *sk = sk_to_full_sk(sk_partial); unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -81,7 +81,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, skb); + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index b0524b18c4fb..acba3757ff60 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index ca6d38698b1a..2b6a3b27f670 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -352,7 +352,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index da3f1f8cb325..afe79cb46e63 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv, skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d5e4329579e2..acaeeaf81441 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -725,12 +725,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(ipvs->net, skb) != 0) + ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1; return 0; From fe4d386d6c2a497ec0020164647497e23f10bfdd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 23 Aug 2018 17:48:45 +0100 Subject: [PATCH 357/636] crypto: arm64/aes-modes - get rid of literal load of addend vector commit ed6ed11830a9ded520db31a6e2b69b6b0a1eb0e2 upstream. Replace the literal load of the addend vector with a sequence that performs each add individually. This sequence is only 2 instructions longer than the original, and 2% faster on Cortex-A53. This is an improvement by itself, but also works around a Clang issue, whose integrated assembler does not implement the GNU ARM asm syntax completely, and does not support the =literal notation for FP registers (more info at https://bugs.llvm.org/show_bug.cgi?id=38642) Cc: Nick Desaulniers Signed-off-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- arch/arm64/crypto/aes-modes.S | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 483a7130cf0e..496c243de4ac 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -232,17 +232,19 @@ AES_ENTRY(aes_ctr_encrypt) bmi .Lctr1x cmn w6, #4 /* 32 bit overflow? */ bcs .Lctr1x - ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w6 + add w7, w6, #1 mov v0.16b, v4.16b - add v7.4s, v7.4s, v8.4s + add w8, w6, #2 mov v1.16b, v4.16b - rev32 v8.16b, v7.16b + add w9, w6, #3 mov v2.16b, v4.16b + rev w7, w7 mov v3.16b, v4.16b - mov v1.s[3], v8.s[0] - mov v2.s[3], v8.s[1] - mov v3.s[3], v8.s[2] + rev w8, w8 + mov v1.s[3], w7 + rev w9, w9 + mov v2.s[3], w8 + mov v3.s[3], w9 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ bl aes_encrypt_block4x eor v0.16b, v5.16b, v0.16b From 5cfb8377e8af998ea6bd092cce32510fbae29e9c Mon Sep 17 00:00:00 2001 From: Evgeny Novikov Date: Fri, 2 Oct 2020 18:01:55 +0300 Subject: [PATCH 358/636] usb: gadget: goku_udc: fix potential crashes in probe [ Upstream commit 0d66e04875c5aae876cf3d4f4be7978fa2b00523 ] goku_probe() goes to error label "err" and invokes goku_remove() in case of failures of pci_enable_device(), pci_resource_start() and ioremap(). goku_remove() gets a device from pci_get_drvdata(pdev) and works with it without any checks, in particular it dereferences a corresponding pointer. But goku_probe() did not set this device yet. So, one can expect various crashes. The patch moves setting the device just after allocation of memory for it. Found by Linux Driver Verification project (linuxtesting.org). Reported-by: Pavel Andrianov Signed-off-by: Evgeny Novikov Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/goku_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c index c3721225b61e..b706ad3034bc 100644 --- a/drivers/usb/gadget/udc/goku_udc.c +++ b/drivers/usb/gadget/udc/goku_udc.c @@ -1757,6 +1757,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err; } + pci_set_drvdata(pdev, dev); spin_lock_init(&dev->lock); dev->pdev = pdev; dev->gadget.ops = &goku_ops; @@ -1790,7 +1791,6 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } dev->regs = (struct goku_udc_regs __iomem *) base; - pci_set_drvdata(pdev, dev); INFO(dev, "%s\n", driver_desc); INFO(dev, "version: " DRIVER_VERSION " %s\n", dmastr()); INFO(dev, "irq %d, pci mem %p\n", pdev->irq, base); From 236c85d6c65ed59944361620aab597a2369086af Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 10:10:01 -0500 Subject: [PATCH 359/636] gfs2: Free rd_bits later in gfs2_clear_rgrpd to fix use-after-free [ Upstream commit d0f17d3883f1e3f085d38572c2ea8edbd5150172 ] Function gfs2_clear_rgrpd calls kfree(rgd->rd_bits) before calling return_all_reservations, but return_all_reservations still dereferences rgd->rd_bits in __rs_deltree. Fix that by moving the call to kfree below the call to return_all_reservations. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c94c4ac1ae78..1686a40099f2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -739,9 +739,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) } gfs2_free_clones(rgd); + return_all_reservations(rgd); kfree(rgd->rd_bits); rgd->rd_bits = NULL; - return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } From 7b0310b573d2d27846d077a6607088bd484e7ad2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 27 Oct 2020 10:10:02 -0500 Subject: [PATCH 360/636] gfs2: Add missing truncate_inode_pages_final for sd_aspace [ Upstream commit a9dd945ccef07a904e412f208f8de708a3d7159e ] Gfs2 creates an address space for its rgrps called sd_aspace, but it never called truncate_inode_pages_final on it. This confused vfs greatly which tried to reference the address space after gfs2 had freed the superblock that contained it. This patch adds a call to truncate_inode_pages_final for sd_aspace, thus avoiding the use-after-free. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a971862b186e..22cd68bd8c9b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -934,6 +934,7 @@ restart: gfs2_jindex_free(sdp); /* Take apart glock structures and buffer lists */ gfs2_gl_hash_clear(sdp); + truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); /* Unmount the locking protocol */ gfs2_lm_unmount(sdp); From 210387f1af1af2db0f70d8e3af0caadb05be52c6 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 28 Oct 2020 13:42:18 -0500 Subject: [PATCH 361/636] gfs2: check for live vs. read-only file system in gfs2_fitrim [ Upstream commit c5c68724696e7d2f8db58a5fce3673208d35c485 ] Before this patch, gfs2_fitrim was not properly checking for a "live" file system. If the file system had something to trim and the file system was read-only (or spectator) it would start the trim, but when it starts the transaction, gfs2_trans_begin returns -EROFS (read-only file system) and it errors out. However, if the file system was already trimmed so there's no work to do, it never called gfs2_trans_begin. That code is bypassed so it never returns the error. Instead, it returns a good return code with 0 work. All this makes for inconsistent behavior: The same fstrim command can return -EROFS in one case and 0 in another. This tripped up xfstests generic/537 which reports the error as: +fstrim with unrecovered metadata just ate your filesystem This patch adds a check for a "live" (iow, active journal, iow, RW) file system, and if not, returns the error properly. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin --- fs/gfs2/rgrp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 1686a40099f2..de9b561b1c38 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1387,6 +1387,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + if (!blk_queue_discard(q)) return -EOPNOTSUPP; From 6563e00fe8ee5664f3affb47abb93237df15de49 Mon Sep 17 00:00:00 2001 From: Keita Suzuki Date: Tue, 27 Oct 2020 07:31:24 +0000 Subject: [PATCH 362/636] scsi: hpsa: Fix memory leak in hpsa_init_one() [ Upstream commit af61bc1e33d2c0ec22612b46050f5b58ac56a962 ] When hpsa_scsi_add_host() fails, h->lastlogicals is leaked since it is missing a free() in the error handler. Fix this by adding free() when hpsa_scsi_add_host() fails. Link: https://lore.kernel.org/r/20201027073125.14229-1-keitasuzuki.park@sslab.ics.keio.ac.jp Tested-by: Don Brace Acked-by: Don Brace Signed-off-by: Keita Suzuki Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hpsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 11de2198bb87..0fe21cbdf0ca 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -8781,7 +8781,7 @@ reinit_after_soft_reset: /* hook into SCSI subsystem */ rc = hpsa_scsi_add_host(h); if (rc) - goto clean7; /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ + goto clean8; /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */ /* Monitor the controller for firmware lockups */ h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL; @@ -8796,6 +8796,8 @@ reinit_after_soft_reset: HPSA_EVENT_MONITOR_INTERVAL); return 0; +clean8: /* lastlogicals, perf, sg, cmd, irq, shost, pci, lu, aer/h */ + kfree(h->lastlogicals); clean7: /* perf, sg, cmd, irq, shost, pci, lu, aer/h */ hpsa_free_performant_mode(h); h->access.set_intr_mask(h, HPSA_INTR_OFF); From c1814f330a2dece1f4c5c294118ea81a429f52bf Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 28 Oct 2020 15:29:59 +0800 Subject: [PATCH 363/636] drm/amdgpu: perform srbm soft reset always on SDMA resume [ Upstream commit 253475c455eb5f8da34faa1af92709e7bb414624 ] This can address the random SDMA hang after pci config reset seen on Hawaii. Signed-off-by: Evan Quan Tested-by: Sandeep Raghuraman Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index d0fa2aac2388..ca66c2f79758 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1086,22 +1086,19 @@ static int cik_sdma_soft_reset(void *handle) { u32 srbm_soft_reset = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 tmp = RREG32(mmSRBM_STATUS2); + u32 tmp; - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - } - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; - } + /* sdma0 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; + + /* sdma1 */ + tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); + tmp |= SDMA0_F32_CNTL__HALT_MASK; + WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; if (srbm_soft_reset) { tmp = RREG32(mmSRBM_SOFT_RESET); From 9dc8a120f440b6882a61c31ccd70253f80bc7435 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 16 Oct 2020 10:45:26 +0800 Subject: [PATCH 364/636] drm/amd/pm: perform SMC reset on suspend/hibernation [ Upstream commit 277b080f98803cb73a83fb234f0be83a10e63958 ] So that the succeeding resume can be performed based on a clean state. Signed-off-by: Evan Quan Tested-by: Sandeep Raghuraman Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 4 ++++ drivers/gpu/drm/amd/powerplay/inc/hwmgr.h | 1 + drivers/gpu/drm/amd/powerplay/inc/smumgr.h | 2 ++ .../gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 24 +++++++++++++++++++ drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 8 +++++++ 5 files changed, 39 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 058898b321b8..d8e624d64ae3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1531,6 +1531,10 @@ int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to reset to default!", result = tmp_result); + tmp_result = smum_stop_smc(hwmgr); + PP_ASSERT_WITH_CODE((tmp_result == 0), + "Failed to stop smc!", result = tmp_result); + tmp_result = smu7_force_switch_to_arbf0(hwmgr); PP_ASSERT_WITH_CODE((tmp_result == 0), "Failed to force to switch arbf0!", result = tmp_result); diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 6ee864455a12..f59e1e737735 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -216,6 +216,7 @@ struct pp_smumgr_func { bool (*is_hw_avfs_present)(struct pp_hwmgr *hwmgr); int (*update_dpm_settings)(struct pp_hwmgr *hwmgr, void *profile_setting); int (*smc_table_manager)(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw); /*rw: true for read, false for write */ + int (*stop_smc)(struct pp_hwmgr *hwmgr); }; struct pp_hwmgr_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 82550a8a3a3f..ef4f2392e2e7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -113,4 +113,6 @@ extern int smum_update_dpm_settings(struct pp_hwmgr *hwmgr, void *profile_settin extern int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t table_id, bool rw); +extern int smum_stop_smc(struct pp_hwmgr *hwmgr); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index db87cb8930d2..0d4dd607e85c 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2934,6 +2934,29 @@ static int ci_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) return 0; } +static void ci_reset_smc(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_RESET_CNTL, + rst_reg, 1); +} + + +static void ci_stop_smc_clock(struct pp_hwmgr *hwmgr) +{ + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, + SMC_SYSCON_CLOCK_CNTL_0, + ck_disable, 1); +} + +static int ci_stop_smc(struct pp_hwmgr *hwmgr) +{ + ci_reset_smc(hwmgr); + ci_stop_smc_clock(hwmgr); + + return 0; +} + const struct pp_smumgr_func ci_smu_funcs = { .smu_init = ci_smu_init, .smu_fini = ci_smu_fini, @@ -2957,4 +2980,5 @@ const struct pp_smumgr_func ci_smu_funcs = { .is_dpm_running = ci_is_dpm_running, .update_dpm_settings = ci_update_dpm_settings, .update_smc_table = ci_update_smc_table, + .stop_smc = ci_stop_smc, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index a6edd5df33b0..20ecf994d47f 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -213,3 +213,11 @@ int smum_smc_table_manager(struct pp_hwmgr *hwmgr, uint8_t *table, uint16_t tabl return -EINVAL; } + +int smum_stop_smc(struct pp_hwmgr *hwmgr) +{ + if (hwmgr->smumgr_funcs->stop_smc) + return hwmgr->smumgr_funcs->stop_smc(hwmgr); + + return 0; +} From efb8c4d7bef928ad002831f6f65173458088a3ae Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 27 Oct 2020 10:24:18 +0800 Subject: [PATCH 365/636] drm/amd/pm: do not use ixFEATURE_STATUS for checking smc running [ Upstream commit 786436b453001dafe81025389f96bf9dac1e9690 ] This reverts commit f87812284172a9809820d10143b573d833cd3f75 ("drm/amdgpu: Fix bug where DPM is not enabled after hibernate and resume"). It was intended to fix Hawaii S4(hibernation) issue but break S3. As ixFEATURE_STATUS is filled with garbage data on resume which can be only cleared by reloading smc firmware(but that will involve many changes). So, we will revert this S4 fix and seek a new way. Signed-off-by: Evan Quan Tested-by: Sandeep Raghuraman Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 0d4dd607e85c..c05bec5effb2 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -2723,10 +2723,7 @@ static int ci_initialize_mc_reg_table(struct pp_hwmgr *hwmgr) static bool ci_is_dpm_running(struct pp_hwmgr *hwmgr) { - return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device, - CGS_IND_REG__SMC, FEATURE_STATUS, - VOLTAGE_CONTROLLER_ON)) - ? true : false; + return ci_is_smc_ram_running(hwmgr); } static int ci_smu_init(struct pp_hwmgr *hwmgr) From 79e25eab99ae35f208447e2bd570185459c99025 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 13:25:41 +0200 Subject: [PATCH 366/636] mac80211: fix use of skb payload instead of header [ Upstream commit 14f46c1e5108696ec1e5a129e838ecedf108c7bf ] When ieee80211_skb_resize() is called from ieee80211_build_hdr() the skb has no 802.11 header yet, in fact it consist only of the payload as the ethernet frame is removed. As such, we're using the payload data for ieee80211_is_mgmt(), which is of course completely wrong. This didn't really hurt us because these are always data frames, so we could only have added more tailroom than we needed if we determined it was a management frame and sdata->crypto_tx_tailroom_needed_cnt was false. However, syzbot found that of course there need not be any payload, so we're using at best uninitialized memory for the check. Fix this to pass explicitly the kind of frame that we have instead of checking there, by replacing the "bool may_encrypt" argument with an argument that can carry the three possible states - it's not going to be encrypted, it's a management frame, or it's a data frame (and then we check sdata->crypto_tx_tailroom_needed_cnt). Reported-by: syzbot+32fd1a1bfe355e93f1e2@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20201009132538.e1fd7f802947.I799b288466ea2815f9d4c84349fae697dca2f189@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/tx.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3160ffd93a15..98d048630ad2 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1908,19 +1908,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* device xmit handlers */ +enum ieee80211_encrypt { + ENCRYPT_NO, + ENCRYPT_MGMT, + ENCRYPT_DATA, +}; + static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int head_need, bool may_encrypt) + int head_need, + enum ieee80211_encrypt encrypt) { struct ieee80211_local *local = sdata->local; - struct ieee80211_hdr *hdr; bool enc_tailroom; int tail_need = 0; - hdr = (struct ieee80211_hdr *) skb->data; - enc_tailroom = may_encrypt && - (sdata->crypto_tx_tailroom_needed_cnt || - ieee80211_is_mgmt(hdr->frame_control)); + enc_tailroom = encrypt == ENCRYPT_MGMT || + (encrypt == ENCRYPT_DATA && + sdata->crypto_tx_tailroom_needed_cnt); if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; @@ -1952,23 +1957,29 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; int headroom; - bool may_encrypt; + enum ieee80211_encrypt encrypt; - may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); + if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT) + encrypt = ENCRYPT_NO; + else if (ieee80211_is_mgmt(hdr->frame_control)) + encrypt = ENCRYPT_MGMT; + else + encrypt = ENCRYPT_DATA; headroom = local->tx_headroom; - if (may_encrypt) + if (encrypt != ENCRYPT_NO) headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); - if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { + if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) { ieee80211_free_txskb(&local->hw, skb); return; } + /* reload after potential resize */ hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; @@ -2751,7 +2762,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) { ieee80211_free_txskb(&local->hw, skb); skb = NULL; return ERR_PTR(-ENOMEM); @@ -3414,7 +3425,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (unlikely(ieee80211_skb_resize(sdata, skb, max_t(int, extra_head + hw_headroom - skb_headroom(skb), 0), - false))) { + ENCRYPT_NO))) { kfree_skb(skb); return true; } From 319261de5a616ee5ad326c891db24e7028d74eef Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 9 Oct 2020 15:02:15 +0800 Subject: [PATCH 367/636] cfg80211: regulatory: Fix inconsistent format argument [ Upstream commit db18d20d1cb0fde16d518fb5ccd38679f174bc04 ] Fix follow warning: [net/wireless/reg.c:3619]: (warning) %d in format string (no. 2) requires 'int' but the argument type is 'unsigned int'. Reported-by: Hulk Robot Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20201009070215.63695-1-yebin10@huawei.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 935aebf15010..c7825b951f72 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3374,7 +3374,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) power_rule = ®_rule->power_rule; if (reg_rule->flags & NL80211_RRF_AUTO_BW) - snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO", freq_range->max_bandwidth_khz, reg_get_max_bandwidth(rd, reg_rule)); else From 509e215d161ead66ea3cc39aca61a5b1845cc048 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 24 Sep 2020 12:45:59 +0200 Subject: [PATCH 368/636] scsi: scsi_dh_alua: Avoid crash during alua_bus_detach() [ Upstream commit 5faf50e9e9fdc2117c61ff7e20da49cd6a29e0ca ] alua_bus_detach() might be running concurrently with alua_rtpg_work(), so we might trip over h->sdev == NULL and call BUG_ON(). The correct way of handling it is to not set h->sdev to NULL in alua_bus_detach(), and call rcu_synchronize() before the final delete to ensure that all concurrent threads have left the critical section. Then we can get rid of the BUG_ON() and replace it with a simple if condition. Link: https://lore.kernel.org/r/1600167537-12509-1-git-send-email-jitendra.khasdev@oracle.com Link: https://lore.kernel.org/r/20200924104559.26753-1-hare@suse.de Cc: Brian Bunker Acked-by: Brian Bunker Tested-by: Jitendra Khasdev Reviewed-by: Jitendra Khasdev Signed-off-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/device_handler/scsi_dh_alua.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index c95c782b93a5..60c48dc5d945 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -672,8 +672,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) rcu_read_lock(); list_for_each_entry_rcu(h, &tmp_pg->dh_list, node) { - /* h->sdev should always be valid */ - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = desc[0]; } rcu_read_unlock(); @@ -719,7 +719,8 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) pg->expiry = 0; rcu_read_lock(); list_for_each_entry_rcu(h, &pg->dh_list, node) { - BUG_ON(!h->sdev); + if (!h->sdev) + continue; h->sdev->access_state = (pg->state & SCSI_ACCESS_STATE_MASK); if (pg->pref) @@ -1160,7 +1161,6 @@ static void alua_bus_detach(struct scsi_device *sdev) spin_lock(&h->pg_lock); pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); rcu_assign_pointer(h->pg, NULL); - h->sdev = NULL; spin_unlock(&h->pg_lock); if (pg) { spin_lock_irq(&pg->lock); @@ -1169,6 +1169,7 @@ static void alua_bus_detach(struct scsi_device *sdev) kref_put(&pg->kref, release_port_group); } sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } From f440ef2a292e74081d5f4352057c4ca13a4b78fe Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 15 Oct 2020 02:50:02 +0000 Subject: [PATCH 369/636] iommu/amd: Increase interrupt remapping table limit to 512 entries [ Upstream commit 73db2fc595f358460ce32bcaa3be1f0cce4a2db1 ] Certain device drivers allocate IO queues on a per-cpu basis. On AMD EPYC platform, which can support up-to 256 cpu threads, this can exceed the current MAX_IRQ_PER_TABLE limit of 256, and result in the error message: AMD-Vi: Failed to allocate IRTE This has been observed with certain NVME devices. AMD IOMMU hardware can actually support upto 512 interrupt remapping table entries. Therefore, update the driver to match the hardware limit. Please note that this also increases the size of interrupt remapping table to 8KB per device when using the 128-bit IRTE format. Signed-off-by: Suravee Suthikulpanit Link: https://lore.kernel.org/r/20201015025002.87997-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd_iommu_types.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 859b06424e5c..df6f3cc958e5 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -410,7 +410,11 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define MAX_IRQS_PER_TABLE 512 #define IRQ_TABLE_ALIGNMENT 128 struct irq_remap_table { From 2343665ea3627826e316674f4b80ee102b9010a7 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 28 Oct 2020 14:27:42 -0400 Subject: [PATCH 370/636] s390/smp: move rcu_cpu_starting() earlier [ Upstream commit de5d9dae150ca1c1b5c7676711a9ca139d1a8dec ] The call to rcu_cpu_starting() in smp_init_secondary() is not early enough in the CPU-hotplug onlining process, which results in lockdep splats as follows: WARNING: suspicious RCU usage ----------------------------- kernel/locking/lockdep.c:3497 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. Call Trace: show_stack+0x158/0x1f0 dump_stack+0x1f2/0x238 __lock_acquire+0x2640/0x4dd0 lock_acquire+0x3a8/0xd08 _raw_spin_lock_irqsave+0xc0/0xf0 clockevents_register_device+0xa8/0x528 init_cpu_timer+0x33e/0x468 smp_init_secondary+0x11a/0x328 smp_start_secondary+0x82/0x88 This is avoided by moving the call to rcu_cpu_starting up near the beginning of the smp_init_secondary() function. Note that the raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. Link: https://lore.kernel.org/lkml/160223032121.7002.1269740091547117869.tip-bot2@tip-bot2/ Signed-off-by: Qian Cai Acked-by: Paul E. McKenney Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8e31dfd85de3..888f247c9261 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -831,7 +831,7 @@ void __init smp_detect_cpus(void) */ static void smp_start_secondary(void *cpuvoid) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); S390_lowcore.restart_stack = (unsigned long) restart_stack; @@ -844,6 +844,7 @@ static void smp_start_secondary(void *cpuvoid) set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); From a945da61809ebb8349e93f4356983363083937e0 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 31 Oct 2020 11:03:53 +0800 Subject: [PATCH 371/636] vfio: platform: fix reference leak in vfio_platform_open [ Upstream commit bb742ad01961a3b9d1f9d19375487b879668b6b2 ] pm_runtime_get_sync() will increment pm usage counter even it failed. Forgetting to call pm_runtime_put will result in reference leak in vfio_platform_open, so we should fix it. Signed-off-by: Zhang Qilong Acked-by: Eric Auger Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin --- drivers/vfio/platform/vfio_platform_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index c0cd824be2b7..460760d0becf 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -273,7 +273,7 @@ static int vfio_platform_open(void *device_data) ret = pm_runtime_get_sync(vdev->device); if (ret < 0) - goto err_pm; + goto err_rst; ret = vfio_platform_call_reset(vdev, &extra_dbg); if (ret && vdev->reset_required) { @@ -290,7 +290,6 @@ static int vfio_platform_open(void *device_data) err_rst: pm_runtime_put(vdev->device); -err_pm: vfio_platform_irq_cleanup(vdev); err_irq: vfio_platform_regions_cleanup(vdev); From cb23120e8a7a569ba18631c38691aa38a9bd1e9b Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 8 Oct 2020 15:26:30 +0300 Subject: [PATCH 372/636] selftests: proc: fix warning: _GNU_SOURCE redefined [ Upstream commit f3ae6c6e8a3ea49076d826c64e63ea78fbf9db43 ] Makefile already contains -D_GNU_SOURCE, so we can remove it from the *.c files. Signed-off-by: Tommi Rantala Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- tools/testing/selftests/proc/proc-loadavg-001.c | 1 - tools/testing/selftests/proc/proc-self-syscall.c | 1 - tools/testing/selftests/proc/proc-uptime-002.c | 1 - 3 files changed, 3 deletions(-) diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index fcff7047000d..8edaafc2b92f 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 5ab5f4810e43..7b9018fad092 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include #include #include diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include #include From d3b4018b8eb32bb0d7341e7c1b4d10c3665fdb00 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 15 Oct 2020 14:44:30 -0700 Subject: [PATCH 373/636] tpm_tis: Disable interrupts on ThinkPad T490s [ Upstream commit b154ce11ead925de6a94feb3b0317fafeefa0ebc ] There is a misconfiguration in the bios of the gpio pin used for the interrupt in the T490s. When interrupts are enabled in the tpm_tis driver code this results in an interrupt storm. This was initially reported when we attempted to enable the interrupt code in the tpm_tis driver, which previously wasn't setting a flag to enable it. Due to the reports of the interrupt storm that code was reverted and we went back to polling instead of using interrupts. Now that we know the T490s problem is a firmware issue, add code to check if the system is a T490s and disable interrupts if that is the case. This will allow us to enable interrupts for everyone else. If the user has a fixed bios they can force the enabling of interrupts with tpm_tis.interrupts=1 on the kernel command line. Cc: Peter Huewe Cc: Jason Gunthorpe Cc: Hans de Goede Signed-off-by: Jerry Snitselaar Reviewed-by: James Bottomley Reviewed-by: Hans de Goede Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index f08949a5f678..5a3a4f095391 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "tpm.h" #include "tpm_tis_core.h" @@ -53,8 +54,8 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da return container_of(data, struct tpm_tis_tcg_phy, priv); } -static bool interrupts = true; -module_param(interrupts, bool, 0444); +static int interrupts = -1; +module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); static bool itpm; @@ -67,6 +68,28 @@ module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force device probe rather than using ACPI entry"); #endif +static int tpm_tis_disable_irq(const struct dmi_system_id *d) +{ + if (interrupts == -1) { + pr_notice("tpm_tis: %s detected: disabling interrupts.\n", d->ident); + interrupts = 0; + } + + return 0; +} + +static const struct dmi_system_id tpm_tis_dmi_table[] = { + { + .callback = tpm_tis_disable_irq, + .ident = "ThinkPad T490s", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T490s"), + }, + }, + {} +}; + #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int has_hid(struct acpi_device *dev, const char *hid) { @@ -196,6 +219,8 @@ static int tpm_tis_init(struct device *dev, struct tpm_info *tpm_info) int irq = -1; int rc; + dmi_check_system(tpm_tis_dmi_table); + rc = check_acpi_tpm2(dev); if (rc) return rc; From 880d94c7811ebe89ab7edc7e8839ccdf0eedcd91 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 10 Jan 2020 16:39:02 +0800 Subject: [PATCH 374/636] tick/common: Touch watchdog in tick_unfreeze() on all CPUs commit 5167c506d62dd9ffab73eba23c79b0a8845c9fe1 upstream. Suspend to IDLE invokes tick_unfreeze() on resume. tick_unfreeze() on the first resuming CPU resumes timekeeping, which also has the side effect of resetting the softlockup watchdog on this CPU. But on the secondary CPUs the watchdog is not reset in the resume / unfreeze() path, which can result in false softlockup warnings on those CPUs depending on the time spent in suspend. Prevent this by clearing the softlock watchdog in the unfreeze path also on the secondary resuming CPUs. [ tglx: Massaged changelog ] Signed-off-by: Chunyan Zhang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20200110083902.27276-1-chunyan.zhang@unisoc.com Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a02e0f6b287c..0a3cc37e4b83 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -520,6 +521,7 @@ void tick_unfreeze(void) trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); } else { + touch_softlockup_watchdog(); tick_resume_local(); } From c5ea311b161c0b25e74d20d994bde0cc61079fe3 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 18 Aug 2020 11:41:58 +0800 Subject: [PATCH 375/636] mfd: sprd: Add wakeup capability for PMIC IRQ commit a75bfc824a2d33f57ebdc003bfe6b7a9e11e9cb9 upstream. When changing to use suspend-to-idle to save power, the PMIC irq can not wakeup the system due to lack of wakeup capability, which will cause the sub-irqs (such as power key) of the PMIC can not wake up the system. Thus we can add the wakeup capability for PMIC irq to solve this issue, as well as removing the IRQF_NO_SUSPEND flag to allow PMIC irq to be a wakeup source. Reported-by: Chunyan Zhang Signed-off-by: Baolin Wang Tested-by: Chunyan Zhang Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/sprd-sc27xx-spi.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/mfd/sprd-sc27xx-spi.c b/drivers/mfd/sprd-sc27xx-spi.c index 69df27769c21..3ba8cfa4b3b7 100644 --- a/drivers/mfd/sprd-sc27xx-spi.c +++ b/drivers/mfd/sprd-sc27xx-spi.c @@ -212,7 +212,7 @@ static int sprd_pmic_probe(struct spi_device *spi) } ret = devm_regmap_add_irq_chip(&spi->dev, ddata->regmap, ddata->irq, - IRQF_ONESHOT | IRQF_NO_SUSPEND, 0, + IRQF_ONESHOT, 0, &ddata->irq_chip, &ddata->irq_data); if (ret) { dev_err(&spi->dev, "Failed to add PMIC irq chip %d\n", ret); @@ -228,9 +228,34 @@ static int sprd_pmic_probe(struct spi_device *spi) return ret; } + device_init_wakeup(&spi->dev, true); return 0; } +#ifdef CONFIG_PM_SLEEP +static int sprd_pmic_suspend(struct device *dev) +{ + struct sprd_pmic *ddata = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + enable_irq_wake(ddata->irq); + + return 0; +} + +static int sprd_pmic_resume(struct device *dev) +{ + struct sprd_pmic *ddata = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + disable_irq_wake(ddata->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(sprd_pmic_pm_ops, sprd_pmic_suspend, sprd_pmic_resume); + static const struct of_device_id sprd_pmic_match[] = { { .compatible = "sprd,sc2731", .data = &sc2731_data }, {}, @@ -242,6 +267,7 @@ static struct spi_driver sprd_pmic_driver = { .name = "sc27xx-pmic", .bus = &spi_bus_type, .of_match_table = sprd_pmic_match, + .pm = &sprd_pmic_pm_ops, }, .probe = sprd_pmic_probe, }; From bbe2d3e7ec8f66aa8cfe62fa3cd45fccba63ce4e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 14 Oct 2020 13:46:38 +0300 Subject: [PATCH 376/636] pinctrl: intel: Set default bias in case no particular value given [ Upstream commit f3c75e7a9349d1d33eb53ddc1b31640994969f73 ] When GPIO library asks pin control to set the bias, it doesn't pass any value of it and argument is considered boolean (and this is true for ACPI GpioIo() / GpioInt() resources, by the way). Thus, individual drivers must behave well, when they got the resistance value of 1 Ohm, i.e. transforming it to sane default. In case of Intel pin control hardware the 5 kOhm sounds plausible because on one hand it's a minimum of resistors present in all hardware generations and at the same time it's high enough to minimize leakage current (will be only 200 uA with the above choice). Fixes: e57725eabf87 ("pinctrl: intel: Add support for hardware debouncer") Reported-by: Jamie McClymont Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-intel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 89ff2795a8b5..5e0adb00b430 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -621,6 +621,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin, value |= PADCFG1_TERM_UP; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; @@ -643,6 +647,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned pin, case PIN_CONFIG_BIAS_PULL_DOWN: value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK); + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; From 0ef358bc8788046d31f536d4c99405a3e4970b01 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Thu, 22 Oct 2020 01:43:59 +0100 Subject: [PATCH 377/636] ARM: 9019/1: kprobes: Avoid fortify_panic() when copying optprobe template [ Upstream commit 9fa2e7af3d53a4b769136eccc32c02e128a4ee51 ] Setting both CONFIG_KPROBES=y and CONFIG_FORTIFY_SOURCE=y on ARM leads to a panic in memcpy() when injecting a kprobe despite the fixes found in commit e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") and commit 0ac569bf6a79 ("ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction"). arch/arm/include/asm/kprobes.h effectively declares the target type of the optprobe_template_entry assembly label as a u32 which leads memcpy()'s __builtin_object_size() call to determine that the pointed-to object is of size four. However, the symbol is used as a handle for the optimised probe assembly template that is at least 96 bytes in size. The symbol's use despite its type blows up the memcpy() in ARM's arch_prepare_optimized_kprobe() with a false-positive fortify_panic() when it should instead copy the optimised probe template into place: ``` $ sudo perf probe -a aspeed_g6_pinctrl_probe [ 158.457252] detected buffer overflow in memcpy [ 158.458069] ------------[ cut here ]------------ [ 158.458283] kernel BUG at lib/string.c:1153! [ 158.458436] Internal error: Oops - BUG: 0 [#1] SMP ARM [ 158.458768] Modules linked in: [ 158.459043] CPU: 1 PID: 99 Comm: perf Not tainted 5.9.0-rc7-00038-gc53ebf8167e9 #158 [ 158.459296] Hardware name: Generic DT based system [ 158.459529] PC is at fortify_panic+0x18/0x20 [ 158.459658] LR is at __irq_work_queue_local+0x3c/0x74 [ 158.459831] pc : [<8047451c>] lr : [<8020ecd4>] psr: 60000013 [ 158.460032] sp : be2d1d50 ip : be2d1c58 fp : be2d1d5c [ 158.460174] r10: 00000006 r9 : 00000000 r8 : 00000060 [ 158.460348] r7 : 8011e434 r6 : b9e0b800 r5 : 7f000000 r4 : b9fe4f0c [ 158.460557] r3 : 80c04cc8 r2 : 00000000 r1 : be7c03cc r0 : 00000022 [ 158.460801] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none [ 158.461037] Control: 10c5387d Table: b9cd806a DAC: 00000051 [ 158.461251] Process perf (pid: 99, stack limit = 0x81c71a69) [ 158.461472] Stack: (0xbe2d1d50 to 0xbe2d2000) [ 158.461757] 1d40: be2d1d84 be2d1d60 8011e724 80474510 [ 158.462104] 1d60: b9e0b800 b9fe4f0c 00000000 b9fe4f14 80c8ec80 be235000 be2d1d9c be2d1d88 [ 158.462436] 1d80: 801cee44 8011e57c b9fe4f0c 00000000 be2d1dc4 be2d1da0 801d0ad0 801cedec [ 158.462742] 1da0: 00000000 00000000 b9fe4f00 ffffffea 00000000 be235000 be2d1de4 be2d1dc8 [ 158.463087] 1dc0: 80204604 801d0738 00000000 00000000 b9fe4004 ffffffea be2d1e94 be2d1de8 [ 158.463428] 1de0: 80205434 80204570 00385c00 00000000 00000000 00000000 be2d1e14 be2d1e08 [ 158.463880] 1e00: 802ba014 b9fe4f00 b9e718c0 b9fe4f84 b9e71ec8 be2d1e24 00000000 00385c00 [ 158.464365] 1e20: 00000000 626f7270 00000065 802b905c be2d1e94 0000002e 00000000 802b9914 [ 158.464829] 1e40: be2d1e84 be2d1e50 802b9914 8028ff78 804629d0 b9e71ec0 0000002e b9e71ec0 [ 158.465141] 1e60: be2d1ea8 80c04cc8 00000cc0 b9e713c4 00000002 80205834 80205834 0000002e [ 158.465488] 1e80: be235000 be235000 be2d1ea4 be2d1e98 80205854 80204e94 be2d1ecc be2d1ea8 [ 158.465806] 1ea0: 801ee4a0 80205840 00000002 80c04cc8 00000000 0000002e 0000002e 00000000 [ 158.466110] 1ec0: be2d1f0c be2d1ed0 801ee5c8 801ee428 00000000 be2d0000 006b1fd0 00000051 [ 158.466398] 1ee0: 00000000 b9eedf00 0000002e 80204410 006b1fd0 be2d1f60 00000000 00000004 [ 158.466763] 1f00: be2d1f24 be2d1f10 8020442c 801ee4c4 80205834 802c613c be2d1f5c be2d1f28 [ 158.467102] 1f20: 802c60ac 8020441c be2d1fac be2d1f38 8010c764 802e9888 be2d1f5c b9eedf00 [ 158.467447] 1f40: b9eedf00 006b1fd0 0000002e 00000000 be2d1f94 be2d1f60 802c634c 802c5fec [ 158.467812] 1f60: 00000000 00000000 00000000 80c04cc8 006b1fd0 00000003 76f7a610 00000004 [ 158.468155] 1f80: 80100284 be2d0000 be2d1fa4 be2d1f98 802c63ec 802c62e8 00000000 be2d1fa8 [ 158.468508] 1fa0: 80100080 802c63e0 006b1fd0 00000003 00000003 006b1fd0 0000002e 00000000 [ 158.468858] 1fc0: 006b1fd0 00000003 76f7a610 00000004 006b1fb0 0026d348 00000017 7ef2738c [ 158.469202] 1fe0: 76f3431c 7ef272d8 0014ec50 76f34338 60000010 00000003 00000000 00000000 [ 158.469461] Backtrace: [ 158.469683] [<80474504>] (fortify_panic) from [<8011e724>] (arch_prepare_optimized_kprobe+0x1b4/0x1f8) [ 158.470021] [<8011e570>] (arch_prepare_optimized_kprobe) from [<801cee44>] (alloc_aggr_kprobe+0x64/0x70) [ 158.470287] r9:be235000 r8:80c8ec80 r7:b9fe4f14 r6:00000000 r5:b9fe4f0c r4:b9e0b800 [ 158.470478] [<801cede0>] (alloc_aggr_kprobe) from [<801d0ad0>] (register_kprobe+0x3a4/0x5a0) [ 158.470685] r5:00000000 r4:b9fe4f0c [ 158.470790] [<801d072c>] (register_kprobe) from [<80204604>] (__register_trace_kprobe+0xa0/0xa4) [ 158.471001] r9:be235000 r8:00000000 r7:ffffffea r6:b9fe4f00 r5:00000000 r4:00000000 [ 158.471188] [<80204564>] (__register_trace_kprobe) from [<80205434>] (trace_kprobe_create+0x5ac/0x9ac) [ 158.471408] r7:ffffffea r6:b9fe4004 r5:00000000 r4:00000000 [ 158.471553] [<80204e88>] (trace_kprobe_create) from [<80205854>] (create_or_delete_trace_kprobe+0x20/0x3c) [ 158.471766] r10:be235000 r9:be235000 r8:0000002e r7:80205834 r6:80205834 r5:00000002 [ 158.471949] r4:b9e713c4 [ 158.472027] [<80205834>] (create_or_delete_trace_kprobe) from [<801ee4a0>] (trace_run_command+0x84/0x9c) [ 158.472255] [<801ee41c>] (trace_run_command) from [<801ee5c8>] (trace_parse_run_command+0x110/0x1f8) [ 158.472471] r6:00000000 r5:0000002e r4:0000002e [ 158.472594] [<801ee4b8>] (trace_parse_run_command) from [<8020442c>] (probes_write+0x1c/0x28) [ 158.472800] r10:00000004 r9:00000000 r8:be2d1f60 r7:006b1fd0 r6:80204410 r5:0000002e [ 158.472968] r4:b9eedf00 [ 158.473046] [<80204410>] (probes_write) from [<802c60ac>] (vfs_write+0xcc/0x1e8) [ 158.473226] [<802c5fe0>] (vfs_write) from [<802c634c>] (ksys_write+0x70/0xf8) [ 158.473400] r8:00000000 r7:0000002e r6:006b1fd0 r5:b9eedf00 r4:b9eedf00 [ 158.473567] [<802c62dc>] (ksys_write) from [<802c63ec>] (sys_write+0x18/0x1c) [ 158.473745] r9:be2d0000 r8:80100284 r7:00000004 r6:76f7a610 r5:00000003 r4:006b1fd0 [ 158.473932] [<802c63d4>] (sys_write) from [<80100080>] (ret_fast_syscall+0x0/0x54) [ 158.474126] Exception stack(0xbe2d1fa8 to 0xbe2d1ff0) [ 158.474305] 1fa0: 006b1fd0 00000003 00000003 006b1fd0 0000002e 00000000 [ 158.474573] 1fc0: 006b1fd0 00000003 76f7a610 00000004 006b1fb0 0026d348 00000017 7ef2738c [ 158.474811] 1fe0: 76f3431c 7ef272d8 0014ec50 76f34338 [ 158.475171] Code: e24cb004 e1a01000 e59f0004 ebf40dd3 (e7f001f2) [ 158.475847] ---[ end trace 55a5b31c08a29f00 ]--- [ 158.476088] Kernel panic - not syncing: Fatal exception [ 158.476375] CPU0: stopping [ 158.476709] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G D 5.9.0-rc7-00038-gc53ebf8167e9 #158 [ 158.477176] Hardware name: Generic DT based system [ 158.477411] Backtrace: [ 158.477604] [<8010dd28>] (dump_backtrace) from [<8010dfd4>] (show_stack+0x20/0x24) [ 158.477990] r7:00000000 r6:60000193 r5:00000000 r4:80c2f634 [ 158.478323] [<8010dfb4>] (show_stack) from [<8046390c>] (dump_stack+0xcc/0xe8) [ 158.478686] [<80463840>] (dump_stack) from [<80110750>] (handle_IPI+0x334/0x3a0) [ 158.479063] r7:00000000 r6:00000004 r5:80b65cc8 r4:80c78278 [ 158.479352] [<8011041c>] (handle_IPI) from [<801013f8>] (gic_handle_irq+0x88/0x94) [ 158.479757] r10:10c5387d r9:80c01ed8 r8:00000000 r7:c0802000 r6:80c0537c r5:000003ff [ 158.480146] r4:c080200c r3:fffffff4 [ 158.480364] [<80101370>] (gic_handle_irq) from [<80100b6c>] (__irq_svc+0x6c/0x90) [ 158.480748] Exception stack(0x80c01ed8 to 0x80c01f20) [ 158.481031] 1ec0: 000128bc 00000000 [ 158.481499] 1ee0: be7b8174 8011d3a0 80c00000 00000000 80c04cec 80c04d28 80c5d7c2 80a026d4 [ 158.482091] 1f00: 10c5387d 80c01f34 80c01f38 80c01f28 80109554 80109558 60000013 ffffffff [ 158.482621] r9:80c00000 r8:80c5d7c2 r7:80c01f0c r6:ffffffff r5:60000013 r4:80109558 [ 158.482983] [<80109518>] (arch_cpu_idle) from [<80818780>] (default_idle_call+0x38/0x120) [ 158.483360] [<80818748>] (default_idle_call) from [<801585a8>] (do_idle+0xd4/0x158) [ 158.483945] r5:00000000 r4:80c00000 [ 158.484237] [<801584d4>] (do_idle) from [<801588f4>] (cpu_startup_entry+0x28/0x2c) [ 158.484784] r9:80c78000 r8:00000000 r7:80c78000 r6:80c78040 r5:80c04cc0 r4:000000d6 [ 158.485328] [<801588cc>] (cpu_startup_entry) from [<80810a78>] (rest_init+0x9c/0xbc) [ 158.485930] [<808109dc>] (rest_init) from [<80b00ae4>] (arch_call_rest_init+0x18/0x1c) [ 158.486503] r5:80c04cc0 r4:00000001 [ 158.486857] [<80b00acc>] (arch_call_rest_init) from [<80b00fcc>] (start_kernel+0x46c/0x548) [ 158.487589] [<80b00b60>] (start_kernel) from [<00000000>] (0x0) ``` Fixes: e46daee53bb5 ("ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE") Fixes: 0ac569bf6a79 ("ARM: 8834/1: Fix: kprobes: optimized kprobes illegal instruction") Suggested-by: Kees Cook Signed-off-by: Andrew Jeffery Tested-by: Luka Oreskovic Tested-by: Joel Stanley Reviewed-by: Joel Stanley Acked-by: Masami Hiramatsu Cc: Luka Oreskovic Cc: Juraj Vijtiuk Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/include/asm/kprobes.h | 22 +++++++++++----------- arch/arm/probes/kprobes/opt-arm.c | 18 +++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 82290f212d8e..e1eb662e0f9e 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -52,20 +52,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 0dc23fc227ed..cf08cb726767 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -98,21 +98,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ From 5818925f87d33b9a77e3f4d78a47f0f823f221cc Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Fri, 30 Oct 2020 13:54:50 +0800 Subject: [PATCH 378/636] pinctrl: aspeed: Fix GPI only function problem. [ Upstream commit 9b92f5c51e9a41352d665f6f956bd95085a56a83 ] Some gpio pin at aspeed soc is input only and the prefix name of these pin is "GPI" only. This patch fine-tune the condition of GPIO check from "GPIO" to "GPI" and it will fix the usage error of banks D and E in the AST2400/AST2500 and banks T and U in the AST2600. Fixes: 4d3d0e4272d8 ("pinctrl: Add core support for Aspeed SoCs") Signed-off-by: Billy Tsai Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20201030055450.29613-1-billy_tsai@aspeedtech.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index aefe3c33dffd..8dec302dc067 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -458,13 +458,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr) { /* - * The signal type is GPIO if the signal name has "GPIO" as a prefix. + * The signal type is GPIO if the signal name has "GPI" as a prefix. * strncmp (rather than strcmp) is used to implement the prefix * requirement. * - * expr->signal might look like "GPIOT3" in the GPIO case. + * expr->signal might look like "GPIOB1" in the GPIO case. + * expr->signal might look like "GPIT0" in the GPI case. */ - return strncmp(expr->signal, "GPIO", 4) == 0; + return strncmp(expr->signal, "GPI", 3) == 0; } static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs) From d9f4534a9a286877ae29e15b5f9ba8ecb7370924 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Nov 2020 18:30:59 +0100 Subject: [PATCH 379/636] nbd: fix a block_device refcount leak in nbd_release [ Upstream commit 2bd645b2d3f0bacadaa6037f067538e1cd4e42ef ] bdget_disk needs to be paired with bdput to not leak a reference on the block device inode. Fixes: 08ba91ee6e2c ("nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.") Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 706115ecd9be..517318bb350c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1448,6 +1448,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode) if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && bdev->bd_openers == 0) nbd_disconnect_and_put(nbd); + bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); From 83e65294aff8635877dfd0a019dbb9cd1abcab8a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:43 -0800 Subject: [PATCH 380/636] xfs: fix flags argument to rmap lookup when converting shared file rmaps [ Upstream commit ea8439899c0b15a176664df62aff928010fad276 ] Pass the same oldext argument (which contains the existing rmapping's unwritten state) to xfs_rmap_lookup_le_range at the start of xfs_rmap_convert_shared. At this point in the code, flags is zero, which means that we perform lookups using the wrong key. Fixes: 3f165b334e51 ("xfs: convert unwritten status of reverse mappings for shared files") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 245af452840e..ab3e72e702f0 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -1387,7 +1387,7 @@ xfs_rmap_convert_shared( * record for our insertion point. This will also give us the record for * start block contiguity tests. */ - error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; From 5fda0976e88dd4addc401af6b2ab53b2842e47a6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:43 -0800 Subject: [PATCH 381/636] xfs: set the unwritten bit in rmap lookup flags in xchk_bmap_get_rmapextents [ Upstream commit 5dda3897fd90783358c4c6115ef86047d8c8f503 ] When the bmbt scrubber is looking up rmap extents, we need to set the extent flags from the bmbt record fully. This will matter once we fix the rmap btree comparison functions to check those flags correctly. Fixes: d852657ccfc0 ("xfs: cross-reference reverse-mapping btree") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/bmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index f84a58e523bc..b05d65fd360b 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -120,6 +120,8 @@ xchk_bmap_get_rmap( if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; + if (irec->br_state == XFS_EXT_UNWRITTEN) + rflags |= XFS_RMAP_UNWRITTEN; /* * CoW staging extents are owned (on disk) by the refcountbt, so From b7fec5a9a726e9e2a095d53b6de4e62bf179b481 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:44 -0800 Subject: [PATCH 382/636] xfs: fix rmap key and record comparison functions [ Upstream commit 6ff646b2ceb0eec916101877f38da0b73e3a5b7f ] Keys for extent interval records in the reverse mapping btree are supposed to be computed as follows: (physical block, owner, fork, is_btree, is_unwritten, offset) This provides users the ability to look up a reverse mapping from a bmbt record -- start with the physical block; then if there are multiple records for the same block, move on to the owner; then the inode fork type; and so on to the file offset. However, the key comparison functions incorrectly remove the fork/btree/unwritten information that's encoded in the on-disk offset. This means that lookup comparisons are only done with: (physical block, owner, offset) This means that queries can return incorrect results. On consistent filesystems this hasn't been an issue because blocks are never shared between forks or with bmbt blocks; and are never unwritten. However, this bug means that online repair cannot always detect corruption in the key information in internal rmapbt nodes. Found by fuzzing keys[1].attrfork = ones on xfs/371. Fixes: 4b8ed67794fe ("xfs: add rmap btree operations") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_rmap_btree.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index f79cf040d745..77528f413286 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -247,8 +247,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); - y = rec->rm_offset; + x = be64_to_cpu(kp->rm_offset); + y = xfs_rmap_irec_offset_pack(rec); if (x > y) return 1; else if (y > x) @@ -279,8 +279,8 @@ xfs_rmapbt_diff_two_keys( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); - y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); + x = be64_to_cpu(kp1->rm_offset); + y = be64_to_cpu(kp2->rm_offset); if (x > y) return 1; else if (y > x) @@ -393,8 +393,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); + a = be64_to_cpu(k1->rmap.rm_offset); + b = be64_to_cpu(k2->rmap.rm_offset); if (a <= b) return 1; return 0; @@ -423,8 +423,8 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); + a = be64_to_cpu(r1->rmap.rm_offset); + b = be64_to_cpu(r2->rmap.rm_offset); if (a <= b) return 1; return 0; From 964e25377fab8e9071c07d8cdf1c9a4fb079285e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:42 -0800 Subject: [PATCH 383/636] xfs: fix brainos in the refcount scrubber's rmap fragment processor [ Upstream commit 54e9b09e153842ab5adb8a460b891e11b39e9c3d ] Fix some serious WTF in the reference count scrubber's rmap fragment processing. The code comment says that this loop is supposed to move all fragment records starting at or before bno onto the worklist, but there's no obvious reason why nr (the number of items added) should increment starting from 1, and breaking the loop when we've added the target number seems dubious since we could have more rmap fragments that should have been added to the worklist. This seems to manifest in xfs/411 when adding one to the refcount field. Fixes: dbde19da9637 ("xfs: cross-reference the rmapbt data with the refcountbt") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/refcount.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index e8c82b026083..76e4f16a9fab 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -180,7 +180,6 @@ xchk_refcountbt_process_rmap_fragments( */ INIT_LIST_HEAD(&worklist); rbno = NULLAGBLOCK; - nr = 1; /* Make sure the fragments actually /are/ in agbno order. */ bno = 0; @@ -194,15 +193,14 @@ xchk_refcountbt_process_rmap_fragments( * Find all the rmaps that start at or before the refc extent, * and put them on the worklist. */ + nr = 0; list_for_each_entry_safe(frag, n, &refchk->fragments, list) { - if (frag->rm.rm_startblock > refchk->bno) - goto done; + if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) + break; bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; if (bno < rbno) rbno = bno; list_move_tail(&frag->list, &worklist); - if (nr == target_nr) - break; nr++; } From 2b07f571617bc6ed2560c26b534c372150197da2 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Mon, 9 Nov 2020 15:38:28 -0500 Subject: [PATCH 384/636] lan743x: fix "BUG: invalid wait context" when setting rx mode [ Upstream commit 2b52a4b65bc8f14520fe6e996ea7fb3f7e400761 ] In the net core, the struct net_device_ops -> ndo_set_rx_mode() callback is called with the dev->addr_list_lock spinlock held. However, this driver's ndo_set_rx_mode callback eventually calls lan743x_dp_write(), which acquires a mutex. Mutex acquisition may sleep, and this is not allowed when holding a spinlock. Fix by removing the dp_lock mutex entirely. Its purpose is to prevent concurrent accesses to the data port. No concurrent accesses are possible, because the dev->addr_list_lock spinlock in the core only lets through one thread at a time. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201109203828.5115-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan743x_main.c | 12 +++--------- drivers/net/ethernet/microchip/lan743x_main.h | 3 --- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 208341541087..085fdceb3821 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -672,14 +672,12 @@ clean_up: static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 select, u32 addr, u32 length, u32 *buf) { - int ret = -EIO; u32 dp_sel; int i; - mutex_lock(&adapter->dp_lock); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; dp_sel |= select; @@ -691,13 +689,10 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, 1, 40, 100, 100)) - goto unlock; + return -EIO; } - ret = 0; -unlock: - mutex_unlock(&adapter->dp_lock); - return ret; + return 0; } static u32 lan743x_mac_mii_access(u16 id, u16 index, int read) @@ -2679,7 +2674,6 @@ static int lan743x_hardware_init(struct lan743x_adapter *adapter, adapter->intr.irq = adapter->pdev->irq; lan743x_csr_write(adapter, INT_EN_CLR, 0xFFFFFFFF); - mutex_init(&adapter->dp_lock); ret = lan743x_gpio_init(adapter); if (ret) diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 2d6eea18973e..77273be2d1ee 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -702,9 +702,6 @@ struct lan743x_adapter { struct lan743x_csr csr; struct lan743x_intr intr; - /* lock, used to prevent concurrent access to data port */ - struct mutex dp_lock; - struct lan743x_gpio gpio; struct lan743x_ptp ptp; From 05e0bb210209126f495401051996389831f7ba48 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Nov 2020 08:07:37 -0800 Subject: [PATCH 385/636] xfs: fix a missing unlock on error in xfs_fs_map_blocks [ Upstream commit 2bd3fa793aaa7e98b74e3653fdcc72fa753913b5 ] We also need to drop the iolock when invalidate_inode_pages2 fails, not only on all other error or successful cases. Fixes: 527851124d10 ("xfs: implement pNFS export operations") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Sasha Levin --- fs/xfs/xfs_pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index f44c3599527d..1c9bced3e860 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -141,7 +141,7 @@ xfs_fs_map_blocks( goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) - return error; + goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); From c51725f8d2411ef79843635a5fd692c15f6a8cc5 Mon Sep 17 00:00:00 2001 From: Evan Nimmo Date: Tue, 10 Nov 2020 15:28:25 +1300 Subject: [PATCH 386/636] of/address: Fix of_node memory leak in of_dma_is_coherent [ Upstream commit a5bea04fcc0b3c0aec71ee1fd58fd4ff7ee36177 ] Commit dabf6b36b83a ("of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc") added a check to of_dma_is_coherent which returns early if OF_DMA_DEFAULT_COHERENT is enabled. This results in the of_node_put() being skipped causing a memory leak. Moved the of_node_get() below this check so we now we only get the node if OF_DMA_DEFAULT_COHERENT is not enabled. Fixes: dabf6b36b83a ("of: Add OF_DMA_DEFAULT_COHERENT & select it on powerpc") Signed-off-by: Evan Nimmo Link: https://lore.kernel.org/r/20201110022825.30895-1-evan.nimmo@alliedtelesis.co.nz Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/address.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index c42aebba35ab..30806dd35735 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -975,11 +975,13 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); */ bool of_dma_is_coherent(struct device_node *np) { - struct device_node *node = of_node_get(np); + struct device_node *node; if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) return true; + node = of_node_get(np); + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); From d302e78731143479d203a05fff648270502ab770 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 10 Nov 2020 22:46:14 +0800 Subject: [PATCH 387/636] cosa: Add missing kfree in error path of cosa_write [ Upstream commit 52755b66ddcef2e897778fac5656df18817b59ab ] If memory allocation for 'kbuf' succeed, cosa_write() doesn't have a corresponding kfree() in exception handling. Thus add kfree() for this function implementation. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Wang Hai Acked-by: Jan "Yenya" Kasprzak Link: https://lore.kernel.org/r/20201110144614.43194-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/cosa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index f6b000ddcd15..b7bfc0caa5dc 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -902,6 +902,7 @@ static ssize_t cosa_write(struct file *file, chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); up(&chan->wsem); + kfree(kbuf); return -ERESTARTSYS; } } From 0f4eb125c56b524776a2f396bb7a7c119bb2e0a1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:49:45 +0100 Subject: [PATCH 388/636] perf: Fix get_recursion_context() [ Upstream commit ce0f17fc93f63ee91428af10b7b2ddef38cd19e5 ] One should use in_serving_softirq() to detect SoftIRQ context. Fixes: 96f6d4444302 ("perf_counter: avoid recursion") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151955.120572175@infradead.org Signed-off-by: Sasha Levin --- kernel/events/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 6dc725a7e7bc..8fc0ddc38cb6 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -209,7 +209,7 @@ static inline int get_recursion_context(int *recursion) rctx = 3; else if (in_irq()) rctx = 2; - else if (in_softirq()) + else if (in_serving_softirq()) rctx = 1; else rctx = 0; From f0bb30a1fc8fe0d9a3068c7cf1644302c8196cfc Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 29 Oct 2020 23:46:36 +0800 Subject: [PATCH 389/636] ext4: correctly report "not supported" for {usr,grp}jquota when !CONFIG_QUOTA commit 174fe5ba2d1ea0d6c5ab2a7d4aa058d6d497ae4d upstream. The macro MOPT_Q is used to indicates the mount option is related to quota stuff and is defined to be MOPT_NOSUPPORT when CONFIG_QUOTA is disabled. Normally the quota options are handled explicitly, so it didn't matter that the MOPT_STRING flag was missing, even though the usrjquota and grpjquota mount options take a string argument. It's important that's present in the !CONFIG_QUOTA case, since without MOPT_STRING, the mount option matcher will match usrjquota= followed by an integer, and will otherwise skip the table entry, and so "mount option not supported" error message is never reported. [ Fixed up the commit description to better explain why the fix works. --TYT ] Fixes: 26092bf52478 ("ext4: use a table-driven handler for mount options") Signed-off-by: Kaixu Xia Link: https://lore.kernel.org/r/1603986396-28917-1-git-send-email-kaixuxia@tencent.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6338ca95d8b3..ee96f504ed78 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1748,8 +1748,8 @@ static const struct mount_opts { {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA), MOPT_CLEAR | MOPT_Q}, - {Opt_usrjquota, 0, MOPT_Q}, - {Opt_grpjquota, 0, MOPT_Q}, + {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING}, + {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING}, {Opt_offusrjquota, 0, MOPT_Q}, {Opt_offgrpjquota, 0, MOPT_Q}, {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, From 9d9ad220bb414da6e8b6ae0e35f9e4db976a97ef Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 3 Nov 2020 10:29:02 +0800 Subject: [PATCH 390/636] ext4: unlock xattr_sem properly in ext4_inline_data_truncate() commit 7067b2619017d51e71686ca9756b454de0e5826a upstream. It takes xattr_sem to check inline data again but without unlock it in case not have. So unlock it before return. Fixes: aef1c8513c1f ("ext4: let ext4_truncate handle inline data correctly") Reported-by: Dan Carpenter Cc: Tao Ma Signed-off-by: Joseph Qi Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/1604370542-124630-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 4572cb057951..c95246187659 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1921,6 +1921,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) ext4_write_lock_xattr(inode, &no_expand); if (!ext4_has_inline_data(inode)) { + ext4_write_unlock_xattr(inode, &no_expand); *has_inline = 0; ext4_journal_stop(handle); return 0; From f136ce570b384e32b66f06898f04bedb25b5df88 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 21 Oct 2020 13:36:55 +0800 Subject: [PATCH 391/636] btrfs: ref-verify: fix memory leak in btrfs_ref_tree_mod commit 468600c6ec28613b756193c5f780aac062f1acdf upstream. There is one error handling path that does not free ref, which may cause a minor memory leak. CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Josef Bacik Signed-off-by: Dinghao Liu Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ref-verify.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 5dec52bd2897..b26739d0e991 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -854,6 +854,7 @@ int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, "dropping a ref for a root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } From da9de75cdee257242edfd3aae0f604f190f23506 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 30 Oct 2020 06:53:56 +0800 Subject: [PATCH 392/636] btrfs: dev-replace: fail mount if we don't have replace item with target device commit cf89af146b7e62af55470cf5f3ec3c56ec144a5e upstream. If there is a device BTRFS_DEV_REPLACE_DEVID without the device replace item, then it means the filesystem is inconsistent state. This is either corruption or a crafted image. Fail the mount as this needs a closer look what is actually wrong. As of now if BTRFS_DEV_REPLACE_DEVID is present without the replace item, in __btrfs_free_extra_devids() we determine that there is an extra device, and free those extra devices but continue to mount the device. However, we were wrong in keeping tack of the rw_devices so the syzbot testcase failed: WARNING: CPU: 1 PID: 3612 at fs/btrfs/volumes.c:1166 close_fs_devices.part.0+0x607/0x800 fs/btrfs/volumes.c:1166 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 3612 Comm: syz-executor.2 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 panic+0x347/0x7c0 kernel/panic.c:231 __warn.cold+0x20/0x46 kernel/panic.c:600 report_bug+0x1bd/0x210 lib/bug.c:198 handle_bug+0x38/0x90 arch/x86/kernel/traps.c:234 exc_invalid_op+0x14/0x40 arch/x86/kernel/traps.c:254 asm_exc_invalid_op+0x12/0x20 arch/x86/include/asm/idtentry.h:536 RIP: 0010:close_fs_devices.part.0+0x607/0x800 fs/btrfs/volumes.c:1166 RSP: 0018:ffffc900091777e0 EFLAGS: 00010246 RAX: 0000000000040000 RBX: ffffffffffffffff RCX: ffffc9000c8b7000 RDX: 0000000000040000 RSI: ffffffff83097f47 RDI: 0000000000000007 RBP: dffffc0000000000 R08: 0000000000000001 R09: ffff8880988a187f R10: 0000000000000000 R11: 0000000000000001 R12: ffff88809593a130 R13: ffff88809593a1ec R14: ffff8880988a1908 R15: ffff88809593a050 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 open_ctree+0x4984/0x4a2d fs/btrfs/disk-io.c:3434 btrfs_fill_super fs/btrfs/super.c:1316 [inline] btrfs_mount_root.cold+0x14/0x165 fs/btrfs/super.c:1672 The fix here is, when we determine that there isn't a replace item then fail the mount if there is a replace target device (devid 0). CC: stable@vger.kernel.org # 4.19+ Reported-by: syzbot+4cfe71a4da060be47502@syzkaller.appspotmail.com Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/dev-replace.c | 26 ++++++++++++++++++++++++-- fs/btrfs/volumes.c | 26 +++++++------------------- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 36c0490156ac..4d1d2657d70c 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -54,6 +54,17 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: + /* + * We don't have a replace item or it's corrupted. If there is + * a replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "found replace target device without a valid replace item"); + ret = -EUCLEAN; + goto out; + } ret = 0; dev_replace->replace_state = BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED; @@ -107,8 +118,19 @@ no_valid_dev_replace_entry_found: case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: - dev_replace->srcdev = NULL; - dev_replace->tgtdev = NULL; + /* + * We don't have an active replace item but if there is a + * replace target, fail the mount. + */ + if (btrfs_find_device(fs_info->fs_devices, + BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) { + btrfs_err(fs_info, + "replace devid present without an active replace item"); + ret = -EUCLEAN; + } else { + dev_replace->srcdev = NULL; + dev_replace->tgtdev = NULL; + } break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4067d0196556..7e000d061813 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -974,22 +974,13 @@ again: continue; } - if (device->devid == BTRFS_DEV_REPLACE_DEVID) { - /* - * In the first step, keep the device which has - * the correct fsid and the devid that is used - * for the dev_replace procedure. - * In the second step, the dev_replace state is - * read from the device tree and it is known - * whether the procedure is really active or - * not, which means whether this device is - * used or whether it should be removed. - */ - if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) { - continue; - } - } + /* + * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID, + * in btrfs_init_dev_replace() so just continue. + */ + if (device->devid == BTRFS_DEV_REPLACE_DEVID) + continue; + if (device->bdev) { blkdev_put(device->bdev, device->mode); device->bdev = NULL; @@ -998,9 +989,6 @@ again: if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, - &device->dev_state)) - fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; From 111fd1676e45cdc3c497c7fcaa28d8024eafb265 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 7 Oct 2020 17:06:17 +0300 Subject: [PATCH 393/636] thunderbolt: Fix memory leak if ida_simple_get() fails in enumerate_services() commit a663e0df4a374b8537562a44d1cecafb472cd65b upstream. The svc->key field is not released as it should be if ida_simple_get() fails so fix that. Fixes: 9aabb68568b4 ("thunderbolt: Fix to check return value of ida_simple_get") Cc: stable@vger.kernel.org Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/xdomain.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index befe75490697..4eb51a123a6f 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -774,6 +774,7 @@ static void enumerate_services(struct tb_xdomain *xd) id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); if (id < 0) { + kfree(svc->key); kfree(svc); break; } From 2df599fa6cdc92276180d95b0eda02463161818f Mon Sep 17 00:00:00 2001 From: Jing Xiangfeng Date: Thu, 15 Oct 2020 16:40:53 +0800 Subject: [PATCH 394/636] thunderbolt: Add the missed ida_simple_remove() in ring_request_msix() commit 7342ca34d931a357d408aaa25fadd031e46af137 upstream. ring_request_msix() misses to call ida_simple_remove() in an error path. Add a label 'err_ida_remove' and jump to it. Fixes: 046bee1f9ab8 ("thunderbolt: Add MSI-X support") Cc: stable@vger.kernel.org Signed-off-by: Jing Xiangfeng Reviewed-by: Andy Shevchenko Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/nhi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index d436a1534fc2..384623c49cfe 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -408,12 +408,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend) ring->vector = ret; - ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector); - if (ring->irq < 0) - return ring->irq; + ret = pci_irq_vector(ring->nhi->pdev, ring->vector); + if (ret < 0) + goto err_ida_remove; + + ring->irq = ret; irqflags = no_suspend ? IRQF_NO_SUSPEND : 0; - return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + if (ret) + goto err_ida_remove; + + return 0; + +err_ida_remove: + ida_simple_remove(&nhi->msix_ida, ring->vector); + + return ret; } static void ring_release_msix(struct tb_ring *ring) From ebfe3b94c746b9c9b1b7fa2cfc842380a85d7c40 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 2 Nov 2020 21:28:19 +0900 Subject: [PATCH 395/636] uio: Fix use-after-free in uio_unregister_device() commit 092561f06702dd4fdd7fb74dd3a838f1818529b7 upstream. Commit 8fd0e2a6df26 ("uio: free uio id after uio file node is freed") triggered KASAN use-after-free failure at deletion of TCM-user backstores [1]. In uio_unregister_device(), struct uio_device *idev is passed to uio_free_minor() to refer idev->minor. However, before uio_free_minor() call, idev is already freed by uio_device_release() during call to device_unregister(). To avoid reference to idev->minor after idev free, keep idev->minor value in a local variable. Also modify uio_free_minor() argument to receive the value. [1] BUG: KASAN: use-after-free in uio_unregister_device+0x166/0x190 Read of size 4 at addr ffff888105196508 by task targetcli/49158 CPU: 3 PID: 49158 Comm: targetcli Not tainted 5.10.0-rc1 #1 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 Call Trace: dump_stack+0xae/0xe5 ? uio_unregister_device+0x166/0x190 print_address_description.constprop.0+0x1c/0x210 ? uio_unregister_device+0x166/0x190 ? uio_unregister_device+0x166/0x190 kasan_report.cold+0x37/0x7c ? kobject_put+0x80/0x410 ? uio_unregister_device+0x166/0x190 uio_unregister_device+0x166/0x190 tcmu_destroy_device+0x1c4/0x280 [target_core_user] ? tcmu_release+0x90/0x90 [target_core_user] ? __mutex_unlock_slowpath+0xd6/0x5d0 target_free_device+0xf3/0x2e0 [target_core_mod] config_item_cleanup+0xea/0x210 configfs_rmdir+0x651/0x860 ? detach_groups.isra.0+0x380/0x380 vfs_rmdir.part.0+0xec/0x3a0 ? __lookup_hash+0x20/0x150 do_rmdir+0x252/0x320 ? do_file_open_root+0x420/0x420 ? strncpy_from_user+0xbc/0x2f0 ? getname_flags.part.0+0x8e/0x450 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f9e2bfc91fb Code: 73 01 c3 48 8b 0d 9d ec 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 54 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 6d ec 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffdd2baafe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000054 RAX: ffffffffffffffda RBX: 00007f9e2beb44a0 RCX: 00007f9e2bfc91fb RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f9e1c20be90 RBP: 00007ffdd2bab000 R08: 0000000000000000 R09: 00007f9e2bdf2440 R10: 00007ffdd2baaf37 R11: 0000000000000246 R12: 00000000ffffff9c R13: 000055f9abb7e390 R14: 000055f9abcf9558 R15: 00007f9e2be7a780 Allocated by task 34735: kasan_save_stack+0x1b/0x40 __kasan_kmalloc.constprop.0+0xc2/0xd0 __uio_register_device+0xeb/0xd40 tcmu_configure_device+0x5a0/0xbc0 [target_core_user] target_configure_device+0x12f/0x760 [target_core_mod] target_dev_enable_store+0x32/0x50 [target_core_mod] configfs_write_file+0x2bb/0x450 vfs_write+0x1ce/0x610 ksys_write+0xe9/0x1b0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 49158: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x1b/0x30 __kasan_slab_free+0x110/0x150 slab_free_freelist_hook+0x5a/0x170 kfree+0xc6/0x560 device_release+0x9b/0x210 kobject_put+0x13e/0x410 uio_unregister_device+0xf9/0x190 tcmu_destroy_device+0x1c4/0x280 [target_core_user] target_free_device+0xf3/0x2e0 [target_core_mod] config_item_cleanup+0xea/0x210 configfs_rmdir+0x651/0x860 vfs_rmdir.part.0+0xec/0x3a0 do_rmdir+0x252/0x320 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff888105196000 which belongs to the cache kmalloc-2k of size 2048 The buggy address is located 1288 bytes inside of 2048-byte region [ffff888105196000, ffff888105196800) The buggy address belongs to the page: page:0000000098e6ca81 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x105190 head:0000000098e6ca81 order:3 compound_mapcount:0 compound_pincount:0 flags: 0x17ffffc0010200(slab|head) raw: 0017ffffc0010200 dead000000000100 dead000000000122 ffff888100043040 raw: 0000000000000000 0000000000080008 00000001ffffffff ffff88810eb55c01 page dumped because: kasan: bad access detected page->mem_cgroup:ffff88810eb55c01 Memory state around the buggy address: ffff888105196400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888105196480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888105196500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888105196580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888105196600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 8fd0e2a6df26 ("uio: free uio id after uio file node is freed") Cc: stable Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20201102122819.2346270-1-shinichiro.kawasaki@wdc.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 3926be659147..0e3e16c51d3a 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -413,10 +413,10 @@ static int uio_get_minor(struct uio_device *idev) return retval; } -static void uio_free_minor(struct uio_device *idev) +static void uio_free_minor(unsigned long minor) { mutex_lock(&minor_lock); - idr_remove(&uio_idr, idev->minor); + idr_remove(&uio_idr, minor); mutex_unlock(&minor_lock); } @@ -988,7 +988,7 @@ err_request_irq: err_uio_dev_add_attributes: device_del(&idev->dev); err_device_create: - uio_free_minor(idev); + uio_free_minor(idev->minor); put_device(&idev->dev); return ret; } @@ -1002,11 +1002,13 @@ EXPORT_SYMBOL_GPL(__uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; + unsigned long minor; if (!info || !info->uio_dev) return; idev = info->uio_dev; + minor = idev->minor; mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1019,7 +1021,7 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); - uio_free_minor(idev); + uio_free_minor(minor); return; } From 9eb97beaf594123e8bf448f68fa44e08b3d0d89c Mon Sep 17 00:00:00 2001 From: Chris Brandt Date: Wed, 11 Nov 2020 08:12:09 -0500 Subject: [PATCH 396/636] usb: cdc-acm: Add DISABLE_ECHO for Renesas USB Download mode commit 6d853c9e4104b4fc8d55dc9cd3b99712aa347174 upstream. Renesas R-Car and RZ/G SoCs have a firmware download mode over USB. However, on reset a banner string is transmitted out which is not expected to be echoed back and will corrupt the protocol. Cc: stable Acked-by: Oliver Neukum Signed-off-by: Chris Brandt Link: https://lore.kernel.org/r/20201111131209.3977903-1-chris.brandt@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 08751d1a765f..e0d8da4e3967 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1738,6 +1738,15 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From 83361a5464e28dde737b4d6c7730fc490233cdc7 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 6 Nov 2020 20:22:21 +0800 Subject: [PATCH 397/636] xhci: hisilicon: fix refercence leak in xhci_histb_probe commit 76255470ffa2795a44032e8b3c1ced11d81aa2db upstream. pm_runtime_get_sync() will increment pm usage at first and it will resume the device later. We should decrease the usage count whetever it succeeded or failed(maybe runtime of the device has error, or device is in inaccessible state, or other error state). If we do not call put operation to decrease the reference, it will result in reference leak in xhci_histb_probe. Moreover, this device cannot enter the idle state and always stay busy or other non-idle state later. So we fixed it by jumping to error handling branch. Fixes: c508f41da0788 ("xhci: hisilicon: support HiSilicon STB xHCI host controller") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201106122221.2304528-1-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-histb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c index 3c4abb5a1c3f..73aba464b66a 100644 --- a/drivers/usb/host/xhci-histb.c +++ b/drivers/usb/host/xhci-histb.c @@ -241,7 +241,7 @@ static int xhci_histb_probe(struct platform_device *pdev) /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) - return ret; + goto disable_pm; hcd = usb_create_hcd(driver, dev, dev_name(dev)); if (!hcd) { From 41fc5ddd35bb60f9ae316175866dc21088384b6f Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 29 Oct 2020 11:54:42 +0200 Subject: [PATCH 398/636] mei: protect mei_cl_mtu from null dereference commit bcbc0b2e275f0a797de11a10eff495b4571863fc upstream. A receive callback is queued while the client is still connected but can still be called after the client was disconnected. Upon disconnect cl->me_cl is set to NULL, hence we need to check that ME client is not-NULL in mei_cl_mtu to avoid null dereference. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20201029095444.957924-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/client.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 64e318f589b4..0d23efcc74ff 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -138,11 +138,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl) * * @cl: host client * - * Return: mtu + * Return: mtu or 0 if client is not connected */ static inline size_t mei_cl_mtu(const struct mei_cl *cl) { - return cl->me_cl->props.max_msg_length; + return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; } /** From 3f7277405fb3fe2853ce5d017ac7935ffca4ccfd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Nov 2020 11:52:05 +0300 Subject: [PATCH 399/636] futex: Don't enable IRQs unconditionally in put_pi_state() commit 1e106aa3509b86738769775969822ffc1ec21bf4 upstream. The exit_pi_state_list() function calls put_pi_state() with IRQs disabled and is not expecting that IRQs will be enabled inside the function. Use the _irqsave() variant so that IRQs are restored to the original state instead of being enabled unconditionally. Fixes: 153fbd1226fb ("futex: Fix more put_pi_state() vs. exit_pi_state_list() races") Signed-off-by: Dan Carpenter Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201106085205.GA1159983@mwanda Signed-off-by: Greg Kroah-Hartman --- kernel/futex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 52f641c00a65..334dc4cae780 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -856,8 +856,9 @@ static void put_pi_state(struct futex_pi_state *pi_state) */ if (pi_state->owner) { struct task_struct *owner; + unsigned long flags; - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); owner = pi_state->owner; if (owner) { raw_spin_lock(&owner->pi_lock); @@ -865,7 +866,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock(&owner->pi_lock); } rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } if (current->pi_state_cache) { From bc2d96b3061ad35150c4db14661e91916766f241 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 13 Nov 2020 22:52:23 -0800 Subject: [PATCH 400/636] ocfs2: initialize ip_next_orphan commit f5785283dd64867a711ca1fb1f5bb172f252ecdf upstream. Though problem if found on a lower 4.1.12 kernel, I think upstream has same issue. In one node in the cluster, there is the following callback trace: # cat /proc/21473/stack __ocfs2_cluster_lock.isra.36+0x336/0x9e0 [ocfs2] ocfs2_inode_lock_full_nested+0x121/0x520 [ocfs2] ocfs2_evict_inode+0x152/0x820 [ocfs2] evict+0xae/0x1a0 iput+0x1c6/0x230 ocfs2_orphan_filldir+0x5d/0x100 [ocfs2] ocfs2_dir_foreach_blk+0x490/0x4f0 [ocfs2] ocfs2_dir_foreach+0x29/0x30 [ocfs2] ocfs2_recover_orphans+0x1b6/0x9a0 [ocfs2] ocfs2_complete_recovery+0x1de/0x5c0 [ocfs2] process_one_work+0x169/0x4a0 worker_thread+0x5b/0x560 kthread+0xcb/0xf0 ret_from_fork+0x61/0x90 The above stack is not reasonable, the final iput shouldn't happen in ocfs2_orphan_filldir() function. Looking at the code, 2067 /* Skip inodes which are already added to recover list, since dio may 2068 * happen concurrently with unlink/rename */ 2069 if (OCFS2_I(iter)->ip_next_orphan) { 2070 iput(iter); 2071 return 0; 2072 } 2073 The logic thinks the inode is already in recover list on seeing ip_next_orphan is non-NULL, so it skip this inode after dropping a reference which incremented in ocfs2_iget(). While, if the inode is already in recover list, it should have another reference and the iput() at line 2070 should not be the final iput (dropping the last reference). So I don't think the inode is really in the recover list (no vmcore to confirm). Note that ocfs2_queue_orphans(), though not shown up in the call back trace, is holding cluster lock on the orphan directory when looking up for unlinked inodes. The on disk inode eviction could involve a lot of IOs which may need long time to finish. That means this node could hold the cluster lock for very long time, that can lead to the lock requests (from other nodes) to the orhpan directory hang for long time. Looking at more on ip_next_orphan, I found it's not initialized when allocating a new ocfs2_inode_info structure. This causes te reflink operations from some nodes hang for very long time waiting for the cluster lock on the orphan directory. Fix: initialize ip_next_orphan as NULL. Signed-off-by: Wengang Wang Signed-off-by: Andrew Morton Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Link: https://lkml.kernel.org/r/20201109171746.27884-1-wen.gang.wang@oracle.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2658d91c1f7b..09bc2cf5f61c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1747,6 +1747,7 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); From b0f6ea857986f0f5adf19050d8c93480c6eb83ef Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 4 Oct 2020 19:04:26 +0100 Subject: [PATCH 401/636] btrfs: fix potential overflow in cluster_pages_for_defrag on 32bit arch commit a1fbc6750e212c5675a4e48d7f51d44607eb8756 upstream. On 32-bit systems, this shift will overflow for files larger than 4GB as start_index is unsigned long while the calls to btrfs_delalloc_*_space expect u64. CC: stable@vger.kernel.org # 4.4+ Fixes: df480633b891 ("btrfs: extent-tree: Switch to new delalloc space reserve and release") Reviewed-by: Josef Bacik Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Sterba [ define the variable instead of repeating the shift ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f0e71aa05d22..f3658d6ea657 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1239,6 +1239,7 @@ static int cluster_pages_for_defrag(struct inode *inode, u64 page_start; u64 page_end; u64 page_cnt; + u64 start = (u64)start_index << PAGE_SHIFT; int ret; int i; int i_done; @@ -1255,8 +1256,7 @@ static int cluster_pages_for_defrag(struct inode *inode, page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); ret = btrfs_delalloc_reserve_space(inode, &data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT); + start, page_cnt << PAGE_SHIFT); if (ret) return ret; i_done = 0; @@ -1346,8 +1346,7 @@ again: btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, data_reserved, - start_index << PAGE_SHIFT, - (page_cnt - i_done) << PAGE_SHIFT, true); + start, (page_cnt - i_done) << PAGE_SHIFT, true); } @@ -1374,8 +1373,7 @@ out: put_page(pages[i]); } btrfs_delalloc_release_space(inode, data_reserved, - start_index << PAGE_SHIFT, - page_cnt << PAGE_SHIFT, true); + start, page_cnt << PAGE_SHIFT, true); btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); extent_changeset_free(data_reserved); return ret; From 46977ef987a743133b2a66208464c898d7a03e3c Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Thu, 12 Nov 2020 21:53:32 +0800 Subject: [PATCH 402/636] selinux: Fix error return code in sel_ib_pkey_sid_slow() commit c350f8bea271782e2733419bd2ab9bf4ec2051ef upstream. Fix to return a negative error code from the error handling case instead of 0 in function sel_ib_pkey_sid_slow(), as done elsewhere in this function. Cc: stable@vger.kernel.org Fixes: 409dcf31538a ("selinux: Add a cache for quicker retreival of PKey SIDs") Reported-by: Hulk Robot Signed-off-by: Chen Zhou Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- security/selinux/ibpkey.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index 0a4b89d48297..cb05ae28ce00 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -161,8 +161,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (!new) { + ret = -ENOMEM; goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; From f3e1eb6bcfa7f26946110ece9b83f140e0fcc65e Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:53 +0100 Subject: [PATCH 403/636] gpio: pcie-idio-24: Fix irq mask when masking commit d8f270efeac850c569c305dc0baa42ac3d607988 upstream. Fix the bitwise operation to remove only the corresponding bit from the mask. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pcie-idio-24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index f953541e7890..95b2bb2bd035 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -365,7 +365,7 @@ static void idio_24_irq_mask(struct irq_data *data) raw_spin_lock_irqsave(&idio24gpio->lock, flags); - idio24gpio->irq_mask &= BIT(bit_offset); + idio24gpio->irq_mask &= ~BIT(bit_offset); new_irq_mask = idio24gpio->irq_mask >> bank_offset; if (!new_irq_mask) { From 17fe356a26565267573997f3828e663328faacb4 Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:54 +0100 Subject: [PATCH 404/636] gpio: pcie-idio-24: Fix IRQ Enable Register value commit 23a7fdc06ebcc334fa667f0550676b035510b70b upstream. This fixes the COS Enable Register value for enabling/disabling the corresponding IRQs bank. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pcie-idio-24.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 95b2bb2bd035..ea48c458967c 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -360,13 +360,13 @@ static void idio_24_irq_mask(struct irq_data *data) unsigned long flags; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; unsigned char new_irq_mask; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); idio24gpio->irq_mask &= ~BIT(bit_offset); - new_irq_mask = idio24gpio->irq_mask >> bank_offset; + new_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; if (!new_irq_mask) { cos_enable_state = ioread8(&idio24gpio->reg->cos_enable); @@ -389,12 +389,12 @@ static void idio_24_irq_unmask(struct irq_data *data) unsigned long flags; unsigned char prev_irq_mask; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - prev_irq_mask = idio24gpio->irq_mask >> bank_offset; + prev_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; idio24gpio->irq_mask |= BIT(bit_offset); if (!prev_irq_mask) { From d8806fe4a1be54cc696db1d784792efca3ee5689 Mon Sep 17 00:00:00 2001 From: Arnaud de Turckheim Date: Wed, 4 Nov 2020 16:24:55 +0100 Subject: [PATCH 405/636] gpio: pcie-idio-24: Enable PEX8311 interrupts commit 10a2f11d3c9e48363c729419e0f0530dea76e4fe upstream. This enables the PEX8311 internal PCI wire interrupt and the PEX8311 local interrupt input so the local interrupts are forwarded to the PCI. Fixes: 585562046628 ("gpio: Add GPIO support for the ACCES PCIe-IDIO-24 family") Cc: stable@vger.kernel.org Signed-off-by: Arnaud de Turckheim Reviewed-by: William Breathitt Gray Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman --- drivers/gpio/gpio-pcie-idio-24.c | 52 +++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index ea48c458967c..634125747a03 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -28,6 +28,47 @@ #include #include +/* + * PLX PEX8311 PCI LCS_INTCSR Interrupt Control/Status + * + * Bit: Description + * 0: Enable Interrupt Sources (Bit 0) + * 1: Enable Interrupt Sources (Bit 1) + * 2: Generate Internal PCI Bus Internal SERR# Interrupt + * 3: Mailbox Interrupt Enable + * 4: Power Management Interrupt Enable + * 5: Power Management Interrupt + * 6: Slave Read Local Data Parity Check Error Enable + * 7: Slave Read Local Data Parity Check Error Status + * 8: Internal PCI Wire Interrupt Enable + * 9: PCI Express Doorbell Interrupt Enable + * 10: PCI Abort Interrupt Enable + * 11: Local Interrupt Input Enable + * 12: Retry Abort Enable + * 13: PCI Express Doorbell Interrupt Active + * 14: PCI Abort Interrupt Active + * 15: Local Interrupt Input Active + * 16: Local Interrupt Output Enable + * 17: Local Doorbell Interrupt Enable + * 18: DMA Channel 0 Interrupt Enable + * 19: DMA Channel 1 Interrupt Enable + * 20: Local Doorbell Interrupt Active + * 21: DMA Channel 0 Interrupt Active + * 22: DMA Channel 1 Interrupt Active + * 23: Built-In Self-Test (BIST) Interrupt Active + * 24: Direct Master was the Bus Master during a Master or Target Abort + * 25: DMA Channel 0 was the Bus Master during a Master or Target Abort + * 26: DMA Channel 1 was the Bus Master during a Master or Target Abort + * 27: Target Abort after internal 256 consecutive Master Retrys + * 28: PCI Bus wrote data to LCS_MBOX0 + * 29: PCI Bus wrote data to LCS_MBOX1 + * 30: PCI Bus wrote data to LCS_MBOX2 + * 31: PCI Bus wrote data to LCS_MBOX3 + */ +#define PLX_PEX8311_PCI_LCS_INTCSR 0x68 +#define INTCSR_INTERNAL_PCI_WIRE BIT(8) +#define INTCSR_LOCAL_INPUT BIT(11) + /** * struct idio_24_gpio_reg - GPIO device registers structure * @out0_7: Read: FET Outputs 0-7 @@ -92,6 +133,7 @@ struct idio_24_gpio_reg { struct idio_24_gpio { struct gpio_chip chip; raw_spinlock_t lock; + __u8 __iomem *plx; struct idio_24_gpio_reg __iomem *reg; unsigned long irq_mask; }; @@ -481,6 +523,7 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *const dev = &pdev->dev; struct idio_24_gpio *idio24gpio; int err; + const size_t pci_plx_bar_index = 1; const size_t pci_bar_index = 2; const char *const name = pci_name(pdev); @@ -494,12 +537,13 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pcim_iomap_regions(pdev, BIT(pci_bar_index), name); + err = pcim_iomap_regions(pdev, BIT(pci_plx_bar_index) | BIT(pci_bar_index), name); if (err) { dev_err(dev, "Unable to map PCI I/O addresses (%d)\n", err); return err; } + idio24gpio->plx = pcim_iomap_table(pdev)[pci_plx_bar_index]; idio24gpio->reg = pcim_iomap_table(pdev)[pci_bar_index]; idio24gpio->chip.label = name; @@ -520,6 +564,12 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Software board reset */ iowrite8(0, &idio24gpio->reg->soft_reset); + /* + * enable PLX PEX8311 internal PCI wire interrupt and local interrupt + * input + */ + iowrite8((INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT) >> 8, + idio24gpio->plx + PLX_PEX8311_PCI_LCS_INTCSR + 1); err = devm_gpiochip_add_data(dev, &idio24gpio->chip, idio24gpio); if (err) { From de04ff998008d29783242dbe5b2912c70e6f4881 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 6 Nov 2020 18:25:30 +0900 Subject: [PATCH 406/636] mmc: renesas_sdhi_core: Add missing tmio_mmc_host_free() at remove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e8973201d9b281375b5a8c66093de5679423021a upstream. The commit 94b110aff867 ("mmc: tmio: add tmio_mmc_host_alloc/free()") added tmio_mmc_host_free(), but missed the function calling in the sh_mobile_sdhi_remove() at that time. So, fix it. Otherwise, we cannot rebind the sdhi/mmc devices when we use aliases of mmc. Fixes: 94b110aff867 ("mmc: tmio: add tmio_mmc_host_alloc/free()") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Niklas Söderlund Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1604654730-29914-1-git-send-email-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/renesas_sdhi_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 61f0faddfd88..e8ab582551f8 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -764,6 +764,7 @@ int renesas_sdhi_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); renesas_sdhi_clk_disable(host); + tmio_mmc_host_free(host); return 0; } From 9bb7c3825457a6319c6e6cc4442e52ce3edcdaf4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 28 Oct 2020 16:39:49 -0400 Subject: [PATCH 407/636] don't dump the threads that had been already exiting when zapped. commit 77f6ab8b7768cf5e6bdd0e72499270a0671506ee upstream. Coredump logics needs to report not only the registers of the dumping thread, but (since 2.5.43) those of other threads getting killed. Doing that might require extra state saved on the stack in asm glue at kernel entry; signal delivery logics does that (we need to be able to save sigcontext there, at the very least) and so does seccomp. That covers all callers of do_coredump(). Secondary threads get hit with SIGKILL and caught as soon as they reach exit_mm(), which normally happens in signal delivery, so those are also fine most of the time. Unfortunately, it is possible to end up with secondary zapped when it has already entered exit(2) (or, worse yet, is oopsing). In those cases we reach exit_mm() when mm->core_state is already set, but the stack contents is not what we would have in signal delivery. At least on two architectures (alpha and m68k) it leads to infoleaks - we end up with a chunk of kernel stack written into coredump, with the contents consisting of normal C stack frames of the call chain leading to exit_mm() instead of the expected copy of userland registers. In case of alpha we leak 312 bytes of stack. Other architectures (including the regset-using ones) might have similar problems - the normal user of regsets is ptrace and the state of tracee at the time of such calls is special in the same way signal delivery is. Note that had the zapper gotten to the exiting thread slightly later, it wouldn't have been included into coredump anyway - we skip the threads that have already cleared their ->mm. So let's pretend that zapper always loses the race. IOW, have exit_mm() only insert into the dumper list if we'd gotten there from handling a fatal signal[*] As the result, the callers of do_exit() that have *not* gone through get_signal() are not seen by coredump logics as secondary threads. Which excludes voluntary exit()/oopsen/traps/etc. The dumper thread itself is unaffected by that, so seccomp is fine. [*] originally I intended to add a new flag in tsk->flags, but ebiederman pointed out that PF_SIGNALED is already doing just what we need. Cc: stable@vger.kernel.org Fixes: d89f3847def4 ("[PATCH] thread-aware coredumps, 2.5.43-C3") History-tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Acked-by: "Eric W. Biederman" Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index eeaafd4064c9..65133ebddfad 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -517,7 +517,10 @@ static void exit_mm(void) up_read(&mm->mmap_sem); self.task = current; - self.next = xchg(&core_state->dumper.next, &self); + if (self.task->flags & PF_SIGNALED) + self.next = xchg(&core_state->dumper.next, &self); + else + self.task = NULL; /* * Implies mb(), the result of xchg() must be visible * to core_state->dumper. From 0149a4cacc960b352738179b714da4deb4dc2970 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 5 Nov 2020 20:02:56 +0100 Subject: [PATCH 408/636] drm/gma500: Fix out-of-bounds access to struct drm_device.vblank[] commit 06ad8d339524bf94b89859047822c31df6ace239 upstream. The gma500 driver expects 3 pipelines in several it's IRQ functions. Accessing struct drm_device.vblank[], this fails with devices that only have 2 pipelines. An example KASAN report is shown below. [ 62.267688] ================================================================== [ 62.268856] BUG: KASAN: slab-out-of-bounds in psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.269450] Read of size 1 at addr ffff8880012bc6d0 by task systemd-udevd/285 [ 62.269949] [ 62.270192] CPU: 0 PID: 285 Comm: systemd-udevd Tainted: G E 5.10.0-rc1-1-default+ #572 [ 62.270807] Hardware name: /DN2800MT, BIOS MTCDT10N.86A.0164.2012.1213.1024 12/13/2012 [ 62.271366] Call Trace: [ 62.271705] dump_stack+0xae/0xe5 [ 62.272180] print_address_description.constprop.0+0x17/0xf0 [ 62.272987] ? psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.273474] __kasan_report.cold+0x20/0x38 [ 62.273989] ? psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.274460] kasan_report+0x3a/0x50 [ 62.274891] psb_irq_postinstall+0x250/0x3c0 [gma500_gfx] [ 62.275380] drm_irq_install+0x131/0x1f0 <...> [ 62.300751] Allocated by task 285: [ 62.301223] kasan_save_stack+0x1b/0x40 [ 62.301731] __kasan_kmalloc.constprop.0+0xbf/0xd0 [ 62.302293] drmm_kmalloc+0x55/0x100 [ 62.302773] drm_vblank_init+0x77/0x210 Resolve the issue by only handling vblank entries up to the number of CRTCs. I'm adding a Fixes tag for reference, although the bug has been present since the driver's initial commit. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Fixes: 5c49fd3aa0ab ("gma500: Add the core DRM files and headers") Cc: Alan Cox Cc: Dave Airlie Cc: Patrik Jakobsson Cc: dri-devel@lists.freedesktop.org Cc: stable@vger.kernel.org#v3.3+ Link: https://patchwork.freedesktop.org/patch/msgid/20201105190256.3893-1-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/psb_irq.c | 34 +++++++++++--------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 78eb10902809..076b6da44f46 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -350,6 +350,7 @@ int psb_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -362,20 +363,12 @@ int psb_irq_postinstall(struct drm_device *dev) PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R); PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + else + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } if (dev_priv->ops->hotplug_enable) dev_priv->ops->hotplug_enable(dev, true); @@ -388,6 +381,7 @@ void psb_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -396,14 +390,10 @@ void psb_irq_uninstall(struct drm_device *dev) PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG | _PSB_IRQ_MSVDX_FLAG | From 0066e9d0f10296ae1ea9162c9f44a5de584197cd Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 6 Nov 2020 07:19:10 +0800 Subject: [PATCH 409/636] pinctrl: amd: use higher precision for 512 RtcClk commit c64a6a0d4a928c63e5bc3b485552a8903a506c36 upstream. RTC is 32.768kHz thus 512 RtcClk equals 15625 usec. The documentation likely has dropped precision and that's why the driver mistakenly took the slightly deviated value. Cc: stable@vger.kernel.org Reported-by: Andy Shevchenko Suggested-by: Andy Shevchenko Suggested-by: Hans de Goede Signed-off-by: Coiby Xu Reviewed-by: Andy Shevchenko Reviewed-by: Hans de Goede Link: https://lore.kernel.org/linux-gpio/2f4706a1-502f-75f0-9596-cc25b4933b6c@redhat.com/ Link: https://lore.kernel.org/r/20201105231912.69527-3-coiby.xu@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b1ffdd3f6d07..46f084aa429d 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -157,7 +157,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); } else if (debounce < 250000) { - time = debounce / 15600; + time = debounce / 15625; pin_reg |= time & DB_TMR_OUT_MASK; pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); From 0145bc38a82a98e5f00ac92ed4b695e649da6f52 Mon Sep 17 00:00:00 2001 From: Coiby Xu Date: Fri, 6 Nov 2020 07:19:09 +0800 Subject: [PATCH 410/636] pinctrl: amd: fix incorrect way to disable debounce filter commit 06abe8291bc31839950f7d0362d9979edc88a666 upstream. The correct way to disable debounce filter is to clear bit 5 and 6 of the register. Cc: stable@vger.kerne.org Signed-off-by: Coiby Xu Reviewed-by: Hans de Goede Cc: Hans de Goede Link: https://lore.kernel.org/linux-gpio/df2c008b-e7b5-4fdd-42ea-4d1c62b52139@redhat.com/ Link: https://lore.kernel.org/r/20201105231912.69527-2-coiby.xu@gmail.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 46f084aa429d..d6255049e519 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -167,14 +167,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); } else { - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); ret = -EINVAL; } } else { pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); pin_reg &= ~DB_TMR_OUT_MASK; - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); } writel(pin_reg, gpio_dev->base + offset * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); From ebe96a42f940231e07640d43cb87f81f3f706d82 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 1 Nov 2020 03:51:02 +0800 Subject: [PATCH 411/636] erofs: derive atime instead of leaving it empty commit d3938ee23e97bfcac2e0eb6b356875da73d700df upstream. EROFS has _only one_ ondisk timestamp (ctime is currently documented and recorded, we might also record mtime instead with a new compat feature if needed) for each extended inode since EROFS isn't mainly for archival purposes so no need to keep all timestamps on disk especially for Android scenarios due to security concerns. Also, romfs/cramfs don't have their own on-disk timestamp, and squashfs only records mtime instead. Let's also derive access time from ondisk timestamp rather than leaving it empty, and if mtime/atime for each file are really needed for specific scenarios as well, we can also use xattrs to record them then. Link: https://lore.kernel.org/r/20201031195102.21221-1-hsiangkao@aol.com [ Gao Xiang: It'd be better to backport for user-friendly concern. ] Fixes: 431339ba9042 ("staging: erofs: add inode operations") Cc: stable # 4.19+ Reported-by: nl6720 Reviewed-by: Chao Yu [ Gao Xiang: Manually backport to 4.19.y due to trivial conflicts. ] Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/inode.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c index 7448744cc515..12a5be95457f 100644 --- a/drivers/staging/erofs/inode.c +++ b/drivers/staging/erofs/inode.c @@ -53,11 +53,9 @@ static int read_inode(struct inode *inode, void *data) i_gid_write(inode, le32_to_cpu(v2->i_gid)); set_nlink(inode, le32_to_cpu(v2->i_nlink)); - /* ns timestamp */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - le64_to_cpu(v2->i_ctime); - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - le32_to_cpu(v2->i_ctime_nsec); + /* extended inode has its own timestamp */ + inode->i_ctime.tv_sec = le64_to_cpu(v2->i_ctime); + inode->i_ctime.tv_nsec = le32_to_cpu(v2->i_ctime_nsec); inode->i_size = le64_to_cpu(v2->i_size); } else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) { @@ -83,11 +81,9 @@ static int read_inode(struct inode *inode, void *data) i_gid_write(inode, le16_to_cpu(v1->i_gid)); set_nlink(inode, le16_to_cpu(v1->i_nlink)); - /* use build time to derive all file time */ - inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = - sbi->build_time; - inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = - sbi->build_time_nsec; + /* use build time for compact inodes */ + inode->i_ctime.tv_sec = sbi->build_time; + inode->i_ctime.tv_nsec = sbi->build_time_nsec; inode->i_size = le32_to_cpu(v1->i_size); } else { @@ -97,6 +93,11 @@ static int read_inode(struct inode *inode, void *data) return -EIO; } + inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; + inode->i_atime.tv_sec = inode->i_ctime.tv_sec; + inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + /* measure inode.i_blocks as the generic filesystem */ inode->i_blocks = ((inode->i_size - 1) >> 9) + 1; return 0; From 02ee0fe622e589a48cb44fbeb9e6d1e996fdbc7d Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Mon, 26 Oct 2020 17:02:14 -0700 Subject: [PATCH 412/636] swiotlb: fix "x86: Don't panic if can not alloc buffer for swiotlb" commit e9696d259d0fb5d239e8c28ca41089838ea76d13 upstream. kernel/dma/swiotlb.c:swiotlb_init gets called first and tries to allocate a buffer for the swiotlb. It does so by calling memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE); If the allocation must fail, no_iotlb_memory is set. Later during initialization swiotlb-xen comes in (drivers/xen/swiotlb-xen.c:xen_swiotlb_init) and given that io_tlb_start is != 0, it thinks the memory is ready to use when actually it is not. When the swiotlb is actually needed, swiotlb_tbl_map_single gets called and since no_iotlb_memory is set the kernel panics. Instead, if swiotlb-xen.c:xen_swiotlb_init knew the swiotlb hadn't been initialized, it would do the initialization itself, which might still succeed. Fix the panic by setting io_tlb_start to 0 on swiotlb initialization failure, and also by setting no_iotlb_memory to false on swiotlb initialization success. Fixes: ac2cbab21f31 ("x86: Don't panic if can not alloc buffer for swiotlb") Reported-by: Elliott Mitchell Tested-by: Elliott Mitchell Signed-off-by: Stefano Stabellini Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman --- kernel/dma/swiotlb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 2a8c41f12d45..6f7d4e977c5c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -239,6 +239,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; if (verbose) swiotlb_print_info(); @@ -270,9 +271,11 @@ swiotlb_init(int verbose) if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; - if (io_tlb_start) + if (io_tlb_start) { memblock_free_early(io_tlb_start, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + io_tlb_start = 0; + } pr_warn("Cannot allocate buffer"); no_iotlb_memory = true; } @@ -376,6 +379,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; + no_iotlb_memory = false; swiotlb_print_info(); From 7fe18ca0435629dc6b5bcc76a6dbb2ba66f86d70 Mon Sep 17 00:00:00 2001 From: Oliver Herms Date: Tue, 3 Nov 2020 11:41:33 +0100 Subject: [PATCH 413/636] IPv6: Set SIT tunnel hard_header_len to zero [ Upstream commit 8ef9ba4d666614497a057d09b0a6eafc1e34eadf ] Due to the legacy usage of hard_header_len for SIT tunnels while already using infrastructure from net/ipv4/ip_tunnel.c the calculation of the path MTU in tnl_update_pmtu is incorrect. This leads to unnecessary creation of MTU exceptions for any flow going over a SIT tunnel. As SIT tunnels do not have a header themsevles other than their transport (L3, L2) headers we're leaving hard_header_len set to zero as tnl_update_pmtu is already taking care of the transport headers sizes. This will also help avoiding unnecessary IPv6 GC runs and spinlock contention seen when using SIT tunnels and for more than net.ipv6.route.gc_thresh flows. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Signed-off-by: Oliver Herms Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20201103104133.GA1573211@tws Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index bfed7508ba19..98c108baf35e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1087,7 +1087,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); dev->mtu = tdev->mtu - t_hlen; if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; @@ -1377,7 +1376,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU - t_hlen; From 2ff0e7c8eca2536ecd534c758aa704d342f38007 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 9 Nov 2020 08:57:05 +0100 Subject: [PATCH 414/636] net/af_iucv: fix null pointer dereference on shutdown [ Upstream commit 4031eeafa71eaf22ae40a15606a134ae86345daf ] syzbot reported the following KASAN finding: BUG: KASAN: nullptr-dereference in iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 Read of size 2 at addr 000000000000021e by task syz-executor907/519 CPU: 0 PID: 519 Comm: syz-executor907 Not tainted 5.9.0-syzkaller-07043-gbcf9877ad213 #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: [<00000000c576af60>] unwind_start arch/s390/include/asm/unwind.h:65 [inline] [<00000000c576af60>] show_stack+0x180/0x228 arch/s390/kernel/dumpstack.c:135 [<00000000c9dcd1f8>] __dump_stack lib/dump_stack.c:77 [inline] [<00000000c9dcd1f8>] dump_stack+0x268/0x2f0 lib/dump_stack.c:118 [<00000000c5fed016>] print_address_description.constprop.0+0x5e/0x218 mm/kasan/report.c:383 [<00000000c5fec82a>] __kasan_report mm/kasan/report.c:517 [inline] [<00000000c5fec82a>] kasan_report+0x11a/0x168 mm/kasan/report.c:534 [<00000000c98b5b60>] iucv_send_ctrl+0x390/0x3f0 net/iucv/af_iucv.c:385 [<00000000c98b6262>] iucv_sock_shutdown+0x44a/0x4c0 net/iucv/af_iucv.c:1457 [<00000000c89d3a54>] __sys_shutdown+0x12c/0x1c8 net/socket.c:2204 [<00000000c89d3b70>] __do_sys_shutdown net/socket.c:2212 [inline] [<00000000c89d3b70>] __s390x_sys_shutdown+0x38/0x48 net/socket.c:2210 [<00000000c9e36eac>] system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 There is nothing to shutdown if a connection has never been established. Besides that iucv->hs_dev is not yet initialized if a socket is in IUCV_OPEN state and iucv->path is not yet initialized if socket is in IUCV_BOUND state. So, just skip the shutdown calls for a socket in these states. Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support") Fixes: 82492a355fac ("af_iucv: add shutdown for HS transport") Reviewed-by: Vasily Gorbik Signed-off-by: Ursula Braun [jwi: correct one Fixes tag] Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/iucv/af_iucv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 23a1002ed86d..ad3e515f91f0 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1571,7 +1571,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how) break; } - if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) { + if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) && + sk->sk_state == IUCV_CONNECTED) { if (iucv->transport == AF_IUCV_TRANS_IUCV) { txmsg.class = 0; txmsg.tag = 0; From bd73928fa9561a25144be6ed9313493bd9fb9ce3 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Tue, 10 Nov 2020 08:16:31 +0800 Subject: [PATCH 415/636] net: Update window_clamp if SOCK_RCVBUF is set [ Upstream commit 909172a149749242990a6e64cb55d55460d4e417 ] When net.ipv4.tcp_syncookies=1 and syn flood is happened, cookie_v4_check or cookie_v6_check tries to redo what tcp_v4_send_synack or tcp_v6_send_synack did, rsk_window_clamp will be changed if SOCK_RCVBUF is set, which will make rcv_wscale is different, the client still operates with initial window scale and can overshot granted window, the client use the initial scale but local server use new scale to advertise window value, and session work abnormally. Fixes: e88c64f0a425 ("tcp: allow effective reduction of TCP's rcv-buffer via setsockopt") Signed-off-by: Mao Wenan Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/1604967391-123737-1-git-send-email-wenan.mao@linux.alibaba.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/syncookies.c | 9 +++++++-- net/ipv6/syncookies.c | 10 ++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f66b2e6d97a7..1a06850ef3cc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -296,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; @@ -391,8 +391,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index a377be8a9fb4..ec61b67a92be 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -141,7 +141,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; - int mss; + int full_space, mss; struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; @@ -246,7 +246,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, + /* limit the window selection if the user enforce a smaller rx buffer */ + full_space = tcp_full_space(sk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) + req->rsk_window_clamp = full_space; + + tcp_select_initial_window(sk, full_space, req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); From 889a5da9ddf7c3669263c6d342898c5075d5d13c Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Mon, 9 Nov 2020 07:54:49 +0100 Subject: [PATCH 416/636] net/x25: Fix null-ptr-deref in x25_connect [ Upstream commit 361182308766a265b6c521879b34302617a8c209 ] This fixes a regression for blocking connects introduced by commit 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect"). The x25->neighbour is already set to "NULL" by x25_disconnect() now, while a blocking connect is waiting in x25_wait_for_connection_establishment(). Therefore x25->neighbour must not be accessed here again and x25->state is also already set to X25_STATE_0 by x25_disconnect(). Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Signed-off-by: Martin Schiller Reviewed-by: Xie He Link: https://lore.kernel.org/r/20201109065449.9014-1-ms@dev.tdt.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index bd1cbbfe5924..372f4194db5a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -824,7 +824,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) { + if (rc && x25->neighbour) { read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); x25->neighbour = NULL; From cf9913775901db56dd5c5c4488c3c3f47d5ff4e0 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 9 Nov 2020 22:09:13 +0800 Subject: [PATCH 417/636] tipc: fix memory leak in tipc_topsrv_start() [ Upstream commit fa6882c63621821f73cc806f291208e1c6ea6187 ] kmemleak report a memory leak as follows: unreferenced object 0xffff88810a596800 (size 512): comm "ip", pid 21558, jiffies 4297568990 (age 112.120s) hex dump (first 32 bytes): 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... ff ff ff ff ff ff ff ff 00 83 60 b0 ff ff ff ff ..........`..... backtrace: [<0000000022bbe21f>] tipc_topsrv_init_net+0x1f3/0xa70 [<00000000fe15ddf7>] ops_init+0xa8/0x3c0 [<00000000138af6f2>] setup_net+0x2de/0x7e0 [<000000008c6807a3>] copy_net_ns+0x27d/0x530 [<000000006b21adbd>] create_new_namespaces+0x382/0xa30 [<00000000bb169746>] unshare_nsproxy_namespaces+0xa1/0x1d0 [<00000000fe2e42bc>] ksys_unshare+0x39c/0x780 [<0000000009ba3b19>] __x64_sys_unshare+0x2d/0x40 [<00000000614ad866>] do_syscall_64+0x56/0xa0 [<00000000a1b5ca3c>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 'srv' is malloced in tipc_topsrv_start() but not free before leaving from the error handling cases. We need to free it. Fixes: 5c45ab24ac77 ("tipc: make struct tipc_server private for server.c") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201109140913.47370-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/tipc/topsrv.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ec9a7137d267..1c4733153d74 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -671,12 +671,18 @@ static int tipc_topsrv_start(struct net *net) ret = tipc_topsrv_work_start(srv); if (ret < 0) - return ret; + goto err_start; ret = tipc_topsrv_create_listener(srv); if (ret < 0) - tipc_topsrv_work_stop(srv); + goto err_create; + return 0; + +err_create: + tipc_topsrv_work_stop(srv); +err_start: + kfree(srv); return ret; } From 200c2f68407601b1bed3dba5e027f34cd284ec92 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Fri, 6 Nov 2020 08:30:30 +0100 Subject: [PATCH 418/636] vrf: Fix fast path output packet handling with async Netfilter rules [ Upstream commit 9e2b7fa2df4365e99934901da4fb4af52d81e820 ] VRF devices use an optimized direct path on output if a default qdisc is involved, calling Netfilter hooks directly. This path, however, does not consider Netfilter rules completing asynchronously, such as with NFQUEUE. The Netfilter okfn() is called for asynchronously accepted packets, but the VRF never passes that packet down the stack to send it out over the slave device. Using the slower redirect path for this seems not feasible, as we do not know beforehand if a Netfilter hook has asynchronously completing rules. Fix the use of asynchronously completing Netfilter rules in OUTPUT and POSTROUTING by using a special completion function that additionally calls dst_output() to pass the packet down the stack. Also, slightly adjust the use of nf_reset_ct() so that is called in the asynchronous case, too. Fixes: dcdd43c41e60 ("net: vrf: performance improvements for IPv4") Fixes: a9ec54d1b0cd ("net: vrf: performance improvements for IPv6") Signed-off-by: Martin Willi Link: https://lore.kernel.org/r/20201106073030.3974927-1-martin@strongswan.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 92 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 69 insertions(+), 23 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b55eeb8f8fa3..93899a7be9c5 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -336,8 +336,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } -static int vrf_finish_direct(struct net *net, struct sock *sk, - struct sk_buff *skb) +static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; @@ -356,7 +355,8 @@ static int vrf_finish_direct(struct net *net, struct sock *sk, skb_pull(skb, ETH_HLEN); } - return 1; + /* reset skb device */ + nf_reset(skb); } #if IS_ENABLED(CONFIG_IPV6) @@ -435,15 +435,41 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output6_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip6_local_out(net, sk, skb); +} + static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IPV6); - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output6_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output6_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip6_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, @@ -456,18 +482,15 @@ static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output6_direct); + skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, @@ -649,15 +672,41 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, return skb; } +static int vrf_output_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + vrf_finish_direct(skb); + + return vrf_ip_local_out(net, sk, skb); +} + static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { + int err = 1; + skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, - net, sk, skb, NULL, skb->dev, - vrf_finish_direct, - !(IPCB(skb)->flags & IPSKB_REROUTED)); + if (!(IPCB(skb)->flags & IPSKB_REROUTED)) + err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, skb->dev, vrf_output_direct_finish); + + if (likely(err == 1)) + vrf_finish_direct(skb); + + return err; +} + +static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = vrf_output_direct(net, sk, skb); + if (likely(err == 1)) + err = vrf_ip_local_out(net, sk, skb); + + return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, @@ -670,18 +719,15 @@ static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, - skb, NULL, vrf_dev, vrf_output_direct); + skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); - /* reset skb device */ if (likely(err == 1)) - nf_reset(skb); - else - skb = NULL; + return skb; - return skb; + return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, From bc571404829755e4a9504cc5fbc636fea80b2d83 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 5 Nov 2020 15:28:42 +0100 Subject: [PATCH 419/636] r8169: fix potential skb double free in an error path [ Upstream commit cc6528bc9a0c901c83b8220a2e2617f3354d6dd9 ] The caller of rtl8169_tso_csum_v2() frees the skb if false is returned. eth_skb_pad() internally frees the skb on error what would result in a double free. Therefore use __skb_put_padto() directly and instruct it to not free the skb on error. Fixes: b423e9ae49d7 ("r8169: fix offloaded tx checksum for small packets.") Reported-by: Jakub Kicinski Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/f7e68191-acff-9ded-4263-c016428a8762@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 1555c0dae490..c7ce167c67a0 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6274,7 +6274,8 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return !eth_skb_pad(skb); + /* eth_skb_pad would free the skb on error */ + return !__skb_put_padto(skb, ETH_ZLEN, false); } return true; From 81d7c56d6fab5ccbf522c47a655cd427808679f2 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Sun, 9 Aug 2020 06:57:44 +0000 Subject: [PATCH 420/636] random32: make prandom_u32() output unpredictable commit c51f8f88d705e06bd696d7510aff22b33eb8e638 upstream. Non-cryptographic PRNGs may have great statistical properties, but are usually trivially predictable to someone who knows the algorithm, given a small sample of their output. An LFSR like prandom_u32() is particularly simple, even if the sample is widely scattered bits. It turns out the network stack uses prandom_u32() for some things like random port numbers which it would prefer are *not* trivially predictable. Predictability led to a practical DNS spoofing attack. Oops. This patch replaces the LFSR with a homebrew cryptographic PRNG based on the SipHash round function, which is in turn seeded with 128 bits of strong random key. (The authors of SipHash have *not* been consulted about this abuse of their algorithm.) Speed is prioritized over security; attacks are rare, while performance is always wanted. Replacing all callers of prandom_u32() is the quick fix. Whether to reinstate a weaker PRNG for uses which can tolerate it is an open question. Commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") was an earlier attempt at a solution. This patch replaces it. Reported-by: Amit Klein Cc: Willy Tarreau Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity") Signed-off-by: George Spelvin Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ [ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions to prandom.h for later use; merged George's prandom_seed() proposal; inlined siprand_u32(); replaced the net_rand_state[] array with 4 members to fix a build issue; cosmetic cleanups to make checkpatch happy; fixed RANDOM32_SELFTEST build ] [wt: backported to 4.19 -- various context adjustments] Signed-off-by: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 1 - include/linux/prandom.h | 36 +++- kernel/time/timer.c | 7 - lib/random32.c | 462 ++++++++++++++++++++++++---------------- 4 files changed, 317 insertions(+), 189 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 80dedecfe15c..98925d49c96b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1257,7 +1257,6 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]); if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && diff --git a/include/linux/prandom.h b/include/linux/prandom.h index aa16e6468f91..cc1e71334e53 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + struct rnd_state { __u32 s1, s2, s3, s4; }; -DECLARE_PER_CPU(struct rnd_state, net_rand_state); - u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 61e41ea3a96e..a6e88d9bb931 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1655,13 +1655,6 @@ void update_process_times(int user_tick) scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(p); - - /* The current CPU might make use of net randoms without receiving IRQs - * to renew them often enough. Let's update the net_rand_state from a - * non-constant value that's not affine to the number of calls to make - * sure it's updated when there's some activity (we don't care in idle). - */ - this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); } /** diff --git a/lib/random32.c b/lib/random32.c index b6f3325e38e4..9085b1172015 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -40,16 +40,6 @@ #include #include -#ifdef CONFIG_RANDOM32_SELFTEST -static void __init prandom_state_selftest(void); -#else -static inline void prandom_state_selftest(void) -{ -} -#endif - -DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; - /** * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. @@ -69,25 +59,6 @@ u32 prandom_u32_state(struct rnd_state *state) } EXPORT_SYMBOL(prandom_u32_state); -/** - * prandom_u32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 prandom_u32(void) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - u32 res; - - res = prandom_u32_state(state); - put_cpu_var(net_rand_state); - - return res; -} -EXPORT_SYMBOL(prandom_u32); - /** * prandom_bytes_state - get the requested number of pseudo-random bytes * @@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) } EXPORT_SYMBOL(prandom_bytes_state); -/** - * prandom_bytes - get the requested number of pseudo-random bytes - * @buf: where to copy the pseudo-random bytes to - * @bytes: the requested number of bytes - */ -void prandom_bytes(void *buf, size_t bytes) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - - prandom_bytes_state(state, buf, bytes); - put_cpu_var(net_rand_state); -} -EXPORT_SYMBOL(prandom_bytes); - static void prandom_warmup(struct rnd_state *state) { /* Calling RNG ten times to satisfy recurrence condition */ @@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static u32 __extract_hwseed(void) -{ - unsigned int val = 0; - - (void)(arch_get_random_seed_int(&val) || - arch_get_random_int(&val)); - - return val; -} - -static void prandom_seed_early(struct rnd_state *state, u32 seed, - bool mix_with_hwseed) -{ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ -#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) - state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); - state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); - state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); - state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); -} - -/** - * prandom_seed - add entropy to pseudo random number generator - * @seed: seed value - * - * Add some additional seeding to the prandom pool. - */ -void prandom_seed(u32 entropy) -{ - int i; - /* - * No locking on the CPUs, but then somewhat random results are, well, - * expected. - */ - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - - state->s1 = __seed(state->s1 ^ entropy, 2U); - prandom_warmup(state); - } -} -EXPORT_SYMBOL(prandom_seed); - -/* - * Generate some initially weak seeding values to allow - * to start the prandom_u32() engine. - */ -static int __init prandom_init(void) -{ - int i; - - prandom_state_selftest(); - - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - - prandom_seed_early(state, weak_seed, true); - prandom_warmup(state); - } - - return 0; -} -core_initcall(prandom_init); - -static void __prandom_timer(struct timer_list *unused); - -static DEFINE_TIMER(seed_timer, __prandom_timer); - -static void __prandom_timer(struct timer_list *unused) -{ - u32 entropy; - unsigned long expires; - - get_random_bytes(&entropy, sizeof(entropy)); - prandom_seed(entropy); - - /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + prandom_u32_max(40); - seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); - - add_timer(&seed_timer); -} - -static void __init __prandom_start_seed_timer(void) -{ - seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); - add_timer(&seed_timer); -} - void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) { int i; @@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) } EXPORT_SYMBOL(prandom_seed_full_state); -/* - * Generate better values after random number generator - * is fully initialized. - */ -static void __prandom_reseed(bool late) -{ - unsigned long flags; - static bool latch = false; - static DEFINE_SPINLOCK(lock); - - /* Asking for random bytes might result in bytes getting - * moved into the nonblocking pool and thus marking it - * as initialized. In this case we would double back into - * this function and attempt to do a late reseed. - * Ignore the pointless attempt to reseed again if we're - * already waiting for bytes when the nonblocking pool - * got initialized. - */ - - /* only allow initial seeding (late == false) once */ - if (!spin_trylock_irqsave(&lock, flags)) - return; - - if (latch && !late) - goto out; - - latch = true; - prandom_seed_full_state(&net_rand_state); -out: - spin_unlock_irqrestore(&lock, flags); -} - -void prandom_reseed_late(void) -{ - __prandom_reseed(true); -} - -static int __init prandom_reseed(void) -{ - __prandom_reseed(false); - __prandom_start_seed_timer(); - return 0; -} -late_initcall(prandom_reseed); - #ifdef CONFIG_RANDOM32_SELFTEST static struct prandom_test1 { u32 seed; @@ -421,7 +243,28 @@ static struct prandom_test2 { { 407983964U, 921U, 728767059U }, }; -static void __init prandom_state_selftest(void) +static u32 __extract_hwseed(void) +{ + unsigned int val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); +} + +static int __init prandom_state_selftest(void) { int i, j, errors = 0, runs = 0; bool error = false; @@ -461,5 +304,266 @@ static void __init prandom_state_selftest(void) pr_warn("prandom: %d/%d self tests failed\n", errors, runs); else pr_info("prandom: %d self tests passed\n", runs); + return 0; } +core_initcall(prandom_state_selftest); #endif + +/* + * The prandom_u32() implementation is now completely separate from the + * prandom_state() functions, which are retained (for now) for compatibility. + * + * Because of (ab)use in the networking code for choosing random TCP/UDP port + * numbers, which open DoS possibilities if guessable, we want something + * stronger than a standard PRNG. But the performance requirements of + * the network code do not allow robust crypto for this application. + * + * So this is a homebrew Junior Spaceman implementation, based on the + * lowest-latency trustworthy crypto primitive available, SipHash. + * (The authors of SipHash have not been consulted about this abuse of + * their work.) + * + * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to + * one word of output. This abbreviated version uses 2 rounds per word + * of output. + */ + +struct siprand_state { + unsigned long v0; + unsigned long v1; + unsigned long v2; + unsigned long v3; +}; + +static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; + +/* + * This is the core CPRNG function. As "pseudorandom", this is not used + * for truly valuable things, just intended to be a PITA to guess. + * For maximum speed, we do just two SipHash rounds per word. This is + * the same rate as 4 rounds per 64 bits that SipHash normally uses, + * so hopefully it's reasonably secure. + * + * There are two changes from the official SipHash finalization: + * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; + * they are there only to make the output rounds distinct from the input + * rounds, and this application has no input rounds. + * - Rather than returning v0^v1^v2^v3, return v1+v3. + * If you look at the SipHash round, the last operation on v3 is + * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. + * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but + * it still cancels out half of the bits in v2 for no benefit.) + * Second, since the last combining operation was xor, continue the + * pattern of alternating xor/add for a tiny bit of extra non-linearity. + */ +static inline u32 siprand_u32(struct siprand_state *s) +{ + unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; + + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; + return v1 + v3; +} + + +/** + * prandom_u32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 prandom_u32(void) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u32 res = siprand_u32(state); + + put_cpu_ptr(&net_rand_state); + return res; +} +EXPORT_SYMBOL(prandom_u32); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, size_t bytes) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u8 *ptr = buf; + + while (bytes >= sizeof(u32)) { + put_unaligned(siprand_u32(state), (u32 *)ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); + } + + if (bytes > 0) { + u32 rem = siprand_u32(state); + + do { + *ptr++ = (u8)rem; + rem >>= BITS_PER_BYTE; + } while (--bytes > 0); + } + put_cpu_ptr(&net_rand_state); +} +EXPORT_SYMBOL(prandom_bytes); + +/** + * prandom_seed - add entropy to pseudo random number generator + * @entropy: entropy value + * + * Add some additional seed material to the prandom pool. + * The "entropy" is actually our IP address (the only caller is + * the network code), not for unpredictability, but to ensure that + * different machines are initialized differently. + */ +void prandom_seed(u32 entropy) +{ + int i; + + add_device_randomness(&entropy, sizeof(entropy)); + + for_each_possible_cpu(i) { + struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); + unsigned long v0 = state->v0, v1 = state->v1; + unsigned long v2 = state->v2, v3 = state->v3; + + do { + v3 ^= entropy; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= entropy; + } while (unlikely(!v0 || !v1 || !v2 || !v3)); + + WRITE_ONCE(state->v0, v0); + WRITE_ONCE(state->v1, v1); + WRITE_ONCE(state->v2, v2); + WRITE_ONCE(state->v3, v3); + } +} +EXPORT_SYMBOL(prandom_seed); + +/* + * Generate some initially weak seeding values to allow + * the prandom_u32() engine to be started. + */ +static int __init prandom_init_early(void) +{ + int i; + unsigned long v0, v1, v2, v3; + + if (!arch_get_random_long(&v0)) + v0 = jiffies; + if (!arch_get_random_long(&v1)) + v1 = random_get_entropy(); + v2 = v0 ^ PRND_K0; + v3 = v1 ^ PRND_K1; + + for_each_possible_cpu(i) { + struct siprand_state *state; + + v3 ^= i; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= i; + + state = per_cpu_ptr(&net_rand_state, i); + state->v0 = v0; state->v1 = v1; + state->v2 = v2; state->v3 = v3; + } + + return 0; +} +core_initcall(prandom_init_early); + + +/* Stronger reseeding when available, and periodically thereafter. */ +static void prandom_reseed(struct timer_list *unused); + +static DEFINE_TIMER(seed_timer, prandom_reseed); + +static void prandom_reseed(struct timer_list *unused) +{ + unsigned long expires; + int i; + + /* + * Reinitialize each CPU's PRNG with 128 bits of key. + * No locking on the CPUs, but then somewhat random results are, + * well, expected. + */ + for_each_possible_cpu(i) { + struct siprand_state *state; + unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; + unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; +#if BITS_PER_LONG == 32 + int j; + + /* + * On 32-bit machines, hash in two extra words to + * approximate 128-bit key length. Not that the hash + * has that much security, but this prevents a trivial + * 64-bit brute force. + */ + for (j = 0; j < 2; j++) { + unsigned long m = get_random_long(); + + v3 ^= m; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= m; + } +#endif + /* + * Probably impossible in practice, but there is a + * theoretical risk that a race between this reseeding + * and the target CPU writing its state back could + * create the all-zero SipHash fixed point. + * + * To ensure that never happens, ensure the state + * we write contains no zero words. + */ + state = per_cpu_ptr(&net_rand_state, i); + WRITE_ONCE(state->v0, v0 ? v0 : -1ul); + WRITE_ONCE(state->v1, v1 ? v1 : -1ul); + WRITE_ONCE(state->v2, v2 ? v2 : -1ul); + WRITE_ONCE(state->v3, v3 ? v3 : -1ul); + } + + /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ + expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); + mod_timer(&seed_timer, expires); +} + +/* + * The random ready callback can be called from almost any interrupt. + * To avoid worrying about whether it's safe to delay that interrupt + * long enough to seed all CPUs, just schedule an immediate timer event. + */ +static void prandom_timer_start(struct random_ready_callback *unused) +{ + mod_timer(&seed_timer, jiffies); +} + +/* + * Start periodic full reseeding as soon as strong + * random numbers are available. + */ +static int __init prandom_init_late(void) +{ + static struct random_ready_callback random_ready = { + .func = prandom_timer_start + }; + int ret = add_random_ready_callback(&random_ready); + + if (ret == -EALREADY) { + prandom_timer_start(&random_ready); + ret = 0; + } + return ret; +} +late_initcall(prandom_init_late); From 45df7b02a0a50a0bd54ade354de8ada3f99ccaf6 Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Thu, 5 Nov 2020 16:33:04 +1100 Subject: [PATCH 421/636] x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP commit 1978b3a53a74e3230cd46932b149c6e62e832e9a upstream. On AMD CPUs which have the feature X86_FEATURE_AMD_STIBP_ALWAYS_ON, STIBP is set to on and spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED At the same time, IBPB can be set to conditional. However, this leads to the case where it's impossible to turn on IBPB for a process because in the PR_SPEC_DISABLE case in ib_prctl_set() the spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED condition leads to a return before the task flag is set. Similarly, ib_prctl_get() will return PR_SPEC_DISABLE even though IBPB is set to conditional. More generally, the following cases are possible: 1. STIBP = conditional && IBPB = on for spectre_v2_user=seccomp,ibpb 2. STIBP = on && IBPB = conditional for AMD CPUs with X86_FEATURE_AMD_STIBP_ALWAYS_ON The first case functions correctly today, but only because spectre_v2_user_ibpb isn't updated to reflect the IBPB mode. At a high level, this change does one thing. If either STIBP or IBPB is set to conditional, allow the prctl to change the task flag. Also, reflect that capability when querying the state. This isn't perfect since it doesn't take into account if only STIBP or IBPB is unconditionally on. But it allows the conditional feature to work as expected, without affecting the unconditional one. [ bp: Massage commit message and comment; space out statements for better readability. ] Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") Signed-off-by: Anand K Mistry Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Acked-by: Tom Lendacky Link: https://lkml.kernel.org/r/20201105163246.v2.1.Ifd7243cd3e2c2206a893ad0a5b9a4f19549e22c6@changeid Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 52 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bf554ed2fd51..9e482fbdb28f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1240,6 +1240,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { @@ -1247,17 +1255,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; - /* - * Indirect branch speculation is always disabled in strict - * mode. It can neither be enabled if it was force-disabled - * by a previous prctl call. + /* + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + if (!is_spec_ib_user_controlled() || task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1270,10 +1287,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1336,20 +1353,17 @@ static int ib_prctl_get(struct task_struct *task) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) - return PR_SPEC_DISABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || - spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - } else + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else return PR_SPEC_NOT_AFFECTED; } From b9658d18499e4cf1b6c5f122f12e1cc985ac5568 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2020 08:24:38 -0300 Subject: [PATCH 422/636] perf scripting python: Avoid declaring function pointers with a visibility attribute commit d0e7b0c71fbb653de90a7163ef46912a96f0bdaf upstream. To avoid this: util/scripting-engines/trace-event-python.c: In function 'python_start_script': util/scripting-engines/trace-event-python.c:1595:2: error: 'visibility' attribute ignored [-Werror=attributes] 1595 | PyMODINIT_FUNC (*initfunc)(void); | ^~~~~~~~~~~~~~ That started breaking when building with PYTHON=python3 and these gcc versions (I haven't checked with the clang ones, maybe it breaks there as well): # export PERF_TARBALL=http://192.168.86.5/perf/perf-5.9.0.tar.xz # dm fedora:33 fedora:rawhide 1 107.80 fedora:33 : Ok gcc (GCC) 10.2.1 20201005 (Red Hat 10.2.1-5), clang version 11.0.0 (Fedora 11.0.0-1.fc33) 2 92.47 fedora:rawhide : Ok gcc (GCC) 10.2.1 20201016 (Red Hat 10.2.1-6), clang version 11.0.0 (Fedora 11.0.0-1.fc34) # Avoid that by ditching that 'initfunc' function pointer with its: #define Py_EXPORTED_SYMBOL _attribute_ ((visibility ("default"))) #define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* And just call PyImport_AppendInittab() at the end of the ifdef python3 block with the functions that were being attributed to that initfunc. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Tapas Kundu Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/scripting-engines/trace-event-python.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 9569cc06e0a7..2814251df06b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1493,7 +1493,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1508,20 +1507,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 From 1cbfb60332f4e62224440f6d38a6b3114b2355f0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Sep 2020 13:53:11 +0200 Subject: [PATCH 423/636] perf/core: Fix race in the perf_mmap_close() function commit f91072ed1b7283b13ca57fcfbece5a3b92726143 upstream. There's a possible race in perf_mmap_close() when checking ring buffer's mmap_count refcount value. The problem is that the mmap_count check is not atomic because we call atomic_dec() and atomic_read() separately. perf_mmap_close: ... atomic_dec(&rb->mmap_count); ... if (atomic_read(&rb->mmap_count)) goto out_put; free_uid out_put: ring_buffer_put(rb); /* could be last */ The race can happen when we have two (or more) events sharing same ring buffer and they go through atomic_dec() and then they both see 0 as refcount value later in atomic_read(). Then both will go on and execute code which is meant to be run just once. The code that detaches ring buffer is probably fine to be executed more than once, but the problem is in calling free_uid(), which will later on demonstrate in related crashes and refcount warnings, like: refcount_t: addition on 0; use-after-free. ... RIP: 0010:refcount_warn_saturate+0x6d/0xf ... Call Trace: prepare_creds+0x190/0x1e0 copy_creds+0x35/0x172 copy_process+0x471/0x1a80 _do_fork+0x83/0x3a0 __do_sys_wait4+0x83/0x90 __do_sys_clone+0x85/0xa0 do_syscall_64+0x5b/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Using atomic decrease and check instead of separated calls. Tested-by: Michael Petlan Signed-off-by: Jiri Olsa Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Namhyung Kim Acked-by: Wade Mealing Fixes: 9bb5d40cd93c ("perf: Fix mmap() accounting hole"); Link: https://lore.kernel.org/r/20200916115311.GE2301783@krava [sudip: used ring_buffer] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8b94eb6437c1..b8b74a4a524c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5475,11 +5475,11 @@ static void perf_pmu_output_stop(struct perf_event *event); static void perf_mmap_close(struct vm_area_struct *vma) { struct perf_event *event = vma->vm_file->private_data; - struct ring_buffer *rb = ring_buffer_get(event); struct user_struct *mmap_user = rb->mmap_user; int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); + bool detach_rest = false; if (event->pmu->event_unmapped) event->pmu->event_unmapped(event, vma->vm_mm); @@ -5510,7 +5510,8 @@ static void perf_mmap_close(struct vm_area_struct *vma) mutex_unlock(&event->mmap_mutex); } - atomic_dec(&rb->mmap_count); + if (atomic_dec_and_test(&rb->mmap_count)) + detach_rest = true; if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) goto out_put; @@ -5519,7 +5520,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) mutex_unlock(&event->mmap_mutex); /* If there's still other mmap()s of this buffer, we're done. */ - if (atomic_read(&rb->mmap_count)) + if (!detach_rest) goto out_put; /* From 9a6cea8220c608f6d59763fab06ff196185cc3ff Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 13 Nov 2020 22:52:02 -0800 Subject: [PATCH 424/636] Revert "kernel/reboot.c: convert simple_strtoul to kstrtoint" commit 8b92c4ff4423aa9900cf838d3294fcade4dbda35 upstream. Patch series "fix parsing of reboot= cmdline", v3. The parsing of the reboot= cmdline has two major errors: - a missing bound check can crash the system on reboot - parsing of the cpu number only works if specified last Fix both. This patch (of 2): This reverts commit 616feab753972b97. kstrtoint() and simple_strtoul() have a subtle difference which makes them non interchangeable: if a non digit character is found amid the parsing, the former will return an error, while the latter will just stop parsing, e.g. simple_strtoul("123xyx") = 123. The kernel cmdline reboot= argument allows to specify the CPU used for rebooting, with the syntax `s####` among the other flags, e.g. "reboot=warm,s31,force", so if this flag is not the last given, it's silently ignored as well as the subsequent ones. Fixes: 616feab75397 ("kernel/reboot.c: convert simple_strtoul to kstrtoint") Signed-off-by: Matteo Croce Signed-off-by: Andrew Morton Cc: Guenter Roeck Cc: Petr Mladek Cc: Arnd Bergmann Cc: Mike Rapoport Cc: Kees Cook Cc: Pavel Tatashin Cc: Robin Holt Cc: Fabian Frederick Cc: Greg Kroah-Hartman Cc: Link: https://lkml.kernel.org/r/20201103214025.116799-2-mcroce@linux.microsoft.com Signed-off-by: Linus Torvalds [sudip: use reboot_mode instead of mode] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/reboot.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/kernel/reboot.c b/kernel/reboot.c index 8fb44dec9ad7..db0eed5916d5 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -539,22 +539,15 @@ static int __init reboot_setup(char *str) break; case 's': - { - int rc; - - if (isdigit(*(str+1))) { - rc = kstrtoint(str+1, 0, &reboot_cpu); - if (rc) - return rc; - } else if (str[1] == 'm' && str[2] == 'p' && - isdigit(*(str+3))) { - rc = kstrtoint(str+3, 0, &reboot_cpu); - if (rc) - return rc; - } else + if (isdigit(*(str+1))) + reboot_cpu = simple_strtoul(str+1, NULL, 0); + else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) + reboot_cpu = simple_strtoul(str+3, NULL, 0); + else reboot_mode = REBOOT_SOFT; break; - } + case 'g': reboot_mode = REBOOT_GPIO; break; From 2e021b7197fb911f934efcc45f019e5f7717e8db Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 13 Nov 2020 22:52:07 -0800 Subject: [PATCH 425/636] reboot: fix overflow parsing reboot cpu number commit df5b0ab3e08a156701b537809914b339b0daa526 upstream. Limit the CPU number to num_possible_cpus(), because setting it to a value lower than INT_MAX but higher than NR_CPUS produces the following error on reboot and shutdown: BUG: unable to handle page fault for address: ffffffff90ab1bb0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 1c09067 P4D 1c09067 PUD 1c0a063 PMD 0 Oops: 0000 [#1] SMP CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 5.9.0-rc8-kvm #110 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:migrate_to_reboot_cpu+0xe/0x60 Code: ea ea 00 48 89 fa 48 c7 c7 30 57 f1 81 e9 fa ef ff ff 66 2e 0f 1f 84 00 00 00 00 00 53 8b 1d d5 ea ea 00 e8 14 33 fe ff 89 da <48> 0f a3 15 ea fc bd 00 48 89 d0 73 29 89 c2 c1 e8 06 65 48 8b 3c RSP: 0018:ffffc90000013e08 EFLAGS: 00010246 RAX: ffff88801f0a0000 RBX: 0000000077359400 RCX: 0000000000000000 RDX: 0000000077359400 RSI: 0000000000000002 RDI: ffffffff81c199e0 RBP: ffffffff81c1e3c0 R08: ffff88801f41f000 R09: ffffffff81c1e348 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 00007f32bedf8830 R14: 00000000fee1dead R15: 0000000000000000 FS: 00007f32bedf8980(0000) GS:ffff88801f480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff90ab1bb0 CR3: 000000001d057000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __do_sys_reboot.cold+0x34/0x5b do_syscall_64+0x2d/0x40 Fixes: 1b3a5d02ee07 ("reboot: move arch/x86 reboot= handling to generic kernel") Signed-off-by: Matteo Croce Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Fabian Frederick Cc: Greg Kroah-Hartman Cc: Guenter Roeck Cc: Kees Cook Cc: Mike Rapoport Cc: Pavel Tatashin Cc: Petr Mladek Cc: Robin Holt Cc: Link: https://lkml.kernel.org/r/20201103214025.116799-3-mcroce@linux.microsoft.com Signed-off-by: Linus Torvalds [sudip: use reboot_mode instead of mode] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- kernel/reboot.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/reboot.c b/kernel/reboot.c index db0eed5916d5..45bea54f9aee 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -546,6 +546,13 @@ static int __init reboot_setup(char *str) reboot_cpu = simple_strtoul(str+3, NULL, 0); else reboot_mode = REBOOT_SOFT; + if (reboot_cpu >= num_possible_cpus()) { + pr_err("Ignoring the CPU number in reboot= option. " + "CPU %d exceeds possible cpu number %d\n", + reboot_cpu, num_possible_cpus()); + reboot_cpu = 0; + break; + } break; case 'g': From 81504d1952d712c8bb9c3966896efee8a37ea966 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 3 Nov 2020 11:25:38 +0800 Subject: [PATCH 426/636] net: sch_generic: fix the missing new qdisc assignment bug When commit 2fb541c862c9 ("net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc") is backported to stable kernel, one assignment is missing, which causes two problems reported by Joakim and Vishwanath, see [1] and [2]. So add the assignment back to fix it. 1. https://www.spinics.net/lists/netdev/msg693916.html 2. https://www.spinics.net/lists/netdev/msg695131.html Fixes: 749cc0b0c7f3 ("net: sch_generic: aviod concurrent reset and enqueue op for lockless qdisc") Signed-off-by: Yunsheng Lin Acked-by: Jakub Kicinski Tested-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_generic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bd96fd261dba..4e15913e7519 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1116,10 +1116,13 @@ static void dev_deactivate_queue(struct net_device *dev, void *_qdisc_default) { struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc); + struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + + rcu_assign_pointer(dev_queue->qdisc, qdisc_default); } } From 987c45d57f388f9531d523b8424b8e97687bb323 Mon Sep 17 00:00:00 2001 From: Boris Protopopov Date: Thu, 24 Sep 2020 00:36:38 +0000 Subject: [PATCH 427/636] Convert trailing spaces and periods in path components commit 57c176074057531b249cf522d90c22313fa74b0b upstream. When converting trailing spaces and periods in paths, do so for every component of the path, not just the last component. If the conversion is not done for every path component, then subsequent operations in directories with trailing spaces or periods (e.g. create(), mkdir()) will fail with ENOENT. This is because on the server, the directory will have a special symbol in its name, and the client needs to provide the same. Signed-off-by: Boris Protopopov Acked-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifs_unicode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index a2b2355e7f01..9986817532b1 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -501,7 +501,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, else if (map_chars == SFM_MAP_UNI_RSVD) { bool end_of_string; - if (i == srclen - 1) + /** + * Remap spaces and periods found at the end of every + * component of the path. The special cases of '.' and + * '..' do not need to be dealt with explicitly because + * they are addressed in namei.c:link_path_walk(). + **/ + if ((i == srclen - 1) || (source[i+1] == '\\')) end_of_string = true; else end_of_string = false; From 2c746135a12e3f329171ed168ca0078d468f6d85 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Nov 2020 19:18:53 +0100 Subject: [PATCH 428/636] Linux 4.19.158 Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20201117122113.128215851@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 245bcd8dd7b7..698a9cc2864b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 157 +SUBLEVEL = 158 EXTRAVERSION = NAME = "People's Front" From 998f5e469d48bda634c7289b76da897e8dcb5e53 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Wed, 18 Nov 2020 16:03:11 -0800 Subject: [PATCH 429/636] Revert "ANDROID: clang: update to 11.0.5" This reverts commit 199499f1d787911d139929045e11c7478406b4e3. It breaks full suspend/resume on bramble/redfin. Bug: 173479375 Test: Full suspend/resume on bramble Signed-off-by: Will McVicker Change-Id: I97a49f6fac26d714a89924ea343450216701f87a --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index c9467028ea7f..1b0034f31452 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KMI_GENERATION=0 LLVM=1 DEPMOD=depmod -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r399163b/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r383902/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' From 197d678a0efdc3d29597e11c4c6bb110b9d7b431 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 25 Jun 2020 14:46:07 +0200 Subject: [PATCH 430/636] ANDROID: Incremental fs: fix up attempt to copy structures with READ/WRITE_ONCE READ/WRITE_ONCE are for atomic data types, not for structures. Fix this up by doing a memcpy to make it explicit just how messy this copy is... This fixes a build error on 5.8-rc1, as things are more strict, odds are it's also wrong in other kernel versions as well... Cc: Daniel Mentz Cc: Paul Lawrence Signed-off-by: Greg Kroah-Hartman Change-Id: I7ecd3d05bd94c936dd5e69c63028458786f37a78 --- fs/incfs/vfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index a13330eabe7d..045b83c798c8 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -604,8 +604,11 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); while (reads_to_collect > 0) { - struct read_log_state next_state = READ_ONCE(log_state->state); - int reads_collected = incfs_collect_logged_reads( + struct read_log_state next_state; + int reads_collected; + + memcpy(&next_state, &log_state->state, sizeof(next_state)); + reads_collected = incfs_collect_logged_reads( mi, &next_state, reads_buf, min_t(ssize_t, reads_to_collect, reads_per_page)); if (reads_collected <= 0) { @@ -624,7 +627,7 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, goto out; } - WRITE_ONCE(log_state->state, next_state); + memcpy(&log_state->state, &next_state, sizeof(next_state)); total_reads_collected += reads_collected; buf += reads_collected * sizeof(*reads_buf); reads_to_collect -= reads_collected; From 883c150ca9f7cfbd16ee4347ebf5e44a9ac02ea2 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Fri, 17 Jul 2020 00:33:09 +0000 Subject: [PATCH 431/636] ANDROID: Incremental fs: RCU locks instead of mutex for pending_reads. Use RCU locks instead of pending_reads_mutex. Current mutex is taking lock on entire mount_info structure which seems a heavy operation. Following fields of mount_info structure are protected through spinlocks for multiple writers and are RCU safe for readers: - reads_list_head - mi_pending_reads_count - mi_last_pending_read_number - data_file_segment.reads_list_head We could probably use atomic_inc/atomic_dec for mi_pending_reads_count and mi_last_pending_read_number which can futher cut down spin_locks at couple of more places, thereby only the list addition and removal can protected by spinlock. This CL doesn't address it. Bug: 161565969 Test: kernel selftest incfs_test and incfs_perf Signed-off-by: Akilesh Kailash Change-Id: Iad7439657016764dce25d64c8b3df69b930452bc --- fs/incfs/data_mgmt.c | 71 ++++++++++++++++++++++++++++---------------- fs/incfs/data_mgmt.h | 7 +++-- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 074a733c7001..0ee9a2d56688 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -40,11 +40,11 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mi->mi_owner = get_current_cred(); path_get(&mi->mi_backing_dir_path); mutex_init(&mi->mi_dir_struct_mutex); - mutex_init(&mi->mi_pending_reads_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); INIT_DELAYED_WORK(&mi->mi_log.ml_wakeup_work, log_wake_up_all); spin_lock_init(&mi->mi_log.rl_lock); + spin_lock_init(&mi->pending_read_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); error = incfs_realloc_mount_info(mi, options); @@ -109,7 +109,6 @@ void incfs_free_mount_info(struct mount_info *mi) dput(mi->mi_index_dir); path_put(&mi->mi_backing_dir_path); mutex_destroy(&mi->mi_dir_struct_mutex); - mutex_destroy(&mi->mi_pending_reads_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); kfree(mi->log_xattr); @@ -756,19 +755,29 @@ static struct pending_read *add_pending_read(struct data_file *df, result->block_index = block_index; result->timestamp_us = ktime_to_us(ktime_get()); - mutex_lock(&mi->mi_pending_reads_mutex); + spin_lock(&mi->pending_read_lock); result->serial_number = ++mi->mi_last_pending_read_number; mi->mi_pending_reads_count++; - list_add(&result->mi_reads_list, &mi->mi_reads_list_head); - list_add(&result->segment_reads_list, &segment->reads_list_head); - mutex_unlock(&mi->mi_pending_reads_mutex); + list_add_rcu(&result->mi_reads_list, &mi->mi_reads_list_head); + list_add_rcu(&result->segment_reads_list, &segment->reads_list_head); + + spin_unlock(&mi->pending_read_lock); wake_up_all(&mi->mi_pending_reads_notif_wq); return result; } +static void free_pending_read_entry(struct rcu_head *entry) +{ + struct pending_read *read; + + read = container_of(entry, struct pending_read, rcu); + + kfree(read); +} + /* Notifies a given data file that pending read is completed. */ static void remove_pending_read(struct data_file *df, struct pending_read *read) { @@ -782,14 +791,17 @@ static void remove_pending_read(struct data_file *df, struct pending_read *read) mi = df->df_mount_info; - mutex_lock(&mi->mi_pending_reads_mutex); - list_del(&read->mi_reads_list); - list_del(&read->segment_reads_list); + spin_lock(&mi->pending_read_lock); + + list_del_rcu(&read->mi_reads_list); + list_del_rcu(&read->segment_reads_list); mi->mi_pending_reads_count--; - mutex_unlock(&mi->mi_pending_reads_mutex); - kfree(read); + spin_unlock(&mi->pending_read_lock); + + /* Don't free. Wait for readers */ + call_rcu(&read->rcu, free_pending_read_entry); } static void notify_pending_reads(struct mount_info *mi, @@ -799,13 +811,13 @@ static void notify_pending_reads(struct mount_info *mi, struct pending_read *entry = NULL; /* Notify pending reads waiting for this block. */ - mutex_lock(&mi->mi_pending_reads_mutex); - list_for_each_entry(entry, &segment->reads_list_head, + rcu_read_lock(); + list_for_each_entry_rcu(entry, &segment->reads_list_head, segment_reads_list) { if (entry->block_index == index) set_read_done(entry); } - mutex_unlock(&mi->mi_pending_reads_mutex); + rcu_read_unlock(); wake_up_all(&segment->new_data_arrival_wq); } @@ -869,7 +881,6 @@ static int wait_for_data_block(struct data_file *df, int block_index, /* Woke up, the pending read is no longer needed. */ remove_pending_read(df, read); - read = NULL; if (wait_res == 0) { /* Wait has timed out */ @@ -1292,10 +1303,14 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) { bool result = false; - mutex_lock(&mi->mi_pending_reads_mutex); + /* + * We could probably get away with spin locks here; + * if we use atomic_read() on both these variables. + */ + spin_lock(&mi->pending_read_lock); result = (mi->mi_last_pending_read_number > last_number) && - (mi->mi_pending_reads_count > 0); - mutex_unlock(&mi->mi_pending_reads_mutex); + (mi->mi_pending_reads_count > 0); + spin_unlock(&mi->pending_read_lock); return result; } @@ -1305,6 +1320,7 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, { int reported_reads = 0; struct pending_read *entry = NULL; + bool result = false; if (!mi) return -EFAULT; @@ -1312,13 +1328,19 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, if (reads_size <= 0) return 0; - mutex_lock(&mi->mi_pending_reads_mutex); + spin_lock(&mi->pending_read_lock); - if (mi->mi_last_pending_read_number <= sn_lowerbound - || mi->mi_pending_reads_count == 0) - goto unlock; + result = ((mi->mi_last_pending_read_number <= sn_lowerbound) + || (mi->mi_pending_reads_count == 0)); - list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) { + spin_unlock(&mi->pending_read_lock); + + if (result) + return reported_reads; + + rcu_read_lock(); + + list_for_each_entry_rcu(entry, &mi->mi_reads_list_head, mi_reads_list) { if (entry->serial_number <= sn_lowerbound) continue; @@ -1333,8 +1355,7 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, break; } -unlock: - mutex_unlock(&mi->mi_pending_reads_mutex); + rcu_read_unlock(); return reported_reads; } diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 2726867835a8..00e93f1b7712 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -123,13 +124,13 @@ struct mount_info { wait_queue_head_t mi_pending_reads_notif_wq; /* - * Protects: + * Protects - RCU safe: * - reads_list_head * - mi_pending_reads_count * - mi_last_pending_read_number * - data_file_segment.reads_list_head */ - struct mutex mi_pending_reads_mutex; + spinlock_t pending_read_lock; /* List of active pending_read objects */ struct list_head mi_reads_list_head; @@ -175,6 +176,8 @@ struct pending_read { struct list_head mi_reads_list; struct list_head segment_reads_list; + + struct rcu_head rcu; }; struct data_file_segment { From ed146f65cc54659f8e0d3391676fd17a8666e188 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Fri, 17 Jul 2020 00:18:50 +0000 Subject: [PATCH 432/636] ANDROID: Incremental fs: Fix minor bugs 1: Invoke kunmap(page) in error path 2: Validate NULL checks at few places in the code. 3: path_put() should not be invoked if path entry is null. Although path_put() checks for NULL condition internally, caller should gracefully handle it. Bug: 161565969 Test: kernel selftest - incfs_test, incfs_perf Signed-off-by: Akilesh Kailash Change-Id: Ie4dfaaba4b09f4798d492f8a25dd9dcc8da89e51 --- fs/incfs/data_mgmt.c | 2 +- fs/incfs/vfs.c | 50 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 0ee9a2d56688..9e6ef41e4302 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -933,7 +933,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, struct data_file_block block = {}; struct data_file *df = get_incfs_data_file(f); - if (!dst.data || !df) + if (!dst.data || !df || !tmp.data) return -EFAULT; if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 045b83c798c8..7b1cd539aeed 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -470,6 +470,9 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, ssize_t result = 0; int i = 0; + if (!mi) + return -EFAULT; + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) return 0; @@ -798,13 +801,17 @@ static int read_single_page(struct file *f, struct page *page) ssize_t read_result = 0; struct data_file *df = get_incfs_data_file(f); int result = 0; - void *page_start = kmap(page); + void *page_start; int block_index; int timeout_ms; - if (!df) + if (!df) { + SetPageError(page); + unlock_page(page); return -EBADF; + } + page_start = kmap(page); offset = page_offset(page); block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; size = df->df_size; @@ -816,6 +823,10 @@ static int read_single_page(struct file *f, struct page *page) }; tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); + if (!tmp.data) { + read_result = -ENOMEM; + goto err; + } bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); read_result = incfs_read_data_file_block( range(page_start, bytes_to_read), f, block_index, @@ -827,6 +838,7 @@ static int read_single_page(struct file *f, struct page *page) read_result = 0; } +err: if (read_result < 0) result = read_result; else if (read_result < PAGE_SIZE) @@ -1506,6 +1518,9 @@ static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, range((u8 *)dentry->d_name.name, dentry->d_name.len); int err = 0; + if (!mi || !dir_info || !dir_info->n_backing_inode) + return ERR_PTR(-EBADF); + if (d_inode(mi->mi_backing_dir_path.dentry) == dir_info->n_backing_inode) { /* We do lookup in the FS root. Show pseudo files. */ @@ -1621,7 +1636,7 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!backing_dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_dentry->d_parent == mi->mi_index_dir) { @@ -1653,6 +1668,8 @@ out: if (d_really_is_negative(dentry)) d_drop(dentry); path_put(&backing_path); + +path_err: mutex_unlock(&mi->mi_dir_struct_mutex); if (err) pr_debug("incfs: %s err:%d\n", __func__, err); @@ -1708,6 +1725,9 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) struct kstat stat; int err = 0; + if (!mi) + return -EBADF; + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (err) return err; @@ -1715,7 +1735,7 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) get_incfs_backing_path(dentry, &backing_path); if (!backing_path.dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_path.dentry->d_parent == mi->mi_index_dir) { @@ -1743,6 +1763,7 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) d_drop(dentry); out: path_put(&backing_path); +path_err: if (err) pr_debug("incfs: %s err:%d\n", __func__, err); mutex_unlock(&mi->mi_dir_struct_mutex); @@ -1757,6 +1778,9 @@ static int dir_link(struct dentry *old_dentry, struct inode *dir, struct path backing_new_path = {}; int error = 0; + if (!mi) + return -EBADF; + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (error) return error; @@ -1803,6 +1827,9 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) struct path backing_path = {}; int err = 0; + if (!mi) + return -EBADF; + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); if (err) return err; @@ -1810,7 +1837,7 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) get_incfs_backing_path(dentry, &backing_path); if (!backing_path.dentry) { err = -EBADF; - goto out; + goto path_err; } if (backing_path.dentry == mi->mi_index_dir) { @@ -1824,6 +1851,8 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) d_drop(dentry); out: path_put(&backing_path); + +path_err: if (err) pr_debug("incfs: %s err:%d\n", __func__, err); mutex_unlock(&mi->mi_dir_struct_mutex); @@ -1907,7 +1936,14 @@ static int file_open(struct inode *inode, struct file *file) struct path backing_path = {}; int err = 0; + if (!mi) + return -EBADF; + get_incfs_backing_path(file->f_path.dentry, &backing_path); + + if (!backing_path.dentry) + return -EBADF; + backing_file = dentry_open( &backing_path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); path_put(&backing_path); @@ -1969,8 +2005,10 @@ static int dentry_revalidate(struct dentry *d, unsigned int flags) get_incfs_backing_path(d, &backing_path); backing_dentry = backing_path.dentry; - if (!backing_dentry) + if (!backing_dentry) { + result = -EBADF; goto out; + } if (d_inode(backing_dentry) != binode) { /* From 30abd70986d2c4a6724890aeed0e2b33e8864968 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 27 Jul 2020 18:16:51 +0000 Subject: [PATCH 433/636] ANDROID: Incremental fs: dentry_revalidate should not return -EBADF. Fix of bug 161565969 added EBADF when backing path doesn't exist. However, .log and .pending reads doesn't have backing path causing incfs_test to fail. Bug: 162243475 Test: incfs_test on QEMU passes Signed-off-by: Akilesh Kailash Change-Id: Iba303bbc42b431fe4a4fbacb40584faaff4b14b7 --- fs/incfs/vfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 7b1cd539aeed..65b7f325bf9c 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -2005,10 +2005,8 @@ static int dentry_revalidate(struct dentry *d, unsigned int flags) get_incfs_backing_path(d, &backing_path); backing_dentry = backing_path.dentry; - if (!backing_dentry) { - result = -EBADF; + if (!backing_dentry) goto out; - } if (d_inode(backing_dentry) != binode) { /* From 9d06e6e105190e38499e61f5265be13599b00fe6 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 5 Jun 2020 09:32:10 -0700 Subject: [PATCH 434/636] ANDROID: Incremental fs: Remove annoying pr_debugs Bug: 162255528 Test: incfs_test passes with fewer annoying debug messages Signed-off-by: Paul Lawrence Change-Id: Ib347d3d5dd8a3c5545a0760fc674147211aa8c9d --- fs/incfs/data_mgmt.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 9e6ef41e4302..960a1f72d34d 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -1257,8 +1257,6 @@ int incfs_scan_metadata_chain(struct data_file *df) handler->handle_file_attr = process_file_attr_md; handler->handle_signature = process_file_signature_md; - pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n", - handler->md_record_offset); while (handler->md_record_offset > 0) { error = incfs_read_next_metadata_record(bfc, handler); if (error) { @@ -1270,14 +1268,11 @@ int incfs_scan_metadata_chain(struct data_file *df) records_count++; } if (error) { - pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n", + pr_warn("incfs: Error %d after reading %d incfs-metadata records.\n", -error, records_count); result = error; - } else { - pr_debug("incfs: Finished reading %d incfs-metadata records.\n", - records_count); + } else result = records_count; - } mutex_unlock(&bfc->bc_mutex); if (df->df_hash_tree) { From 46a2a9f490e4e04459fa3a97f7284cb05895884c Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 10 Jun 2020 08:20:33 -0700 Subject: [PATCH 435/636] ANDROID: Incremental fs: Remove unnecessary dependencies Bug: 162260581 Test: builds with defconfig Signed-off-by: Paul Lawrence Change-Id: I743fa7a989f5a464f755a22b6a03299fb462e74c --- fs/incfs/Kconfig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig index 1ffe31a9c0ff..8c0e8ae2030f 100644 --- a/fs/incfs/Kconfig +++ b/fs/incfs/Kconfig @@ -2,13 +2,7 @@ config INCREMENTAL_FS tristate "Incremental file system support" depends on BLOCK select DECOMPRESS_LZ4 - select CRC32 - select CRYPTO - select CRYPTO_RSA select CRYPTO_SHA256 - select X509_CERTIFICATE_PARSER - select ASYMMETRIC_KEY_TYPE - select ASYMMETRIC_PUBLIC_KEY_SUBTYPE help Incremental FS is a read-only virtual file system that facilitates execution of programs while their binaries are still being lazily downloaded over the From 53f6b6a6a60c9b79ca77cf551b8bc57c858d4e3b Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Fri, 17 Jul 2020 00:47:05 +0000 Subject: [PATCH 436/636] ANDROID: Incremental fs: Use R/W locks to read/write segment blockmap. Use Read-Write locks for reading/writing segment in blockmap. This should allow parallel reads when there are multiple reads within same segment. A small optimization in pending_reads_read(). Since incfs_collect_pending_reads() already iterate to populate buffer, new_max_sn - highest serial number among all the pending read buffer can be done in the same loop instead of looping again in pending_reads_read(). Bug: 161566104 Test: kernel selftest - incfs_test and incfs_perf Signed-off-by: Akilesh Kailash Change-Id: Id00376b0e4cb8c0c0bc8264cdddd6f38c4aa85f0 --- fs/incfs/data_mgmt.c | 69 ++++++++++++++++++++++---------------------- fs/incfs/data_mgmt.h | 6 ++-- fs/incfs/vfs.c | 7 +---- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 960a1f72d34d..819b437218d2 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -119,15 +119,10 @@ void incfs_free_mount_info(struct mount_info *mi) static void data_file_segment_init(struct data_file_segment *segment) { init_waitqueue_head(&segment->new_data_arrival_wq); - mutex_init(&segment->blockmap_mutex); + init_rwsem(&segment->rwsem); INIT_LIST_HEAD(&segment->reads_list_head); } -static void data_file_segment_destroy(struct data_file_segment *segment) -{ - mutex_destroy(&segment->blockmap_mutex); -} - struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) { struct data_file *df = NULL; @@ -189,14 +184,10 @@ out: void incfs_free_data_file(struct data_file *df) { - int i; - if (!df) return; incfs_free_mtree(df->df_hash_tree); - for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) - data_file_segment_destroy(&df->df_segments[i]); incfs_free_bfc(df->df_backing_file_context); kfree(df); } @@ -838,22 +829,21 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (block_index < 0 || block_index >= df->df_data_block_count) return -EINVAL; - if (df->df_blockmap_off <= 0) + if (df->df_blockmap_off <= 0 || !df->df_mount_info) return -ENODATA; + mi = df->df_mount_info; segment = get_file_segment(df, block_index); - error = mutex_lock_interruptible(&segment->blockmap_mutex); + + error = down_read_killable(&segment->rwsem); if (error) return error; /* Look up the given block */ error = get_data_file_block(df, block_index, &block); - /* If it's not found, create a pending read */ - if (!error && !is_data_block_present(&block) && timeout_ms != 0) - read = add_pending_read(df, block_index); + up_read(&segment->rwsem); - mutex_unlock(&segment->blockmap_mutex); if (error) return error; @@ -861,18 +851,18 @@ static int wait_for_data_block(struct data_file *df, int block_index, if (is_data_block_present(&block)) { *res_block = block; return 0; + } else { + /* If it's not found, create a pending read */ + if (timeout_ms != 0) { + read = add_pending_read(df, block_index); + if (!read) + return -ENOMEM; + } else { + log_block_read(mi, &df->df_id, block_index); + return -ETIME; + } } - mi = df->df_mount_info; - - if (timeout_ms == 0) { - log_block_read(mi, &df->df_id, block_index); - return -ETIME; - } - - if (!read) - return -ENOMEM; - /* Wait for notifications about block's arrival */ wait_res = wait_event_interruptible_timeout(segment->new_data_arrival_wq, @@ -895,7 +885,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, return wait_res; } - error = mutex_lock_interruptible(&segment->blockmap_mutex); + error = down_read_killable(&segment->rwsem); if (error) return error; @@ -917,7 +907,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, } } - mutex_unlock(&segment->blockmap_mutex); + up_read(&segment->rwsem); return error; } @@ -1012,18 +1002,25 @@ int incfs_process_new_data_block(struct data_file *df, if (block->compression == COMPRESSION_LZ4) flags |= INCFS_BLOCK_COMPRESSED_LZ4; - error = mutex_lock_interruptible(&segment->blockmap_mutex); + error = down_read_killable(&segment->rwsem); if (error) return error; error = get_data_file_block(df, block->block_index, &existing_block); + + up_read(&segment->rwsem); + if (error) - goto unlock; + return error; if (is_data_block_present(&existing_block)) { /* Block is already present, nothing to do here */ - goto unlock; + return 0; } + error = down_write_killable(&segment->rwsem); + if (error) + return error; + error = mutex_lock_interruptible(&bfc->bc_mutex); if (!error) { error = incfs_write_data_block_to_backing_file( @@ -1034,8 +1031,8 @@ int incfs_process_new_data_block(struct data_file *df, if (!error) notify_pending_reads(mi, segment, block->block_index); -unlock: - mutex_unlock(&segment->blockmap_mutex); + up_write(&segment->rwsem); + if (error) pr_debug("incfs: %s %d error: %d\n", __func__, block->block_index, error); @@ -1311,7 +1308,7 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, - int reads_size) + int reads_size, int *new_max_sn) { int reported_reads = 0; struct pending_read *entry = NULL; @@ -1343,7 +1340,9 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, reads[reported_reads].block_index = entry->block_index; reads[reported_reads].serial_number = entry->serial_number; reads[reported_reads].timestamp_us = entry->timestamp_us; - /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */ + + if (entry->serial_number > *new_max_sn) + *new_max_sn = entry->serial_number; reported_reads++; if (reported_reads >= reads_size) diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 00e93f1b7712..88b0ec7cbd98 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -184,8 +185,7 @@ struct data_file_segment { wait_queue_head_t new_data_arrival_wq; /* Protects reads and writes from the blockmap */ - /* Good candidate for read/write mutex */ - struct mutex blockmap_mutex; + struct rw_semaphore rwsem; /* List of active pending_read objects belonging to this segment */ /* Protected by mount_info.pending_reads_mutex */ @@ -303,7 +303,7 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); */ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, - int reads_size); + int reads_size, int *new_max_sn); int incfs_collect_logged_reads(struct mount_info *mi, struct read_log_state *start_state, diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 65b7f325bf9c..47d631080557 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -468,7 +468,6 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, int new_max_sn = last_known_read_sn; int reads_collected = 0; ssize_t result = 0; - int i = 0; if (!mi) return -EFAULT; @@ -484,16 +483,12 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); reads_collected = incfs_collect_pending_reads( - mi, last_known_read_sn, reads_buf, reads_to_collect); + mi, last_known_read_sn, reads_buf, reads_to_collect, &new_max_sn); if (reads_collected < 0) { result = reads_collected; goto out; } - for (i = 0; i < reads_collected; i++) - if (reads_buf[i].serial_number > new_max_sn) - new_max_sn = reads_buf[i].serial_number; - /* * Just to make sure that we don't accidentally copy more data * to reads buffer than userspace can handle. From 9395fb3db23c21875ded5a2e72c8bc414bb11da4 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 27 Apr 2020 16:07:16 -0700 Subject: [PATCH 437/636] ANDROID: Incremental fs: Stress tool Bug: 156474586 Test: Tool runs, passes Signed-off-by: Paul Lawrence Change-Id: I745708b7d4ae1fcacbc359ed2f3fda462d95ea6a --- .../selftests/filesystems/incfs/.gitignore | 3 +- .../selftests/filesystems/incfs/Makefile | 16 +- .../filesystems/incfs/incfs_stress.c | 349 ++++++++++++++++++ 3 files changed, 359 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/filesystems/incfs/incfs_stress.c diff --git a/tools/testing/selftests/filesystems/incfs/.gitignore b/tools/testing/selftests/filesystems/incfs/.gitignore index 4cba9c219a92..6b90110d6556 100644 --- a/tools/testing/selftests/filesystems/incfs/.gitignore +++ b/tools/testing/selftests/filesystems/incfs/.gitignore @@ -1 +1,2 @@ -incfs_test \ No newline at end of file +incfs_test +incfs_stress diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 5b2e627ce883..1484dbe8f7e1 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -CFLAGS += -I../.. -I../../../../.. - -LDLIBS := -llz4 -lcrypto -EXTRA_SOURCES := utils.c -TEST_GEN_PROGS := incfs_test - -$(TEST_GEN_PROGS): $(EXTRA_SOURCES) +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -I../.. -I../../../../.. +LDLIBS := -llz4 -lcrypto -lpthread +TEST_GEN_PROGS := incfs_test incfs_stress include ../../lib.mk + +# Put after include ../../lib.mk since that changes $(TEST_GEN_PROGS) +# Otherwise you get multiple targets, this becomes the default, and it's a mess +EXTRA_SOURCES := utils.c +$(TEST_GEN_PROGS) : $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/filesystems/incfs/incfs_stress.c b/tools/testing/selftests/filesystems/incfs/incfs_stress.c new file mode 100644 index 000000000000..4238803cccd0 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_stress.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define err_msg(...) \ + do { \ + fprintf(stderr, "%s: (%d) ", TAG, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " (%s)\n", strerror(errno)); \ + } while (false) + +#define TAG "incfs_stress" + +struct options { + bool no_cleanup; /* -c */ + const char *test_dir; /* -d */ + unsigned int rng_seed; /* -g */ + int num_reads; /* -n */ + int readers; /* -r */ + int size; /* -s */ + int timeout; /* -t */ +}; + +struct read_data { + const char *filename; + int dir_fd; + size_t filesize; + int num_reads; + unsigned int rng_seed; +}; + +int cancel_threads; + +int parse_options(int argc, char *const *argv, struct options *options) +{ + char c; + + /* Set defaults here */ + *options = (struct options){ + .test_dir = ".", + .num_reads = 1000, + .readers = 10, + .size = 10, + }; + + /* Load options from command line here */ + while ((c = getopt(argc, argv, "cd:g:n:r:s:t:")) != -1) { + switch (c) { + case 'c': + options->no_cleanup = true; + break; + + case 'd': + options->test_dir = optarg; + break; + + case 'g': + options->rng_seed = atoi(optarg); + break; + + case 'n': + options->num_reads = atoi(optarg); + break; + + case 'r': + options->readers = atoi(optarg); + break; + + case 's': + options->size = atoi(optarg); + break; + + case 't': + options->timeout = atoi(optarg); + break; + } + } + + return 0; +} + +unsigned int rnd(unsigned int max, unsigned int *seed) +{ + return rand_r(seed) * ((uint64_t)max + 1) / RAND_MAX; +} + +int remove_dir(const char *dir) +{ + int err = rmdir(dir); + + if (err && errno == ENOTEMPTY) { + err = delete_dir_tree(dir); + if (err) { + err_msg("Can't delete dir %s", dir); + return err; + } + + return 0; + } + + if (err && errno != ENOENT) { + err_msg("Can't delete dir %s", dir); + return -errno; + } + + return 0; +} + +void *reader(void *data) +{ + struct read_data *read_data = (struct read_data *)data; + int i; + int fd = -1; + void *buffer = malloc(read_data->filesize); + + if (!buffer) { + err_msg("Failed to alloc read buffer"); + goto out; + } + + fd = openat(read_data->dir_fd, read_data->filename, + O_RDONLY | O_CLOEXEC); + if (fd == -1) { + err_msg("Failed to open file"); + goto out; + } + + for (i = 0; i < read_data->num_reads && !cancel_threads; ++i) { + off_t offset = rnd(read_data->filesize, &read_data->rng_seed); + size_t count = + rnd(read_data->filesize - offset, &read_data->rng_seed); + ssize_t err = pread(fd, buffer, count, offset); + + if (err != count) + err_msg("failed to read with value %lu", err); + } + +out: + close(fd); + free(read_data); + free(buffer); + return NULL; +} + +int write_data(int cmd_fd, int dir_fd, const char *name, size_t size) +{ + int fd = openat(dir_fd, name, O_RDWR | O_CLOEXEC); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int error = 0; + int i; + int block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + + if (fd == -1) { + err_msg("Could not open file for writing %s", name); + return -errno; + } + + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + for (i = 0; i < block_count; ++i) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + size_t block_size = + size > i * INCFS_DATA_FILE_BLOCK_SIZE ? + INCFS_DATA_FILE_BLOCK_SIZE : + size - (i * INCFS_DATA_FILE_BLOCK_SIZE); + struct incfs_fill_block fill_block = { + .compression = COMPRESSION_NONE, + .block_index = i, + .data_len = block_size, + .data = ptr_to_u64(data), + }; + struct incfs_fill_blocks fill_blocks = { + .count = 1, + .fill_blocks = ptr_to_u64(&fill_block), + }; + int written = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + + if (written != 1) { + error = -errno; + err_msg("Failed to write block %d in file %s", i, name); + break; + } + } +out: + close(fd); + return error; +} + +int test_files(int src_dir, int dst_dir, struct options const *options) +{ + unsigned int seed = options->rng_seed; + int cmd_file = openat(dst_dir, INCFS_PENDING_READS_FILENAME, + O_RDONLY | O_CLOEXEC); + int err; + const char *name = "001"; + incfs_uuid_t id; + size_t size; + int i; + pthread_t *threads = NULL; + + size = 1 << (rnd(options->size, &seed) + 12); + size += rnd(size, &seed); + + if (cmd_file == -1) { + err_msg("Could not open command file"); + return -errno; + } + + err = emit_file(cmd_file, NULL, name, &id, size, NULL); + if (err) { + err_msg("Failed to create file %s", name); + return err; + } + + threads = malloc(sizeof(pthread_t) * options->readers); + if (!threads) { + err_msg("Could not allocate memory for threads"); + return -ENOMEM; + } + + for (i = 0; i < options->readers; ++i) { + struct read_data *read_data = malloc(sizeof(*read_data)); + + if (!read_data) { + err_msg("Failed to allocate read_data"); + err = -ENOMEM; + break; + } + + *read_data = (struct read_data){ + .filename = name, + .dir_fd = dst_dir, + .filesize = size, + .num_reads = options->num_reads, + .rng_seed = seed, + }; + + rnd(0, &seed); + + err = pthread_create(threads + i, 0, reader, read_data); + if (err) { + err_msg("Failed to create thread"); + free(read_data); + break; + } + } + + if (err) + cancel_threads = 1; + else + err = write_data(cmd_file, dst_dir, name, size); + + for (; i > 0; --i) { + if (pthread_join(threads[i - 1], NULL)) { + err_msg("FATAL: failed to join thread"); + exit(-errno); + } + } + + free(threads); + close(cmd_file); + return err; +} + +int main(int argc, char *const *argv) +{ + struct options options; + int err; + const char *src_dir = "src"; + const char *dst_dir = "dst"; + int src_dir_fd = -1; + int dst_dir_fd = -1; + + err = parse_options(argc, argv, &options); + if (err) + return err; + + err = chdir(options.test_dir); + if (err) { + err_msg("Failed to change to %s", options.test_dir); + return -errno; + } + + err = remove_dir(src_dir) || remove_dir(dst_dir); + if (err) + return err; + + err = mkdir(src_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(dst_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mount_fs(dst_dir, src_dir, options.timeout); + if (err) { + err_msg("Failed to mount incfs"); + goto cleanup; + } + + src_dir_fd = open(src_dir, O_RDONLY | O_CLOEXEC); + dst_dir_fd = open(dst_dir, O_RDONLY | O_CLOEXEC); + if (src_dir_fd == -1 || dst_dir_fd == -1) { + err_msg("Failed to open src or dst dir"); + err = -errno; + goto cleanup; + } + + err = test_files(src_dir_fd, dst_dir_fd, &options); + +cleanup: + close(src_dir_fd); + close(dst_dir_fd); + if (!options.no_cleanup) { + umount(dst_dir); + remove_dir(dst_dir); + remove_dir(src_dir); + } + + return err; +} From f5c43067728eb12529fac7eed2540fddb2ef2729 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 6 May 2020 12:31:31 -0700 Subject: [PATCH 438/636] ANDROID: Incremental fs: Adding perf test Bug: 156474586 Test: Profile tool runs Signed-off-by: Paul Lawrence Change-Id: Iff91432f7602fc1e23cf30012c893a99e02c868d --- .../selftests/filesystems/incfs/.gitignore | 1 + .../selftests/filesystems/incfs/Makefile | 2 +- .../selftests/filesystems/incfs/incfs_perf.c | 717 ++++++++++++++++++ .../filesystems/incfs/incfs_stress.c | 39 +- .../selftests/filesystems/incfs/incfs_test.c | 3 +- .../selftests/filesystems/incfs/utils.c | 39 + .../selftests/filesystems/incfs/utils.h | 7 + 7 files changed, 772 insertions(+), 36 deletions(-) create mode 100644 tools/testing/selftests/filesystems/incfs/incfs_perf.c diff --git a/tools/testing/selftests/filesystems/incfs/.gitignore b/tools/testing/selftests/filesystems/incfs/.gitignore index 6b90110d6556..f0e3cd99d4ac 100644 --- a/tools/testing/selftests/filesystems/incfs/.gitignore +++ b/tools/testing/selftests/filesystems/incfs/.gitignore @@ -1,2 +1,3 @@ incfs_test incfs_stress +incfs_perf diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 1484dbe8f7e1..882b43decbcd 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -I../.. -I../../../../.. LDLIBS := -llz4 -lcrypto -lpthread -TEST_GEN_PROGS := incfs_test incfs_stress +TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/incfs_perf.c b/tools/testing/selftests/filesystems/incfs/incfs_perf.c new file mode 100644 index 000000000000..ed36bbd774e3 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_perf.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +#define err_msg(...) \ + do { \ + fprintf(stderr, "%s: (%d) ", TAG, __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " (%s)\n", strerror(errno)); \ + } while (false) + +#define TAG "incfs_perf" + +struct options { + int blocks; /* -b number of diff block sizes */ + bool no_cleanup; /* -c don't clean up after */ + const char *test_dir; /* -d working directory */ + const char *file_types; /* -f sScCvV */ + bool no_native; /* -n don't test native files */ + bool no_random; /* -r don't do random reads*/ + bool no_linear; /* -R random reads only */ + size_t size; /* -s file size as power of 2 */ + int tries; /* -t times to run test*/ +}; + +enum flags { + SHUFFLE = 1, + COMPRESS = 2, + VERIFY = 4, + LAST_FLAG = 8, +}; + +void print_help(void) +{ + puts( + "incfs_perf. Performance test tool for incfs\n" + "\tTests read performance of incfs by creating files of various types\n" + "\tflushing caches and then reading them back.\n" + "\tEach file is read with different block sizes and average\n" + "\tthroughput in megabytes/second and memory usage are reported for\n" + "\teach block size\n" + "\tNative files are tested for comparison\n" + "\tNative files are created in native folder, incfs files are created\n" + "\tin src folder which is mounted on dst folder\n" + "\n" + "\t-bn (default 8) number of different block sizes, starting at 4096\n" + "\t and doubling\n" + "\t-c don't Clean up - leave files and mount point\n" + "\t-d dir create directories in dir\n" + "\t-fs|Sc|Cv|V restrict which files are created.\n" + "\t s blocks not shuffled, S blocks shuffled\n" + "\t c blocks not compress, C blocks compressed\n" + "\t v files not verified, V files verified\n" + "\t If a letter is omitted, both options are tested\n" + "\t If no letter are given, incfs is not tested\n" + "\t-n Don't test native files\n" + "\t-r No random reads (sequential only)\n" + "\t-R Random reads only (no sequential)\n" + "\t-sn (default 30)File size as power of 2\n" + "\t-tn (default 5) Number of tries per file. Results are averaged\n" + ); +} + +int parse_options(int argc, char *const *argv, struct options *options) +{ + signed char c; + + /* Set defaults here */ + *options = (struct options){ + .blocks = 8, + .test_dir = ".", + .tries = 5, + .size = 30, + }; + + /* Load options from command line here */ + while ((c = getopt(argc, argv, "b:cd:f::hnrRs:t:")) != -1) { + switch (c) { + case 'b': + options->blocks = strtol(optarg, NULL, 10); + break; + + case 'c': + options->no_cleanup = true; + break; + + case 'd': + options->test_dir = optarg; + break; + + case 'f': + if (optarg) + options->file_types = optarg; + else + options->file_types = "sS"; + break; + + case 'h': + print_help(); + exit(0); + + case 'n': + options->no_native = true; + break; + + case 'r': + options->no_random = true; + break; + + case 'R': + options->no_linear = true; + break; + + case 's': + options->size = strtol(optarg, NULL, 10); + break; + + case 't': + options->tries = strtol(optarg, NULL, 10); + break; + + default: + print_help(); + return -EINVAL; + } + } + + options->size = 1L << options->size; + + return 0; +} + +void shuffle(size_t *buffer, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++i) { + size_t j = random() * (size - i - 1) / RAND_MAX; + size_t temp = buffer[i]; + + buffer[i] = buffer[j]; + buffer[j] = temp; + } +} + +int get_free_memory(void) +{ + FILE *meminfo = fopen("/proc/meminfo", "re"); + char field[256]; + char value[256] = {}; + + if (!meminfo) + return -ENOENT; + + while (fscanf(meminfo, "%[^:]: %s kB\n", field, value) == 2) { + if (!strcmp(field, "MemFree")) + break; + *value = 0; + } + + fclose(meminfo); + + if (!*value) + return -ENOENT; + + return strtol(value, NULL, 10); +} + +int write_data(int cmd_fd, int dir_fd, const char *name, size_t size, int flags) +{ + int fd = openat(dir_fd, name, O_RDWR | O_CLOEXEC); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + size_t *blocks = malloc(sizeof(size_t) * block_count); + int error = 0; + size_t i; + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + uint8_t compressed_data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + struct incfs_fill_block fill_block = { + .compression = COMPRESSION_NONE, + .data_len = sizeof(data), + .data = ptr_to_u64(data), + }; + + if (!blocks) { + err_msg("Out of memory"); + error = -errno; + goto out; + } + + if (fd == -1) { + err_msg("Could not open file for writing %s", name); + error = -errno; + goto out; + } + + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + for (i = 0; i < block_count; ++i) + blocks[i] = i; + + if (flags & SHUFFLE) + shuffle(blocks, block_count); + + if (flags & COMPRESS) { + size_t comp_size = LZ4_compress_default( + (char *)data, (char *)compressed_data, sizeof(data), + ARRAY_SIZE(compressed_data)); + + if (comp_size <= 0) { + error = -EBADMSG; + goto out; + } + fill_block.compression = COMPRESSION_LZ4; + fill_block.data = ptr_to_u64(compressed_data); + fill_block.data_len = comp_size; + } + + for (i = 0; i < block_count; ++i) { + struct incfs_fill_blocks fill_blocks = { + .count = 1, + .fill_blocks = ptr_to_u64(&fill_block), + }; + + fill_block.block_index = blocks[i]; + int written = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + + if (written != 1) { + error = -errno; + err_msg("Failed to write block %lu in file %s", i, + name); + break; + } + } + +out: + free(blocks); + close(fd); + sync(); + return error; +} + +int measure_read_throughput_internal(const char *tag, int dir, const char *name, + const struct options *options, bool random) +{ + int block; + + if (random) + printf("%32s(random)", tag); + else + printf("%40s", tag); + + for (block = 0; block < options->blocks; ++block) { + size_t buffer_size; + char *buffer; + int try; + double time = 0; + double throughput; + int memory = 0; + + buffer_size = 1 << (block + 12); + buffer = malloc(buffer_size); + + for (try = 0; try < options->tries; ++try) { + int err; + struct timespec start_time, end_time; + off_t i; + int fd; + size_t offsets_size = options->size / buffer_size; + size_t *offsets = + malloc(offsets_size * sizeof(*offsets)); + int start_memory, end_memory; + + if (!offsets) { + err_msg("Not enough memory"); + return -ENOMEM; + } + + for (i = 0; i < offsets_size; ++i) + offsets[i] = i * buffer_size; + + if (random) + shuffle(offsets, offsets_size); + + err = drop_caches(); + if (err) { + err_msg("Failed to drop caches"); + return err; + } + + start_memory = get_free_memory(); + if (start_memory < 0) { + err_msg("Failed to get start memory"); + return start_memory; + } + + fd = openat(dir, name, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + err_msg("Failed to open file"); + return err; + } + + err = clock_gettime(CLOCK_MONOTONIC, &start_time); + if (err) { + err_msg("Failed to get start time"); + return err; + } + + for (i = 0; i < offsets_size; ++i) + if (pread(fd, buffer, buffer_size, + offsets[i]) != buffer_size) { + err_msg("Failed to read file"); + err = -errno; + goto fail; + } + + err = clock_gettime(CLOCK_MONOTONIC, &end_time); + if (err) { + err_msg("Failed to get start time"); + goto fail; + } + + end_memory = get_free_memory(); + if (end_memory < 0) { + err_msg("Failed to get end memory"); + return end_memory; + } + + time += end_time.tv_sec - start_time.tv_sec; + time += (end_time.tv_nsec - start_time.tv_nsec) / 1e9; + + close(fd); + fd = -1; + memory += start_memory - end_memory; + +fail: + free(offsets); + close(fd); + if (err) + return err; + } + + throughput = options->size * options->tries / time; + printf("%10.3e %10d", throughput, memory / options->tries); + free(buffer); + } + + printf("\n"); + return 0; +} + +int measure_read_throughput(const char *tag, int dir, const char *name, + const struct options *options) +{ + int err = 0; + + if (!options->no_linear) + err = measure_read_throughput_internal(tag, dir, name, options, + false); + + if (!err && !options->no_random) + err = measure_read_throughput_internal(tag, dir, name, options, + true); + return err; +} + +int test_native_file(int dir, const struct options *options) +{ + const char *name = "file"; + int fd; + char buffer[4096] = {}; + off_t i; + int err; + + fd = openat(dir, name, O_CREAT | O_WRONLY | O_CLOEXEC, 0600); + if (fd == -1) { + err_msg("Could not open native file"); + return -errno; + } + + for (i = 0; i < options->size; i += sizeof(buffer)) + if (pwrite(fd, buffer, sizeof(buffer), i) != sizeof(buffer)) { + err_msg("Failed to write file"); + err = -errno; + goto fail; + } + + close(fd); + sync(); + fd = -1; + + err = measure_read_throughput("native", dir, name, options); + +fail: + close(fd); + return err; +} + +struct hash_block { + char data[INCFS_DATA_FILE_BLOCK_SIZE]; +}; + +static struct hash_block *build_mtree(size_t size, char *root_hash, + int *mtree_block_count) +{ + char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + int block_count = 0; + int hash_block_count = 0; + int total_tree_block_count = 0; + int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; + int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; + int levels_count = 0; + int i, level; + struct hash_block *mtree; + + if (size == 0) + return 0; + + block_count = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + hash_block_count = block_count; + for (i = 0; hash_block_count > 1; i++) { + hash_block_count = (hash_block_count + hash_per_block - 1) / + hash_per_block; + tree_lvl_count[i] = hash_block_count; + total_tree_block_count += hash_block_count; + } + levels_count = i; + + for (i = 0; i < levels_count; i++) { + int prev_lvl_base = (i == 0) ? total_tree_block_count : + tree_lvl_index[i - 1]; + + tree_lvl_index[i] = prev_lvl_base - tree_lvl_count[i]; + } + + *mtree_block_count = total_tree_block_count; + mtree = calloc(total_tree_block_count, sizeof(*mtree)); + /* Build level 0 hashes. */ + for (i = 0; i < block_count; i++) { + int block_index = tree_lvl_index[0] + i / hash_per_block; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = mtree[block_index].data + block_off; + + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + + /* Build higher levels of hash tree. */ + for (level = 1; level < levels_count; level++) { + int prev_lvl_base = tree_lvl_index[level - 1]; + int prev_lvl_count = tree_lvl_count[level - 1]; + + for (i = 0; i < prev_lvl_count; i++) { + int block_index = + i / hash_per_block + tree_lvl_index[level]; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = mtree[block_index].data + block_off; + + sha256(mtree[i + prev_lvl_base].data, + INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + } + + /* Calculate root hash from the top block */ + sha256(mtree[0].data, INCFS_DATA_FILE_BLOCK_SIZE, root_hash); + + return mtree; +} + +static int load_hash_tree(int cmd_fd, int dir, const char *name, + struct hash_block *mtree, int mtree_block_count) +{ + int err; + int i; + int fd; + struct incfs_fill_block *fill_block_array = + calloc(mtree_block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = mtree_block_count, + .fill_blocks = ptr_to_u64(fill_block_array), + }; + struct incfs_permit_fill permit_fill; + + if (!fill_block_array) + return -ENOMEM; + + for (i = 0; i < fill_blocks.count; i++) { + fill_block_array[i] = (struct incfs_fill_block){ + .block_index = i, + .data_len = INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(mtree[i].data), + .flags = INCFS_BLOCK_FLAGS_HASH + }; + } + + fd = openat(dir, name, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = errno; + goto failure; + } + + permit_fill.file_descriptor = fd; + if (ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + err_msg("Failed to call PERMIT_FILL"); + err = -errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); + if (err < fill_blocks.count) + err = errno; + else + err = 0; + +failure: + free(fill_block_array); + return err; +} + +int test_incfs_file(int dst_dir, const struct options *options, int flags) +{ + int cmd_file = openat(dst_dir, INCFS_PENDING_READS_FILENAME, + O_RDONLY | O_CLOEXEC); + int err; + char name[4]; + incfs_uuid_t id; + char tag[256]; + + snprintf(name, sizeof(name), "%c%c%c", + flags & SHUFFLE ? 'S' : 's', + flags & COMPRESS ? 'C' : 'c', + flags & VERIFY ? 'V' : 'v'); + + if (cmd_file == -1) { + err_msg("Could not open command file"); + return -errno; + } + + if (flags & VERIFY) { + char root_hash[INCFS_MAX_HASH_SIZE]; + int mtree_block_count; + struct hash_block *mtree = build_mtree(options->size, root_hash, + &mtree_block_count); + + if (!mtree) { + err_msg("Failed to build hash tree"); + err = -ENOMEM; + goto fail; + } + + err = crypto_emit_file(cmd_file, NULL, name, &id, options->size, + root_hash, "add_data"); + + if (!err) + err = load_hash_tree(cmd_file, dst_dir, name, mtree, + mtree_block_count); + + free(mtree); + } else + err = emit_file(cmd_file, NULL, name, &id, options->size, NULL); + + if (err) { + err_msg("Failed to create file %s", name); + goto fail; + } + + if (write_data(cmd_file, dst_dir, name, options->size, flags)) + goto fail; + + snprintf(tag, sizeof(tag), "incfs%s%s%s", + flags & SHUFFLE ? "(shuffle)" : "", + flags & COMPRESS ? "(compress)" : "", + flags & VERIFY ? "(verify)" : ""); + + err = measure_read_throughput(tag, dst_dir, name, options); + +fail: + close(cmd_file); + return err; +} + +bool skip(struct options const *options, int flag, char c) +{ + if (!options->file_types) + return false; + + if (flag && strchr(options->file_types, tolower(c))) + return true; + + if (!flag && strchr(options->file_types, toupper(c))) + return true; + + return false; +} + +int main(int argc, char *const *argv) +{ + struct options options; + int err; + const char *native_dir = "native"; + const char *src_dir = "src"; + const char *dst_dir = "dst"; + int native_dir_fd = -1; + int src_dir_fd = -1; + int dst_dir_fd = -1; + int block; + int flags; + + err = parse_options(argc, argv, &options); + if (err) + return err; + + err = chdir(options.test_dir); + if (err) { + err_msg("Failed to change to %s", options.test_dir); + return -errno; + } + + /* Clean up any interrupted previous runs */ + while (!umount(dst_dir)) + ; + + err = remove_dir(native_dir) || remove_dir(src_dir) || + remove_dir(dst_dir); + if (err) + return err; + + err = mkdir(native_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(src_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mkdir(dst_dir, 0700); + if (err) { + err_msg("Failed to make directory %s", src_dir); + err = -errno; + goto cleanup; + } + + err = mount_fs_opt(dst_dir, src_dir, "readahead=0,rlog_pages=0", 0); + if (err) { + err_msg("Failed to mount incfs"); + goto cleanup; + } + + native_dir_fd = open(native_dir, O_RDONLY | O_CLOEXEC); + src_dir_fd = open(src_dir, O_RDONLY | O_CLOEXEC); + dst_dir_fd = open(dst_dir, O_RDONLY | O_CLOEXEC); + if (native_dir_fd == -1 || src_dir_fd == -1 || dst_dir_fd == -1) { + err_msg("Failed to open native, src or dst dir"); + err = -errno; + goto cleanup; + } + + printf("%40s", ""); + for (block = 0; block < options.blocks; ++block) + printf("%21d", 1 << (block + 12)); + printf("\n"); + + if (!err && !options.no_native) + err = test_native_file(native_dir_fd, &options); + + for (flags = 0; flags < LAST_FLAG && !err; ++flags) { + if (skip(&options, flags & SHUFFLE, 's') || + skip(&options, flags & COMPRESS, 'c') || + skip(&options, flags & VERIFY, 'v')) + continue; + err = test_incfs_file(dst_dir_fd, &options, flags); + } + +cleanup: + close(native_dir_fd); + close(src_dir_fd); + close(dst_dir_fd); + if (!options.no_cleanup) { + umount(dst_dir); + remove_dir(native_dir); + remove_dir(dst_dir); + remove_dir(src_dir); + } + + return err; +} diff --git a/tools/testing/selftests/filesystems/incfs/incfs_stress.c b/tools/testing/selftests/filesystems/incfs/incfs_stress.c index 4238803cccd0..a1d491755832 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_stress.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_stress.c @@ -48,7 +48,7 @@ int cancel_threads; int parse_options(int argc, char *const *argv, struct options *options) { - char c; + signed char c; /* Set defaults here */ *options = (struct options){ @@ -70,23 +70,23 @@ int parse_options(int argc, char *const *argv, struct options *options) break; case 'g': - options->rng_seed = atoi(optarg); + options->rng_seed = strtol(optarg, NULL, 10); break; case 'n': - options->num_reads = atoi(optarg); + options->num_reads = strtol(optarg, NULL, 10); break; case 'r': - options->readers = atoi(optarg); + options->readers = strtol(optarg, NULL, 10); break; case 's': - options->size = atoi(optarg); + options->size = strtol(optarg, NULL, 10); break; case 't': - options->timeout = atoi(optarg); + options->timeout = strtol(optarg, NULL, 10); break; } } @@ -94,33 +94,6 @@ int parse_options(int argc, char *const *argv, struct options *options) return 0; } -unsigned int rnd(unsigned int max, unsigned int *seed) -{ - return rand_r(seed) * ((uint64_t)max + 1) / RAND_MAX; -} - -int remove_dir(const char *dir) -{ - int err = rmdir(dir); - - if (err && errno == ENOTEMPTY) { - err = delete_dir_tree(dir); - if (err) { - err_msg("Can't delete dir %s", dir); - return err; - } - - return 0; - } - - if (err && errno != ENOENT) { - err_msg("Can't delete dir %s", dir); - return -errno; - } - - return 0; -} - void *reader(void *data) { struct read_data *read_data = (struct read_data *)data; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 6809399eac97..203c30a62493 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -29,7 +29,6 @@ #define TEST_FAILURE 1 #define TEST_SUCCESS 0 -#define INCFS_MAX_MTREE_LEVELS 8 #define INCFS_ROOT_INODE 0 @@ -2624,7 +2623,7 @@ static int large_file(char *mount_dir) .fill_blocks = ptr_to_u64(block_buf), }; incfs_uuid_t id; - int fd; + int fd = -1; backing_dir = create_backing_dir(mount_dir); if (!backing_dir) diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index e194f63ba922..a80123294d5a 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -25,6 +25,45 @@ #define __S_IFREG S_IFREG #endif +unsigned int rnd(unsigned int max, unsigned int *seed) +{ + return rand_r(seed) * ((uint64_t)max + 1) / RAND_MAX; +} + +int remove_dir(const char *dir) +{ + int err = rmdir(dir); + + if (err && errno == ENOTEMPTY) { + err = delete_dir_tree(dir); + if (err) + return err; + return 0; + } + + if (err && errno != ENOENT) + return -errno; + + return 0; +} + +int drop_caches(void) +{ + int drop_caches = + open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); + int i; + + if (drop_caches == -1) + return -errno; + i = write(drop_caches, "3", 1); + close(drop_caches); + + if (i != 1) + return -errno; + + return 0; +} + int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms) { diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 9af63e4e922c..5b59272dbb7f 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -18,6 +18,13 @@ #endif #define SHA256_DIGEST_SIZE 32 +#define INCFS_MAX_MTREE_LEVELS 8 + +unsigned int rnd(unsigned int max, unsigned int *seed); + +int remove_dir(const char *dir); + +int drop_caches(void); int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms); From aab713422ed00d4e15f0f3d2531682ff021ea100 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 21 May 2020 09:14:51 -0700 Subject: [PATCH 439/636] ANDROID: Incremental fs: Allow running a single test Bug: 162313014 Test: Can run single test Signed-off-by: Paul Lawrence Change-Id: Ia75d15351245701d53040a537205b37d06a4263b --- .../selftests/filesystems/incfs/Makefile | 2 +- .../selftests/filesystems/incfs/incfs_test.c | 122 ++++++++++++------ 2 files changed, 81 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 882b43decbcd..83e2ecb7bad9 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -I../.. -I../../../../.. +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. LDLIBS := -llz4 -lcrypto -lpthread TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 203c30a62493..fdf29f5ca9cd 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -247,7 +247,7 @@ out: return fd; } -int get_file_attr(char *mnt_dir, incfs_uuid_t id, char *value, int size) +int get_file_attr(const char *mnt_dir, incfs_uuid_t id, char *value, int size) { char *path = get_index_filename(mnt_dir, id); int res; @@ -265,7 +265,7 @@ static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); } -static int emit_test_blocks(char *mnt_dir, struct test_file *file, +static int emit_test_blocks(const char *mnt_dir, struct test_file *file, int blocks[], int count) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; @@ -375,7 +375,7 @@ out: return (error < 0) ? error : blocks_written; } -static int emit_test_block(char *mnt_dir, struct test_file *file, +static int emit_test_block(const char *mnt_dir, struct test_file *file, int block_index) { int res = emit_test_blocks(mnt_dir, file, &block_index, 1); @@ -405,7 +405,7 @@ static void shuffle(int array[], int count, unsigned int seed) } } -static int emit_test_file_data(char *mount_dir, struct test_file *file) +static int emit_test_file_data(const char *mount_dir, struct test_file *file) { int i; int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; @@ -438,7 +438,7 @@ out: return result; } -static loff_t read_whole_file(char *filename) +static loff_t read_whole_file(const char *filename) { int fd = -1; loff_t result; @@ -504,7 +504,7 @@ cleanup: return result; } -static char *create_backing_dir(char *mount_dir) +static char *create_backing_dir(const char *mount_dir) { struct stat st; char backing_dir_name[255]; @@ -540,7 +540,7 @@ static char *create_backing_dir(char *mount_dir) return strdup(backing_dir_name); } -static int validate_test_file_content_with_seed(char *mount_dir, +static int validate_test_file_content_with_seed(const char *mount_dir, struct test_file *file, unsigned int shuffle_seed) { @@ -602,12 +602,13 @@ failure: return error; } -static int validate_test_file_content(char *mount_dir, struct test_file *file) +static int validate_test_file_content(const char *mount_dir, + struct test_file *file) { return validate_test_file_content_with_seed(mount_dir, file, 0); } -static int data_producer(char *mount_dir, struct test_files_set *test_set) +static int data_producer(const char *mount_dir, struct test_files_set *test_set) { int ret = 0; int timeout_ms = 1000; @@ -802,7 +803,7 @@ failure: return err; } -static int cant_touch_index_test(char *mount_dir) +static int cant_touch_index_test(const char *mount_dir) { char *file_name = "test_file"; int file_size = 123; @@ -892,7 +893,8 @@ failure: return TEST_FAILURE; } -static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) +static bool iterate_directory(const char *dir_to_iterate, bool root, + int file_count) { struct expected_name { const char *name; @@ -997,7 +999,7 @@ failure: return pass; } -static int basic_file_ops_test(char *mount_dir) +static int basic_file_ops_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1163,7 +1165,7 @@ failure: return TEST_FAILURE; } -static int dynamic_files_and_data_test(char *mount_dir) +static int dynamic_files_and_data_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1269,7 +1271,7 @@ failure: return TEST_FAILURE; } -static int concurrent_reads_and_writes_test(char *mount_dir) +static int concurrent_reads_and_writes_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1391,7 +1393,7 @@ failure: return TEST_FAILURE; } -static int work_after_remount_test(char *mount_dir) +static int work_after_remount_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1541,7 +1543,7 @@ failure: return TEST_FAILURE; } -static int attribute_test(char *mount_dir) +static int attribute_test(const char *mount_dir) { char file_attr[] = "metadata123123"; char attr_buf[INCFS_MAX_FILE_ATTR_SIZE] = {}; @@ -1624,7 +1626,7 @@ failure: return TEST_FAILURE; } -static int child_procs_waiting_for_data_test(char *mount_dir) +static int child_procs_waiting_for_data_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1716,7 +1718,7 @@ failure: return TEST_FAILURE; } -static int multiple_providers_test(char *mount_dir) +static int multiple_providers_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -1811,7 +1813,7 @@ failure: return TEST_FAILURE; } -static int hash_tree_test(char *mount_dir) +static int hash_tree_test(const char *mount_dir) { char *backing_dir; struct test_files_set test = get_test_files_set(); @@ -1933,7 +1935,8 @@ failure: enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; -static int validate_logs(char *mount_dir, int log_fd, struct test_file *file, +static int validate_logs(const char *mount_dir, int log_fd, + struct test_file *file, enum expected_log expected_log) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; @@ -2055,7 +2058,7 @@ failure: return TEST_FAILURE; } -static int read_log_test(char *mount_dir) +static int read_log_test(const char *mount_dir) { struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; @@ -2217,7 +2220,7 @@ failure: return TEST_FAILURE; } -static int emit_partial_test_file_data(char *mount_dir, struct test_file *file) +static int emit_partial_test_file_data(const char *mount_dir, struct test_file *file) { int i, j; int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; @@ -2383,7 +2386,7 @@ out: return error; } -static int get_blocks_test(char *mount_dir) +static int get_blocks_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2440,7 +2443,7 @@ failure: return TEST_FAILURE; } -static int emit_partial_test_file_hash(char *mount_dir, struct test_file *file) +static int emit_partial_test_file_hash(const char *mount_dir, struct test_file *file) { int err; int fd; @@ -2558,7 +2561,7 @@ out: return error; } -static int get_hash_blocks_test(char *mount_dir) +static int get_hash_blocks_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2608,7 +2611,7 @@ failure: return TEST_FAILURE; } -static int large_file(char *mount_dir) +static int large_file_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; @@ -2690,12 +2693,54 @@ static char *setup_mount_dir() return mount_dir; } +struct options { + int test; +}; + +int parse_options(int argc, char *const *argv, struct options *options) +{ + signed char c; + + while ((c = getopt(argc, argv, "t:")) != -1) + switch (c) { + case 't': + options->test = strtol(optarg, NULL, 10); + break; + + default: + return -EINVAL; + } + + return 0; +} + +struct test_case { + int (*pfunc)(const char *dir); + const char *name; +}; + +void run_one_test(const char *mount_dir, struct test_case *test_case, + int *fails) +{ + ksft_print_msg("Running %s\n", test_case->name); + if (test_case->pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", test_case->name); + else { + ksft_test_result_fail("%s\n", test_case->name); + fails++; + } +} + int main(int argc, char *argv[]) { char *mount_dir = NULL; int fails = 0; int i; int fd, count; + struct options options = {}; + + if (parse_options(argc, argv, &options)) + ksft_exit_fail_msg("Bad options\n"); // Seed randomness pool for testing on QEMU // NOTE - this abuses the concept of randomness - do *not* ever do this @@ -2720,10 +2765,7 @@ int main(int argc, char *argv[]) { \ test, #test \ } - struct { - int (*pfunc)(char *dir); - const char *name; - } cases[] = { + struct test_case cases[] = { MAKE_TEST(basic_file_ops_test), MAKE_TEST(cant_touch_index_test), MAKE_TEST(dynamic_files_and_data_test), @@ -2736,22 +2778,18 @@ int main(int argc, char *argv[]) MAKE_TEST(read_log_test), MAKE_TEST(get_blocks_test), MAKE_TEST(get_hash_blocks_test), - MAKE_TEST(large_file), + MAKE_TEST(large_file_test), }; #undef MAKE_TEST - /* Bring back for kernel 5.x */ - /* ksft_set_plan(ARRAY_SIZE(cases)); */ + if (options.test) { + if (options.test <= 0 || options.test > ARRAY_SIZE(cases)) + ksft_exit_fail_msg("Invalid test\n"); - for (i = 0; i < ARRAY_SIZE(cases); ++i) { - ksft_print_msg("Running %s\n", cases[i].name); - if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) - ksft_test_result_pass("%s\n", cases[i].name); - else { - ksft_test_result_fail("%s\n", cases[i].name); - fails++; - } - } + run_one_test(mount_dir, &cases[options.test - 1], &fails); + } else + for (i = 0; i < ARRAY_SIZE(cases); ++i) + run_one_test(mount_dir, &cases[i], &fails); umount2(mount_dir, MNT_FORCE); rmdir(mount_dir); From abe35abbf0ad68c58b55d3abaeafca4e9e03b55e Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 20 Jul 2020 13:20:02 -0700 Subject: [PATCH 440/636] ANDROID: Incremental fs: Fix incfs to work on virtio-9p Test: incfs_test on a virtio 9p drive. Note test 6 fails still, but I think this is a result of caching directory entries over a network file system. Bug: 161802292 Signed-off-by: Paul Lawrence Change-Id: I6986fb3e9b403181cf81024046f394960caf4620 --- fs/incfs/format.c | 47 +++++++++++++++++++++++++++++++++++------------ fs/incfs/vfs.c | 6 +++--- 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index c56e559b6893..3261d5d4b8f6 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -89,11 +89,42 @@ static int truncate_backing_file(struct backing_file_context *bfc, return result; } +static int write_to_bf(struct backing_file_context *bfc, const void *buf, + size_t count, loff_t pos) +{ + ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); + + if (res < 0) + return res; + if (res != count) + return -EIO; + return 0; +} + +static int append_zeros_no_fallocate(struct backing_file_context *bfc, + size_t file_size, size_t len) +{ + u8 buffer[256] = {}; + size_t i; + + for (i = 0; i < len; i += sizeof(buffer)) { + int to_write = len - i > sizeof(buffer) + ? sizeof(buffer) : len - i; + int err = write_to_bf(bfc, buffer, to_write, file_size + i); + + if (err) + return err; + } + + return 0; +} + /* Append a given number of zero bytes to the end of the backing file. */ static int append_zeros(struct backing_file_context *bfc, size_t len) { loff_t file_size = 0; loff_t new_last_byte_offset = 0; + int result; if (!bfc) return -EFAULT; @@ -110,19 +141,11 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) */ file_size = incfs_get_end_offset(bfc->bc_file); new_last_byte_offset = file_size + len - 1; - return vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); -} + result = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (result != -EOPNOTSUPP) + return result; -static int write_to_bf(struct backing_file_context *bfc, const void *buf, - size_t count, loff_t pos) -{ - ssize_t res = incfs_kwrite(bfc->bc_file, buf, count, pos); - - if (res < 0) - return res; - if (res != count) - return -EIO; - return 0; + return append_zeros_no_fallocate(bfc, file_size, len); } static u32 calc_md_crc(struct incfs_md_header *record) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 47d631080557..69a01f346446 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -1930,17 +1930,17 @@ static int file_open(struct inode *inode, struct file *file) struct file *backing_file = NULL; struct path backing_path = {}; int err = 0; + int flags = O_NOATIME | O_LARGEFILE | + (S_ISDIR(inode->i_mode) ? O_RDONLY : O_RDWR); if (!mi) return -EBADF; get_incfs_backing_path(file->f_path.dentry, &backing_path); - if (!backing_path.dentry) return -EBADF; - backing_file = dentry_open( - &backing_path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); + backing_file = dentry_open(&backing_path, flags, mi->mi_owner); path_put(&backing_path); if (IS_ERR(backing_file)) { From 85e158e3e144849c453f768d26de7696d64253d1 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Sat, 18 Jul 2020 00:26:46 +0000 Subject: [PATCH 441/636] ANDROID: Incremental fs: Don't allow renaming .index directory. Add additional check to verify dentry is hashed after vfs_mkdir() is successful. Bug: 148423333 Test: kernel seftest - incfs_test, manual test removing .index when mounted Signed-off-by: Akilesh Kailash Change-Id: I7d2d71d480db4f02b7f3a347b5734c6643843594 --- fs/incfs/vfs.c | 14 ++++++++++++-- .../selftests/filesystems/incfs/incfs_test.c | 12 ++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 69a01f346446..10b6727c8fd9 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -780,7 +780,8 @@ static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) if (err) return ERR_PTR(err); - if (!d_really_is_positive(index_dentry)) { + if (!d_really_is_positive(index_dentry) || + unlikely(d_unhashed(index_dentry))) { dput(index_dentry); return ERR_PTR(-EINVAL); } @@ -1646,7 +1647,8 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!err) { struct inode *inode = NULL; - if (d_really_is_negative(backing_dentry)) { + if (d_really_is_negative(backing_dentry) || + unlikely(d_unhashed(backing_dentry))) { err = -EINVAL; goto out; } @@ -1871,6 +1873,13 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, return error; backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; + + if (!backing_old_dentry || backing_old_dentry == mi->mi_index_dir) { + /* Renaming .index not allowed */ + error = -EBUSY; + goto exit; + } + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; dget(backing_old_dentry); dget(backing_new_dentry); @@ -1917,6 +1926,7 @@ out: dput(backing_new_dentry); dput(backing_old_dentry); +exit: mutex_unlock(&mi->mi_dir_struct_mutex); if (error) pr_debug("incfs: %s err:%d\n", __func__, error); diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index fdf29f5ca9cd..a3ab9f2a7e67 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -837,6 +837,12 @@ static int cant_touch_index_test(const char *mount_dir) goto failure; } + err = rmdir(index_path); + if (err == 0 || errno != EBUSY) { + print_error(".index directory should not be removed\n"); + goto failure; + } + err = emit_file(cmd_fd, ".index", file_name, &file_id, file_size, NULL); if (err != -EBUSY) { @@ -871,6 +877,12 @@ static int cant_touch_index_test(const char *mount_dir) goto failure; } + err = rename(index_path, dst_name); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't rename .index directory\n"); + goto failure; + } + close(cmd_fd); free(subdir); free(index_path); From 0eae0d27040b49c438490b3ee5e63ce70307d778 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 27 May 2020 13:57:34 -0700 Subject: [PATCH 442/636] ANDROID: Incremental fs: Create mapped file Bug: 160634482 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ic2ac8dfccd60f6c9c72c38bf323997fce7546c1c --- fs/incfs/data_mgmt.c | 88 +++++++ fs/incfs/data_mgmt.h | 5 + fs/incfs/format.c | 39 ++- fs/incfs/format.h | 39 ++- fs/incfs/vfs.c | 232 +++++++++++++++--- include/uapi/linux/incrementalfs.h | 50 ++++ .../selftests/filesystems/incfs/incfs_test.c | 167 ++++++++++++- 7 files changed, 568 insertions(+), 52 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 819b437218d2..ccb1b0d1f3c7 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -123,6 +124,86 @@ static void data_file_segment_init(struct data_file_segment *segment) INIT_LIST_HEAD(&segment->reads_list_head); } +char *file_id_to_str(incfs_uuid_t id) +{ + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); + char *end; + + if (!result) + return NULL; + + end = bin2hex(result, id.bytes, sizeof(id.bytes)); + *end = 0; + return result; +} + +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name) +{ + struct inode *inode; + struct dentry *result = NULL; + + if (!parent) + return ERR_PTR(-EFAULT); + + inode = d_inode(parent); + inode_lock_nested(inode, I_MUTEX_PARENT); + result = lookup_one_len(name, parent, strlen(name)); + inode_unlock(inode); + + if (IS_ERR(result)) + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); + + return result; +} + +static struct data_file *handle_mapped_file(struct mount_info *mi, + struct data_file *df) +{ + char *file_id_str; + struct dentry *index_file_dentry; + struct path path; + struct file *bf; + struct data_file *result = NULL; + + file_id_str = file_id_to_str(df->df_id); + if (!file_id_str) + return ERR_PTR(-ENOENT); + + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, + file_id_str); + kfree(file_id_str); + if (!index_file_dentry) + return ERR_PTR(-ENOENT); + if (IS_ERR(index_file_dentry)) + return (struct data_file *)index_file_dentry; + if (!d_really_is_positive(index_file_dentry)) { + result = ERR_PTR(-ENOENT); + goto out; + } + + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = index_file_dentry + }; + + bf = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, mi->mi_owner); + if (IS_ERR(bf)) { + result = (struct data_file *)bf; + goto out; + } + + result = incfs_open_data_file(mi, bf); + fput(bf); + if (IS_ERR(result)) + goto out; + + result->df_mapped_offset = df->df_metadata_off; + +out: + dput(index_file_dentry); + return result; +} + struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) { struct data_file *df = NULL; @@ -167,6 +248,13 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) if (size > 0) df->df_data_block_count = get_blocks_count_for_size(size); + if (df->df_header_flags & INCFS_FILE_MAPPED) { + struct data_file *mapped_df = handle_mapped_file(mi, df); + + incfs_free_data_file(df); + return mapped_df; + } + md_records = incfs_scan_metadata_chain(df); if (md_records < 0) error = md_records; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 88b0ec7cbd98..96b581a564bd 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -235,6 +235,9 @@ struct data_file { /* Total number of blocks, data + hash */ int df_total_block_count; + /* For mapped files, the offset into the actual file */ + loff_t df_mapped_offset; + struct file_attr n_attr; struct mtree *df_hash_tree; @@ -271,6 +274,8 @@ int incfs_realloc_mount_info(struct mount_info *mi, void incfs_free_mount_info(struct mount_info *mi); +char *file_id_to_str(incfs_uuid_t id); +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); void incfs_free_data_file(struct data_file *df); diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 3261d5d4b8f6..a5239a5a17f5 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -233,8 +233,7 @@ int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) return -EFAULT; return write_to_bf(bfc, &flags, sizeof(flags), - offsetof(struct incfs_file_header, - fh_file_header_flags)); + offsetof(struct incfs_file_header, fh_flags)); } /* @@ -385,7 +384,7 @@ err: /* * Write a backing file header * It should always be called only on empty file. - * incfs_super_block.s_first_md_offset is 0 for now, but will be updated + * fh.fh_first_md_offset is 0 for now, but will be updated * once first metadata record is added. */ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, @@ -415,6 +414,38 @@ int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, return write_to_bf(bfc, &fh, sizeof(fh), file_pos); } +/* + * Write a backing file header for a mapping file + * It should always be called only on empty file. + */ +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size, u64 offset) +{ + struct incfs_file_header fh = {}; + loff_t file_pos = 0; + + if (!bfc) + return -EFAULT; + + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); + fh.fh_header_size = cpu_to_le16(sizeof(fh)); + fh.fh_original_offset = cpu_to_le64(offset); + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + + fh.fh_mapped_file_size = cpu_to_le64(file_size); + fh.fh_original_uuid = *uuid; + fh.fh_flags = INCFS_FILE_MAPPED; + + LOCK_REQUIRED(bfc->bc_mutex); + + file_pos = incfs_get_end_offset(bfc->bc_file); + if (file_pos != 0) + return -EEXIST; + + return write_to_bf(bfc, &fh, sizeof(fh), file_pos); +} + /* Write a given data block and update file's blockmap to point it. */ int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, @@ -598,7 +629,7 @@ int incfs_read_file_header(struct backing_file_context *bfc, if (file_size) *file_size = le64_to_cpu(fh.fh_file_size); if (flags) - *flags = le32_to_cpu(fh.fh_file_header_flags); + *flags = le32_to_cpu(fh.fh_flags); return 0; } diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 1a83349bb2eb..90cadce26c55 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -72,7 +72,7 @@ * * * +-------------------------------------------+ - * | incfs_super_block |]---+ + * | incfs_file_header |]---+ * +-------------------------------------------+ | * | metadata |<---+ * | incfs_file_signature |]---+ @@ -123,6 +123,7 @@ enum incfs_metadata_type { enum incfs_file_header_flags { INCFS_FILE_COMPLETE = 1 << 0, + INCFS_FILE_MAPPED = 1 << 1, }; /* Header included at the beginning of all metadata records on the disk. */ @@ -164,20 +165,33 @@ struct incfs_file_header { __le16 fh_data_block_size; /* File flags, from incfs_file_header_flags */ - __le32 fh_file_header_flags; + __le32 fh_flags; - /* Offset of the first metadata record */ - __le64 fh_first_md_offset; + union { + /* Standard incfs file */ + struct { + /* Offset of the first metadata record */ + __le64 fh_first_md_offset; - /* - * Put file specific information after this point - */ + /* Full size of the file's content */ + __le64 fh_file_size; - /* Full size of the file's content */ - __le64 fh_file_size; + /* File uuid */ + incfs_uuid_t fh_uuid; + }; - /* File uuid */ - incfs_uuid_t fh_uuid; + /* Mapped file - INCFS_FILE_MAPPED set in fh_flags */ + struct { + /* Offset in original file */ + __le64 fh_original_offset; + + /* Full size of the file's content */ + __le64 fh_mapped_file_size; + + /* Original file's uuid */ + incfs_uuid_t fh_original_uuid; + }; + }; } __packed; enum incfs_block_map_entry_flags { @@ -294,6 +308,9 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size, u64 offset); + int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, struct mem_range block, int block_index, loff_t bm_base_off, diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 10b6727c8fd9..a374427a4e67 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -735,26 +735,6 @@ static int incfs_init_dentry(struct dentry *dentry, struct path *path) return 0; } -static struct dentry *incfs_lookup_dentry(struct dentry *parent, - const char *name) -{ - struct inode *inode; - struct dentry *result = NULL; - - if (!parent) - return ERR_PTR(-EFAULT); - - inode = d_inode(parent); - inode_lock_nested(inode, I_MUTEX_PARENT); - result = lookup_one_len(name, parent, strlen(name)); - inode_unlock(inode); - - if (IS_ERR(result)) - pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); - - return result; -} - static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) { static const char name[] = ".index"; @@ -809,7 +789,8 @@ static int read_single_page(struct file *f, struct page *page) page_start = kmap(page); offset = page_offset(page); - block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; + block_index = (offset + df->df_mapped_offset) / + INCFS_DATA_FILE_BLOCK_SIZE; size = df->df_size; timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; @@ -851,19 +832,6 @@ err: return result; } -static char *file_id_to_str(incfs_uuid_t id) -{ - char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); - char *end; - - if (!result) - return NULL; - - end = bin2hex(result, id.bytes, sizeof(id.bytes)); - *end = 0; - return result; -} - static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, u64 size) { @@ -981,6 +949,54 @@ out: return error; } +static int init_new_mapped_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, u64 offset) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + fput(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_mapping_fh_to_backing_file(bfc, uuid, size, offset); + if (error) + goto out; + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + static int incfs_link(struct dentry *what, struct dentry *where) { struct dentry *parent_dentry = dget_parent(where); @@ -1269,7 +1285,7 @@ static long ioctl_create_file(struct mount_info *mi, if (error) goto delete_index_file; - /* Linking a file with it's real name from the requested dir. */ + /* Linking a file with its real name from the requested dir. */ error = incfs_link(index_file_dentry, named_file_dentry); if (!error) @@ -1482,6 +1498,150 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) return error; } +static long ioctl_create_mapped_file(struct mount_info *mi, void __user *arg) +{ + struct incfs_create_mapped_file_args __user *args_usr_ptr = arg; + struct incfs_create_mapped_file_args args = {}; + char *file_name; + int error = 0; + struct path parent_dir_path = {}; + char *source_file_name = NULL; + struct dentry *source_file_dentry = NULL; + u64 source_file_size; + struct dentry *file_dentry = NULL; + struct inode *parent_inode; + __le64 size_attr_value; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + if (args.source_offset % INCFS_DATA_FILE_BLOCK_SIZE) { + error = -EINVAL; + goto out; + } + + /* Validate file mapping is in range */ + source_file_name = file_id_to_str(args.source_file_id); + if (!source_file_name) { + pr_warn("Failed to alloc source_file_name\n"); + error = -ENOMEM; + goto out; + } + + source_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, + source_file_name); + if (!source_file_dentry) { + pr_warn("Source file does not exist\n"); + error = -EINVAL; + goto out; + } + if (IS_ERR(source_file_dentry)) { + pr_warn("Error opening source file\n"); + error = PTR_ERR(source_file_dentry); + source_file_dentry = NULL; + goto out; + } + if (!d_really_is_positive(source_file_dentry)) { + pr_warn("Source file dentry negative\n"); + error = -EINVAL; + goto out; + } + + error = vfs_getxattr(source_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value)); + if (error < 0) + goto out; + + if (error != sizeof(size_attr_value)) { + pr_warn("Mapped file has no size attr\n"); + error = -EINVAL; + goto out; + } + + source_file_size = le64_to_cpu(size_attr_value); + if (args.source_offset + args.size > source_file_size) { + pr_warn("Mapped file out of range\n"); + error = -EINVAL; + goto out; + } + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(file_dentry)) { + error = PTR_ERR(file_dentry); + file_dentry = NULL; + goto out; + } + if (d_really_is_positive(file_dentry)) { + error = -EEXIST; + goto out; + } + + parent_inode = d_inode(parent_dir_path.dentry); + inode_lock_nested(parent_inode, I_MUTEX_PARENT); + error = vfs_create(parent_inode, file_dentry, args.mode | 0222, true); + inode_unlock(parent_inode); + if (error) + goto out; + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_file; + } + + error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, + size_attr_value, cpu_to_le64(args.source_offset)); + if (error) + goto delete_file; + + goto out; + +delete_file: + incfs_unlink(file_dentry); + +out: + dput(file_dentry); + dput(source_file_dentry); + path_put(&parent_dir_path); + kfree(file_name); + kfree(source_file_name); + return error; +} + static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { struct mount_info *mi = get_mount_info(file_superblock(f)); @@ -1497,6 +1657,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) return ioctl_read_file_signature(f, (void __user *)arg); case INCFS_IOC_GET_FILLED_BLOCKS: return ioctl_get_filled_blocks(f, (void __user *)arg); + case INCFS_IOC_CREATE_MAPPED_FILE: + return ioctl_create_mapped_file(mi, (void __user *)arg); default: return -EINVAL; } diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 13c3d5173e14..a5ec85968f62 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -92,6 +92,10 @@ #define INCFS_IOC_GET_FILLED_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) +/* Creates a new mapped file */ +#define INCFS_IOC_CREATE_MAPPED_FILE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) + enum incfs_compression_alg { COMPRESSION_NONE = 0, COMPRESSION_LZ4 = 1 @@ -331,4 +335,50 @@ struct incfs_get_filled_blocks_args { __u32 index_out; }; +/* + * Create a new mapped file + * Argument for INCFS_IOC_CREATE_MAPPED_FILE + */ +struct incfs_create_mapped_file_args { + /* + * Total size of the new file. + */ + __aligned_u64 size; + + /* + * File mode. Permissions and dir flag. + */ + __u16 mode; + + __u16 reserved1; + + __u32 reserved2; + + /* + * A pointer to a null-terminated relative path to the incfs mount + * point + * Max length: PATH_MAX + * + * Equivalent to: char *directory_path; + */ + __aligned_u64 directory_path; + + /* + * A pointer to a null-terminated file name. + * Max length: PATH_MAX + * + * Equivalent to: char *file_name; + */ + __aligned_u64 file_name; + + /* Id of source file to map. */ + incfs_uuid_t source_file_id; + + /* + * Offset in source file to start mapping. Must be a multiple of + * INCFS_DATA_FILE_BLOCK_SIZE + */ + __aligned_u64 source_offset; +}; + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index a3ab9f2a7e67..4c3034c4cfd9 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2232,7 +2232,8 @@ failure: return TEST_FAILURE; } -static int emit_partial_test_file_data(const char *mount_dir, struct test_file *file) +static int emit_partial_test_file_data(const char *mount_dir, + struct test_file *file) { int i, j; int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; @@ -2455,7 +2456,8 @@ failure: return TEST_FAILURE; } -static int emit_partial_test_file_hash(const char *mount_dir, struct test_file *file) +static int emit_partial_test_file_hash(const char *mount_dir, + struct test_file *file) { int err; int fd; @@ -2679,6 +2681,166 @@ static int large_file_test(const char *mount_dir) failure: close(fd); close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static int validate_mapped_file(const char *orig_name, const char *name, + size_t size, size_t offset) +{ + struct stat st; + int orig_fd = -1, fd = -1; + size_t block; + int result = TEST_FAILURE; + + if (stat(name, &st)) { + ksft_print_msg("Failed to stat %s with error %s\n", + name, strerror(errno)); + goto failure; + } + + if (size != st.st_size) { + ksft_print_msg("Mismatched file sizes for file %s - expected %llu, got %llu\n", + name, size, st.st_size); + goto failure; + } + + fd = open(name, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + ksft_print_msg("Failed to open %s with error %s\n", name, + strerror(errno)); + goto failure; + } + + orig_fd = open(orig_name, O_RDONLY | O_CLOEXEC); + if (orig_fd == -1) { + ksft_print_msg("Failed to open %s with error %s\n", orig_name, + strerror(errno)); + goto failure; + } + + for (block = 0; block < size; block += INCFS_DATA_FILE_BLOCK_SIZE) { + uint8_t orig_data[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + ssize_t orig_read, mapped_read; + + orig_read = pread(orig_fd, orig_data, + INCFS_DATA_FILE_BLOCK_SIZE, block + offset); + mapped_read = pread(fd, data, INCFS_DATA_FILE_BLOCK_SIZE, + block); + + if (orig_read < mapped_read || + mapped_read != min(size - block, + INCFS_DATA_FILE_BLOCK_SIZE)) { + ksft_print_msg("Failed to read enough data: %llu %llu %llu %lld %lld\n", + block, size, offset, orig_read, + mapped_read); + goto failure; + } + + if (memcmp(orig_data, data, mapped_read)) { + ksft_print_msg("Data doesn't match: %llu %llu %llu %lld %lld\n", + block, size, offset, orig_read, + mapped_read); + goto failure; + } + } + + result = TEST_SUCCESS; + +failure: + close(orig_fd); + close(fd); + return result; +} + +static int mapped_file_test(const char *mount_dir) +{ + char *backing_dir; + int result = TEST_FAILURE; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + for (i = 0; i < file_num; ++i) { + struct test_file *file = &test.files[i]; + size_t blocks = file->size / INCFS_DATA_FILE_BLOCK_SIZE; + size_t mapped_offset = blocks / 4 * + INCFS_DATA_FILE_BLOCK_SIZE; + size_t mapped_size = file->size / 4 * 3 - mapped_offset; + struct incfs_create_mapped_file_args mfa; + char mapped_file_name[FILENAME_MAX]; + char orig_file_path[PATH_MAX]; + char mapped_file_path[PATH_MAX]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, + NULL) < 0) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + + if (snprintf(mapped_file_name, ARRAY_SIZE(mapped_file_name), + "%s.mapped", file->name) < 0) + goto failure; + + mfa = (struct incfs_create_mapped_file_args) { + .size = mapped_size, + .mode = 0664, + .file_name = ptr_to_u64(mapped_file_name), + .source_file_id = file->id, + .source_offset = mapped_offset, + }; + + result = ioctl(cmd_fd, INCFS_IOC_CREATE_MAPPED_FILE, &mfa); + if (result) { + ksft_print_msg( + "Failed to create mapped file with error %d\n", + result); + goto failure; + } + + result = snprintf(orig_file_path, + ARRAY_SIZE(orig_file_path), "%s/%s", + mount_dir, file->name); + + if (result < 0 || result >= ARRAY_SIZE(mapped_file_path)) { + result = TEST_FAILURE; + goto failure; + } + + result = snprintf(mapped_file_path, + ARRAY_SIZE(mapped_file_path), "%s/%s", + mount_dir, mapped_file_name); + + if (result < 0 || result >= ARRAY_SIZE(mapped_file_path)) { + result = TEST_FAILURE; + goto failure; + } + + result = validate_mapped_file(orig_file_path, mapped_file_path, + mapped_size, mapped_offset); + if (result) + goto failure; + } + +failure: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); return result; } @@ -2791,6 +2953,7 @@ int main(int argc, char *argv[]) MAKE_TEST(get_blocks_test), MAKE_TEST(get_hash_blocks_test), MAKE_TEST(large_file_test), + MAKE_TEST(mapped_file_test), }; #undef MAKE_TEST From 20ec909ffbfc8c56eacd9a8b3a11d619b7077192 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 30 Jul 2020 12:46:16 -0700 Subject: [PATCH 443/636] ANDROID: Incremental fs: Add UID to pending_read Test: incfs_test passes Bug: 160634477 Signed-off-by: Paul Lawrence Change-Id: Iaf817cf1f7ccd0109b2114b425ea7f26718345ab --- fs/incfs/data_mgmt.c | 74 ++++++++---- fs/incfs/data_mgmt.h | 6 + fs/incfs/main.c | 9 ++ fs/incfs/vfs.c | 95 +++++++++++----- include/uapi/linux/incrementalfs.h | 45 ++++++++ .../selftests/filesystems/incfs/incfs_test.c | 106 +++++++++++++----- .../selftests/filesystems/incfs/utils.c | 28 +++++ .../selftests/filesystems/incfs/utils.h | 3 + 8 files changed, 292 insertions(+), 74 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index ccb1b0d1f3c7..8e58f6edb555 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -391,6 +391,7 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, s64 relative_us; union log_record record; size_t record_size; + uid_t uid = current_uid().val; /* * This may read the old value, but it's OK to delay the logging start @@ -412,12 +413,14 @@ static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, relative_us = now_us - head->base_record.absolute_ts_us; if (memcmp(id, &head->base_record.file_id, sizeof(incfs_uuid_t)) || - relative_us >= 1ll << 32) { + relative_us >= 1ll << 32 || + uid != head->base_record.uid) { record.full_record = (struct full_record){ .type = FULL, .block_index = block_index, .file_id = *id, .absolute_ts_us = now_us, + .uid = uid, }; head->base_record.file_id = *id; record_size = sizeof(struct full_record); @@ -833,6 +836,7 @@ static struct pending_read *add_pending_read(struct data_file *df, result->file_id = df->df_id; result->block_index = block_index; result->timestamp_us = ktime_to_us(ktime_get()); + result->uid = current_uid().val; spin_lock(&mi->pending_read_lock); @@ -1396,6 +1400,7 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size, int *new_max_sn) { int reported_reads = 0; @@ -1424,10 +1429,24 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, if (entry->serial_number <= sn_lowerbound) continue; - reads[reported_reads].file_id = entry->file_id; - reads[reported_reads].block_index = entry->block_index; - reads[reported_reads].serial_number = entry->serial_number; - reads[reported_reads].timestamp_us = entry->timestamp_us; + if (reads) { + reads[reported_reads].file_id = entry->file_id; + reads[reported_reads].block_index = entry->block_index; + reads[reported_reads].serial_number = + entry->serial_number; + reads[reported_reads].timestamp_us = + entry->timestamp_us; + } + + if (reads2) { + reads2[reported_reads].file_id = entry->file_id; + reads2[reported_reads].block_index = entry->block_index; + reads2[reported_reads].serial_number = + entry->serial_number; + reads2[reported_reads].timestamp_us = + entry->timestamp_us; + reads2[reported_reads].uid = entry->uid; + } if (entry->serial_number > *new_max_sn) *new_max_sn = entry->serial_number; @@ -1473,8 +1492,9 @@ int incfs_get_uncollected_logs_count(struct mount_info *mi, } int incfs_collect_logged_reads(struct mount_info *mi, - struct read_log_state *reader_state, + struct read_log_state *state, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size) { int dst_idx; @@ -1485,36 +1505,48 @@ int incfs_collect_logged_reads(struct mount_info *mi, head = &log->rl_head; tail = &log->rl_tail; - if (reader_state->generation_id != head->generation_id) { + if (state->generation_id != head->generation_id) { pr_debug("read ptr is wrong generation: %u/%u", - reader_state->generation_id, head->generation_id); + state->generation_id, head->generation_id); - *reader_state = (struct read_log_state){ + *state = (struct read_log_state){ .generation_id = head->generation_id, }; } - if (reader_state->current_record_no < tail->current_record_no) { + if (state->current_record_no < tail->current_record_no) { pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n", - (u32)reader_state->next_offset, - (u32)reader_state->current_pass_no, + (u32)state->next_offset, + (u32)state->current_pass_no, (u32)tail->next_offset, (u32)tail->current_pass_no); - *reader_state = *tail; + *state = *tail; } for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { - if (reader_state->current_record_no == head->current_record_no) + if (state->current_record_no == head->current_record_no) break; - log_read_one_record(log, reader_state); + log_read_one_record(log, state); - reads[dst_idx] = (struct incfs_pending_read_info){ - .file_id = reader_state->base_record.file_id, - .block_index = reader_state->base_record.block_index, - .serial_number = reader_state->current_record_no, - .timestamp_us = reader_state->base_record.absolute_ts_us - }; + if (reads) + reads[dst_idx] = (struct incfs_pending_read_info) { + .file_id = state->base_record.file_id, + .block_index = state->base_record.block_index, + .serial_number = state->current_record_no, + .timestamp_us = + state->base_record.absolute_ts_us, + }; + + if (reads2) + reads2[dst_idx] = (struct incfs_pending_read_info2) { + .file_id = state->base_record.file_id, + .block_index = state->base_record.block_index, + .serial_number = state->current_record_no, + .timestamp_us = + state->base_record.absolute_ts_us, + .uid = state->base_record.uid, + }; } spin_unlock(&log->rl_lock); diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 96b581a564bd..77d0950f9a22 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -34,6 +34,7 @@ struct full_record { u32 block_index : 30; incfs_uuid_t file_id; u64 absolute_ts_us; + uid_t uid; } __packed; /* 28 bytes */ struct same_file_record { @@ -103,6 +104,7 @@ struct mount_options { unsigned int read_log_wakeup_count; bool no_backing_file_cache; bool no_backing_file_readahead; + bool report_uid; }; struct mount_info { @@ -174,6 +176,8 @@ struct pending_read { int serial_number; + uid_t uid; + struct list_head mi_reads_list; struct list_head segment_reads_list; @@ -308,11 +312,13 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); */ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size, int *new_max_sn); int incfs_collect_logged_reads(struct mount_info *mi, struct read_log_state *start_state, struct incfs_pending_read_info *reads, + struct incfs_pending_read_info2 *reads2, int reads_size); struct read_log_state incfs_get_log_state(struct mount_info *mi); int incfs_get_uncollected_logs_count(struct mount_info *mi, diff --git a/fs/incfs/main.c b/fs/incfs/main.c index e65d0d895128..2b8161f6c83a 100644 --- a/fs/incfs/main.c +++ b/fs/incfs/main.c @@ -30,8 +30,17 @@ static ssize_t corefs_show(struct kobject *kobj, static struct kobj_attribute corefs_attr = __ATTR_RO(corefs); +static ssize_t report_uid_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "supported\n"); +} + +static struct kobj_attribute report_uid_attr = __ATTR_RO(report_uid); + static struct attribute *attributes[] = { &corefs_attr.attr, + &report_uid_attr.attr, NULL, }; diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index a374427a4e67..343e7c1bb16c 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -217,6 +217,7 @@ enum parse_parameter { Opt_no_backing_file_readahead, Opt_rlog_pages, Opt_rlog_wakeup_cnt, + Opt_report_uid, Opt_err }; @@ -239,6 +240,7 @@ static const match_table_t option_tokens = { { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, { Opt_rlog_pages, "rlog_pages=%u" }, { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, + { Opt_report_uid, "report_uid" }, { Opt_err, NULL } }; @@ -299,6 +301,9 @@ static int parse_options(struct mount_options *opts, char *str) return -EINVAL; opts->read_log_wakeup_count = value; break; + case Opt_report_uid: + opts->report_uid = true; + break; default: return -EINVAL; } @@ -462,8 +467,12 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, { struct pending_reads_state *pr_state = f->private_data; struct mount_info *mi = get_mount_info(file_superblock(f)); + bool report_uid; + unsigned long page = 0; struct incfs_pending_read_info *reads_buf = NULL; - size_t reads_to_collect = len / sizeof(*reads_buf); + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + size_t reads_to_collect; int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); int new_max_sn = last_known_read_sn; int reads_collected = 0; @@ -472,18 +481,29 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, if (!mi) return -EFAULT; + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) return 0; - reads_buf = (struct incfs_pending_read_info *)get_zeroed_page(GFP_NOFS); - if (!reads_buf) + page = get_zeroed_page(GFP_NOFS); + if (!page) return -ENOMEM; - reads_to_collect = - min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / record_size, reads_to_collect); + + reads_collected = incfs_collect_pending_reads(mi, last_known_read_sn, + reads_buf, reads_buf2, reads_to_collect, + &new_max_sn); - reads_collected = incfs_collect_pending_reads( - mi, last_known_read_sn, reads_buf, reads_to_collect, &new_max_sn); if (reads_collected < 0) { result = reads_collected; goto out; @@ -494,19 +514,19 @@ static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, * to reads buffer than userspace can handle. */ reads_collected = min_t(size_t, reads_collected, reads_to_collect); - result = reads_collected * sizeof(*reads_buf); + result = reads_collected * record_size; /* Copy reads info to the userspace buffer */ - if (copy_to_user(buf, reads_buf, result)) { + if (copy_to_user(buf, (void *)page, result)) { result = -EFAULT; goto out; } WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); *ppos = 0; + out: - if (reads_buf) - free_page((unsigned long)reads_buf); + free_page(page); return result; } @@ -588,18 +608,35 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, int total_reads_collected = 0; int rl_size; ssize_t result = 0; - struct incfs_pending_read_info *reads_buf; - ssize_t reads_to_collect = len / sizeof(*reads_buf); - ssize_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); + bool report_uid; + unsigned long page = 0; + struct incfs_pending_read_info *reads_buf = NULL; + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + ssize_t reads_to_collect; + ssize_t reads_per_page; + + if (!mi) + return -EFAULT; + + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + reads_per_page = PAGE_SIZE / record_size; rl_size = READ_ONCE(mi->mi_log.rl_size); if (rl_size == 0) return 0; - reads_buf = (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); - if (!reads_buf) + page = __get_free_page(GFP_NOFS); + if (!page) return -ENOMEM; + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); while (reads_to_collect > 0) { struct read_log_state next_state; @@ -607,35 +644,32 @@ static ssize_t log_read(struct file *f, char __user *buf, size_t len, memcpy(&next_state, &log_state->state, sizeof(next_state)); reads_collected = incfs_collect_logged_reads( - mi, &next_state, reads_buf, + mi, &next_state, reads_buf, reads_buf2, min_t(ssize_t, reads_to_collect, reads_per_page)); if (reads_collected <= 0) { result = total_reads_collected ? - total_reads_collected * - sizeof(*reads_buf) : + total_reads_collected * record_size : reads_collected; goto out; } - if (copy_to_user(buf, reads_buf, - reads_collected * sizeof(*reads_buf))) { + if (copy_to_user(buf, (void *) page, + reads_collected * record_size)) { result = total_reads_collected ? - total_reads_collected * - sizeof(*reads_buf) : + total_reads_collected * record_size : -EFAULT; goto out; } memcpy(&log_state->state, &next_state, sizeof(next_state)); total_reads_collected += reads_collected; - buf += reads_collected * sizeof(*reads_buf); + buf += reads_collected * record_size; reads_to_collect -= reads_collected; } - result = total_reads_collected * sizeof(*reads_buf); + result = total_reads_collected * record_size; *ppos = 0; out: - if (reads_buf) - free_page((unsigned long)reads_buf); + free_page(page); return result; } @@ -2473,6 +2507,11 @@ static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) if (err) return err; + if (options.report_uid != mi->mi_options.report_uid) { + pr_err("incfs: Can't change report_uid mount option on remount\n"); + return -EOPNOTSUPP; + } + err = incfs_realloc_mount_info(mi, &options); if (err) return err; @@ -2505,5 +2544,7 @@ static int show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",no_bf_cache"); if (mi->mi_options.no_backing_file_readahead) seq_puts(m, ",no_bf_readahead"); + if (mi->mi_options.report_uid) + seq_puts(m, ",report_uid"); return 0; } diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index a5ec85968f62..96ecd43b3d46 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -96,6 +96,23 @@ #define INCFS_IOC_CREATE_MAPPED_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) +/* ===== sysfs feature flags ===== */ +/* + * Each flag is represented by a file in /sys/fs/incremental-fs/features + * If the file exists the feature is supported + * Also the file contents will be the line "supported" + */ + +/* + * Basic flag stating that the core incfs file system is available + */ +#define INCFS_FEATURE_FLAG_COREFS "corefs" + +/* + * report_uid mount option is supported + */ +#define INCFS_FEATURE_FLAG_REPORT_UID "report_uid" + enum incfs_compression_alg { COMPRESSION_NONE = 0, COMPRESSION_LZ4 = 1 @@ -113,6 +130,8 @@ typedef struct { /* * Description of a pending read. A pending read - a read call by * a userspace program for which the filesystem currently doesn't have data. + * + * Reads from .pending_reads and .log return an array of these structure */ struct incfs_pending_read_info { /* Id of a file that is being read from. */ @@ -128,6 +147,32 @@ struct incfs_pending_read_info { __u32 serial_number; }; +/* + * Description of a pending read. A pending read - a read call by + * a userspace program for which the filesystem currently doesn't have data. + * + * This version of incfs_pending_read_info is used whenever the file system is + * mounted with the report_uid flag + */ +struct incfs_pending_read_info2 { + /* Id of a file that is being read from. */ + incfs_uuid_t file_id; + + /* A number of microseconds since system boot to the read. */ + __aligned_u64 timestamp_us; + + /* Index of a file block that is being read. */ + __u32 block_index; + + /* A serial number of this pending read. */ + __u32 serial_number; + + /* The UID of the reading process */ + __u32 uid; + + __u32 reserved; +}; + /* * Description of a data or hash block to add to a data file. */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 4c3034c4cfd9..341ceef3f779 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -656,6 +656,55 @@ static int data_producer(const char *mount_dir, struct test_files_set *test_set) return ret; } +static int data_producer2(const char *mount_dir, + struct test_files_set *test_set) +{ + int ret = 0; + int timeout_ms = 1000; + struct incfs_pending_read_info2 prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int fd = open_commands_file(mount_dir); + + if (fd < 0) + return -errno; + + while ((ret = wait_for_pending_reads2(fd, timeout_ms, prs, prs_size)) > + 0) { + int read_count = ret; + int i; + + for (i = 0; i < read_count; i++) { + int j = 0; + struct test_file *file = NULL; + + for (j = 0; j < test_set->files_count; j++) { + bool same = same_id(&(test_set->files[j].id), + &(prs[i].file_id)); + + if (same) { + file = &test_set->files[j]; + break; + } + } + if (!file) { + ksft_print_msg( + "Unknown file in pending reads.\n"); + break; + } + + ret = emit_test_block(mount_dir, file, + prs[i].block_index); + if (ret < 0) { + ksft_print_msg("Emitting test data error: %s\n", + strerror(-ret)); + break; + } + } + } + close(fd); + return ret; +} + static int build_mtree(struct test_file *file) { char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; @@ -1746,7 +1795,8 @@ static int multiple_providers_test(const char *mount_dir) goto failure; /* Mount FS and release the backing file. (10s wait time) */ - if (mount_fs(mount_dir, backing_dir, 10000) != 0) + if (mount_fs_opt(mount_dir, backing_dir, + "read_timeout_ms=10000,report_uid", false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -1773,7 +1823,7 @@ static int multiple_providers_test(const char *mount_dir) * pending reads. */ - ret = data_producer(mount_dir, &test); + ret = data_producer2(mount_dir, &test); exit(-ret); } else if (producer_pid > 0) { producer_pids[i] = producer_pid; @@ -1949,10 +1999,12 @@ enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; static int validate_logs(const char *mount_dir, int log_fd, struct test_file *file, - enum expected_log expected_log) + enum expected_log expected_log, + bool report_uid) { uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; struct incfs_pending_read_info prs[2048] = {}; + struct incfs_pending_read_info2 prs2[2048] = {}; int prs_size = ARRAY_SIZE(prs); int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; int expected_read_block_cnt; @@ -1989,8 +2041,15 @@ static int validate_logs(const char *mount_dir, int log_fd, goto failure; } - read_count = wait_for_pending_reads( - log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size); + if (report_uid) + read_count = wait_for_pending_reads2(log_fd, + expected_log == NO_LOG ? 10 : 0, + prs2, prs_size); + else + read_count = wait_for_pending_reads(log_fd, + expected_log == NO_LOG ? 10 : 0, + prs, prs_size); + if (expected_log == NO_LOG) { if (read_count == 0) goto success; @@ -2027,7 +2086,9 @@ static int validate_logs(const char *mount_dir, int log_fd, } for (j = 0; j < read_count; i++, j++) { - struct incfs_pending_read_info *read = &prs[j]; + struct incfs_pending_read_info *read = report_uid ? + (struct incfs_pending_read_info *) &prs2[j] : + &prs[j]; if (!same_id(&read->file_id, &file->id)) { ksft_print_msg("Bad log read ino %s\n", file->name); @@ -2041,7 +2102,9 @@ static int validate_logs(const char *mount_dir, int log_fd, } if (j != 0) { - unsigned long psn = prs[j - 1].serial_number; + unsigned long psn = (report_uid) ? + prs2[j - 1].serial_number : + prs[j - 1].serial_number; if (read->serial_number != psn + 1) { ksft_print_msg("Bad log read sn %s %d %d.\n", @@ -2075,14 +2138,15 @@ static int read_log_test(const char *mount_dir) struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; int i = 0; - int cmd_fd = -1, log_fd = -1, drop_caches = -1; + int cmd_fd = -1, log_fd = -1; char *backing_dir; backing_dir = create_backing_dir(mount_dir); if (!backing_dir) goto failure; - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,report_uid", + false) != 0) goto failure; cmd_fd = open_commands_file(mount_dir); @@ -2109,7 +2173,7 @@ static int read_log_test(const char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) + if (validate_logs(mount_dir, log_fd, file, FULL_LOG, true)) goto failure; } @@ -2137,7 +2201,7 @@ static int read_log_test(const char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) + if (validate_logs(mount_dir, log_fd, file, FULL_LOG, false)) goto failure; } @@ -2164,19 +2228,14 @@ static int read_log_test(const char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, NO_LOG)) + if (validate_logs(mount_dir, log_fd, file, NO_LOG, false)) goto failure; } /* * Remount and check that logs start working again */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) + if (drop_caches()) goto failure; if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1", @@ -2187,19 +2246,14 @@ static int read_log_test(const char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG)) + if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG, false)) goto failure; } /* * Remount and check that logs start working again */ - drop_caches = open("/proc/sys/vm/drop_caches", O_WRONLY | O_CLOEXEC); - if (drop_caches == -1) - goto failure; - i = write(drop_caches, "3", 1); - close(drop_caches); - if (i != 1) + if (drop_caches()) goto failure; if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=4", @@ -2210,7 +2264,7 @@ static int read_log_test(const char *mount_dir) for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (validate_logs(mount_dir, log_fd, file, FULL_LOG)) + if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG, false)) goto failure; } diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index a80123294d5a..3bc1449c16da 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -272,6 +272,34 @@ int wait_for_pending_reads(int fd, int timeout_ms, return read_res / sizeof(*prs); } +int wait_for_pending_reads2(int fd, int timeout_ms, + struct incfs_pending_read_info2 *prs, int prs_count) +{ + ssize_t read_res = 0; + + if (timeout_ms > 0) { + int poll_res = 0; + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN + }; + + poll_res = poll(&pollfd, 1, timeout_ms); + if (poll_res < 0) + return -errno; + if (poll_res == 0) + return 0; + if (!(pollfd.revents | POLLIN)) + return 0; + } + + read_res = read(fd, prs, prs_count * sizeof(*prs)); + if (read_res < 0) + return -errno; + + return read_res / sizeof(*prs); +} + char *concat_file_name(const char *dir, char *file) { char full_name[FILENAME_MAX] = ""; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 5b59272dbb7f..39eb60333637 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -55,6 +55,9 @@ int open_log_file(const char *mount_dir); int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); +int wait_for_pending_reads2(int fd, int timeout_ms, + struct incfs_pending_read_info2 *prs, int prs_count); + char *concat_file_name(const char *dir, char *file); void sha256(const char *data, size_t dsize, char *hash); From 9577751233cac38c7324a602ede38f0a666ec385 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 20 Aug 2020 12:45:41 -0700 Subject: [PATCH 444/636] ANDROID: Incremental fs: Separate pseudo-file code Also fixed two bugs in the process: is_pseudo_filename was not previously checking for .log, so an attempt to create a .log would succeed. All ioctls could be called on all files. ioctls now set on the correct files. Bug: 162856396 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I3f1e87d018836f51a97897880dd70181db4f7169 --- fs/incfs/Makefile | 1 + fs/incfs/pseudo_files.c | 1063 +++++++++++++++ fs/incfs/pseudo_files.h | 15 + fs/incfs/vfs.c | 1140 +---------------- fs/incfs/vfs.h | 25 + .../selftests/filesystems/incfs/incfs_test.c | 3 +- 6 files changed, 1152 insertions(+), 1095 deletions(-) create mode 100644 fs/incfs/pseudo_files.c create mode 100644 fs/incfs/pseudo_files.h diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile index 8d734bf91ecd..0c3f6d348bec 100644 --- a/fs/incfs/Makefile +++ b/fs/incfs/Makefile @@ -6,4 +6,5 @@ incrementalfs-y := \ format.o \ integrity.o \ main.o \ + pseudo_files.o \ vfs.o diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c new file mode 100644 index 000000000000..5d61d8e866dc --- /dev/null +++ b/fs/incfs/pseudo_files.c @@ -0,0 +1,1063 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2020 Google LLC + */ + +#include +#include +#include +#include +#include + +#include + +#include "pseudo_files.h" + +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" +#include "vfs.h" + +#define INCFS_PENDING_READS_INODE 2 +#define INCFS_LOG_INODE 3 +#define READ_WRITE_FILE_MODE 0666 + +/******************************************************************************* + * .log pseudo file definition + ******************************************************************************/ +static const char log_file_name[] = INCFS_LOG_FILENAME; +static struct mem_range log_file_name_range = { + .data = (u8 *)log_file_name, + .len = ARRAY_SIZE(log_file_name) - 1 +}; + +/* State of an open .log file, unique for each file descriptor. */ +struct log_file_state { + struct read_log_state state; +}; + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct log_file_state *log_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + int total_reads_collected = 0; + int rl_size; + ssize_t result = 0; + bool report_uid; + unsigned long page = 0; + struct incfs_pending_read_info *reads_buf = NULL; + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + ssize_t reads_to_collect; + ssize_t reads_per_page; + + if (!mi) + return -EFAULT; + + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + reads_per_page = PAGE_SIZE / record_size; + + rl_size = READ_ONCE(mi->mi_log.rl_size); + if (rl_size == 0) + return 0; + + page = __get_free_page(GFP_NOFS); + if (!page) + return -ENOMEM; + + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + + reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); + while (reads_to_collect > 0) { + struct read_log_state next_state; + int reads_collected; + + memcpy(&next_state, &log_state->state, sizeof(next_state)); + reads_collected = incfs_collect_logged_reads( + mi, &next_state, reads_buf, reads_buf2, + min_t(ssize_t, reads_to_collect, reads_per_page)); + if (reads_collected <= 0) { + result = total_reads_collected ? + total_reads_collected * record_size : + reads_collected; + goto out; + } + if (copy_to_user(buf, (void *) page, + reads_collected * record_size)) { + result = total_reads_collected ? + total_reads_collected * record_size : + -EFAULT; + goto out; + } + + memcpy(&log_state->state, &next_state, sizeof(next_state)); + total_reads_collected += reads_collected; + buf += reads_collected * record_size; + reads_to_collect -= reads_collected; + } + + result = total_reads_collected * record_size; + *ppos = 0; +out: + free_page(page); + return result; +} + +static __poll_t log_poll(struct file *file, poll_table *wait) +{ + struct log_file_state *log_state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + int count; + __poll_t ret = 0; + + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); + count = incfs_get_uncollected_logs_count(mi, &log_state->state); + if (count >= mi->mi_options.read_log_wakeup_count) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int log_open(struct inode *inode, struct file *file) +{ + struct log_file_state *log_state = NULL; + struct mount_info *mi = get_mount_info(file_superblock(file)); + + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); + if (!log_state) + return -ENOMEM; + + log_state->state = incfs_get_log_state(mi); + file->private_data = log_state; + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations incfs_log_file_ops = { + .read = log_read, + .poll = log_poll, + .open = log_open, + .release = log_release, + .llseek = noop_llseek, +}; + +/******************************************************************************* + * .pending_reads pseudo file definition + ******************************************************************************/ +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; +static struct mem_range pending_reads_file_name_range = { + .data = (u8 *)pending_reads_file_name, + .len = ARRAY_SIZE(pending_reads_file_name) - 1 +}; + +/* State of an open .pending_reads file, unique for each file descriptor. */ +struct pending_reads_state { + /* A serial number of the last pending read obtained from this file. */ + int last_pending_read_sn; +}; + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct pending_reads_state *pr_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + bool report_uid; + unsigned long page = 0; + struct incfs_pending_read_info *reads_buf = NULL; + struct incfs_pending_read_info2 *reads_buf2 = NULL; + size_t record_size; + size_t reads_to_collect; + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); + int new_max_sn = last_known_read_sn; + int reads_collected = 0; + ssize_t result = 0; + + if (!mi) + return -EFAULT; + + report_uid = mi->mi_options.report_uid; + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); + reads_to_collect = len / record_size; + + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) + return 0; + + page = get_zeroed_page(GFP_NOFS); + if (!page) + return -ENOMEM; + + if (report_uid) + reads_buf2 = (struct incfs_pending_read_info2 *) page; + else + reads_buf = (struct incfs_pending_read_info *) page; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / record_size, reads_to_collect); + + reads_collected = incfs_collect_pending_reads(mi, last_known_read_sn, + reads_buf, reads_buf2, reads_to_collect, + &new_max_sn); + + if (reads_collected < 0) { + result = reads_collected; + goto out; + } + + /* + * Just to make sure that we don't accidentally copy more data + * to reads buffer than userspace can handle. + */ + reads_collected = min_t(size_t, reads_collected, reads_to_collect); + result = reads_collected * record_size; + + /* Copy reads info to the userspace buffer */ + if (copy_to_user(buf, (void *)page, result)) { + result = -EFAULT; + goto out; + } + + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); + *ppos = 0; + +out: + free_page(page); + return result; +} + +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) +{ + struct pending_reads_state *state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + __poll_t ret = 0; + + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); + if (incfs_fresh_pending_reads_exist(mi, + state->last_pending_read_sn)) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int pending_reads_open(struct inode *inode, struct file *file) +{ + struct pending_reads_state *state = NULL; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + + file->private_data = state; + return 0; +} + +static int pending_reads_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static long ioctl_permit_fill(struct file *f, void __user *arg) +{ + struct incfs_permit_fill __user *usr_permit_fill = arg; + struct incfs_permit_fill permit_fill; + long error = 0; + struct file *file = NULL; + + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) + return -EFAULT; + + file = fget(permit_fill.file_descriptor); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (file->f_op != &incfs_file_ops) { + error = -EPERM; + goto out; + } + + if (file->f_inode->i_sb != f->f_inode->i_sb) { + error = -EPERM; + goto out; + } + + switch ((uintptr_t)file->private_data) { + case CANT_FILL: + file->private_data = (void *)CAN_FILL; + break; + + case CAN_FILL: + pr_debug("CAN_FILL already set"); + break; + + default: + pr_warn("Invalid file private data"); + error = -EFAULT; + goto out; + } + +out: + fput(file); + return error; +} + +static int chmod(struct dentry *dentry, umode_t mode) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + struct iattr newattrs; + int error; + +retry_deleg: + inode_lock(inode); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + error = notify_change(dentry, &newattrs, &delegated_inode); + inode_unlock(inode); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + return error; +} + +static bool is_pseudo_filename(struct mem_range name) +{ + if (incfs_equal_ranges(pending_reads_file_name_range, name)) + return true; + if (incfs_equal_ranges(log_file_name_range, name)) + return true; + + return false; +} + +static int validate_name(char *file_name) +{ + struct mem_range name = range(file_name, strlen(file_name)); + int i = 0; + + if (name.len > INCFS_MAX_NAME_LEN) + return -ENAMETOOLONG; + + if (is_pseudo_filename(name)) + return -EINVAL; + + for (i = 0; i < name.len; i++) + if (name.data[i] == '/') + return -EINVAL; + + return 0; +} + +static int dir_relative_path_resolve( + struct mount_info *mi, + const char __user *relative_path, + struct path *result_path) +{ + struct path *base_path = &mi->mi_backing_dir_path; + int dir_fd = get_unused_fd_flags(0); + struct file *dir_f = NULL; + int error = 0; + + if (dir_fd < 0) + return dir_fd; + + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + + if (IS_ERR(dir_f)) { + error = PTR_ERR(dir_f); + goto out; + } + fd_install(dir_fd, dir_f); + + if (!relative_path) { + /* No relative path given, just return the base dir. */ + *result_path = *base_path; + path_get(result_path); + goto out; + } + + error = user_path_at_empty(dir_fd, relative_path, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + +out: + ksys_close(dir_fd); + if (error) + pr_debug("incfs: %s %d\n", __func__, error); + return error; +} + +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, + u64 size) +{ + u8 *result; + + if (!original) + return range(NULL, 0); + + if (size > INCFS_MAX_SIGNATURE_SIZE) + return range(ERR_PTR(-EFAULT), 0); + + result = kzalloc(size, GFP_NOFS | __GFP_COMP); + if (!result) + return range(ERR_PTR(-ENOMEM), 0); + + if (copy_from_user(result, original, size)) { + kfree(result); + return range(ERR_PTR(-EFAULT), 0); + } + + return range(result, size); +} + +static int init_new_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + u8 __user *user_signature_info, u64 signature_size) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + u32 block_count; + struct mem_range raw_signature = { NULL }; + struct mtree *hash_tree = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + fput(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_fh_to_backing_file(bfc, uuid, size); + if (error) + goto out; + + if (attr.data && attr.len) { + error = incfs_write_file_attr_to_backing_file(bfc, + attr, NULL); + if (error) + goto out; + } + + block_count = (u32)get_blocks_count_for_size(size); + + if (user_signature_info) { + raw_signature = incfs_copy_signature_info_from_user( + user_signature_info, signature_size); + + if (IS_ERR(raw_signature.data)) { + error = PTR_ERR(raw_signature.data); + raw_signature.data = NULL; + goto out; + } + + hash_tree = incfs_alloc_mtree(raw_signature, block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + + error = incfs_write_signature_to_backing_file( + bfc, raw_signature, hash_tree->hash_tree_area_size); + if (error) + goto out; + + block_count += get_blocks_count_for_size( + hash_tree->hash_tree_area_size); + } + + if (block_count) + error = incfs_write_blockmap_to_backing_file(bfc, block_count); + + if (error) + goto out; +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + incfs_free_mtree(hash_tree); + kfree(raw_signature.data); + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static long ioctl_create_file(struct mount_info *mi, + struct incfs_new_file_args __user *usr_args) +{ + struct incfs_new_file_args args; + char *file_id_str = NULL; + struct dentry *index_file_dentry = NULL; + struct dentry *named_file_dentry = NULL; + struct path parent_dir_path = {}; + struct inode *index_dir_inode = NULL; + __le64 size_attr_value = 0; + char *file_name = NULL; + char *attr_value = NULL; + int error = 0; + bool locked = false; + + if (!mi || !mi->mi_index_dir) { + error = -EFAULT; + goto out; + } + + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { + error = -EFAULT; + goto out; + } + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + file_id_str = file_id_to_str(args.file_id); + if (!file_id_str) { + error = -ENOMEM; + goto out; + } + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + goto out; + locked = true; + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!named_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(named_file_dentry)) { + error = PTR_ERR(named_file_dentry); + named_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(named_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + /* Look up a dentry in the .index dir. It should be negative. */ + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (!index_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(index_file_dentry)) { + /* File with this ID already exists in index. */ + error = -EEXIST; + goto out; + } + + /* Creating a file in the .index dir. */ + index_dir_inode = d_inode(mi->mi_index_dir); + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); + error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, + true); + inode_unlock(index_dir_inode); + + if (error) + goto out; + if (!d_really_is_positive(index_file_dentry)) { + error = -EINVAL; + goto out; + } + + error = chmod(index_file_dentry, args.mode | 0222); + if (error) { + pr_debug("incfs: chmod err: %d\n", error); + goto delete_index_file; + } + + /* Save the file's ID as an xattr for easy fetching in future. */ + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, + file_id_str, strlen(file_id_str), XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's attribute as an xattr */ + if (args.file_attr_len && args.file_attr) { + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto delete_index_file; + } + + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); + if (!attr_value) { + error = -ENOMEM; + goto delete_index_file; + } + + if (copy_from_user(attr_value, + u64_to_user_ptr(args.file_attr), + args.file_attr_len) > 0) { + error = -EFAULT; + goto delete_index_file; + } + + error = vfs_setxattr(index_file_dentry, + INCFS_XATTR_METADATA_NAME, + attr_value, args.file_attr_len, + XATTR_CREATE); + + if (error) + goto delete_index_file; + } + + /* Initializing a newly created file. */ + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, + range(attr_value, args.file_attr_len), + (u8 __user *)args.signature_info, + args.signature_size); + if (error) + goto delete_index_file; + + /* Linking a file with its real name from the requested dir. */ + error = incfs_link(index_file_dentry, named_file_dentry); + + if (!error) + goto out; + +delete_index_file: + incfs_unlink(index_file_dentry); + +out: + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + + kfree(file_id_str); + kfree(file_name); + kfree(attr_value); + dput(named_file_dentry); + dput(index_file_dentry); + path_put(&parent_dir_path); + if (locked) + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static int init_new_mapped_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, u64 offset) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, + mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + fput(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_mapping_fh_to_backing_file(bfc, uuid, size, offset); + if (error) + goto out; + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static long ioctl_create_mapped_file(struct mount_info *mi, void __user *arg) +{ + struct incfs_create_mapped_file_args __user *args_usr_ptr = arg; + struct incfs_create_mapped_file_args args = {}; + char *file_name; + int error = 0; + struct path parent_dir_path = {}; + char *source_file_name = NULL; + struct dentry *source_file_dentry = NULL; + u64 source_file_size; + struct dentry *file_dentry = NULL; + struct inode *parent_inode; + __le64 size_attr_value; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + if (args.source_offset % INCFS_DATA_FILE_BLOCK_SIZE) { + error = -EINVAL; + goto out; + } + + /* Validate file mapping is in range */ + source_file_name = file_id_to_str(args.source_file_id); + if (!source_file_name) { + pr_warn("Failed to alloc source_file_name\n"); + error = -ENOMEM; + goto out; + } + + source_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, + source_file_name); + if (!source_file_dentry) { + pr_warn("Source file does not exist\n"); + error = -EINVAL; + goto out; + } + if (IS_ERR(source_file_dentry)) { + pr_warn("Error opening source file\n"); + error = PTR_ERR(source_file_dentry); + source_file_dentry = NULL; + goto out; + } + if (!d_really_is_positive(source_file_dentry)) { + pr_warn("Source file dentry negative\n"); + error = -EINVAL; + goto out; + } + + error = vfs_getxattr(source_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value)); + if (error < 0) + goto out; + + if (error != sizeof(size_attr_value)) { + pr_warn("Mapped file has no size attr\n"); + error = -EINVAL; + goto out; + } + + source_file_size = le64_to_cpu(size_attr_value); + if (args.source_offset + args.size > source_file_size) { + pr_warn("Mapped file out of range\n"); + error = -EINVAL; + goto out; + } + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(file_dentry)) { + error = PTR_ERR(file_dentry); + file_dentry = NULL; + goto out; + } + if (d_really_is_positive(file_dentry)) { + error = -EEXIST; + goto out; + } + + parent_inode = d_inode(parent_dir_path.dentry); + inode_lock_nested(parent_inode, I_MUTEX_PARENT); + error = vfs_create(parent_inode, file_dentry, args.mode | 0222, true); + inode_unlock(parent_inode); + if (error) + goto out; + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_file; + } + + error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, + size_attr_value, cpu_to_le64(args.source_offset)); + if (error) + goto delete_file; + + goto out; + +delete_file: + incfs_unlink(file_dentry); + +out: + dput(file_dentry); + dput(source_file_dentry); + path_put(&parent_dir_path); + kfree(file_name); + kfree(source_file_name); + return error; +} + +static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, + unsigned long arg) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + + switch (req) { + case INCFS_IOC_CREATE_FILE: + return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_PERMIT_FILL: + return ioctl_permit_fill(f, (void __user *)arg); + case INCFS_IOC_CREATE_MAPPED_FILE: + return ioctl_create_mapped_file(mi, (void __user *)arg); + default: + return -EINVAL; + } +} + +static const struct file_operations incfs_pending_read_file_ops = { + .read = pending_reads_read, + .poll = pending_reads_poll, + .open = pending_reads_open, + .release = pending_reads_release, + .llseek = noop_llseek, + .unlocked_ioctl = pending_reads_dispatch_ioctl, + .compat_ioctl = pending_reads_dispatch_ioctl +}; + +/******************************************************************************* + * Generic inode lookup functionality + ******************************************************************************/ +static bool get_pseudo_inode(int ino, struct inode *inode) +{ + inode->i_ctime = (struct timespec64){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = ino; + inode->i_private = NULL; + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + inode->i_op = &incfs_file_inode_ops; + + switch (ino) { + case INCFS_PENDING_READS_INODE: + inode->i_fop = &incfs_pending_read_file_ops; + return true; + + case INCFS_LOG_INODE: + inode->i_fop = &incfs_log_file_ops; + return true; + + default: + return false; + } +} + +struct inode_search { + unsigned long ino; +}; + +static int inode_test(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + + return inode->i_ino == search->ino; +} + +static int inode_set(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + + if (get_pseudo_inode(search->ino, inode)) + return 0; + + /* Unknown inode requested. */ + return -EINVAL; +} + +static struct inode *fetch_pending_reads_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_PENDING_READS_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static struct inode *fetch_log_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_LOG_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry) +{ + struct mem_range name_range = + range((u8 *)dentry->d_name.name, dentry->d_name.len); + + if (incfs_equal_ranges(pending_reads_file_name_range, name_range)) { + struct inode *inode = fetch_pending_reads_inode(sb); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_add(dentry, inode); + return 0; + } + + if (incfs_equal_ranges(log_file_name_range, name_range)) { + struct inode *inode = fetch_log_inode(sb); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_add(dentry, inode); + return 0; + } + + return -ENOENT; +} + +int emit_pseudo_files(struct dir_context *ctx) +{ + if (ctx->pos == 0) { + if (!dir_emit(ctx, pending_reads_file_name, + ARRAY_SIZE(pending_reads_file_name) - 1, + INCFS_PENDING_READS_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + + if (ctx->pos == 1) { + if (!dir_emit(ctx, log_file_name, + ARRAY_SIZE(log_file_name) - 1, + INCFS_LOG_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + + return 0; +} diff --git a/fs/incfs/pseudo_files.h b/fs/incfs/pseudo_files.h new file mode 100644 index 000000000000..14a8cc513363 --- /dev/null +++ b/fs/incfs/pseudo_files.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2020 Google LLC + */ + +#ifndef _INCFS_PSEUDO_FILES_H +#define _INCFS_PSEUDO_FILES_H + +#define PSEUDO_FILE_COUNT 2 +#define INCFS_START_INO_RANGE 10 + +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry); +int emit_pseudo_files(struct dir_context *ctx); + +#endif diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 343e7c1bb16c..b26e542f7e2a 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -4,32 +4,20 @@ */ #include -#include -#include #include #include #include #include #include -#include #include -#include -#include #include #include "vfs.h" -#include "data_mgmt.h" -#include "format.h" -#include "integrity.h" -#include "internal.h" -#define INCFS_PENDING_READS_INODE 2 -#define INCFS_LOG_INODE 3 -#define INCFS_START_INO_RANGE 10 -#define READ_FILE_MODE 0444 -#define READ_EXEC_FILE_MODE 0555 -#define READ_WRITE_FILE_MODE 0666 +#include "data_mgmt.h" +#include "internal.h" +#include "pseudo_files.h" static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); @@ -52,18 +40,6 @@ static int file_release(struct inode *inode, struct file *file); static int read_single_page(struct file *f, struct page *page); static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); -static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos); -static __poll_t pending_reads_poll(struct file *file, poll_table *wait); -static int pending_reads_open(struct inode *inode, struct file *file); -static int pending_reads_release(struct inode *, struct file *); - -static ssize_t log_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos); -static __poll_t log_poll(struct file *file, poll_table *wait); -static int log_open(struct inode *inode, struct file *file); -static int log_release(struct inode *, struct file *); - static struct inode *alloc_inode(struct super_block *sb); static void free_inode(struct inode *inode); static void evict_inode(struct inode *inode); @@ -123,7 +99,7 @@ static const struct address_space_operations incfs_address_space_ops = { /* .readpages = readpages */ }; -static const struct file_operations incfs_file_ops = { +const struct file_operations incfs_file_ops = { .open = file_open, .release = file_release, .read_iter = generic_file_read_iter, @@ -134,32 +110,7 @@ static const struct file_operations incfs_file_ops = { .compat_ioctl = dispatch_ioctl }; -enum FILL_PERMISSION { - CANT_FILL = 0, - CAN_FILL = 1, -}; - -static const struct file_operations incfs_pending_read_file_ops = { - .read = pending_reads_read, - .poll = pending_reads_poll, - .open = pending_reads_open, - .release = pending_reads_release, - .llseek = noop_llseek, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl -}; - -static const struct file_operations incfs_log_file_ops = { - .read = log_read, - .poll = log_poll, - .open = log_open, - .release = log_release, - .llseek = noop_llseek, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl -}; - -static const struct inode_operations incfs_file_inode_ops = { +const struct inode_operations incfs_file_inode_ops = { .setattr = incfs_setattr, .getattr = simple_getattr, .listxattr = incfs_listxattr @@ -191,17 +142,6 @@ static const struct xattr_handler *incfs_xattr_ops[] = { NULL, }; -/* State of an open .pending_reads file, unique for each file descriptor. */ -struct pending_reads_state { - /* A serial number of the last pending read obtained from this file. */ - int last_pending_read_sn; -}; - -/* State of an open .log file, unique for each file descriptor. */ -struct log_file_state { - struct read_log_state state; -}; - struct inode_search { unsigned long ino; @@ -221,18 +161,6 @@ enum parse_parameter { Opt_err }; -static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; -static struct mem_range pending_reads_file_name_range = { - .data = (u8 *)pending_reads_file_name, - .len = ARRAY_SIZE(pending_reads_file_name) - 1 -}; - -static const char log_file_name[] = INCFS_LOG_FILENAME; -static struct mem_range log_file_name_range = { - .data = (u8 *)log_file_name, - .len = ARRAY_SIZE(log_file_name) - 1 -}; - static const match_table_t option_tokens = { { Opt_read_timeout, "read_timeout_ms=%u" }, { Opt_readahead_pages, "readahead=%u" }, @@ -312,21 +240,6 @@ static int parse_options(struct mount_options *opts, char *str) return 0; } -static struct super_block *file_superblock(struct file *f) -{ - struct inode *inode = file_inode(f); - - return inode->i_sb; -} - -static struct mount_info *get_mount_info(struct super_block *sb) -{ - struct mount_info *result = sb->s_fs_info; - - WARN_ON(!result); - return result; -} - /* Read file size from the attribute. Quicker than reading the header */ static u64 read_size_attr(struct dentry *backing_dentry) { @@ -346,96 +259,53 @@ static int inode_test(struct inode *inode, void *opaque) { struct inode_search *search = opaque; struct inode_info *node = get_incfs_node(inode); + struct inode *backing_inode = d_inode(search->backing_dentry); if (!node) return 0; - if (search->backing_dentry) { - struct inode *backing_inode = d_inode(search->backing_dentry); - - return (node->n_backing_inode == backing_inode) && - inode->i_ino == search->ino; - } else - return inode->i_ino == search->ino; + return node->n_backing_inode == backing_inode && + inode->i_ino == search->ino; } static int inode_set(struct inode *inode, void *opaque) { struct inode_search *search = opaque; struct inode_info *node = get_incfs_node(inode); + struct dentry *backing_dentry = search->backing_dentry; + struct inode *backing_inode = d_inode(backing_dentry); - if (search->backing_dentry) { - /* It's a regular inode that has corresponding backing inode */ - struct dentry *backing_dentry = search->backing_dentry; - struct inode *backing_inode = d_inode(backing_dentry); - - fsstack_copy_attr_all(inode, backing_inode); - if (S_ISREG(inode->i_mode)) { - u64 size = search->size; - - inode->i_size = size; - inode->i_blocks = get_blocks_count_for_size(size); - inode->i_mapping->a_ops = &incfs_address_space_ops; - inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_file_ops; - inode->i_mode &= ~0222; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_size = 0; - inode->i_blocks = 1; - inode->i_mapping->a_ops = &incfs_address_space_ops; - inode->i_op = &incfs_dir_inode_ops; - inode->i_fop = &incfs_dir_fops; - } else { - pr_warn_once("incfs: Unexpected inode type\n"); - return -EBADF; - } - - ihold(backing_inode); - node->n_backing_inode = backing_inode; - node->n_mount_info = get_mount_info(inode->i_sb); - inode->i_ctime = backing_inode->i_ctime; - inode->i_mtime = backing_inode->i_mtime; - inode->i_atime = backing_inode->i_atime; - inode->i_ino = backing_inode->i_ino; - if (backing_inode->i_ino < INCFS_START_INO_RANGE) { - pr_warn("incfs: ino conflict with backing FS %ld\n", - backing_inode->i_ino); - } - - return 0; - } else if (search->ino == INCFS_PENDING_READS_INODE) { - /* It's an inode for .pending_reads pseudo file. */ - - inode->i_ctime = (struct timespec64){}; - inode->i_mtime = inode->i_ctime; - inode->i_atime = inode->i_ctime; - inode->i_size = 0; - inode->i_ino = INCFS_PENDING_READS_INODE; - inode->i_private = NULL; - - inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + fsstack_copy_attr_all(inode, backing_inode); + if (S_ISREG(inode->i_mode)) { + u64 size = search->size; + inode->i_size = size; + inode->i_blocks = get_blocks_count_for_size(size); + inode->i_mapping->a_ops = &incfs_address_space_ops; inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_pending_read_file_ops; - - } else if (search->ino == INCFS_LOG_INODE) { - /* It's an inode for .log pseudo file. */ - - inode->i_ctime = (struct timespec64){}; - inode->i_mtime = inode->i_ctime; - inode->i_atime = inode->i_ctime; + inode->i_fop = &incfs_file_ops; + inode->i_mode &= ~0222; + } else if (S_ISDIR(inode->i_mode)) { inode->i_size = 0; - inode->i_ino = INCFS_LOG_INODE; - inode->i_private = NULL; - - inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); - - inode->i_op = &incfs_file_inode_ops; - inode->i_fop = &incfs_log_file_ops; - + inode->i_blocks = 1; + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_dir_inode_ops; + inode->i_fop = &incfs_dir_fops; } else { - /* Unknown inode requested. */ - return -EINVAL; + pr_warn_once("incfs: Unexpected inode type\n"); + return -EBADF; + } + + ihold(backing_inode); + node->n_backing_inode = backing_inode; + node->n_mount_info = get_mount_info(inode->i_sb); + inode->i_ctime = backing_inode->i_ctime; + inode->i_mtime = backing_inode->i_mtime; + inode->i_atime = backing_inode->i_atime; + inode->i_ino = backing_inode->i_ino; + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { + pr_warn("incfs: ino conflict with backing FS %ld\n", + backing_inode->i_ino); } return 0; @@ -462,249 +332,6 @@ static struct inode *fetch_regular_inode(struct super_block *sb, return inode; } -static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos) -{ - struct pending_reads_state *pr_state = f->private_data; - struct mount_info *mi = get_mount_info(file_superblock(f)); - bool report_uid; - unsigned long page = 0; - struct incfs_pending_read_info *reads_buf = NULL; - struct incfs_pending_read_info2 *reads_buf2 = NULL; - size_t record_size; - size_t reads_to_collect; - int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); - int new_max_sn = last_known_read_sn; - int reads_collected = 0; - ssize_t result = 0; - - if (!mi) - return -EFAULT; - - report_uid = mi->mi_options.report_uid; - record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); - reads_to_collect = len / record_size; - - if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) - return 0; - - page = get_zeroed_page(GFP_NOFS); - if (!page) - return -ENOMEM; - - if (report_uid) - reads_buf2 = (struct incfs_pending_read_info2 *) page; - else - reads_buf = (struct incfs_pending_read_info *) page; - - reads_to_collect = - min_t(size_t, PAGE_SIZE / record_size, reads_to_collect); - - reads_collected = incfs_collect_pending_reads(mi, last_known_read_sn, - reads_buf, reads_buf2, reads_to_collect, - &new_max_sn); - - if (reads_collected < 0) { - result = reads_collected; - goto out; - } - - /* - * Just to make sure that we don't accidentally copy more data - * to reads buffer than userspace can handle. - */ - reads_collected = min_t(size_t, reads_collected, reads_to_collect); - result = reads_collected * record_size; - - /* Copy reads info to the userspace buffer */ - if (copy_to_user(buf, (void *)page, result)) { - result = -EFAULT; - goto out; - } - - WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); - *ppos = 0; - -out: - free_page(page); - return result; -} - - -static __poll_t pending_reads_poll(struct file *file, poll_table *wait) -{ - struct pending_reads_state *state = file->private_data; - struct mount_info *mi = get_mount_info(file_superblock(file)); - __poll_t ret = 0; - - poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); - if (incfs_fresh_pending_reads_exist(mi, - state->last_pending_read_sn)) - ret = EPOLLIN | EPOLLRDNORM; - - return ret; -} - -static int pending_reads_open(struct inode *inode, struct file *file) -{ - struct pending_reads_state *state = NULL; - - state = kzalloc(sizeof(*state), GFP_NOFS); - if (!state) - return -ENOMEM; - - file->private_data = state; - return 0; -} - -static int pending_reads_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - return 0; -} - -static struct inode *fetch_pending_reads_inode(struct super_block *sb) -{ - struct inode_search search = { - .ino = INCFS_PENDING_READS_INODE - }; - struct inode *inode = iget5_locked(sb, search.ino, inode_test, - inode_set, &search); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - return inode; -} - -static int log_open(struct inode *inode, struct file *file) -{ - struct log_file_state *log_state = NULL; - struct mount_info *mi = get_mount_info(file_superblock(file)); - - log_state = kzalloc(sizeof(*log_state), GFP_NOFS); - if (!log_state) - return -ENOMEM; - - log_state->state = incfs_get_log_state(mi); - file->private_data = log_state; - return 0; -} - -static int log_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - return 0; -} - -static ssize_t log_read(struct file *f, char __user *buf, size_t len, - loff_t *ppos) -{ - struct log_file_state *log_state = f->private_data; - struct mount_info *mi = get_mount_info(file_superblock(f)); - int total_reads_collected = 0; - int rl_size; - ssize_t result = 0; - bool report_uid; - unsigned long page = 0; - struct incfs_pending_read_info *reads_buf = NULL; - struct incfs_pending_read_info2 *reads_buf2 = NULL; - size_t record_size; - ssize_t reads_to_collect; - ssize_t reads_per_page; - - if (!mi) - return -EFAULT; - - report_uid = mi->mi_options.report_uid; - record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); - reads_to_collect = len / record_size; - reads_per_page = PAGE_SIZE / record_size; - - rl_size = READ_ONCE(mi->mi_log.rl_size); - if (rl_size == 0) - return 0; - - page = __get_free_page(GFP_NOFS); - if (!page) - return -ENOMEM; - - if (report_uid) - reads_buf2 = (struct incfs_pending_read_info2 *) page; - else - reads_buf = (struct incfs_pending_read_info *) page; - - reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); - while (reads_to_collect > 0) { - struct read_log_state next_state; - int reads_collected; - - memcpy(&next_state, &log_state->state, sizeof(next_state)); - reads_collected = incfs_collect_logged_reads( - mi, &next_state, reads_buf, reads_buf2, - min_t(ssize_t, reads_to_collect, reads_per_page)); - if (reads_collected <= 0) { - result = total_reads_collected ? - total_reads_collected * record_size : - reads_collected; - goto out; - } - if (copy_to_user(buf, (void *) page, - reads_collected * record_size)) { - result = total_reads_collected ? - total_reads_collected * record_size : - -EFAULT; - goto out; - } - - memcpy(&log_state->state, &next_state, sizeof(next_state)); - total_reads_collected += reads_collected; - buf += reads_collected * record_size; - reads_to_collect -= reads_collected; - } - - result = total_reads_collected * record_size; - *ppos = 0; -out: - free_page(page); - return result; -} - -static __poll_t log_poll(struct file *file, poll_table *wait) -{ - struct log_file_state *log_state = file->private_data; - struct mount_info *mi = get_mount_info(file_superblock(file)); - int count; - __poll_t ret = 0; - - poll_wait(file, &mi->mi_log.ml_notif_wq, wait); - count = incfs_get_uncollected_logs_count(mi, &log_state->state); - if (count >= mi->mi_options.read_log_wakeup_count) - ret = EPOLLIN | EPOLLRDNORM; - - return ret; -} - -static struct inode *fetch_log_inode(struct super_block *sb) -{ - struct inode_search search = { - .ino = INCFS_LOG_INODE - }; - struct inode *inode = iget5_locked(sb, search.ino, inode_test, - inode_set, &search); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - return inode; -} - static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) { struct dir_file *dir = get_incfs_dir_file(file); @@ -720,29 +347,15 @@ static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) root = dir->backing_dir->f_inode == d_inode(mi->mi_backing_dir_path.dentry); - if (root && ctx->pos == 0) { - if (!dir_emit(ctx, pending_reads_file_name, - ARRAY_SIZE(pending_reads_file_name) - 1, - INCFS_PENDING_READS_INODE, DT_REG)) { - error = -EINVAL; + if (root) { + error = emit_pseudo_files(ctx); + if (error) goto out; - } - ctx->pos++; } - if (root && ctx->pos == 1) { - if (!dir_emit(ctx, log_file_name, - ARRAY_SIZE(log_file_name) - 1, - INCFS_LOG_INODE, DT_REG)) { - error = -EINVAL; - goto out; - } - ctx->pos++; - } - - ctx->pos -= 2; + ctx->pos -= PSEUDO_FILE_COUNT; error = iterate_dir(dir->backing_dir, ctx); - ctx->pos += 2; + ctx->pos += PSEUDO_FILE_COUNT; file->f_pos = dir->backing_dir->f_pos; out: if (error) @@ -866,172 +479,7 @@ err: return result; } -static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, - u64 size) -{ - u8 *result; - - if (!original) - return range(NULL, 0); - - if (size > INCFS_MAX_SIGNATURE_SIZE) - return range(ERR_PTR(-EFAULT), 0); - - result = kzalloc(size, GFP_NOFS | __GFP_COMP); - if (!result) - return range(ERR_PTR(-ENOMEM), 0); - - if (copy_from_user(result, original, size)) { - kfree(result); - return range(ERR_PTR(-EFAULT), 0); - } - - return range(result, size); -} - -static int init_new_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, struct mem_range attr, - u8 __user *user_signature_info, u64 signature_size) -{ - struct path path = {}; - struct file *new_file; - int error = 0; - struct backing_file_context *bfc = NULL; - u32 block_count; - struct mem_range raw_signature = { NULL }; - struct mtree *hash_tree = NULL; - - if (!mi || !dentry || !uuid) - return -EFAULT; - - /* Resize newly created file to its true size. */ - path = (struct path) { - .mnt = mi->mi_backing_dir_path.mnt, - .dentry = dentry - }; - new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, - mi->mi_owner); - - if (IS_ERR(new_file)) { - error = PTR_ERR(new_file); - goto out; - } - - bfc = incfs_alloc_bfc(new_file); - fput(new_file); - if (IS_ERR(bfc)) { - error = PTR_ERR(bfc); - bfc = NULL; - goto out; - } - - mutex_lock(&bfc->bc_mutex); - error = incfs_write_fh_to_backing_file(bfc, uuid, size); - if (error) - goto out; - - if (attr.data && attr.len) { - error = incfs_write_file_attr_to_backing_file(bfc, - attr, NULL); - if (error) - goto out; - } - - block_count = (u32)get_blocks_count_for_size(size); - - if (user_signature_info) { - raw_signature = incfs_copy_signature_info_from_user( - user_signature_info, signature_size); - - if (IS_ERR(raw_signature.data)) { - error = PTR_ERR(raw_signature.data); - raw_signature.data = NULL; - goto out; - } - - hash_tree = incfs_alloc_mtree(raw_signature, block_count); - if (IS_ERR(hash_tree)) { - error = PTR_ERR(hash_tree); - hash_tree = NULL; - goto out; - } - - error = incfs_write_signature_to_backing_file( - bfc, raw_signature, hash_tree->hash_tree_area_size); - if (error) - goto out; - - block_count += get_blocks_count_for_size( - hash_tree->hash_tree_area_size); - } - - if (block_count) - error = incfs_write_blockmap_to_backing_file(bfc, block_count); - - if (error) - goto out; -out: - if (bfc) { - mutex_unlock(&bfc->bc_mutex); - incfs_free_bfc(bfc); - } - incfs_free_mtree(hash_tree); - kfree(raw_signature.data); - - if (error) - pr_debug("incfs: %s error: %d\n", __func__, error); - return error; -} - -static int init_new_mapped_file(struct mount_info *mi, struct dentry *dentry, - incfs_uuid_t *uuid, u64 size, u64 offset) -{ - struct path path = {}; - struct file *new_file; - int error = 0; - struct backing_file_context *bfc = NULL; - - if (!mi || !dentry || !uuid) - return -EFAULT; - - /* Resize newly created file to its true size. */ - path = (struct path) { - .mnt = mi->mi_backing_dir_path.mnt, - .dentry = dentry - }; - new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, - mi->mi_owner); - - if (IS_ERR(new_file)) { - error = PTR_ERR(new_file); - goto out; - } - - bfc = incfs_alloc_bfc(new_file); - fput(new_file); - if (IS_ERR(bfc)) { - error = PTR_ERR(bfc); - bfc = NULL; - goto out; - } - - mutex_lock(&bfc->bc_mutex); - error = incfs_write_mapping_fh_to_backing_file(bfc, uuid, size, offset); - if (error) - goto out; - -out: - if (bfc) { - mutex_unlock(&bfc->bc_mutex); - incfs_free_bfc(bfc); - } - - if (error) - pr_debug("incfs: %s error: %d\n", __func__, error); - return error; -} - -static int incfs_link(struct dentry *what, struct dentry *where) +int incfs_link(struct dentry *what, struct dentry *where) { struct dentry *parent_dentry = dget_parent(where); struct inode *pinode = d_inode(parent_dentry); @@ -1045,7 +493,7 @@ static int incfs_link(struct dentry *what, struct dentry *where) return error; } -static int incfs_unlink(struct dentry *dentry) +int incfs_unlink(struct dentry *dentry) { struct dentry *parent_dentry = dget_parent(dentry); struct inode *pinode = d_inode(parent_dentry); @@ -1073,276 +521,6 @@ static int incfs_rmdir(struct dentry *dentry) return error; } -static int dir_relative_path_resolve( - struct mount_info *mi, - const char __user *relative_path, - struct path *result_path) -{ - struct path *base_path = &mi->mi_backing_dir_path; - int dir_fd = get_unused_fd_flags(0); - struct file *dir_f = NULL; - int error = 0; - - if (dir_fd < 0) - return dir_fd; - - dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); - - if (IS_ERR(dir_f)) { - error = PTR_ERR(dir_f); - goto out; - } - fd_install(dir_fd, dir_f); - - if (!relative_path) { - /* No relative path given, just return the base dir. */ - *result_path = *base_path; - path_get(result_path); - goto out; - } - - error = user_path_at_empty(dir_fd, relative_path, - LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); - -out: - ksys_close(dir_fd); - if (error) - pr_debug("incfs: %s %d\n", __func__, error); - return error; -} - -static int validate_name(char *file_name) -{ - struct mem_range name = range(file_name, strlen(file_name)); - int i = 0; - - if (name.len > INCFS_MAX_NAME_LEN) - return -ENAMETOOLONG; - - if (incfs_equal_ranges(pending_reads_file_name_range, name)) - return -EINVAL; - - for (i = 0; i < name.len; i++) - if (name.data[i] == '/') - return -EINVAL; - - return 0; -} - -static int chmod(struct dentry *dentry, umode_t mode) -{ - struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; - struct iattr newattrs; - int error; - -retry_deleg: - inode_lock(inode); - newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(dentry, &newattrs, &delegated_inode); - inode_unlock(inode); - if (delegated_inode) { - error = break_deleg_wait(&delegated_inode); - if (!error) - goto retry_deleg; - } - return error; -} - -static long ioctl_create_file(struct mount_info *mi, - struct incfs_new_file_args __user *usr_args) -{ - struct incfs_new_file_args args; - char *file_id_str = NULL; - struct dentry *index_file_dentry = NULL; - struct dentry *named_file_dentry = NULL; - struct path parent_dir_path = {}; - struct inode *index_dir_inode = NULL; - __le64 size_attr_value = 0; - char *file_name = NULL; - char *attr_value = NULL; - int error = 0; - bool locked = false; - - if (!mi || !mi->mi_index_dir) { - error = -EFAULT; - goto out; - } - - if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { - error = -EFAULT; - goto out; - } - - file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); - if (IS_ERR(file_name)) { - error = PTR_ERR(file_name); - file_name = NULL; - goto out; - } - - error = validate_name(file_name); - if (error) - goto out; - - file_id_str = file_id_to_str(args.file_id); - if (!file_id_str) { - error = -ENOMEM; - goto out; - } - - error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); - if (error) - goto out; - locked = true; - - /* Find a directory to put the file into. */ - error = dir_relative_path_resolve(mi, - u64_to_user_ptr(args.directory_path), - &parent_dir_path); - if (error) - goto out; - - if (parent_dir_path.dentry == mi->mi_index_dir) { - /* Can't create a file directly inside .index */ - error = -EBUSY; - goto out; - } - - /* Look up a dentry in the parent dir. It should be negative. */ - named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, - file_name); - if (!named_file_dentry) { - error = -EFAULT; - goto out; - } - if (IS_ERR(named_file_dentry)) { - error = PTR_ERR(named_file_dentry); - named_file_dentry = NULL; - goto out; - } - if (d_really_is_positive(named_file_dentry)) { - /* File with this path already exists. */ - error = -EEXIST; - goto out; - } - /* Look up a dentry in the .index dir. It should be negative. */ - index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); - if (!index_file_dentry) { - error = -EFAULT; - goto out; - } - if (IS_ERR(index_file_dentry)) { - error = PTR_ERR(index_file_dentry); - index_file_dentry = NULL; - goto out; - } - if (d_really_is_positive(index_file_dentry)) { - /* File with this ID already exists in index. */ - error = -EEXIST; - goto out; - } - - /* Creating a file in the .index dir. */ - index_dir_inode = d_inode(mi->mi_index_dir); - inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); - error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, - true); - inode_unlock(index_dir_inode); - - if (error) - goto out; - if (!d_really_is_positive(index_file_dentry)) { - error = -EINVAL; - goto out; - } - - error = chmod(index_file_dentry, args.mode | 0222); - if (error) { - pr_debug("incfs: chmod err: %d\n", error); - goto delete_index_file; - } - - /* Save the file's ID as an xattr for easy fetching in future. */ - error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, - file_id_str, strlen(file_id_str), XATTR_CREATE); - if (error) { - pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; - } - - /* Save the file's size as an xattr for easy fetching in future. */ - size_attr_value = cpu_to_le64(args.size); - error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, - (char *)&size_attr_value, sizeof(size_attr_value), - XATTR_CREATE); - if (error) { - pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; - } - - /* Save the file's attribute as an xattr */ - if (args.file_attr_len && args.file_attr) { - if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { - error = -E2BIG; - goto delete_index_file; - } - - attr_value = kmalloc(args.file_attr_len, GFP_NOFS); - if (!attr_value) { - error = -ENOMEM; - goto delete_index_file; - } - - if (copy_from_user(attr_value, - u64_to_user_ptr(args.file_attr), - args.file_attr_len) > 0) { - error = -EFAULT; - goto delete_index_file; - } - - error = vfs_setxattr(index_file_dentry, - INCFS_XATTR_METADATA_NAME, - attr_value, args.file_attr_len, - XATTR_CREATE); - - if (error) - goto delete_index_file; - } - - /* Initializing a newly created file. */ - error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, - range(attr_value, args.file_attr_len), - (u8 __user *)args.signature_info, - args.signature_size); - if (error) - goto delete_index_file; - - /* Linking a file with its real name from the requested dir. */ - error = incfs_link(index_file_dentry, named_file_dentry); - - if (!error) - goto out; - -delete_index_file: - incfs_unlink(index_file_dentry); - -out: - if (error) - pr_debug("incfs: %s err:%d\n", __func__, error); - - kfree(file_id_str); - kfree(file_name); - kfree(attr_value); - dput(named_file_dentry); - dput(index_file_dentry); - path_put(&parent_dir_path); - if (locked) - mutex_unlock(&mi->mi_dir_struct_mutex); - return error; -} - static long ioctl_fill_blocks(struct file *f, void __user *arg) { struct incfs_fill_blocks __user *usr_fill_blocks = arg; @@ -1413,53 +591,6 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) return i; } -static long ioctl_permit_fill(struct file *f, void __user *arg) -{ - struct incfs_permit_fill __user *usr_permit_fill = arg; - struct incfs_permit_fill permit_fill; - long error = 0; - struct file *file = NULL; - - if (f->f_op != &incfs_pending_read_file_ops) - return -EPERM; - - if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) - return -EFAULT; - - file = fget(permit_fill.file_descriptor); - if (IS_ERR(file)) - return PTR_ERR(file); - - if (file->f_op != &incfs_file_ops) { - error = -EPERM; - goto out; - } - - if (file->f_inode->i_sb != f->f_inode->i_sb) { - error = -EPERM; - goto out; - } - - switch ((uintptr_t)file->private_data) { - case CANT_FILL: - file->private_data = (void *)CAN_FILL; - break; - - case CAN_FILL: - pr_debug("CAN_FILL already set"); - break; - - default: - pr_warn("Invalid file private data"); - error = -EFAULT; - goto out; - } - -out: - fput(file); - return error; -} - static long ioctl_read_file_signature(struct file *f, void __user *arg) { struct incfs_get_file_sig_args __user *args_usr_ptr = arg; @@ -1532,167 +663,15 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) return error; } -static long ioctl_create_mapped_file(struct mount_info *mi, void __user *arg) -{ - struct incfs_create_mapped_file_args __user *args_usr_ptr = arg; - struct incfs_create_mapped_file_args args = {}; - char *file_name; - int error = 0; - struct path parent_dir_path = {}; - char *source_file_name = NULL; - struct dentry *source_file_dentry = NULL; - u64 source_file_size; - struct dentry *file_dentry = NULL; - struct inode *parent_inode; - __le64 size_attr_value; - - if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) - return -EINVAL; - - file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); - if (IS_ERR(file_name)) { - error = PTR_ERR(file_name); - file_name = NULL; - goto out; - } - - error = validate_name(file_name); - if (error) - goto out; - - if (args.source_offset % INCFS_DATA_FILE_BLOCK_SIZE) { - error = -EINVAL; - goto out; - } - - /* Validate file mapping is in range */ - source_file_name = file_id_to_str(args.source_file_id); - if (!source_file_name) { - pr_warn("Failed to alloc source_file_name\n"); - error = -ENOMEM; - goto out; - } - - source_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, - source_file_name); - if (!source_file_dentry) { - pr_warn("Source file does not exist\n"); - error = -EINVAL; - goto out; - } - if (IS_ERR(source_file_dentry)) { - pr_warn("Error opening source file\n"); - error = PTR_ERR(source_file_dentry); - source_file_dentry = NULL; - goto out; - } - if (!d_really_is_positive(source_file_dentry)) { - pr_warn("Source file dentry negative\n"); - error = -EINVAL; - goto out; - } - - error = vfs_getxattr(source_file_dentry, INCFS_XATTR_SIZE_NAME, - (char *)&size_attr_value, sizeof(size_attr_value)); - if (error < 0) - goto out; - - if (error != sizeof(size_attr_value)) { - pr_warn("Mapped file has no size attr\n"); - error = -EINVAL; - goto out; - } - - source_file_size = le64_to_cpu(size_attr_value); - if (args.source_offset + args.size > source_file_size) { - pr_warn("Mapped file out of range\n"); - error = -EINVAL; - goto out; - } - - /* Find a directory to put the file into. */ - error = dir_relative_path_resolve(mi, - u64_to_user_ptr(args.directory_path), - &parent_dir_path); - if (error) - goto out; - - if (parent_dir_path.dentry == mi->mi_index_dir) { - /* Can't create a file directly inside .index */ - error = -EBUSY; - goto out; - } - - /* Look up a dentry in the parent dir. It should be negative. */ - file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, - file_name); - if (!file_dentry) { - error = -EFAULT; - goto out; - } - if (IS_ERR(file_dentry)) { - error = PTR_ERR(file_dentry); - file_dentry = NULL; - goto out; - } - if (d_really_is_positive(file_dentry)) { - error = -EEXIST; - goto out; - } - - parent_inode = d_inode(parent_dir_path.dentry); - inode_lock_nested(parent_inode, I_MUTEX_PARENT); - error = vfs_create(parent_inode, file_dentry, args.mode | 0222, true); - inode_unlock(parent_inode); - if (error) - goto out; - - /* Save the file's size as an xattr for easy fetching in future. */ - size_attr_value = cpu_to_le64(args.size); - error = vfs_setxattr(file_dentry, INCFS_XATTR_SIZE_NAME, - (char *)&size_attr_value, sizeof(size_attr_value), - XATTR_CREATE); - if (error) { - pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_file; - } - - error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, - size_attr_value, cpu_to_le64(args.source_offset)); - if (error) - goto delete_file; - - goto out; - -delete_file: - incfs_unlink(file_dentry); - -out: - dput(file_dentry); - dput(source_file_dentry); - path_put(&parent_dir_path); - kfree(file_name); - kfree(source_file_name); - return error; -} - static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { - struct mount_info *mi = get_mount_info(file_superblock(f)); - switch (req) { - case INCFS_IOC_CREATE_FILE: - return ioctl_create_file(mi, (void __user *)arg); case INCFS_IOC_FILL_BLOCKS: return ioctl_fill_blocks(f, (void __user *)arg); - case INCFS_IOC_PERMIT_FILL: - return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_READ_FILE_SIGNATURE: return ioctl_read_file_signature(f, (void __user *)arg); case INCFS_IOC_GET_FILLED_BLOCKS: return ioctl_get_filled_blocks(f, (void __user *)arg); - case INCFS_IOC_CREATE_MAPPED_FILE: - return ioctl_create_mapped_file(mi, (void __user *)arg); default: return -EINVAL; } @@ -1706,8 +685,6 @@ static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, struct dentry *backing_dentry = NULL; struct path dir_backing_path = {}; struct inode_info *dir_info = get_incfs_node(dir_inode); - struct mem_range name_range = - range((u8 *)dentry->d_name.name, dentry->d_name.len); int err = 0; if (!mi || !dir_info || !dir_info->n_backing_inode) @@ -1716,33 +693,10 @@ static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, if (d_inode(mi->mi_backing_dir_path.dentry) == dir_info->n_backing_inode) { /* We do lookup in the FS root. Show pseudo files. */ - - if (incfs_equal_ranges(pending_reads_file_name_range, - name_range)) { - struct inode *inode = fetch_pending_reads_inode( - dir_inode->i_sb); - - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - - d_add(dentry, inode); + err = dir_lookup_pseudo_files(dir_inode->i_sb, dentry); + if (err != -ENOENT) goto out; - } - - if (incfs_equal_ranges(log_file_name_range, name_range)) { - struct inode *inode = fetch_log_inode( - dir_inode->i_sb); - - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out; - } - - d_add(dentry, inode); - goto out; - } + err = 0; } dir_dentry = dget_parent(dentry); diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h index eaa490e19072..d5dc5ebe99de 100644 --- a/fs/incfs/vfs.h +++ b/fs/incfs/vfs.h @@ -6,8 +6,33 @@ #ifndef _INCFS_VFS_H #define _INCFS_VFS_H +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + +extern const struct file_operations incfs_file_ops; +extern const struct inode_operations incfs_file_inode_ops; + void incfs_kill_sb(struct super_block *sb); struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, const char *dev_name, void *data); +int incfs_link(struct dentry *what, struct dentry *where); +int incfs_unlink(struct dentry *dentry); + +static inline struct mount_info *get_mount_info(struct super_block *sb) +{ + struct mount_info *result = sb->s_fs_info; + + WARN_ON(!result); + return result; +} + +static inline struct super_block *file_superblock(struct file *f) +{ + struct inode *inode = file_inode(f); + + return inode->i_sb; +} #endif diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 341ceef3f779..f2abc5899dca 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -227,8 +227,7 @@ int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) goto out; } - if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || - errno != EPERM) { + if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1) { print_error( "Successfully called PERMIT_FILL on non pending_read file"); return -errno; From b7fb6bb4c0071c92f04de6f361f620f9fbad7b83 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 20 Aug 2020 15:03:54 -0700 Subject: [PATCH 445/636] ANDROID: Incremental fs: Add .blocks_written file Bug: 162856396 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I942582218cdc7741bcff2f264960b76cdfa1bb36 --- fs/incfs/data_mgmt.c | 5 + fs/incfs/data_mgmt.h | 7 + fs/incfs/pseudo_files.c | 157 +++++++++++++----- fs/incfs/pseudo_files.h | 2 +- include/uapi/linux/incrementalfs.h | 1 + .../selftests/filesystems/incfs/incfs_test.c | 51 ++++++ .../selftests/filesystems/incfs/utils.c | 26 ++- .../selftests/filesystems/incfs/utils.h | 2 + 8 files changed, 204 insertions(+), 47 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 8e58f6edb555..7cc530b8c1f4 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -43,6 +43,8 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, mutex_init(&mi->mi_dir_struct_mutex); init_waitqueue_head(&mi->mi_pending_reads_notif_wq); init_waitqueue_head(&mi->mi_log.ml_notif_wq); + init_waitqueue_head(&mi->mi_blocks_written_notif_wq); + atomic_set(&mi->mi_blocks_written, 0); INIT_DELAYED_WORK(&mi->mi_log.ml_wakeup_work, log_wake_up_all); spin_lock_init(&mi->mi_log.rl_lock); spin_lock_init(&mi->pending_read_lock); @@ -902,6 +904,9 @@ static void notify_pending_reads(struct mount_info *mi, } rcu_read_unlock(); wake_up_all(&segment->new_data_arrival_wq); + + atomic_inc(&mi->mi_blocks_written); + wake_up_all(&mi->mi_blocks_written_notif_wq); } static int wait_for_data_block(struct data_file *df, int block_index, diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 77d0950f9a22..26e9b4bf4d33 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -155,6 +155,13 @@ struct mount_info { void *pending_read_xattr; size_t pending_read_xattr_size; + + /* A queue of waiters who want to be notified about blocks_written */ + wait_queue_head_t mi_blocks_written_notif_wq; + + /* Number of blocks written since mount */ + atomic_t mi_blocks_written; + }; struct data_file_block { diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 5d61d8e866dc..22863204bb59 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -20,13 +20,14 @@ #define INCFS_PENDING_READS_INODE 2 #define INCFS_LOG_INODE 3 +#define INCFS_BLOCKS_WRITTEN_INODE 4 #define READ_WRITE_FILE_MODE 0666 /******************************************************************************* * .log pseudo file definition ******************************************************************************/ static const char log_file_name[] = INCFS_LOG_FILENAME; -static struct mem_range log_file_name_range = { +static const struct mem_range log_file_name_range = { .data = (u8 *)log_file_name, .len = ARRAY_SIZE(log_file_name) - 1 }; @@ -156,7 +157,7 @@ static const struct file_operations incfs_log_file_ops = { * .pending_reads pseudo file definition ******************************************************************************/ static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; -static struct mem_range pending_reads_file_name_range = { +static const struct mem_range pending_reads_file_name_range = { .data = (u8 *)pending_reads_file_name, .len = ARRAY_SIZE(pending_reads_file_name) - 1 }; @@ -927,6 +928,90 @@ static const struct file_operations incfs_pending_read_file_ops = { .compat_ioctl = pending_reads_dispatch_ioctl }; +/******************************************************************************* + * .blocks_written pseudo file definition + ******************************************************************************/ +static const char blocks_written_file_name[] = INCFS_BLOCKS_WRITTEN_FILENAME; +static const struct mem_range blocks_written_file_name_range = { + .data = (u8 *)blocks_written_file_name, + .len = ARRAY_SIZE(blocks_written_file_name) - 1 +}; + +/* State of an open .blocks_written file, unique for each file descriptor. */ +struct blocks_written_file_state { + unsigned long blocks_written; +}; + +static ssize_t blocks_written_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct blocks_written_file_state *state = f->private_data; + unsigned long blocks_written; + char string[21]; + int result = 0; + + if (!mi) + return -EFAULT; + + blocks_written = atomic_read(&mi->mi_blocks_written); + if (state->blocks_written == blocks_written) + return 0; + + result = snprintf(string, sizeof(string), "%lu", blocks_written); + if (result > len) + result = len; + if (copy_to_user(buf, string, result)) + return -EFAULT; + + state->blocks_written = blocks_written; + return result; +} + +static __poll_t blocks_written_poll(struct file *f, poll_table *wait) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct blocks_written_file_state *state = f->private_data; + unsigned long blocks_written; + + if (!mi) + return -EFAULT; + + poll_wait(f, &mi->mi_blocks_written_notif_wq, wait); + blocks_written = atomic_read(&mi->mi_blocks_written); + if (state->blocks_written == blocks_written) + return 0; + + return EPOLLIN | EPOLLRDNORM; +} + +static int blocks_written_open(struct inode *inode, struct file *file) +{ + struct blocks_written_file_state *state = + kzalloc(sizeof(*state), GFP_NOFS); + + if (!state) + return -ENOMEM; + + state->blocks_written = -1; + file->private_data = state; + return 0; +} + +static int blocks_written_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static const struct file_operations incfs_blocks_written_file_ops = { + .read = blocks_written_read, + .poll = blocks_written_poll, + .open = blocks_written_open, + .release = blocks_written_release, + .llseek = noop_llseek, +}; + /******************************************************************************* * Generic inode lookup functionality ******************************************************************************/ @@ -950,6 +1035,10 @@ static bool get_pseudo_inode(int ino, struct inode *inode) inode->i_fop = &incfs_log_file_ops; return true; + case INCFS_BLOCKS_WRITTEN_INODE: + inode->i_fop = &incfs_blocks_written_file_ops; + return true; + default: return false; } @@ -977,27 +1066,10 @@ static int inode_set(struct inode *inode, void *opaque) return -EINVAL; } -static struct inode *fetch_pending_reads_inode(struct super_block *sb) +static struct inode *fetch_inode(struct super_block *sb, unsigned long ino) { struct inode_search search = { - .ino = INCFS_PENDING_READS_INODE - }; - struct inode *inode = iget5_locked(sb, search.ino, inode_test, - inode_set, &search); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - return inode; -} - -static struct inode *fetch_log_inode(struct super_block *sb) -{ - struct inode_search search = { - .ino = INCFS_LOG_INODE + .ino = ino }; struct inode *inode = iget5_locked(sb, search.ino, inode_test, inode_set, &search); @@ -1015,28 +1087,24 @@ int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry) { struct mem_range name_range = range((u8 *)dentry->d_name.name, dentry->d_name.len); + unsigned long ino; + struct inode *inode; - if (incfs_equal_ranges(pending_reads_file_name_range, name_range)) { - struct inode *inode = fetch_pending_reads_inode(sb); + if (incfs_equal_ranges(pending_reads_file_name_range, name_range)) + ino = INCFS_PENDING_READS_INODE; + else if (incfs_equal_ranges(log_file_name_range, name_range)) + ino = INCFS_LOG_INODE; + else if (incfs_equal_ranges(blocks_written_file_name_range, name_range)) + ino = INCFS_BLOCKS_WRITTEN_INODE; + else + return -ENOENT; - if (IS_ERR(inode)) - return PTR_ERR(inode); + inode = fetch_inode(sb, ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); - d_add(dentry, inode); - return 0; - } - - if (incfs_equal_ranges(log_file_name_range, name_range)) { - struct inode *inode = fetch_log_inode(sb); - - if (IS_ERR(inode)) - return PTR_ERR(inode); - - d_add(dentry, inode); - return 0; - } - - return -ENOENT; + d_add(dentry, inode); + return 0; } int emit_pseudo_files(struct dir_context *ctx) @@ -1059,5 +1127,14 @@ int emit_pseudo_files(struct dir_context *ctx) ctx->pos++; } + if (ctx->pos == 2) { + if (!dir_emit(ctx, blocks_written_file_name, + ARRAY_SIZE(blocks_written_file_name) - 1, + INCFS_BLOCKS_WRITTEN_INODE, DT_REG)) + return -EINVAL; + + ctx->pos++; + } + return 0; } diff --git a/fs/incfs/pseudo_files.h b/fs/incfs/pseudo_files.h index 14a8cc513363..358bcabfe49a 100644 --- a/fs/incfs/pseudo_files.h +++ b/fs/incfs/pseudo_files.h @@ -6,7 +6,7 @@ #ifndef _INCFS_PSEUDO_FILES_H #define _INCFS_PSEUDO_FILES_H -#define PSEUDO_FILE_COUNT 2 +#define PSEUDO_FILE_COUNT 3 #define INCFS_START_INO_RANGE 10 int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry); diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 96ecd43b3d46..44cc754177aa 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -30,6 +30,7 @@ #define INCFS_PENDING_READS_FILENAME ".pending_reads" #define INCFS_LOG_FILENAME ".log" +#define INCFS_BLOCKS_WRITTEN_FILENAME ".blocks_written" #define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") #define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") #define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index f2abc5899dca..893dc9474ae2 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -963,6 +964,7 @@ static bool iterate_directory(const char *dir_to_iterate, bool root, } names[] = { {INCFS_LOG_FILENAME, true, false}, {INCFS_PENDING_READS_FILENAME, true, false}, + {INCFS_BLOCKS_WRITTEN_FILENAME, true, false}, {".index", true, false}, {"..", false, false}, {".", false, false}, @@ -2293,10 +2295,39 @@ static int emit_partial_test_file_data(const char *mount_dir, int *block_indexes = NULL; int result = 0; int blocks_written = 0; + int bw_fd = -1; + char buffer[20]; + struct pollfd pollfd; + long blocks_written_total, blocks_written_new_total; if (file->size == 0) return 0; + bw_fd = open_blocks_written_file(mount_dir); + if (bw_fd == -1) + return -errno; + + result = read(bw_fd, buffer, sizeof(buffer)); + if (result <= 0) { + result = -EIO; + goto out; + } + + buffer[result] = 0; + blocks_written_total = atol(buffer); + result = 0; + + pollfd = (struct pollfd) { + .fd = bw_fd, + .events = POLLIN, + }; + + result = poll(&pollfd, 1, 0); + if (result) { + result = -EIO; + goto out; + } + /* Emit 2 blocks, skip 2 blocks etc*/ block_indexes = calloc(block_cnt, sizeof(*block_indexes)); for (i = 0, j = 0; i < block_cnt; ++i) @@ -2316,9 +2347,29 @@ static int emit_partial_test_file_data(const char *mount_dir, result = -EIO; goto out; } + + result = poll(&pollfd, 1, 0); + if (result != 1 || pollfd.revents != POLLIN) { + result = -EIO; + goto out; + } + + result = read(bw_fd, buffer, sizeof(buffer)); + buffer[result] = 0; + blocks_written_new_total = atol(buffer); + + if (blocks_written_new_total - blocks_written_total + != blocks_written) { + result = -EIO; + goto out; + } + + blocks_written_total = blocks_written_new_total; + result = 0; } out: free(block_indexes); + close(bw_fd); return result; } diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 3bc1449c16da..32ba6af49bbd 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -234,14 +234,28 @@ int open_commands_file(const char *mount_dir) int open_log_file(const char *mount_dir) { - char cmd_file[255]; - int cmd_fd; + char file[255]; + int fd; - snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); - cmd_fd = open(cmd_file, O_RDWR | O_CLOEXEC); - if (cmd_fd < 0) + snprintf(file, ARRAY_SIZE(file), "%s/.log", mount_dir); + fd = open(file, O_RDWR | O_CLOEXEC); + if (fd < 0) perror("Can't open log file"); - return cmd_fd; + return fd; +} + +int open_blocks_written_file(const char *mount_dir) +{ + char file[255]; + int fd; + + snprintf(file, ARRAY_SIZE(file), + "%s/%s", mount_dir, INCFS_BLOCKS_WRITTEN_FILENAME); + fd = open(file, O_RDONLY | O_CLOEXEC); + + if (fd < 0) + perror("Can't open blocks_written file"); + return fd; } int wait_for_pending_reads(int fd, int timeout_ms, diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 39eb60333637..470471c7f418 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -52,6 +52,8 @@ int open_commands_file(const char *mount_dir); int open_log_file(const char *mount_dir); +int open_blocks_written_file(const char *mount_dir); + int wait_for_pending_reads(int fd, int timeout_ms, struct incfs_pending_read_info *prs, int prs_count); From 37436094c8751bb9d5c2cfe710b4e8cc232059de Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 27 Aug 2020 13:52:14 -0700 Subject: [PATCH 446/636] ANDROID: Incremental fs: Remove attributes from file Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I0f753929f6b5b884f829cd3e6e8d4e6f94ab3a46 --- fs/incfs/data_mgmt.c | 20 ----------------- fs/incfs/format.c | 48 ----------------------------------------- fs/incfs/format.h | 18 ---------------- fs/incfs/pseudo_files.c | 7 ------ 4 files changed, 93 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 7cc530b8c1f4..fc868f786e6c 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -1228,25 +1228,6 @@ static int process_blockmap_md(struct incfs_blockmap *bm, return error; } -static int process_file_attr_md(struct incfs_file_attr *fa, - struct metadata_handler *handler) -{ - struct data_file *df = handler->context; - u16 attr_size = le16_to_cpu(fa->fa_size); - - if (!df) - return -EFAULT; - - if (attr_size > INCFS_MAX_FILE_ATTR_SIZE) - return -E2BIG; - - df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset); - df->n_attr.fa_value_size = attr_size; - df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc); - - return 0; -} - static int process_file_signature_md(struct incfs_file_signature *sg, struct metadata_handler *handler) { @@ -1348,7 +1329,6 @@ int incfs_scan_metadata_chain(struct data_file *df) handler->md_record_offset = df->df_metadata_off; handler->context = df; handler->handle_blockmap = process_blockmap_md; - handler->handle_file_attr = process_file_attr_md; handler->handle_signature = process_file_signature_md; while (handler->md_record_offset > 0) { diff --git a/fs/incfs/format.c b/fs/incfs/format.c index a5239a5a17f5..05c23bc87ee5 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -274,49 +274,6 @@ int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, return result; } -/* - * Write file attribute data and metadata record to the backing file. - */ -int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, - struct mem_range value, struct incfs_file_attr *attr) -{ - struct incfs_file_attr file_attr = {}; - int result = 0; - u32 crc = 0; - loff_t value_offset = 0; - - if (!bfc) - return -EFAULT; - - if (value.len > INCFS_MAX_FILE_ATTR_SIZE) - return -ENOSPC; - - LOCK_REQUIRED(bfc->bc_mutex); - - crc = crc32(0, value.data, value.len); - value_offset = incfs_get_end_offset(bfc->bc_file); - file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR; - file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr)); - file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); - file_attr.fa_size = cpu_to_le16((u16)value.len); - file_attr.fa_offset = cpu_to_le64(value_offset); - file_attr.fa_crc = cpu_to_le32(crc); - - result = write_to_bf(bfc, value.data, value.len, value_offset); - if (result) - return result; - - result = append_md_to_backing_file(bfc, &file_attr.fa_header); - if (result) { - /* Error, rollback file changes */ - truncate_backing_file(bfc, value_offset); - } else if (attr) { - *attr = file_attr; - } - - return result; -} - int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size) { @@ -704,11 +661,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, res = handler->handle_blockmap( &handler->md_buffer.blockmap, handler); break; - case INCFS_MD_FILE_ATTR: - if (handler->handle_file_attr) - res = handler->handle_file_attr( - &handler->md_buffer.file_attr, handler); - break; case INCFS_MD_SIGNATURE: if (handler->handle_signature) res = handler->handle_signature( diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 90cadce26c55..c7960e9b6e9c 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -117,7 +117,6 @@ enum incfs_metadata_type { INCFS_MD_NONE = 0, INCFS_MD_BLOCK_MAP = 1, - INCFS_MD_FILE_ATTR = 2, INCFS_MD_SIGNATURE = 3 }; @@ -225,17 +224,6 @@ struct incfs_blockmap { __le32 m_block_count; } __packed; -/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */ -struct incfs_file_attr { - struct incfs_md_header fa_header; - - __le64 fa_offset; - - __le16 fa_size; - - __le32 fa_crc; -} __packed; - /* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ struct incfs_file_signature { struct incfs_md_header sg_header; @@ -280,14 +268,11 @@ struct metadata_handler { union { struct incfs_md_header md_header; struct incfs_blockmap blockmap; - struct incfs_file_attr file_attr; struct incfs_file_signature signature; } md_buffer; int (*handle_blockmap)(struct incfs_blockmap *bm, struct metadata_handler *handler); - int (*handle_file_attr)(struct incfs_file_attr *fa, - struct metadata_handler *handler); int (*handle_signature)(struct incfs_file_signature *sig, struct metadata_handler *handler); }; @@ -323,9 +308,6 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, loff_t bm_base_off, loff_t file_size); -int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, - struct mem_range value, struct incfs_file_attr *attr); - int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size); diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 22863204bb59..ca8d1b206158 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -463,13 +463,6 @@ static int init_new_file(struct mount_info *mi, struct dentry *dentry, if (error) goto out; - if (attr.data && attr.len) { - error = incfs_write_file_attr_to_backing_file(bfc, - attr, NULL); - if (error) - goto out; - } - block_count = (u32)get_blocks_count_for_size(size); if (user_signature_info) { From 63702b82a4f5979e0c51a8060caacfa12756d430 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 27 Aug 2020 14:01:52 -0700 Subject: [PATCH 447/636] ANDROID: Incremental fs: Remove back links and crcs Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ia6061f8985db85a66fce0f2f09e1305a21753dd6 --- fs/incfs/format.c | 34 ---------------------------------- fs/incfs/format.h | 9 --------- 2 files changed, 43 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 05c23bc87ee5..f663a7605727 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -148,26 +148,6 @@ static int append_zeros(struct backing_file_context *bfc, size_t len) return append_zeros_no_fallocate(bfc, file_size, len); } -static u32 calc_md_crc(struct incfs_md_header *record) -{ - u32 result = 0; - __le32 saved_crc = record->h_record_crc; - __le64 saved_md_offset = record->h_next_md_offset; - size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size), - INCFS_MAX_METADATA_RECORD_SIZE); - - /* Zero fields which needs to be excluded from CRC calculation. */ - record->h_record_crc = 0; - record->h_next_md_offset = 0; - result = crc32(0, record, record_size); - - /* Restore excluded fields. */ - record->h_record_crc = saved_crc; - record->h_next_md_offset = saved_md_offset; - - return result; -} - /* * Append a given metadata record to the backing file and update a previous * record to add the new record the the metadata list. @@ -191,9 +171,7 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, record_size = le16_to_cpu(record->h_record_size); file_pos = incfs_get_end_offset(bfc->bc_file); - record->h_prev_md_offset = cpu_to_le64(bfc->bc_last_md_record_offset); record->h_next_md_offset = 0; - record->h_record_crc = cpu_to_le32(calc_md_crc(record)); /* Write the metadata record to the end of the backing file */ record_offset = file_pos; @@ -601,7 +579,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, ssize_t bytes_read = 0; size_t md_record_size = 0; loff_t next_record = 0; - loff_t prev_record = 0; int res = 0; struct incfs_md_header *md_hdr = NULL; @@ -623,7 +600,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, md_hdr = &handler->md_buffer.md_header; next_record = le64_to_cpu(md_hdr->h_next_md_offset); - prev_record = le64_to_cpu(md_hdr->h_prev_md_offset); md_record_size = le16_to_cpu(md_hdr->h_record_size); if (md_record_size > max_md_size) { @@ -643,16 +619,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, return -EBADMSG; } - if (prev_record != handler->md_prev_record_offset) { - pr_warn("incfs: Metadata chain has been corrupted."); - return -EBADMSG; - } - - if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) { - pr_warn("incfs: Metadata CRC mismatch."); - return -EBADMSG; - } - switch (md_hdr->h_md_entry_type) { case INCFS_MD_NONE: break; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index c7960e9b6e9c..67b0af003b05 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -135,18 +135,9 @@ struct incfs_md_header { */ __le16 h_record_size; - /* - * CRC32 of the metadata record. - * (e.g. inode, dir entry etc) not just this struct. - */ - __le32 h_record_crc; - /* Offset of the next metadata entry if any */ __le64 h_next_md_offset; - /* Offset of the previous metadata entry if any */ - __le64 h_prev_md_offset; - } __packed; /* Backing file header */ From 8ac8eb342e2ddb8f6476a6cc4104440ceda8b011 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 27 Aug 2020 14:08:30 -0700 Subject: [PATCH 448/636] ANDROID: Incremental fs: Remove block HASH flag Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I98e1efb9913d7c920ecaa76d29c8cf36219ff930 --- fs/incfs/format.c | 1 - fs/incfs/format.h | 1 - 2 files changed, 2 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index f663a7605727..0b15317ca74d 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -456,7 +456,6 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); - bm_entry.me_flags = cpu_to_le16(INCFS_BLOCK_HASH); return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); } diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 67b0af003b05..626796cce214 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -186,7 +186,6 @@ struct incfs_file_header { enum incfs_block_map_entry_flags { INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), - INCFS_BLOCK_HASH = (1 << 1), }; /* Block map entry pointing to an actual location of the data block. */ From dfbe6ee1414aed7c67a66d964e953a27c226dd0e Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 11 Sep 2020 12:32:10 -0700 Subject: [PATCH 449/636] ANDROID: Incremental fs: Make compatible with existing files Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I3b4f61bf881dccd50f05f42326370421945b68db --- fs/incfs/format.c | 6 + fs/incfs/format.h | 10 ++ .../selftests/filesystems/incfs/incfs_test.c | 105 ++++++++++++++++++ .../selftests/filesystems/incfs/utils.c | 2 +- .../selftests/filesystems/incfs/utils.h | 2 +- 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 0b15317ca74d..743aebfc80b9 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -626,6 +626,12 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, res = handler->handle_blockmap( &handler->md_buffer.blockmap, handler); break; + case INCFS_MD_FILE_ATTR: + /* + * File attrs no longer supported, ignore section for + * compatibility + */ + break; case INCFS_MD_SIGNATURE: if (handler->handle_signature) res = handler->handle_signature( diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 626796cce214..d0f65eb1e0ab 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -117,6 +117,7 @@ enum incfs_metadata_type { INCFS_MD_NONE = 0, INCFS_MD_BLOCK_MAP = 1, + INCFS_MD_FILE_ATTR = 2, INCFS_MD_SIGNATURE = 3 }; @@ -135,9 +136,18 @@ struct incfs_md_header { */ __le16 h_record_size; + /* + * Was: CRC32 of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le32 h_unused1; + /* Offset of the next metadata entry if any */ __le64 h_next_md_offset; + /* Was: Offset of the previous metadata entry if any */ + __le64 h_unused2; + } __packed; /* Backing file header */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 893dc9474ae2..b5cbc51410a3 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2948,6 +2948,110 @@ failure: return result; } +static const char v1_file[] = { + /* Header */ + /* 0x00: Magic number */ + 0x49, 0x4e, 0x43, 0x46, 0x53, 0x00, 0x00, 0x00, + /* 0x08: Version */ + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x10: Header size */ + 0x38, 0x00, + /* 0x12: Block size */ + 0x00, 0x10, + /* 0x14: Flags */ + 0x00, 0x00, 0x00, 0x00, + /* 0x18: First md offset */ + 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x20: File size */ + 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x28: UUID */ + 0x8c, 0x7d, 0xd9, 0x22, 0xad, 0x47, 0x49, 0x4f, + 0xc0, 0x2c, 0x38, 0x8e, 0x12, 0xc0, 0x0e, 0xac, + + /* 0x38: Attribute */ + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x31, 0x32, 0x33, 0x31, 0x32, 0x33, + + /* Attribute md record */ + /* 0x46: Type */ + 0x02, + /* 0x47: Size */ + 0x25, 0x00, + /* 0x49: CRC */ + 0x9a, 0xef, 0xef, 0x72, + /* 0x4d: Next md offset */ + 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x55: Prev md offset */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x5d: fa_offset */ + 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x65: fa_size */ + 0x0e, 0x00, + /* 0x67: fa_crc */ + 0xfb, 0x5e, 0x72, 0x89, + + /* Blockmap table */ + /* 0x6b: First 10-byte entry */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + /* Blockmap md record */ + /* 0x75: Type */ + 0x01, + /* 0x76: Size */ + 0x23, 0x00, + /* 0x78: CRC */ + 0x74, 0x45, 0xd3, 0xb9, + /* 0x7c: Next md offset */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x84: Prev md offset */ + 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x8c: blockmap offset */ + 0x6b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* 0x94: blockmap count */ + 0x01, 0x00, 0x00, 0x00, +}; + +#define TEST(statement, condition) \ + do { \ + statement; \ + if (!(condition)) { \ + ksft_print_msg("%s failed %d\n", \ + __func__, __LINE__); \ + goto out; \ + } \ + } while(false) + +static int compatibility_test(const char *mount_dir) +{ + char *backing_dir = NULL; + static const char *name = "file"; + int result = TEST_FAILURE; + char *filename = NULL; + int fd = -1; + int err; + uint64_t size = 0x0c; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TEST(filename = concat_file_name(backing_dir, name), filename); + TEST(fd = open(filename, O_CREAT | O_WRONLY | O_CLOEXEC), fd != -1); + TEST(err = write(fd, v1_file, sizeof(v1_file)), err == sizeof(v1_file)); + TEST(err = fsetxattr(fd, INCFS_XATTR_SIZE_NAME, &size, sizeof(size), 0), + err == 0); + TEST(err = mount_fs(mount_dir, backing_dir, 50), err == 0); + free(filename); + TEST(filename = concat_file_name(mount_dir, name), filename); + close(fd); + TEST(fd = open(filename, O_RDONLY), fd != -1); + + result = TEST_SUCCESS; +out: + close(fd); + umount(mount_dir); + free(backing_dir); + free(filename); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -3058,6 +3162,7 @@ int main(int argc, char *argv[]) MAKE_TEST(get_hash_blocks_test), MAKE_TEST(large_file_test), MAKE_TEST(mapped_file_test), + MAKE_TEST(compatibility_test), }; #undef MAKE_TEST diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index 32ba6af49bbd..e60b0d36f49d 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -314,7 +314,7 @@ int wait_for_pending_reads2(int fd, int timeout_ms, return read_res / sizeof(*prs); } -char *concat_file_name(const char *dir, char *file) +char *concat_file_name(const char *dir, const char *file) { char full_name[FILENAME_MAX] = ""; diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index 470471c7f418..f5ed8dc5f0ff 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -60,7 +60,7 @@ int wait_for_pending_reads(int fd, int timeout_ms, int wait_for_pending_reads2(int fd, int timeout_ms, struct incfs_pending_read_info2 *prs, int prs_count); -char *concat_file_name(const char *dir, char *file); +char *concat_file_name(const char *dir, const char *file); void sha256(const char *data, size_t dsize, char *hash); From 1652f2647e8f4ffc8e1a3e9a368563d39fa25132 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 2 Sep 2020 15:56:09 -0700 Subject: [PATCH 450/636] ANDROID: Incremental fs: Add INCFS_IOC_GET_BLOCK_COUNT Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ia7a8cab87688fc401f0719df84fe79ea75887692 --- fs/incfs/data_mgmt.c | 38 +++++++- fs/incfs/data_mgmt.h | 9 +- fs/incfs/format.c | 52 ++++++++++ fs/incfs/format.h | 18 +++- fs/incfs/vfs.c | 23 ++++- include/uapi/linux/incrementalfs.h | 12 +++ .../selftests/filesystems/incfs/incfs_test.c | 94 +++++++++++++++++++ 7 files changed, 241 insertions(+), 5 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index fc868f786e6c..9bde35bf4f0f 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -274,9 +274,29 @@ out: void incfs_free_data_file(struct data_file *df) { + u32 blocks_written; + if (!df) return; + blocks_written = atomic_read(&df->df_data_blocks_written); + if (blocks_written != df->df_initial_data_blocks_written) { + struct backing_file_context *bfc = df->df_backing_file_context; + int error = -1; + + if (bfc && !mutex_lock_interruptible(&bfc->bc_mutex)) { + error = incfs_write_status_to_backing_file( + df->df_backing_file_context, + df->df_status_offset, + blocks_written); + mutex_unlock(&bfc->bc_mutex); + } + + if (error) + /* Nothing can be done, just warn */ + pr_warn("incfs: failed to write status to backing file\n"); + } + incfs_free_mtree(df->df_hash_tree); incfs_free_bfc(df->df_backing_file_context); kfree(df); @@ -1125,8 +1145,10 @@ int incfs_process_new_data_block(struct data_file *df, df->df_blockmap_off, flags); mutex_unlock(&bfc->bc_mutex); } - if (!error) + if (!error) { notify_pending_reads(mi, segment, block->block_index); + atomic_inc(&df->df_data_blocks_written); + } up_write(&segment->rwsem); @@ -1303,6 +1325,19 @@ out: return error; } +static int process_status_md(struct incfs_status *is, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + + df->df_initial_data_blocks_written = le32_to_cpu(is->is_blocks_written); + atomic_set(&df->df_data_blocks_written, + df->df_initial_data_blocks_written); + df->df_status_offset = handler->md_record_offset; + + return 0; +} + int incfs_scan_metadata_chain(struct data_file *df) { struct metadata_handler *handler = NULL; @@ -1330,6 +1365,7 @@ int incfs_scan_metadata_chain(struct data_file *df) handler->context = df; handler->handle_blockmap = process_blockmap_md; handler->handle_signature = process_file_signature_md; + handler->handle_status = process_status_md; while (handler->md_record_offset > 0) { error = incfs_read_next_metadata_record(bfc, handler); diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 26e9b4bf4d33..1acb027b4c39 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -249,7 +249,14 @@ struct data_file { /* For mapped files, the offset into the actual file */ loff_t df_mapped_offset; - struct file_attr n_attr; + /* Number of data blocks written to file */ + atomic_t df_data_blocks_written; + + /* Number of data blocks in the status block */ + u32 df_initial_data_blocks_written; + + /* Offset to status metadata header */ + loff_t df_status_offset; struct mtree *df_hash_tree; diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 743aebfc80b9..e1a036a6ba44 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -316,6 +316,53 @@ err: return result; } +static int write_new_status_to_backing_file(struct backing_file_context *bfc, + u32 blocks_written) +{ + struct incfs_status is = {}; + int result; + loff_t rollback_pos; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + rollback_pos = incfs_get_end_offset(bfc->bc_file); + + is.is_header.h_md_entry_type = INCFS_MD_STATUS; + is.is_header.h_record_size = cpu_to_le16(sizeof(is)); + is.is_blocks_written = cpu_to_le32(blocks_written); + + result = append_md_to_backing_file(bfc, &is.is_header); + if (result) + truncate_backing_file(bfc, rollback_pos); + + return result; +} + +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, + loff_t status_offset, + u32 blocks_written) +{ + struct incfs_status is; + int result; + + if (status_offset == 0) + return write_new_status_to_backing_file(bfc, blocks_written); + + result = incfs_kread(bfc->bc_file, &is, sizeof(is), status_offset); + if (result != sizeof(is)) + return -EIO; + + is.is_blocks_written = cpu_to_le32(blocks_written); + result = incfs_kwrite(bfc->bc_file, &is, sizeof(is), status_offset); + if (result != sizeof(is)) + return -EIO; + + return 0; +} + /* * Write a backing file header * It should always be called only on empty file. @@ -637,6 +684,11 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, res = handler->handle_signature( &handler->md_buffer.signature, handler); break; + case INCFS_MD_STATUS: + if (handler->handle_status) + res = handler->handle_status( + &handler->md_buffer.status, handler); + break; default: res = -ENOTSUPP; break; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index d0f65eb1e0ab..9046623798b7 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -118,7 +118,8 @@ enum incfs_metadata_type { INCFS_MD_NONE = 0, INCFS_MD_BLOCK_MAP = 1, INCFS_MD_FILE_ATTR = 2, - INCFS_MD_SIGNATURE = 3 + INCFS_MD_SIGNATURE = 3, + INCFS_MD_STATUS = 4, }; enum incfs_file_header_flags { @@ -245,6 +246,14 @@ struct incfs_df_signature { u64 hash_offset; }; +struct incfs_status { + struct incfs_md_header is_header; + + __le32 is_blocks_written; /* Number of blocks written */ + + __le32 is_dummy[7]; /* Three spare fields */ +}; + /* State of the backing file. */ struct backing_file_context { /* Protects writes to bc_file */ @@ -269,12 +278,15 @@ struct metadata_handler { struct incfs_md_header md_header; struct incfs_blockmap blockmap; struct incfs_file_signature signature; + struct incfs_status status; } md_buffer; int (*handle_blockmap)(struct incfs_blockmap *bm, struct metadata_handler *handler); int (*handle_signature)(struct incfs_file_signature *sig, struct metadata_handler *handler); + int (*handle_status)(struct incfs_status *sig, + struct metadata_handler *handler); }; #define INCFS_MAX_METADATA_RECORD_SIZE \ FIELD_SIZEOF(struct metadata_handler, md_buffer) @@ -311,6 +323,10 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, struct mem_range sig, u32 tree_size); +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, + loff_t status_offset, + u32 blocks_written); + int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); int incfs_make_empty_backing_file(struct backing_file_context *bfc, diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index b26e542f7e2a..3c2ceef861c4 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -85,8 +85,6 @@ static const struct file_operations incfs_dir_fops = { .iterate = iterate_incfs_dir, .open = file_open, .release = file_release, - .unlocked_ioctl = dispatch_ioctl, - .compat_ioctl = dispatch_ioctl }; static const struct dentry_operations incfs_dentry_ops = { @@ -663,6 +661,25 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) return error; } +static long ioctl_get_block_count(struct file *f, void __user *arg) +{ + struct incfs_get_block_count_args __user *args_usr_ptr = arg; + struct incfs_get_block_count_args args = {}; + struct data_file *df = get_incfs_data_file(f); + int error; + + if (!df) + return -EINVAL; + + args.total_blocks_out = df->df_data_block_count; + args.filled_blocks_out = atomic_read(&df->df_data_blocks_written); + + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + return -EFAULT; + + return error; +} + static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { switch (req) { @@ -672,6 +689,8 @@ static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) return ioctl_read_file_signature(f, (void __user *)arg); case INCFS_IOC_GET_FILLED_BLOCKS: return ioctl_get_filled_blocks(f, (void __user *)arg); + case INCFS_IOC_GET_BLOCK_COUNT: + return ioctl_get_block_count(f, (void __user *)arg); default: return -EINVAL; } diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 44cc754177aa..cf7110cabc43 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -97,6 +97,10 @@ #define INCFS_IOC_CREATE_MAPPED_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) +/* Get number of blocks, total and filled */ +#define INCFS_IOC_GET_BLOCK_COUNT \ + _IOR(INCFS_IOCTL_BASE_CODE, 36, struct incfs_get_block_count_args) + /* ===== sysfs feature flags ===== */ /* * Each flag is represented by a file in /sys/fs/incremental-fs/features @@ -427,4 +431,12 @@ struct incfs_create_mapped_file_args { __aligned_u64 source_offset; }; +struct incfs_get_block_count_args { + /* Total number of data blocks in the file */ + __u32 total_blocks_out; + + /* Number of filled data blocks in the file */ + __u32 filled_blocks_out; +}; + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index b5cbc51410a3..c2b149fe8aef 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -3052,6 +3052,99 @@ out: return result; } +static int validate_block_count(const char *mount_dir, struct test_file *file) +{ + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + char *filename = concat_file_name(mount_dir, file->name); + int fd; + struct incfs_get_block_count_args bca = {}; + int test_result = TEST_FAILURE; + int result; + int i; + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd <= 0) + goto out; + + result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); + if (result != 0) + goto out; + + if (bca.total_blocks_out != block_cnt || + bca.filled_blocks_out != 0) + goto out; + + for (i = 0; i < block_cnt; i += 2) + if (emit_test_block(mount_dir, file, i)) + goto out; + + result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); + if (result != 0) + goto out; + + if (bca.total_blocks_out != block_cnt || + bca.filled_blocks_out != (block_cnt + 1) / 2) + goto out; + + close(fd); + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd <= 0) + goto out; + + result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); + if (result != 0) + goto out; + + if (bca.total_blocks_out != block_cnt || + bca.filled_blocks_out != (block_cnt + 1) / 2) + goto out; + + test_result = TEST_SUCCESS; +out: + free(filename); + close(fd); + return test_result; +} + +static int block_count_test(const char *mount_dir) +{ + char *backing_dir; + int result = TEST_FAILURE; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + for (i = 0; i < file_num; ++i) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, + NULL) < 0) + goto failure; + + result = validate_block_count(mount_dir, file); + if (result) + goto failure; + } + +failure: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -3163,6 +3256,7 @@ int main(int argc, char *argv[]) MAKE_TEST(large_file_test), MAKE_TEST(mapped_file_test), MAKE_TEST(compatibility_test), + MAKE_TEST(block_count_test), }; #undef MAKE_TEST From 837bf5a401c0225278f0e7c564897f23ec57406f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 15 Sep 2020 13:01:25 -0700 Subject: [PATCH 451/636] ANDROID: Incremental fs: Add hash block counts to IOC_IOCTL_GET_BLOCK_COUNT Bug: 166638631 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I0061a855ec563de8df40ba0b35292e99be33c6c0 --- fs/incfs/data_mgmt.c | 24 ++- fs/incfs/data_mgmt.h | 6 + fs/incfs/format.c | 27 +-- fs/incfs/format.h | 9 +- fs/incfs/vfs.c | 7 +- include/uapi/linux/incrementalfs.h | 10 +- .../selftests/filesystems/incfs/incfs_test.c | 176 ++++++++++++------ 7 files changed, 176 insertions(+), 83 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 9bde35bf4f0f..35362d450d15 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -274,13 +274,16 @@ out: void incfs_free_data_file(struct data_file *df) { - u32 blocks_written; + u32 data_blocks_written, hash_blocks_written; if (!df) return; - blocks_written = atomic_read(&df->df_data_blocks_written); - if (blocks_written != df->df_initial_data_blocks_written) { + data_blocks_written = atomic_read(&df->df_data_blocks_written); + hash_blocks_written = atomic_read(&df->df_hash_blocks_written); + + if (data_blocks_written != df->df_initial_data_blocks_written || + hash_blocks_written != df->df_initial_hash_blocks_written) { struct backing_file_context *bfc = df->df_backing_file_context; int error = -1; @@ -288,7 +291,8 @@ void incfs_free_data_file(struct data_file *df) error = incfs_write_status_to_backing_file( df->df_backing_file_context, df->df_status_offset, - blocks_written); + data_blocks_written, + hash_blocks_written); mutex_unlock(&bfc->bc_mutex); } @@ -1228,6 +1232,9 @@ int incfs_process_new_hash_block(struct data_file *df, hash_area_base, df->df_blockmap_off, df->df_size); mutex_unlock(&bfc->bc_mutex); } + if (!error) + atomic_inc(&df->df_hash_blocks_written); + return error; } @@ -1330,9 +1337,16 @@ static int process_status_md(struct incfs_status *is, { struct data_file *df = handler->context; - df->df_initial_data_blocks_written = le32_to_cpu(is->is_blocks_written); + df->df_initial_data_blocks_written = + le32_to_cpu(is->is_data_blocks_written); atomic_set(&df->df_data_blocks_written, df->df_initial_data_blocks_written); + + df->df_initial_hash_blocks_written = + le32_to_cpu(is->is_hash_blocks_written); + atomic_set(&df->df_hash_blocks_written, + df->df_initial_hash_blocks_written); + df->df_status_offset = handler->md_record_offset; return 0; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 1acb027b4c39..1d8a0aec4330 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -255,6 +255,12 @@ struct data_file { /* Number of data blocks in the status block */ u32 df_initial_data_blocks_written; + /* Number of hash blocks written to file */ + atomic_t df_hash_blocks_written; + + /* Number of hash blocks in the status block */ + u32 df_initial_hash_blocks_written; + /* Offset to status metadata header */ loff_t df_status_offset; diff --git a/fs/incfs/format.c b/fs/incfs/format.c index e1a036a6ba44..37a41c8162de 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -317,23 +317,25 @@ err: } static int write_new_status_to_backing_file(struct backing_file_context *bfc, - u32 blocks_written) + u32 data_blocks_written, + u32 hash_blocks_written) { - struct incfs_status is = {}; int result; loff_t rollback_pos; + struct incfs_status is = { + .is_header = { + .h_md_entry_type = INCFS_MD_STATUS, + .h_record_size = cpu_to_le16(sizeof(is)), + }, + .is_data_blocks_written = cpu_to_le32(data_blocks_written), + .is_hash_blocks_written = cpu_to_le32(hash_blocks_written), + }; if (!bfc) return -EFAULT; LOCK_REQUIRED(bfc->bc_mutex); - rollback_pos = incfs_get_end_offset(bfc->bc_file); - - is.is_header.h_md_entry_type = INCFS_MD_STATUS; - is.is_header.h_record_size = cpu_to_le16(sizeof(is)); - is.is_blocks_written = cpu_to_le32(blocks_written); - result = append_md_to_backing_file(bfc, &is.is_header); if (result) truncate_backing_file(bfc, rollback_pos); @@ -343,19 +345,22 @@ static int write_new_status_to_backing_file(struct backing_file_context *bfc, int incfs_write_status_to_backing_file(struct backing_file_context *bfc, loff_t status_offset, - u32 blocks_written) + u32 data_blocks_written, + u32 hash_blocks_written) { struct incfs_status is; int result; if (status_offset == 0) - return write_new_status_to_backing_file(bfc, blocks_written); + return write_new_status_to_backing_file(bfc, + data_blocks_written, hash_blocks_written); result = incfs_kread(bfc->bc_file, &is, sizeof(is), status_offset); if (result != sizeof(is)) return -EIO; - is.is_blocks_written = cpu_to_le32(blocks_written); + is.is_data_blocks_written = cpu_to_le32(data_blocks_written); + is.is_hash_blocks_written = cpu_to_le32(hash_blocks_written); result = incfs_kwrite(bfc->bc_file, &is, sizeof(is), status_offset); if (result != sizeof(is)) return -EIO; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 9046623798b7..ab7862ef4b19 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -249,9 +249,11 @@ struct incfs_df_signature { struct incfs_status { struct incfs_md_header is_header; - __le32 is_blocks_written; /* Number of blocks written */ + __le32 is_data_blocks_written; /* Number of data blocks written */ - __le32 is_dummy[7]; /* Three spare fields */ + __le32 is_hash_blocks_written; /* Number of hash blocks written */ + + __le32 is_dummy[6]; /* Spare fields */ }; /* State of the backing file. */ @@ -325,7 +327,8 @@ int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, int incfs_write_status_to_backing_file(struct backing_file_context *bfc, loff_t status_offset, - u32 blocks_written); + u32 data_blocks_written, + u32 hash_blocks_written); int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 3c2ceef861c4..b73b0d19921d 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -671,8 +671,11 @@ static long ioctl_get_block_count(struct file *f, void __user *arg) if (!df) return -EINVAL; - args.total_blocks_out = df->df_data_block_count; - args.filled_blocks_out = atomic_read(&df->df_data_blocks_written); + args.total_data_blocks_out = df->df_data_block_count; + args.filled_data_blocks_out = atomic_read(&df->df_data_blocks_written); + args.total_hash_blocks_out = df->df_total_block_count - + df->df_data_block_count; + args.filled_hash_blocks_out = atomic_read(&df->df_hash_blocks_written); if (copy_to_user(args_usr_ptr, &args, sizeof(args))) return -EFAULT; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index cf7110cabc43..8dc3b65a5a31 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -433,10 +433,16 @@ struct incfs_create_mapped_file_args { struct incfs_get_block_count_args { /* Total number of data blocks in the file */ - __u32 total_blocks_out; + __u32 total_data_blocks_out; /* Number of filled data blocks in the file */ - __u32 filled_blocks_out; + __u32 filled_data_blocks_out; + + /* Total number of hash blocks in the file */ + __u32 total_hash_blocks_out; + + /* Number of filled hash blocks in the file */ + __u32 filled_hash_blocks_out; }; #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index c2b149fe8aef..5c414956f370 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -2575,9 +2575,6 @@ static int emit_partial_test_file_hash(const char *mount_dir, if (file->size <= 4096 / 32 * 4096) return 0; - if (fill_blocks.count == 0) - return 0; - if (!fill_block_array) return -ENOMEM; fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); @@ -3011,15 +3008,23 @@ static const char v1_file[] = { 0x01, 0x00, 0x00, 0x00, }; -#define TEST(statement, condition) \ +#define TESTCOND(condition) \ do { \ - statement; \ if (!(condition)) { \ ksft_print_msg("%s failed %d\n", \ __func__, __LINE__); \ goto out; \ } \ - } while(false) + } while (false) + +#define TEST(statement, condition) \ + do { \ + statement; \ + TESTCOND(condition); \ + } while (false) + +#define TESTEQUAL(statement, res) \ + TESTCOND((statement) == (res)) static int compatibility_test(const char *mount_dir) { @@ -3028,16 +3033,15 @@ static int compatibility_test(const char *mount_dir) int result = TEST_FAILURE; char *filename = NULL; int fd = -1; - int err; uint64_t size = 0x0c; TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); TEST(filename = concat_file_name(backing_dir, name), filename); TEST(fd = open(filename, O_CREAT | O_WRONLY | O_CLOEXEC), fd != -1); - TEST(err = write(fd, v1_file, sizeof(v1_file)), err == sizeof(v1_file)); - TEST(err = fsetxattr(fd, INCFS_XATTR_SIZE_NAME, &size, sizeof(size), 0), - err == 0); - TEST(err = mount_fs(mount_dir, backing_dir, 50), err == 0); + TESTEQUAL(write(fd, v1_file, sizeof(v1_file)), sizeof(v1_file)); + TESTEQUAL(fsetxattr(fd, INCFS_XATTR_SIZE_NAME, &size, sizeof(size), 0), + 0); + TESTEQUAL(mount_fs(mount_dir, backing_dir, 50), 0); free(filename); TEST(filename = concat_file_name(mount_dir, name), filename); close(fd); @@ -3059,45 +3063,32 @@ static int validate_block_count(const char *mount_dir, struct test_file *file) int fd; struct incfs_get_block_count_args bca = {}; int test_result = TEST_FAILURE; - int result; int i; - fd = open(filename, O_RDONLY | O_CLOEXEC); - if (fd <= 0) - goto out; - - result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); - if (result != 0) - goto out; - - if (bca.total_blocks_out != block_cnt || - bca.filled_blocks_out != 0) - goto out; + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, block_cnt); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, 0); + TESTEQUAL(bca.filled_hash_blocks_out, 0); for (i = 0; i < block_cnt; i += 2) - if (emit_test_block(mount_dir, file, i)) - goto out; + TESTEQUAL(emit_test_block(mount_dir, file, i), 0); - result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); - if (result != 0) - goto out; - - if (bca.total_blocks_out != block_cnt || - bca.filled_blocks_out != (block_cnt + 1) / 2) - goto out; + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, block_cnt); + TESTEQUAL(bca.filled_data_blocks_out, (block_cnt + 1) / 2); + TESTEQUAL(bca.total_hash_blocks_out, 0); + TESTEQUAL(bca.filled_hash_blocks_out, 0); close(fd); - fd = open(filename, O_RDONLY | O_CLOEXEC); - if (fd <= 0) - goto out; + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); - result = ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca); - if (result != 0) - goto out; - - if (bca.total_blocks_out != block_cnt || - bca.filled_blocks_out != (block_cnt + 1) / 2) - goto out; + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, block_cnt); + TESTEQUAL(bca.filled_data_blocks_out, (block_cnt + 1) / 2); + TESTEQUAL(bca.total_hash_blocks_out, 0); + TESTEQUAL(bca.filled_hash_blocks_out, 0); test_result = TEST_SUCCESS; out: @@ -3113,32 +3104,96 @@ static int block_count_test(const char *mount_dir) int cmd_fd = -1; int i; struct test_files_set test = get_test_files_set(); - const int file_num = test.files_count; - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - for (i = 0; i < file_num; ++i) { + for (i = 0; i < test.files_count; ++i) { struct test_file *file = &test.files[i]; - if (emit_file(cmd_fd, NULL, file->name, &file->id, file->size, - NULL) < 0) - goto failure; + TESTEQUAL(emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL), 0); - result = validate_block_count(mount_dir, file); - if (result) - goto failure; + TESTEQUAL(validate_block_count(mount_dir, file), 0); } -failure: + result = TEST_SUCCESS; +out: + close(cmd_fd); + umount(mount_dir); + free(backing_dir); + return result; +} + +static int validate_hash_block_count(const char *mount_dir, + struct test_file *file) +{ + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + + int result = TEST_FAILURE; + int block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int hash_block_count = block_count; + int total_hash_block_count = 0; + char *filename = NULL; + int fd = -1; + struct incfs_get_block_count_args bca = {}; + + while (hash_block_count > 1) { + hash_block_count = (hash_block_count + hash_per_block - 1) + / hash_per_block; + total_hash_block_count += hash_block_count; + } + + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, block_count); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_block_count); + TESTEQUAL(bca.filled_hash_blocks_out, + total_hash_block_count > 1 ? 1 : 0); + + result = TEST_SUCCESS; +out: + close(fd); + free(filename); + return result; +} + +static int hash_block_count_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + char *backing_dir; + int cmd_fd = -1; + int i; + struct test_files_set test = get_test_files_set(); + int fd = -1; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + + for (i = 0; i < test.files_count; i++) { + struct test_file *file = &test.files[i]; + + TESTEQUAL(crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.add_data), + 0); + + TESTEQUAL(emit_partial_test_file_hash(mount_dir, file), 0); + TESTEQUAL(validate_hash_block_count(mount_dir, &test.files[i]), + 0); + } + + result = TEST_SUCCESS; +out: + close(fd); close(cmd_fd); umount(mount_dir); free(backing_dir); @@ -3257,6 +3312,7 @@ int main(int argc, char *argv[]) MAKE_TEST(mapped_file_test), MAKE_TEST(compatibility_test), MAKE_TEST(block_count_test), + MAKE_TEST(hash_block_count_test), }; #undef MAKE_TEST From 29e40c29df4b5d53271306e613ef1777be3b8019 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 3 Sep 2020 13:40:25 -0700 Subject: [PATCH 452/636] ANDROID: Incremental fs: Fix filled block count from get filled blocks Bug: 165929150 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I8845adcafcc3a3f01730e8b5534fb25ea3d551db --- fs/incfs/data_mgmt.c | 69 +++--- fs/incfs/data_mgmt.h | 19 +- fs/incfs/format.c | 9 - fs/incfs/format.h | 5 +- fs/incfs/pseudo_files.c | 7 +- fs/incfs/vfs.c | 45 +++- fs/incfs/vfs.h | 5 - .../selftests/filesystems/incfs/incfs_test.c | 230 +++++++++++++++--- 8 files changed, 281 insertions(+), 108 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 35362d450d15..d8c02a413e44 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -686,36 +686,9 @@ static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, return 0; } -static int update_file_header_flags(struct data_file *df, u32 bits_to_reset, - u32 bits_to_set) -{ - int result; - u32 new_flags; - struct backing_file_context *bfc; - - if (!df) - return -EFAULT; - bfc = df->df_backing_file_context; - if (!bfc) - return -EFAULT; - - result = mutex_lock_interruptible(&bfc->bc_mutex); - if (result) - return result; - - new_flags = (df->df_header_flags & ~bits_to_reset) | bits_to_set; - if (new_flags != df->df_header_flags) { - df->df_header_flags = new_flags; - result = incfs_write_file_header_flags(bfc, new_flags); - } - - mutex_unlock(&bfc->bc_mutex); - - return result; -} - #define READ_BLOCKMAP_ENTRIES 512 int incfs_get_filled_blocks(struct data_file *df, + struct incfs_file_data *fd, struct incfs_get_filled_blocks_args *arg) { int error = 0; @@ -729,6 +702,8 @@ int incfs_get_filled_blocks(struct data_file *df, int i = READ_BLOCKMAP_ENTRIES - 1; int entries_read = 0; struct incfs_blockmap_entry *bme; + int data_blocks_filled = 0; + int hash_blocks_filled = 0; *size_out = 0; if (end_index > df->df_total_block_count) @@ -736,7 +711,8 @@ int incfs_get_filled_blocks(struct data_file *df, arg->total_blocks_out = df->df_total_block_count; arg->data_blocks_out = df->df_data_block_count; - if (df->df_header_flags & INCFS_FILE_COMPLETE) { + if (atomic_read(&df->df_data_blocks_written) == + df->df_data_block_count) { pr_debug("File marked full, fast get_filled_blocks"); if (arg->start_index > end_index) { arg->index_out = arg->start_index; @@ -789,6 +765,12 @@ int incfs_get_filled_blocks(struct data_file *df, convert_data_file_block(bme + i, &dfb); + if (is_data_block_present(&dfb)) + if (arg->index_out >= df->df_data_block_count) + ++hash_blocks_filled; + else + ++data_blocks_filled; + if (is_data_block_present(&dfb) == in_range) continue; @@ -818,13 +800,28 @@ int incfs_get_filled_blocks(struct data_file *df, arg->index_out = range.begin; } - if (!error && in_range && arg->start_index == 0 && - end_index == df->df_total_block_count && - *size_out == sizeof(struct incfs_filled_range)) { - int result = - update_file_header_flags(df, 0, INCFS_FILE_COMPLETE); - /* Log failure only, since it's just a failed optimization */ - pr_debug("Marked file full with result %d", result); + if (arg->start_index == 0) { + fd->fd_get_block_pos = 0; + fd->fd_filled_data_blocks = 0; + fd->fd_filled_hash_blocks = 0; + } + + if (arg->start_index == fd->fd_get_block_pos) { + fd->fd_get_block_pos = arg->index_out + 1; + fd->fd_filled_data_blocks += data_blocks_filled; + fd->fd_filled_hash_blocks += hash_blocks_filled; + } + + if (fd->fd_get_block_pos == df->df_total_block_count + 1) { + if (fd->fd_filled_data_blocks > + atomic_read(&df->df_data_blocks_written)) + atomic_set(&df->df_data_blocks_written, + fd->fd_filled_data_blocks); + + if (fd->fd_filled_hash_blocks > + atomic_read(&df->df_hash_blocks_written)) + atomic_set(&df->df_hash_blocks_written, + fd->fd_filled_hash_blocks); } kfree(bme); diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 1d8a0aec4330..a57ae4c9a31c 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -161,7 +161,6 @@ struct mount_info { /* Number of blocks written since mount */ atomic_t mi_blocks_written; - }; struct data_file_block { @@ -289,6 +288,23 @@ struct dentry_info { struct path backing_path; }; +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + +struct incfs_file_data { + /* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */ + enum FILL_PERMISSION fd_fill_permission; + + /* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */ + int fd_get_block_pos; + + /* And how many filled blocks are there up to that point */ + int fd_filled_data_blocks; + int fd_filled_hash_blocks; +}; + struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path); @@ -313,6 +329,7 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, struct mem_range tmp); int incfs_get_filled_blocks(struct data_file *df, + struct incfs_file_data *fd, struct incfs_get_filled_blocks_args *arg); int incfs_read_file_signature(struct data_file *df, struct mem_range dst); diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 37a41c8162de..691d7e474328 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -205,15 +205,6 @@ static int append_md_to_backing_file(struct backing_file_context *bfc, return result; } -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags) -{ - if (!bfc) - return -EFAULT; - - return write_to_bf(bfc, &flags, sizeof(flags), - offsetof(struct incfs_file_header, fh_flags)); -} - /* * Reserve 0-filled space for the blockmap body, and append * incfs_blockmap metadata record pointing to it. diff --git a/fs/incfs/format.h b/fs/incfs/format.h index ab7862ef4b19..2f6c7c8f9118 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -123,7 +123,6 @@ enum incfs_metadata_type { }; enum incfs_file_header_flags { - INCFS_FILE_COMPLETE = 1 << 0, INCFS_FILE_MAPPED = 1 << 1, }; @@ -254,7 +253,7 @@ struct incfs_status { __le32 is_hash_blocks_written; /* Number of hash blocks written */ __le32 is_dummy[6]; /* Spare fields */ -}; +} __packed; /* State of the backing file. */ struct backing_file_context { @@ -330,8 +329,6 @@ int incfs_write_status_to_backing_file(struct backing_file_context *bfc, u32 data_blocks_written, u32 hash_blocks_written); -int incfs_write_file_header_flags(struct backing_file_context *bfc, u32 flags); - int incfs_make_empty_backing_file(struct backing_file_context *bfc, incfs_uuid_t *uuid, u64 file_size); diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index ca8d1b206158..0993829cbe2d 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -274,6 +274,7 @@ static long ioctl_permit_fill(struct file *f, void __user *arg) struct incfs_permit_fill permit_fill; long error = 0; struct file *file = NULL; + struct incfs_file_data *fd; if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) return -EFAULT; @@ -292,9 +293,11 @@ static long ioctl_permit_fill(struct file *f, void __user *arg) goto out; } - switch ((uintptr_t)file->private_data) { + fd = file->private_data; + + switch (fd->fd_fill_permission) { case CANT_FILL: - file->private_data = (void *)CAN_FILL; + fd->fd_fill_permission = CAN_FILL; break; case CAN_FILL: diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index b73b0d19921d..f535d24d76ca 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -525,6 +525,7 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) struct incfs_fill_blocks fill_blocks; struct incfs_fill_block __user *usr_fill_block_array; struct data_file *df = get_incfs_data_file(f); + struct incfs_file_data *fd = f->private_data; const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; u8 *data_buf = NULL; ssize_t error = 0; @@ -533,7 +534,7 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (!df) return -EBADF; - if ((uintptr_t)f->private_data != CAN_FILL) + if (!fd || fd->fd_fill_permission != CAN_FILL) return -EPERM; if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) @@ -642,18 +643,19 @@ static long ioctl_get_filled_blocks(struct file *f, void __user *arg) struct incfs_get_filled_blocks_args __user *args_usr_ptr = arg; struct incfs_get_filled_blocks_args args = {}; struct data_file *df = get_incfs_data_file(f); + struct incfs_file_data *fd = f->private_data; int error; - if (!df) + if (!df || !fd) return -EINVAL; - if ((uintptr_t)f->private_data != CAN_FILL) + if (fd->fd_fill_permission != CAN_FILL) return -EPERM; if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) return -EINVAL; - error = incfs_get_filled_blocks(df, &args); + error = incfs_get_filled_blocks(df, fd, &args); if (copy_to_user(args_usr_ptr, &args, sizeof(args))) return -EFAULT; @@ -1115,6 +1117,8 @@ static int file_open(struct inode *inode, struct file *file) int flags = O_NOATIME | O_LARGEFILE | (S_ISDIR(inode->i_mode) ? O_RDONLY : O_RDWR); + WARN_ON(file->private_data); + if (!mi) return -EBADF; @@ -1132,8 +1136,20 @@ static int file_open(struct inode *inode, struct file *file) } if (S_ISREG(inode->i_mode)) { + struct incfs_file_data *fd = kzalloc(sizeof(*fd), GFP_NOFS); + + if (!fd) { + err = -ENOMEM; + goto out; + } + + *fd = (struct incfs_file_data) { + .fd_fill_permission = CANT_FILL, + }; + file->private_data = fd; + err = make_inode_ready_for_data_ops(mi, inode, backing_file); - file->private_data = (void *)CANT_FILL; + } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = NULL; @@ -1146,9 +1162,17 @@ static int file_open(struct inode *inode, struct file *file) err = -EBADF; out: - if (err) - pr_debug("incfs: %s name:%s err: %d\n", __func__, - file->f_path.dentry->d_name.name, err); + if (err) { + pr_debug("name:%s err: %d\n", + file->f_path.dentry->d_name.name, err); + if (S_ISREG(inode->i_mode)) + kfree(file->private_data); + else if (S_ISDIR(inode->i_mode)) + incfs_free_dir_file(file->private_data); + + file->private_data = NULL; + } + if (backing_file) fput(backing_file); return err; @@ -1157,9 +1181,8 @@ out: static int file_release(struct inode *inode, struct file *file) { if (S_ISREG(inode->i_mode)) { - /* Do nothing. - * data_file is released only by inode eviction. - */ + kfree(file->private_data); + file->private_data = NULL; } else if (S_ISDIR(inode->i_mode)) { struct dir_file *dir = get_incfs_dir_file(file); diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h index d5dc5ebe99de..79fdf243733d 100644 --- a/fs/incfs/vfs.h +++ b/fs/incfs/vfs.h @@ -6,11 +6,6 @@ #ifndef _INCFS_VFS_H #define _INCFS_VFS_H -enum FILL_PERMISSION { - CANT_FILL = 0, - CAN_FILL = 1, -}; - extern const struct file_operations incfs_file_ops; extern const struct inode_operations incfs_file_inode_ops; diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 5c414956f370..281b544cd872 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -33,6 +33,14 @@ #define INCFS_ROOT_INODE 0 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) +#else +#error Big endian not supported! +#endif + struct hash_block { char data[INCFS_DATA_FILE_BLOCK_SIZE]; }; @@ -3026,11 +3034,14 @@ static const char v1_file[] = { #define TESTEQUAL(statement, res) \ TESTCOND((statement) == (res)) +#define TESTNE(statement, res) \ + TESTCOND((statement) != (res)) + static int compatibility_test(const char *mount_dir) { - char *backing_dir = NULL; static const char *name = "file"; int result = TEST_FAILURE; + char *backing_dir = NULL; char *filename = NULL; int fd = -1; uint64_t size = 0x0c; @@ -3056,51 +3067,167 @@ out: return result; } -static int validate_block_count(const char *mount_dir, struct test_file *file) +static int zero_blocks_written_count(int fd, uint32_t data_blocks_written, + uint32_t hash_blocks_written) { - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - char *filename = concat_file_name(mount_dir, file->name); - int fd; + int test_result = TEST_FAILURE; + uint64_t offset; + uint8_t type; + uint32_t bw; + + /* Get first md record */ + TESTEQUAL(pread(fd, &offset, sizeof(offset), 24), sizeof(offset)); + + /* Find status md record */ + for (;;) { + TESTNE(offset, 0); + TESTEQUAL(pread(fd, &type, sizeof(type), le64_to_cpu(offset)), + sizeof(type)); + if (type == 4) + break; + TESTEQUAL(pread(fd, &offset, sizeof(offset), + le64_to_cpu(offset) + 7), + sizeof(offset)); + } + + /* Read blocks_written */ + offset = le64_to_cpu(offset); + TESTEQUAL(pread(fd, &bw, sizeof(bw), offset + 23), sizeof(bw)); + TESTEQUAL(le32_to_cpu(bw), data_blocks_written); + TESTEQUAL(pread(fd, &bw, sizeof(bw), offset + 27), sizeof(bw)); + TESTEQUAL(le32_to_cpu(bw), hash_blocks_written); + + /* Write out zero */ + bw = 0; + TESTEQUAL(pwrite(fd, &bw, sizeof(bw), offset + 23), sizeof(bw)); + TESTEQUAL(pwrite(fd, &bw, sizeof(bw), offset + 27), sizeof(bw)); + + test_result = TEST_SUCCESS; +out: + return test_result; +} + +static int validate_block_count(const char *mount_dir, const char *backing_dir, + struct test_file *file, + int total_data_blocks, int filled_data_blocks, + int total_hash_blocks, int filled_hash_blocks) +{ + char *filename = NULL; + char *backing_filename = NULL; + int fd = -1; struct incfs_get_block_count_args bca = {}; int test_result = TEST_FAILURE; + struct incfs_filled_range ranges[128]; + struct incfs_get_filled_blocks_args fba = { + .range_buffer = ptr_to_u64(ranges), + .range_buffer_size = sizeof(ranges), + }; + int cmd_fd = -1; + struct incfs_permit_fill permit_fill; + + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(backing_filename = concat_file_name(backing_dir, file->name), + backing_filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + + close(fd); + TESTEQUAL(umount(mount_dir), 0); + TEST(fd = open(backing_filename, O_RDWR | O_CLOEXEC), fd != -1); + TESTEQUAL(zero_blocks_written_count(fd, filled_data_blocks, + filled_hash_blocks), + TEST_SUCCESS); + close(fd); + fd = -1; + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), + 0); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + permit_fill.file_descriptor = fd; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill), 0); + do { + ioctl(fd, INCFS_IOC_GET_FILLED_BLOCKS, &fba); + fba.start_index = fba.index_out + 1; + } while (fba.index_out < fba.total_blocks_out); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + + test_result = TEST_SUCCESS; +out: + close(cmd_fd); + close(fd); + free(filename); + free(backing_filename); + return test_result; +} + + + +static int validate_data_block_count(const char *mount_dir, + const char *backing_dir, + struct test_file *file) +{ + const int total_data_blocks = 1 + (file->size - 1) / + INCFS_DATA_FILE_BLOCK_SIZE; + const int filled_data_blocks = (total_data_blocks + 1) / 2; + + int test_result = TEST_FAILURE; + char *filename = NULL; + int fd = -1; + struct incfs_get_block_count_args bca = {}; int i; + TEST(filename = concat_file_name(mount_dir, file->name), filename); TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); - TESTEQUAL(bca.total_data_blocks_out, block_cnt); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); TESTEQUAL(bca.filled_data_blocks_out, 0); TESTEQUAL(bca.total_hash_blocks_out, 0); TESTEQUAL(bca.filled_hash_blocks_out, 0); - for (i = 0; i < block_cnt; i += 2) + for (i = 0; i < total_data_blocks; i += 2) TESTEQUAL(emit_test_block(mount_dir, file, i), 0); TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); - TESTEQUAL(bca.total_data_blocks_out, block_cnt); - TESTEQUAL(bca.filled_data_blocks_out, (block_cnt + 1) / 2); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, filled_data_blocks); TESTEQUAL(bca.total_hash_blocks_out, 0); TESTEQUAL(bca.filled_hash_blocks_out, 0); - close(fd); - TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + fd = -1; - TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); - TESTEQUAL(bca.total_data_blocks_out, block_cnt); - TESTEQUAL(bca.filled_data_blocks_out, (block_cnt + 1) / 2); - TESTEQUAL(bca.total_hash_blocks_out, 0); - TESTEQUAL(bca.filled_hash_blocks_out, 0); + TESTEQUAL(validate_block_count(mount_dir, backing_dir, file, + total_data_blocks, filled_data_blocks, + 0, 0), + 0); test_result = TEST_SUCCESS; out: - free(filename); close(fd); + free(filename); return test_result; } -static int block_count_test(const char *mount_dir) +static int data_block_count_test(const char *mount_dir) { - char *backing_dir; int result = TEST_FAILURE; + char *backing_dir; int cmd_fd = -1; int i; struct test_files_set test = get_test_files_set(); @@ -3109,15 +3236,19 @@ static int block_count_test(const char *mount_dir) TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), 0); - TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); - for (i = 0; i < test.files_count; ++i) { struct test_file *file = &test.files[i]; + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); TESTEQUAL(emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, NULL), 0); + file->size, NULL), + 0); + close(cmd_fd); + cmd_fd = -1; - TESTEQUAL(validate_block_count(mount_dir, file), 0); + TESTEQUAL(validate_data_block_count(mount_dir, backing_dir, + file), + 0); } result = TEST_SUCCESS; @@ -3129,33 +3260,52 @@ out: } static int validate_hash_block_count(const char *mount_dir, + const char *backing_dir, struct test_file *file) { const int digest_size = SHA256_DIGEST_SIZE; const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + const int total_data_blocks = 1 + (file->size - 1) / + INCFS_DATA_FILE_BLOCK_SIZE; int result = TEST_FAILURE; - int block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - int hash_block_count = block_count; - int total_hash_block_count = 0; + int hash_layer = total_data_blocks; + int total_hash_blocks = 0; + int filled_hash_blocks; char *filename = NULL; int fd = -1; struct incfs_get_block_count_args bca = {}; - while (hash_block_count > 1) { - hash_block_count = (hash_block_count + hash_per_block - 1) - / hash_per_block; - total_hash_block_count += hash_block_count; + while (hash_layer > 1) { + hash_layer = (hash_layer + hash_per_block - 1) / hash_per_block; + total_hash_blocks += hash_layer; } + filled_hash_blocks = total_hash_blocks > 1 ? 1 : 0; TEST(filename = concat_file_name(mount_dir, file->name), filename); TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); - TESTEQUAL(bca.total_data_blocks_out, block_count); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); TESTEQUAL(bca.filled_data_blocks_out, 0); - TESTEQUAL(bca.total_hash_blocks_out, total_hash_block_count); - TESTEQUAL(bca.filled_hash_blocks_out, - total_hash_block_count > 1 ? 1 : 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, 0); + + TESTEQUAL(emit_partial_test_file_hash(mount_dir, file), 0); + + TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); + TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); + TESTEQUAL(bca.filled_data_blocks_out, 0); + TESTEQUAL(bca.total_hash_blocks_out, total_hash_blocks); + TESTEQUAL(bca.filled_hash_blocks_out, filled_hash_blocks); + close(fd); + fd = -1; + + if (filled_hash_blocks) + TESTEQUAL(validate_block_count(mount_dir, backing_dir, file, + total_data_blocks, 0, + total_hash_blocks, filled_hash_blocks), + 0); result = TEST_SUCCESS; out: @@ -3171,29 +3321,29 @@ static int hash_block_count_test(const char *mount_dir) int cmd_fd = -1; int i; struct test_files_set test = get_test_files_set(); - int fd = -1; TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, "readahead=0", false), 0); - TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); for (i = 0; i < test.files_count; i++) { struct test_file *file = &test.files[i]; + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); TESTEQUAL(crypto_emit_file(cmd_fd, NULL, file->name, &file->id, file->size, file->root_hash, file->sig.add_data), 0); + close(cmd_fd); + cmd_fd = -1; - TESTEQUAL(emit_partial_test_file_hash(mount_dir, file), 0); - TESTEQUAL(validate_hash_block_count(mount_dir, &test.files[i]), + TESTEQUAL(validate_hash_block_count(mount_dir, backing_dir, + &test.files[i]), 0); } result = TEST_SUCCESS; out: - close(fd); close(cmd_fd); umount(mount_dir); free(backing_dir); @@ -3311,7 +3461,7 @@ int main(int argc, char *argv[]) MAKE_TEST(large_file_test), MAKE_TEST(mapped_file_test), MAKE_TEST(compatibility_test), - MAKE_TEST(block_count_test), + MAKE_TEST(data_block_count_test), MAKE_TEST(hash_block_count_test), }; #undef MAKE_TEST From 45598888d85f81838bb94e2386c7d1b5068fe2ef Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 17 Sep 2020 19:46:48 -0700 Subject: [PATCH 453/636] ANDROID: Incremental fs: Fix uninitialized variable 'error' isn't initialized before being returned. Fixes: d4d1163e5358 ("ANDROID: Incremental fs: Add INCFS_IOC_GET_BLOCK_COUNT") Change-Id: I3082ac4a3f9d8632080a8e8c9221fda25ef152a0 Signed-off-by: Eric Biggers --- fs/incfs/vfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index f535d24d76ca..8c3c3b754825 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -668,7 +668,6 @@ static long ioctl_get_block_count(struct file *f, void __user *arg) struct incfs_get_block_count_args __user *args_usr_ptr = arg; struct incfs_get_block_count_args args = {}; struct data_file *df = get_incfs_data_file(f); - int error; if (!df) return -EINVAL; @@ -682,7 +681,7 @@ static long ioctl_get_block_count(struct file *f, void __user *arg) if (copy_to_user(args_usr_ptr, &args, sizeof(args))) return -EFAULT; - return error; + return 0; } static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) From a040750a765445a40e1f8fc8e3d45aa4cd5fcb33 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 17 Sep 2020 19:50:20 -0700 Subject: [PATCH 454/636] ANDROID: Incremental fs: Fix dangling else MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a compiler warning: fs/incfs/data_mgmt.c: In function ‘incfs_get_filled_blocks’: fs/incfs/data_mgmt.c:768:6: warning: suggest explicit braces to avoid ambiguous ‘else’ [-Wdangling-else] 768 | if (is_data_block_present(&dfb)) | ^ Fixes: 9cbdd375f644 ("ANDROID: Incremental fs: Fix filled block count from get filled blocks") Change-Id: I5a331442155443c6f67e33f1bbaf5deae84a644b Signed-off-by: Eric Biggers --- fs/incfs/data_mgmt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index d8c02a413e44..a4dd223ee693 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -765,11 +765,12 @@ int incfs_get_filled_blocks(struct data_file *df, convert_data_file_block(bme + i, &dfb); - if (is_data_block_present(&dfb)) + if (is_data_block_present(&dfb)) { if (arg->index_out >= df->df_data_block_count) ++hash_blocks_filled; else ++data_blocks_filled; + } if (is_data_block_present(&dfb) == in_range) continue; From 5b188fc353343d818ce744f7cc44fd695d0b9a3c Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 9 Sep 2020 08:16:37 -0700 Subject: [PATCH 455/636] ANDROID: Incremental fs: Add .incomplete folder Bug: 165929150 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ib6952391aea76bf0318cbad8da7a1276f8f9e8ba --- fs/incfs/data_mgmt.c | 1 + fs/incfs/data_mgmt.h | 2 + fs/incfs/pseudo_files.c | 71 ++++++-- fs/incfs/vfs.c | 151 +++++++++++++++--- .../selftests/filesystems/incfs/incfs_test.c | 29 ++++ 5 files changed, 217 insertions(+), 37 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index a4dd223ee693..4100e900b09b 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -110,6 +110,7 @@ void incfs_free_mount_info(struct mount_info *mi) flush_delayed_work(&mi->mi_log.ml_wakeup_work); dput(mi->mi_index_dir); + dput(mi->mi_incomplete_dir); path_put(&mi->mi_backing_dir_path); mutex_destroy(&mi->mi_dir_struct_mutex); put_cred(mi->mi_owner); diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index a57ae4c9a31c..42922b2f9db1 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -114,6 +114,8 @@ struct mount_info { struct dentry *mi_index_dir; + struct dentry *mi_incomplete_dir; + const struct cred *mi_owner; struct mount_options mi_options; diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 0993829cbe2d..07a4eb3d16f1 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -519,6 +519,7 @@ static long ioctl_create_file(struct mount_info *mi, char *file_id_str = NULL; struct dentry *index_file_dentry = NULL; struct dentry *named_file_dentry = NULL; + struct dentry *incomplete_file_dentry = NULL; struct path parent_dir_path = {}; struct inode *index_dir_inode = NULL; __le64 size_attr_value = 0; @@ -526,8 +527,11 @@ static long ioctl_create_file(struct mount_info *mi, char *attr_value = NULL; int error = 0; bool locked = false; + bool index_linked = false; + bool name_linked = false; + bool incomplete_linked = false; - if (!mi || !mi->mi_index_dir) { + if (!mi || !mi->mi_index_dir || !mi->mi_incomplete_dir) { error = -EFAULT; goto out; } @@ -572,6 +576,12 @@ static long ioctl_create_file(struct mount_info *mi, goto out; } + if (parent_dir_path.dentry == mi->mi_incomplete_dir) { + /* Can't create a file directly inside .incomplete */ + error = -EBUSY; + goto out; + } + /* Look up a dentry in the parent dir. It should be negative. */ named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, file_name); @@ -589,6 +599,25 @@ static long ioctl_create_file(struct mount_info *mi, error = -EEXIST; goto out; } + + /* Look up a dentry in the incomplete dir. It should be negative. */ + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, + file_id_str); + if (!incomplete_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(incomplete_file_dentry)) { + error = PTR_ERR(incomplete_file_dentry); + incomplete_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(incomplete_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + /* Look up a dentry in the .index dir. It should be negative. */ index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); if (!index_file_dentry) { @@ -623,7 +652,7 @@ static long ioctl_create_file(struct mount_info *mi, error = chmod(index_file_dentry, args.mode | 0222); if (error) { pr_debug("incfs: chmod err: %d\n", error); - goto delete_index_file; + goto out; } /* Save the file's ID as an xattr for easy fetching in future. */ @@ -631,7 +660,7 @@ static long ioctl_create_file(struct mount_info *mi, file_id_str, strlen(file_id_str), XATTR_CREATE); if (error) { pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; + goto out; } /* Save the file's size as an xattr for easy fetching in future. */ @@ -641,27 +670,27 @@ static long ioctl_create_file(struct mount_info *mi, XATTR_CREATE); if (error) { pr_debug("incfs: vfs_setxattr err:%d\n", error); - goto delete_index_file; + goto out; } /* Save the file's attribute as an xattr */ if (args.file_attr_len && args.file_attr) { if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { error = -E2BIG; - goto delete_index_file; + goto out; } attr_value = kmalloc(args.file_attr_len, GFP_NOFS); if (!attr_value) { error = -ENOMEM; - goto delete_index_file; + goto out; } if (copy_from_user(attr_value, u64_to_user_ptr(args.file_attr), args.file_attr_len) > 0) { error = -EFAULT; - goto delete_index_file; + goto out; } error = vfs_setxattr(index_file_dentry, @@ -670,7 +699,7 @@ static long ioctl_create_file(struct mount_info *mi, XATTR_CREATE); if (error) - goto delete_index_file; + goto out; } /* Initializing a newly created file. */ @@ -679,26 +708,40 @@ static long ioctl_create_file(struct mount_info *mi, (u8 __user *)args.signature_info, args.signature_size); if (error) - goto delete_index_file; + goto out; + index_linked = true; /* Linking a file with its real name from the requested dir. */ error = incfs_link(index_file_dentry, named_file_dentry); - - if (!error) + if (error) goto out; + name_linked = true; -delete_index_file: - incfs_unlink(index_file_dentry); + if (args.size) { + /* Linking a file with its incomplete entry */ + error = incfs_link(index_file_dentry, incomplete_file_dentry); + if (error) + goto out; + incomplete_linked = true; + } out: - if (error) + if (error) { pr_debug("incfs: %s err:%d\n", __func__, error); + if (index_linked) + incfs_unlink(index_file_dentry); + if (name_linked) + incfs_unlink(named_file_dentry); + if (incomplete_linked) + incfs_unlink(incomplete_file_dentry); + } kfree(file_id_str); kfree(file_name); kfree(attr_value); dput(named_file_dentry); dput(index_file_dentry); + dput(incomplete_file_dentry); path_put(&parent_dir_path); if (locked) mutex_unlock(&mi->mi_dir_struct_mutex); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 8c3c3b754825..ed236f5c1f58 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -16,6 +16,7 @@ #include "vfs.h" #include "data_mgmt.h" +#include "format.h" #include "internal.h" #include "pseudo_files.h" @@ -380,9 +381,9 @@ static int incfs_init_dentry(struct dentry *dentry, struct path *path) return 0; } -static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) +static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, + const char *name) { - static const char name[] = ".index"; struct dentry *index_dentry; struct inode *backing_inode = d_inode(backing_dir); int err = 0; @@ -519,6 +520,38 @@ static int incfs_rmdir(struct dentry *dentry) return error; } +static void maybe_delete_incomplete_file(struct data_file *df) +{ + char *file_id_str; + struct dentry *incomplete_file_dentry; + + if (atomic_read(&df->df_data_blocks_written) < df->df_data_block_count) + return; + + /* This is best effort - there is no useful action to take on failure */ + file_id_str = file_id_to_str(df->df_id); + if (!file_id_str) + return; + + incomplete_file_dentry = incfs_lookup_dentry( + df->df_mount_info->mi_incomplete_dir, + file_id_str); + if (!incomplete_file_dentry || IS_ERR(incomplete_file_dentry)) { + incomplete_file_dentry = NULL; + goto out; + } + + if (!d_really_is_positive(incomplete_file_dentry)) + goto out; + + vfs_fsync(df->df_backing_file_context->bc_file, 0); + incfs_unlink(incomplete_file_dentry); + +out: + dput(incomplete_file_dentry); + kfree(file_id_str); +} + static long ioctl_fill_blocks(struct file *f, void __user *arg) { struct incfs_fill_blocks __user *usr_fill_blocks = arg; @@ -580,6 +613,8 @@ static long ioctl_fill_blocks(struct file *f, void __user *arg) if (data_buf) free_pages((unsigned long)data_buf, get_order(data_buf_size)); + maybe_delete_incomplete_file(df); + /* * Only report the error if no records were processed, otherwise * just return how many were processed successfully. @@ -814,6 +849,11 @@ static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } + if (backing_dentry->d_parent == mi->mi_incomplete_dir) { + /* Can't create a subdir inside .incomplete */ + err = -EBUSY; + goto out; + } inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode | 0222); inode_unlock(dir_node->n_backing_inode); @@ -846,17 +886,26 @@ path_err: return err; } -/* Delete file referenced by backing_dentry and also its hardlink from .index */ -static int final_file_delete(struct mount_info *mi, - struct dentry *backing_dentry) +/* + * Delete file referenced by backing_dentry and if appropriate its hardlink + * from .index and .incomplete + */ +static int file_delete(struct mount_info *mi, + struct dentry *backing_dentry, + int nlink) { struct dentry *index_file_dentry = NULL; + struct dentry *incomplete_file_dentry = NULL; /* 2 chars per byte of file ID + 1 char for \0 */ char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; ssize_t uuid_size = 0; int error = 0; WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); + + if (nlink > 3) + goto just_unlink; + uuid_size = vfs_getxattr(backing_dentry, INCFS_XATTR_ID_NAME, file_id_str, 2 * sizeof(incfs_uuid_t)); if (uuid_size < 0) { @@ -872,17 +921,46 @@ static int final_file_delete(struct mount_info *mi, index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); if (IS_ERR(index_file_dentry)) { error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; goto out; } - error = incfs_unlink(backing_dentry); - if (error) + if (d_really_is_positive(index_file_dentry) && nlink > 0) + nlink--; + + if (nlink > 2) + goto just_unlink; + + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, + file_id_str); + if (IS_ERR(incomplete_file_dentry)) { + error = PTR_ERR(incomplete_file_dentry); + incomplete_file_dentry = NULL; goto out; + } + + if (d_really_is_positive(incomplete_file_dentry) && nlink > 0) + nlink--; + + if (nlink > 1) + goto just_unlink; if (d_really_is_positive(index_file_dentry)) error = incfs_unlink(index_file_dentry); + if (error) + goto out; + + if (d_really_is_positive(incomplete_file_dentry)) + error = incfs_unlink(incomplete_file_dentry); + if (error) + goto out; + +just_unlink: + error = incfs_unlink(backing_dentry); + out: dput(index_file_dentry); + dput(incomplete_file_dentry); if (error) pr_debug("incfs: delete_file_from_index err:%d\n", error); return error; @@ -914,21 +992,18 @@ static int dir_unlink(struct inode *dir, struct dentry *dentry) goto out; } + if (backing_path.dentry->d_parent == mi->mi_incomplete_dir) { + /* Direct unlink from .incomplete are not allowed. */ + err = -EBUSY; + goto out; + } + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, AT_STATX_SYNC_AS_STAT); if (err) goto out; - if (stat.nlink == 2) { - /* - * This is the last named link to this file. The only one left - * is in .index. Remove them both now. - */ - err = final_file_delete(mi, backing_path.dentry); - } else { - /* There are other links to this file. Remove just this one. */ - err = incfs_unlink(backing_path.dentry); - } + err = file_delete(mi, backing_path.dentry, stat.nlink); d_drop(dentry); out: @@ -964,6 +1039,12 @@ static int dir_link(struct dentry *old_dentry, struct inode *dir, goto out; } + if (backing_new_path.dentry->d_parent == mi->mi_incomplete_dir) { + /* Can't link to .incomplete */ + error = -EBUSY; + goto out; + } + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); if (!error) { struct inode *inode = NULL; @@ -1016,6 +1097,12 @@ static int dir_rmdir(struct inode *dir, struct dentry *dentry) goto out; } + if (backing_path.dentry == mi->mi_incomplete_dir) { + /* Can't delete .incomplete */ + err = -EBUSY; + goto out; + } + err = incfs_rmdir(backing_path.dentry); if (!err) d_drop(dentry); @@ -1047,8 +1134,9 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; - if (!backing_old_dentry || backing_old_dentry == mi->mi_index_dir) { - /* Renaming .index not allowed */ + if (!backing_old_dentry || backing_old_dentry == mi->mi_index_dir || + backing_old_dentry == mi->mi_incomplete_dir) { + /* Renaming .index or .incomplete not allowed */ error = -EBUSY; goto exit; } @@ -1061,8 +1149,9 @@ static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, backing_new_dir_dentry = dget_parent(backing_new_dentry); target_inode = d_inode(new_dentry); - if (backing_old_dir_dentry == mi->mi_index_dir) { - /* Direct moves from .index are not allowed. */ + if (backing_old_dir_dentry == mi->mi_index_dir || + backing_old_dir_dentry == mi->mi_incomplete_dir) { + /* Direct moves from .index or .incomplete are not allowed. */ error = -EBUSY; goto out; } @@ -1400,10 +1489,13 @@ static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, const char *dev_name, void *data) { + static const char index_name[] = ".index"; + static const char incomplete_name[] = ".incomplete"; struct mount_options options = {}; struct mount_info *mi = NULL; struct path backing_dir_path = {}; - struct dentry *index_dir; + struct dentry *index_dir = NULL; + struct dentry *incomplete_dir = NULL; struct super_block *src_fs_sb = NULL; struct inode *root_inode = NULL; struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); @@ -1456,15 +1548,28 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, goto err; } - index_dir = open_or_create_index_dir(backing_dir_path.dentry); + index_dir = open_or_create_special_dir(backing_dir_path.dentry, + index_name); if (IS_ERR_OR_NULL(index_dir)) { error = PTR_ERR(index_dir); pr_err("incfs: Can't find or create .index dir in %s\n", dev_name); + /* No need to null index_dir since we don't put it */ goto err; } mi->mi_index_dir = index_dir; + incomplete_dir = open_or_create_special_dir(backing_dir_path.dentry, + incomplete_name); + if (IS_ERR_OR_NULL(incomplete_dir)) { + error = PTR_ERR(incomplete_dir); + pr_err("incfs: Can't find or create .incomplete dir in %s\n", + dev_name); + /* No need to null incomplete_dir since we don't put it */ + goto err; + } + mi->mi_incomplete_dir = incomplete_dir; + sb->s_fs_info = mi; root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); if (IS_ERR(root_inode)) { diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 281b544cd872..c947f66b99ce 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -214,6 +214,17 @@ static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) return strdup(path); } +static char *get_incomplete_filename(const char *mnt_dir, incfs_uuid_t id) +{ + char path[FILENAME_MAX]; + char str_id[1 + 2 * sizeof(id)]; + + bin2hex(str_id, id.bytes, sizeof(id.bytes)); + snprintf(path, ARRAY_SIZE(path), "%s/.incomplete/%s", mnt_dir, str_id); + + return strdup(path); +} + int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) { char *path = get_index_filename(mnt_dir, id); @@ -974,6 +985,7 @@ static bool iterate_directory(const char *dir_to_iterate, bool root, {INCFS_PENDING_READS_FILENAME, true, false}, {INCFS_BLOCKS_WRITTEN_FILENAME, true, false}, {".index", true, false}, + {".incomplete", true, false}, {"..", false, false}, {".", false, false}, }; @@ -3189,11 +3201,21 @@ static int validate_data_block_count(const char *mount_dir, int test_result = TEST_FAILURE; char *filename = NULL; + char *incomplete_filename = NULL; + struct stat stat_buf_incomplete, stat_buf_file; int fd = -1; struct incfs_get_block_count_args bca = {}; int i; TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(incomplete_filename = get_incomplete_filename(mount_dir, file->id), + incomplete_filename); + + TESTEQUAL(stat(filename, &stat_buf_file), 0); + TESTEQUAL(stat(incomplete_filename, &stat_buf_incomplete), 0); + TESTEQUAL(stat_buf_file.st_ino, stat_buf_incomplete.st_ino); + TESTEQUAL(stat_buf_file.st_nlink, 3); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); TESTEQUAL(ioctl(fd, INCFS_IOC_GET_BLOCK_COUNT, &bca), 0); TESTEQUAL(bca.total_data_blocks_out, total_data_blocks); @@ -3217,9 +3239,16 @@ static int validate_data_block_count(const char *mount_dir, 0, 0), 0); + for (i = 1; i < total_data_blocks; i += 2) + TESTEQUAL(emit_test_block(mount_dir, file, i), 0); + + TESTEQUAL(stat(incomplete_filename, &stat_buf_incomplete), -1); + TESTEQUAL(errno, ENOENT); + test_result = TEST_SUCCESS; out: close(fd); + free(incomplete_filename); free(filename); return test_result; } From c9704ce7c95be5c6cd60aafd0e185a7c1772ea4f Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 22 Sep 2020 14:59:41 -0700 Subject: [PATCH 456/636] ANDROID: Incremental fs: Add per UID read timeouts Bug: 169056129 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: I8cad9ee4095123bafba33abb65bbb339ba6ff8b5 --- fs/incfs/data_mgmt.c | 38 ++++- fs/incfs/data_mgmt.h | 10 +- fs/incfs/pseudo_files.c | 94 +++++++++++ fs/incfs/vfs.c | 42 ++++- include/uapi/linux/incrementalfs.h | 96 ++++++++++- .../selftests/filesystems/incfs/incfs_test.c | 156 ++++++++++++++++++ 6 files changed, 417 insertions(+), 19 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 4100e900b09b..14cba02b8189 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -3,6 +3,7 @@ * Copyright 2019 Google LLC */ #include +#include #include #include #include @@ -49,6 +50,7 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, spin_lock_init(&mi->mi_log.rl_lock); spin_lock_init(&mi->pending_read_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); + spin_lock_init(&mi->mi_per_uid_read_timeouts_lock); error = incfs_realloc_mount_info(mi, options); if (error) @@ -117,6 +119,7 @@ void incfs_free_mount_info(struct mount_info *mi) kfree(mi->mi_log.rl_ring_buf); kfree(mi->log_xattr); kfree(mi->pending_read_xattr); + kfree(mi->mi_per_uid_read_timeouts); kfree(mi); } @@ -933,7 +936,8 @@ static void notify_pending_reads(struct mount_info *mi, } static int wait_for_data_block(struct data_file *df, int block_index, - int timeout_ms, + int min_time_ms, int min_pending_time_ms, + int max_pending_time_ms, struct data_file_block *res_block) { struct data_file_block block = {}; @@ -942,6 +946,7 @@ static int wait_for_data_block(struct data_file *df, int block_index, struct mount_info *mi = NULL; int error = 0; int wait_res = 0; + u64 time; if (!df || !res_block) return -EFAULT; @@ -969,11 +974,13 @@ static int wait_for_data_block(struct data_file *df, int block_index, /* If the block was found, just return it. No need to wait. */ if (is_data_block_present(&block)) { + if (min_time_ms) + error = msleep_interruptible(min_time_ms); *res_block = block; - return 0; + return error; } else { /* If it's not found, create a pending read */ - if (timeout_ms != 0) { + if (max_pending_time_ms != 0) { read = add_pending_read(df, block_index); if (!read) return -ENOMEM; @@ -983,11 +990,14 @@ static int wait_for_data_block(struct data_file *df, int block_index, } } + if (min_pending_time_ms) + time = ktime_get_ns(); + /* Wait for notifications about block's arrival */ wait_res = wait_event_interruptible_timeout(segment->new_data_arrival_wq, - (is_read_done(read)), - msecs_to_jiffies(timeout_ms)); + (is_read_done(read)), + msecs_to_jiffies(max_pending_time_ms)); /* Woke up, the pending read is no longer needed. */ remove_pending_read(df, read); @@ -1005,6 +1015,16 @@ static int wait_for_data_block(struct data_file *df, int block_index, return wait_res; } + if (min_pending_time_ms) { + time = div_u64(ktime_get_ns() - time, 1000000); + if (min_pending_time_ms > time) { + error = msleep_interruptible( + min_pending_time_ms - time); + if (error) + return error; + } + } + error = down_read_killable(&segment->rwsem); if (error) return error; @@ -1032,8 +1052,9 @@ static int wait_for_data_block(struct data_file *df, int block_index, } ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp) + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp) { loff_t pos; ssize_t result; @@ -1052,7 +1073,8 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, mi = df->df_mount_info; bf = df->df_backing_file_context->bc_file; - result = wait_for_data_block(df, index, timeout_ms, &block); + result = wait_for_data_block(df, index, min_time_ms, + min_pending_time_ms, max_pending_time_ms, &block); if (result < 0) goto out; diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 42922b2f9db1..13b663a1d53e 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -163,6 +163,11 @@ struct mount_info { /* Number of blocks written since mount */ atomic_t mi_blocks_written; + + /* Per UID read timeouts */ + spinlock_t mi_per_uid_read_timeouts_lock; + struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; + int mi_per_uid_read_timeouts_size; }; struct data_file_block { @@ -327,8 +332,9 @@ struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); void incfs_free_dir_file(struct dir_file *dir); ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, - int index, int timeout_ms, - struct mem_range tmp); + int index, int min_time_ms, + int min_pending_time_ms, int max_pending_time_ms, + struct mem_range tmp); int incfs_get_filled_blocks(struct data_file *df, struct incfs_file_data *fd, diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 07a4eb3d16f1..7c20d452c031 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -940,6 +940,96 @@ out: return error; } +static long ioctl_get_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_get_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_get_read_timeouts_args args = {}; + int error = 0; + struct incfs_per_uid_read_timeouts *buffer; + int size; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + if (args.timeouts_array_size_out > INCFS_DATA_FILE_BLOCK_SIZE) + return -EINVAL; + + buffer = kzalloc(args.timeouts_array_size_out, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + size = mi->mi_per_uid_read_timeouts_size; + if (args.timeouts_array_size < size) + error = -E2BIG; + else if (size) + memcpy(buffer, mi->mi_per_uid_read_timeouts, size); + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + + args.timeouts_array_size_out = size; + if (!error && size) + if (copy_to_user(u64_to_user_ptr(args.timeouts_array), buffer, + size)) + error = -EFAULT; + + if (!error || error == -E2BIG) + if (copy_to_user(args_usr_ptr, &args, sizeof(args)) > 0) + error = -EFAULT; + + kfree(buffer); + return error; +} + +static long ioctl_set_read_timeouts(struct mount_info *mi, void __user *arg) +{ + struct incfs_set_read_timeouts_args __user *args_usr_ptr = arg; + struct incfs_set_read_timeouts_args args = {}; + int error = 0; + int size; + struct incfs_per_uid_read_timeouts *buffer = NULL, *tmp; + int i; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) + return -EINVAL; + + size = args.timeouts_array_size; + if (size) { + if (size > INCFS_DATA_FILE_BLOCK_SIZE || + size % sizeof(*buffer) != 0) + return -EINVAL; + + buffer = kzalloc(size, GFP_NOFS); + if (!buffer) + return -ENOMEM; + + if (copy_from_user(buffer, u64_to_user_ptr(args.timeouts_array), + size)) { + error = -EINVAL; + goto out; + } + + for (i = 0; i < size / sizeof(*buffer); ++i) { + struct incfs_per_uid_read_timeouts *t = &buffer[i]; + + if (t->min_pending_time_ms > t->max_pending_time_ms) { + error = -EINVAL; + goto out; + } + } + } + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + mi->mi_per_uid_read_timeouts_size = size; + tmp = mi->mi_per_uid_read_timeouts; + mi->mi_per_uid_read_timeouts = buffer; + buffer = tmp; + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + +out: + kfree(buffer); + return error; +} + static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) { @@ -952,6 +1042,10 @@ static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, return ioctl_permit_fill(f, (void __user *)arg); case INCFS_IOC_CREATE_MAPPED_FILE: return ioctl_create_mapped_file(mi, (void __user *)arg); + case INCFS_IOC_GET_READ_TIMEOUTS: + return ioctl_get_read_timeouts(mi, (void __user *)arg); + case INCFS_IOC_SET_READ_TIMEOUTS: + return ioctl_set_read_timeouts(mi, (void __user *)arg); default: return -EINVAL; } diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index ed236f5c1f58..250e8bf4d5dd 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -415,6 +415,39 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return index_dentry; } +static int read_single_page_timeouts(struct data_file *df, struct file *f, + int block_index, struct mem_range range, + struct mem_range tmp) +{ + struct mount_info *mi = df->df_mount_info; + u32 min_time_ms = 0; + u32 min_pending_time_ms = 0; + u32 max_pending_time_ms = U32_MAX; + int uid = current_uid().val; + int i; + + spin_lock(&mi->mi_per_uid_read_timeouts_lock); + for (i = 0; i < mi->mi_per_uid_read_timeouts_size / + sizeof(*mi->mi_per_uid_read_timeouts); ++i) { + struct incfs_per_uid_read_timeouts *t = + &mi->mi_per_uid_read_timeouts[i]; + + if(t->uid == uid) { + min_time_ms = t->min_time_ms; + min_pending_time_ms = t->min_pending_time_ms; + max_pending_time_ms = t->max_pending_time_ms; + break; + } + } + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); + if (max_pending_time_ms == U32_MAX) + max_pending_time_ms = mi->mi_options.read_timeout_ms; + + return incfs_read_data_file_block(range, f, block_index, + min_time_ms, min_pending_time_ms, max_pending_time_ms, + tmp); +} + static int read_single_page(struct file *f, struct page *page) { loff_t offset = 0; @@ -425,7 +458,6 @@ static int read_single_page(struct file *f, struct page *page) int result = 0; void *page_start; int block_index; - int timeout_ms; if (!df) { SetPageError(page); @@ -438,22 +470,20 @@ static int read_single_page(struct file *f, struct page *page) block_index = (offset + df->df_mapped_offset) / INCFS_DATA_FILE_BLOCK_SIZE; size = df->df_size; - timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; if (offset < size) { struct mem_range tmp = { .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE }; - tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); if (!tmp.data) { read_result = -ENOMEM; goto err; } bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); - read_result = incfs_read_data_file_block( - range(page_start, bytes_to_read), f, block_index, - timeout_ms, tmp); + + read_result = read_single_page_timeouts(df, f, block_index, + range(page_start, bytes_to_read), tmp); free_pages((unsigned long)tmp.data, get_order(tmp.len)); } else { diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index 8dc3b65a5a31..fbb83a2d9479 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -43,7 +43,10 @@ /* ===== ioctl requests on the command dir ===== */ -/* Create a new file */ +/* + * Create a new file + * May only be called on .pending_reads file + */ #define INCFS_IOC_CREATE_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) @@ -93,14 +96,34 @@ #define INCFS_IOC_GET_FILLED_BLOCKS \ _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) -/* Creates a new mapped file */ +/* + * Creates a new mapped file + * May only be called on .pending_reads file + */ #define INCFS_IOC_CREATE_MAPPED_FILE \ _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) -/* Get number of blocks, total and filled */ +/* + * Get number of blocks, total and filled + * May only be called on .pending_reads file + */ #define INCFS_IOC_GET_BLOCK_COUNT \ _IOR(INCFS_IOCTL_BASE_CODE, 36, struct incfs_get_block_count_args) +/* + * Get per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_GET_READ_TIMEOUTS \ + _IOR(INCFS_IOCTL_BASE_CODE, 37, struct incfs_get_read_timeouts_args) + +/* + * Set per UID read timeouts + * May only be called on .pending_reads file + */ +#define INCFS_IOC_SET_READ_TIMEOUTS \ + _IOW(INCFS_IOCTL_BASE_CODE, 38, struct incfs_set_read_timeouts_args) + /* ===== sysfs feature flags ===== */ /* * Each flag is represented by a file in /sys/fs/incremental-fs/features @@ -431,6 +454,10 @@ struct incfs_create_mapped_file_args { __aligned_u64 source_offset; }; +/* + * Get information about the blocks in this file + * Argument for INCFS_IOC_GET_BLOCK_COUNT + */ struct incfs_get_block_count_args { /* Total number of data blocks in the file */ __u32 total_data_blocks_out; @@ -445,4 +472,67 @@ struct incfs_get_block_count_args { __u32 filled_hash_blocks_out; }; +/* Description of timeouts for one UID */ +struct incfs_per_uid_read_timeouts { + /* UID to apply these timeouts to */ + __u32 uid; + + /* + * Min time to read any block. Note that this doesn't apply to reads + * which are satisfied from the page cache. + */ + __u32 min_time_ms; + + /* + * Min time to satisfy a pending read. Must be >= min_time_ms. Any + * pending read which is filled before this time will be delayed so + * that the total read time >= this value. + */ + __u32 min_pending_time_ms; + + /* + * Max time to satisfy a pending read before the read times out. + * If set to U32_MAX, defaults to mount options read_timeout_ms= + * Must be >= min_pending_time_ms + */ + __u32 max_pending_time_ms; +}; + +/* + * Get the read timeouts array + * Argument for INCFS_IOC_GET_READ_TIMEOUTS + */ +struct incfs_get_read_timeouts_args { + /* + * A pointer to a buffer to fill with the current timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above buffer in bytes */ + __u32 timeouts_array_size; + + /* Size used in bytes, or size needed if -ENOMEM returned */ + __u32 timeouts_array_size_out; +}; + +/* + * Set the read timeouts array + * Arguments for INCFS_IOC_SET_READ_TIMEOUTS + */ +struct incfs_set_read_timeouts_args { + /* + * A pointer to an array containing the new timeouts + * This will replace any existing timeouts + * + * Equivalent to struct incfs_per_uid_read_timeouts * + */ + __aligned_u64 timeouts_array; + + /* Size of above array in bytes. Must be < 256 */ + __u32 timeouts_array_size; +}; + + #endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index c947f66b99ce..e1ad0ec14996 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -3379,6 +3380,160 @@ out: return result; } +static int is_close(struct timespec *start, int expected_ms) +{ + const int allowed_variance = 100; + int result = TEST_FAILURE; + struct timespec finish; + int diff; + + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &finish), 0); + diff = (finish.tv_sec - start->tv_sec) * 1000 + + (finish.tv_nsec - start->tv_nsec) / 1000000; + + TESTCOND(diff >= expected_ms - allowed_variance); + TESTCOND(diff <= expected_ms + allowed_variance); + result = TEST_SUCCESS; +out: + return result; +} + +static int per_uid_read_timeouts_test(const char *mount_dir) +{ + struct test_file file = { + .name = "file", + .size = 16 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + int result = TEST_FAILURE; + char *backing_dir = NULL; + int pid; + int cmd_fd = -1; + char *filename = NULL; + int fd = -1; + struct timespec start; + char buffer[4096]; + struct incfs_per_uid_read_timeouts purt_get[1]; + struct incfs_get_read_timeouts_args grt = { + ptr_to_u64(purt_get), + sizeof(purt_get) + }; + struct incfs_per_uid_read_timeouts purt_set[] = { + { + .uid = 0, + .min_time_ms = 1000, + .min_pending_time_ms = 2000, + .max_pending_time_ms = 3000, + }, + }; + struct incfs_set_read_timeouts_args srt = { + ptr_to_u64(purt_set), + sizeof(purt_set) + }; + int status; + + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "read_timeout_ms=1000,readahead=0", false), 0); + + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); + TESTEQUAL(emit_file(cmd_fd, NULL, file.name, &file.id, file.size, + NULL), 0); + + TEST(filename = concat_file_name(mount_dir, file.name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC), 0); + + /* Default mount options read failure is 1000 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + /* Set it to 3000 */ + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 3000), 0); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), -1); + TESTEQUAL(errno, E2BIG); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + grt.timeouts_array_size = sizeof(purt_get); + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, sizeof(purt_get)); + TESTEQUAL(purt_get[0].uid, purt_set[0].uid); + TESTEQUAL(purt_get[0].min_time_ms, purt_set[0].min_time_ms); + TESTEQUAL(purt_get[0].min_pending_time_ms, + purt_set[0].min_pending_time_ms); + TESTEQUAL(purt_get[0].max_pending_time_ms, + purt_set[0].max_pending_time_ms); + + /* Still 1000 in UID 2 */ + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + exit(0); + } + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Set it to default */ + purt_set[0].max_pending_time_ms = UINT32_MAX; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), -1); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min read time */ + TESTEQUAL(emit_test_block(mount_dir, &file, 0), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), 0), sizeof(buffer)); + TESTEQUAL(is_close(&start, 1000), 0); + + /* Test min pending time */ + purt_set[0].uid = 2; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + TESTEQUAL(clock_gettime(CLOCK_MONOTONIC, &start), 0); + TEST(pid = fork(), pid != -1); + if (pid == 0) { + TESTEQUAL(setuid(2), 0); + TESTEQUAL(pread(fd, buffer, sizeof(buffer), sizeof(buffer)), + sizeof(buffer)); + exit(0); + } + sleep(1); + TESTEQUAL(emit_test_block(mount_dir, &file, 1), 0); + TESTNE(wait(&status), -1); + TESTEQUAL(WEXITSTATUS(status), 0); + TESTEQUAL(is_close(&start, 2000), 0); + + /* Clear timeouts */ + srt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_SET_READ_TIMEOUTS, &srt), 0); + grt.timeouts_array_size = 0; + TESTEQUAL(ioctl(cmd_fd, INCFS_IOC_GET_READ_TIMEOUTS, &grt), 0); + TESTEQUAL(grt.timeouts_array_size_out, 0); + + result = TEST_SUCCESS; +out: + close(fd); + + if (pid == 0) + exit(result); + + free(filename); + close(cmd_fd); + umount(backing_dir); + free(backing_dir); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -3492,6 +3647,7 @@ int main(int argc, char *argv[]) MAKE_TEST(compatibility_test), MAKE_TEST(data_block_count_test), MAKE_TEST(hash_block_count_test), + MAKE_TEST(per_uid_read_timeouts_test), }; #undef MAKE_TEST From 6851a6dcf2c138b85eb121de2a7807972142da66 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 23 Sep 2020 12:10:56 -0700 Subject: [PATCH 457/636] ANDROID: Incremental fs: Fix misuse of cpu_to_leXX and poll return Found by sparse: fs/incfs/format.c:416:21: warning: incorrect type in assignment (different base types) fs/incfs/format.c:416:21: expected restricted __le32 [assigned] [usertype] fh_flags fs/incfs/format.c:416:21: got int fs/incfs/pseudo_files.c:925:25: warning: incorrect type in argument 4 (different base types) fs/incfs/pseudo_files.c:925:25: expected unsigned long long [usertype] size fs/incfs/pseudo_files.c:925:25: got restricted __le64 [addressable] [assigned] [usertype] size_attr_value fs/incfs/pseudo_files.c:925:42: warning: incorrect type in argument 5 (different base types) fs/incfs/pseudo_files.c:925:42: expected unsigned long long [usertype] offset fs/incfs/pseudo_files.c:925:42: got restricted __le64 [usertype] fs/incfs/pseudo_files.c:1111:24: warning: incorrect type in return expression (different base types) fs/incfs/pseudo_files.c:1111:24: expected restricted __poll_t fs/incfs/pseudo_files.c:1111:24: got int Bug: 169258814 Fixes: Sparse errors introduced by 3f4938108a7ad, 8334d69e65f60 and cb776f45766a6 Test: incfs_test passes, sparse shows no errors Signed-off-by: Paul Lawrence Change-Id: I48596e9521069fc77bf38c345a568529d61c77dc --- fs/incfs/format.c | 2 +- fs/incfs/pseudo_files.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 691d7e474328..06ddaade8523 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -413,7 +413,7 @@ int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, fh.fh_mapped_file_size = cpu_to_le64(file_size); fh.fh_original_uuid = *uuid; - fh.fh_flags = INCFS_FILE_MAPPED; + fh.fh_flags = cpu_to_le32(INCFS_FILE_MAPPED); LOCK_REQUIRED(bfc->bc_mutex); diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 7c20d452c031..d407209c0d99 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -922,7 +922,7 @@ static long ioctl_create_mapped_file(struct mount_info *mi, void __user *arg) } error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, - size_attr_value, cpu_to_le64(args.source_offset)); + args.size, args.source_offset); if (error) goto delete_file; @@ -1108,7 +1108,7 @@ static __poll_t blocks_written_poll(struct file *f, poll_table *wait) unsigned long blocks_written; if (!mi) - return -EFAULT; + return 0; poll_wait(f, &mi->mi_blocks_written_notif_wq, wait); blocks_written = atomic_read(&mi->mi_blocks_written); From 38ef4a2294777ca13ccde9230e0e786c2ad7514c Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Mon, 5 Oct 2020 14:07:34 -0700 Subject: [PATCH 458/636] ANDROID: Incremental fs: Fix read_log_test which failed sporadically Test failure was caused by drop_caches sometimes not dropping a range of sectors in the middle of a random file. Fix by changing to reading the file before we populate it, so the cache is not involved. Also by populating it only on the last test, we test logging on both populated and unpopulated files. Rewrite the tests with the new test macros, and make the whole test far more readable. Test was taking 100 seconds to run, which was half the total runtime for all the tests. This was due to the sleeps while reading. Reduce number of such sleeps by a factor of 10, halving total test runtime. Also incfs_test would exit with 0 whether tests failed or not. Fix. Bug: 170104228 Test: Run incfs_test 100 times with -t10 -f4, and then again 40 time from end to end Signed-off-by: Paul Lawrence Change-Id: Iefeca46c2aa02fdf85e7ce4872d9c352f9517f41 --- .../selftests/filesystems/incfs/incfs_test.c | 443 +++++++----------- 1 file changed, 175 insertions(+), 268 deletions(-) diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index e1ad0ec14996..d375827257d4 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -42,6 +42,35 @@ #error Big endian not supported! #endif +struct { + int file; + int test; + bool verbose; +} options; + +#define TESTCOND(condition) \ + do { \ + if (!(condition)) { \ + ksft_print_msg("%s failed %d\n", \ + __func__, __LINE__); \ + goto out; \ + } else if (options.verbose) \ + ksft_print_msg("%s succeeded %d\n", \ + __func__, __LINE__); \ + } while (false) + +#define TEST(statement, condition) \ + do { \ + statement; \ + TESTCOND(condition); \ + } while (false) + +#define TESTEQUAL(statement, res) \ + TESTCOND((statement) == (res)) + +#define TESTNE(statement, res) \ + TESTCOND((statement) != (res)) + struct hash_block { char data[INCFS_DATA_FILE_BLOCK_SIZE]; }; @@ -112,6 +141,13 @@ struct test_files_set get_test_files_set(void) .size = 900 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, { .index = 10, .name = "file_big", .size = 500 * 1024 * 1024 } }; + + if (options.file) + return (struct test_files_set) { + .files = files + options.file - 1, + .files_count = 1, + }; + return (struct test_files_set){ .files = files, .files_count = ARRAY_SIZE(files) }; } @@ -2022,45 +2058,45 @@ enum expected_log { FULL_LOG, NO_LOG, PARTIAL_LOG }; static int validate_logs(const char *mount_dir, int log_fd, struct test_file *file, enum expected_log expected_log, - bool report_uid) + bool report_uid, bool expect_data) { + int result = TEST_FAILURE; uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; struct incfs_pending_read_info prs[2048] = {}; struct incfs_pending_read_info2 prs2[2048] = {}; + struct incfs_pending_read_info *previous_record = NULL; int prs_size = ARRAY_SIZE(prs); - int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; - int expected_read_block_cnt; - int res; - int read_count; - int i, j; - char *filename = concat_file_name(mount_dir, file->name); - int fd; + int block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int expected_read_count, read_count, block_index, read_index; + char *filename = NULL; + int fd = -1; - fd = open(filename, O_RDONLY | O_CLOEXEC); - free(filename); - if (fd <= 0) - return TEST_FAILURE; + TEST(filename = concat_file_name(mount_dir, file->name), filename); + TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); - if (block_cnt > prs_size) - block_cnt = prs_size; - expected_read_block_cnt = block_cnt; + if (block_count > prs_size) + block_count = prs_size; + expected_read_count = block_count; - for (i = 0; i < block_cnt; i++) { - res = pread(fd, data, sizeof(data), - INCFS_DATA_FILE_BLOCK_SIZE * i); + for (block_index = 0; block_index < block_count; block_index++) { + int result = pread(fd, data, sizeof(data), + INCFS_DATA_FILE_BLOCK_SIZE * block_index); /* Make some read logs of type SAME_FILE_NEXT_BLOCK */ - if (i % 10 == 0) + if (block_index % 100 == 10) usleep(20000); /* Skip some blocks to make logs of type SAME_FILE */ - if (i % 10 == 5) { - ++i; - --expected_read_block_cnt; + if (block_index % 10 == 5) { + ++block_index; + --expected_read_count; } - if (res <= 0) - goto failure; + if (expect_data) + TESTCOND(result > 0); + + if (!expect_data) + TESTEQUAL(result, -1); } if (report_uid) @@ -2072,240 +2108,138 @@ static int validate_logs(const char *mount_dir, int log_fd, expected_log == NO_LOG ? 10 : 0, prs, prs_size); - if (expected_log == NO_LOG) { - if (read_count == 0) - goto success; - if (read_count < 0) - ksft_print_msg("Error reading logged reads %s.\n", - strerror(-read_count)); - else - ksft_print_msg("Somehow read empty logs.\n"); - goto failure; + if (expected_log == NO_LOG) + TESTEQUAL(read_count, 0); + + if (expected_log == PARTIAL_LOG) + TESTCOND(read_count > 0 && + read_count <= expected_read_count); + + if (expected_log == FULL_LOG) + TESTEQUAL(read_count, expected_read_count); + + /* If read less than expected, advance block_index appropriately */ + for (block_index = 0, read_index = 0; + read_index < expected_read_count - read_count; + block_index++, read_index++) + if (block_index % 10 == 5) + ++block_index; + + for (read_index = 0; read_index < read_count; + block_index++, read_index++) { + struct incfs_pending_read_info *record = report_uid ? + (struct incfs_pending_read_info *) &prs2[read_index] : + &prs[read_index]; + + TESTCOND(same_id(&record->file_id, &file->id)); + TESTEQUAL(record->block_index, block_index); + TESTNE(record->timestamp_us, 0); + if (previous_record) + TESTEQUAL(record->serial_number, + previous_record->serial_number + 1); + + previous_record = record; + if (block_index % 10 == 5) + ++block_index; } - if (read_count < 0) { - ksft_print_msg("Error reading logged reads %s.\n", - strerror(-read_count)); - goto failure; - } - - i = 0; - if (expected_log == PARTIAL_LOG) { - if (read_count == 0) { - ksft_print_msg("No logs %s.\n", file->name); - goto failure; - } - - for (i = 0, j = 0; j < expected_read_block_cnt - read_count; - i++, j++) - if (i % 10 == 5) - ++i; - - } else if (read_count != expected_read_block_cnt) { - ksft_print_msg("Bad log read count %s %d %d.\n", file->name, - read_count, expected_read_block_cnt); - goto failure; - } - - for (j = 0; j < read_count; i++, j++) { - struct incfs_pending_read_info *read = report_uid ? - (struct incfs_pending_read_info *) &prs2[j] : - &prs[j]; - - if (!same_id(&read->file_id, &file->id)) { - ksft_print_msg("Bad log read ino %s\n", file->name); - goto failure; - } - - if (read->block_index != i) { - ksft_print_msg("Bad log read ino %s %d %d.\n", - file->name, read->block_index, i); - goto failure; - } - - if (j != 0) { - unsigned long psn = (report_uid) ? - prs2[j - 1].serial_number : - prs[j - 1].serial_number; - - if (read->serial_number != psn + 1) { - ksft_print_msg("Bad log read sn %s %d %d.\n", - file->name, read->serial_number, - psn); - goto failure; - } - } - - if (read->timestamp_us == 0) { - ksft_print_msg("Bad log read timestamp %s.\n", - file->name); - goto failure; - } - - if (i % 10 == 5) - ++i; - } - -success: + result = TEST_SUCCESS; +out: close(fd); - return TEST_SUCCESS; - -failure: - close(fd); - return TEST_FAILURE; + free(filename); + return result; } static int read_log_test(const char *mount_dir) { + int result = TEST_FAILURE; struct test_files_set test = get_test_files_set(); const int file_num = test.files_count; int i = 0; int cmd_fd = -1, log_fd = -1; - char *backing_dir; + char *backing_dir = NULL; - backing_dir = create_backing_dir(mount_dir); - if (!backing_dir) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,report_uid", - false) != 0) - goto failure; - - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); - - /* Write data. */ + /* Create files */ + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,report_uid,read_timeout_ms=0", + false), 0); + TEST(cmd_fd = open_commands_file(mount_dir), cmd_fd != -1); for (i = 0; i < file_num; i++) { struct test_file *file = &test.files[i]; - if (emit_file(cmd_fd, NULL, file->name, &file->id, - file->size, NULL)) - goto failure; - - if (emit_test_file_data(mount_dir, file)) - goto failure; + TESTEQUAL(emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL), 0); } - - /* Validate data */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG, true)) - goto failure; - } - - /* Unmount and mount again, to see that logs work after remount. */ close(cmd_fd); - close(log_fd); cmd_fd = -1; - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0", false) != 0) - goto failure; + /* Validate logs */ + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, true, false), 0); - cmd_fd = open_commands_file(mount_dir); - if (cmd_fd < 0) - goto failure; - - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, FULL_LOG, false)) - goto failure; - } - - /* - * Unmount and mount again with no read log to make sure poll - * doesn't crash - */ - close(cmd_fd); + /* Unmount and mount again without report_uid */ close(log_fd); - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,read_timeout_ms=0", false), 0); - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=0", - false) != 0) - goto failure; + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, false), 0); - log_fd = open_log_file(mount_dir); - if (log_fd < 0) - ksft_print_msg("Can't open log file.\n"); + /* No read log to make sure poll doesn't crash */ + close(log_fd); + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=0,read_timeout_ms=0", + false), 0); - /* Validate data again - note should fail this time */ + TEST(log_fd = open_log_file(mount_dir), log_fd != -1); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + NO_LOG, false, false), 0); + + /* Remount and check that logs start working again */ + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=1,read_timeout_ms=0", + true), 0); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + PARTIAL_LOG, false, false), 0); + + /* Remount and check that logs continue working */ + TESTEQUAL(mount_fs_opt(mount_dir, backing_dir, + "readahead=0,rlog_pages=4,read_timeout_ms=0", + true), 0); + for (i = 0; i < file_num; i++) + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, false), 0); + + /* Check logs work with data */ for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, NO_LOG, false)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - if (drop_caches()) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=1", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG, false)) - goto failure; - } - - /* - * Remount and check that logs start working again - */ - if (drop_caches()) - goto failure; - - if (mount_fs_opt(mount_dir, backing_dir, "readahead=0,rlog_pages=4", - true) != 0) - goto failure; - - /* Validate data again */ - for (i = 0; i < file_num; i++) { - struct test_file *file = &test.files[i]; - - if (validate_logs(mount_dir, log_fd, file, PARTIAL_LOG, false)) - goto failure; + TESTEQUAL(emit_test_file_data(mount_dir, &test.files[i]), 0); + TESTEQUAL(validate_logs(mount_dir, log_fd, &test.files[i], + FULL_LOG, false, true), 0); } /* Final unmount */ close(log_fd); - free(backing_dir); - if (umount(mount_dir) != 0) { - print_error("Can't unmout FS"); - goto failure; - } + log_fd = -1; + TESTEQUAL(umount(mount_dir), 0); - return TEST_SUCCESS; - -failure: + result = TEST_SUCCESS; +out: close(cmd_fd); close(log_fd); free(backing_dir); umount(mount_dir); - return TEST_FAILURE; + return result; } static int emit_partial_test_file_data(const char *mount_dir, @@ -3029,27 +2963,6 @@ static const char v1_file[] = { 0x01, 0x00, 0x00, 0x00, }; -#define TESTCOND(condition) \ - do { \ - if (!(condition)) { \ - ksft_print_msg("%s failed %d\n", \ - __func__, __LINE__); \ - goto out; \ - } \ - } while (false) - -#define TEST(statement, condition) \ - do { \ - statement; \ - TESTCOND(condition); \ - } while (false) - -#define TESTEQUAL(statement, res) \ - TESTCOND((statement) == (res)) - -#define TESTNE(statement, res) \ - TESTCOND((statement) != (res)) - static int compatibility_test(const char *mount_dir) { static const char *name = "file"; @@ -3557,18 +3470,22 @@ static char *setup_mount_dir() return mount_dir; } -struct options { - int test; -}; - -int parse_options(int argc, char *const *argv, struct options *options) +int parse_options(int argc, char *const *argv) { signed char c; - while ((c = getopt(argc, argv, "t:")) != -1) + while ((c = getopt(argc, argv, "f:t:v")) != -1) switch (c) { + case 'f': + options.file = strtol(optarg, NULL, 10); + break; + case 't': - options->test = strtol(optarg, NULL, 10); + options.test = strtol(optarg, NULL, 10); + break; + + case 'v': + options.verbose = true; break; default: @@ -3583,27 +3500,22 @@ struct test_case { const char *name; }; -void run_one_test(const char *mount_dir, struct test_case *test_case, - int *fails) +void run_one_test(const char *mount_dir, struct test_case *test_case) { ksft_print_msg("Running %s\n", test_case->name); if (test_case->pfunc(mount_dir) == TEST_SUCCESS) ksft_test_result_pass("%s\n", test_case->name); - else { + else ksft_test_result_fail("%s\n", test_case->name); - fails++; - } } int main(int argc, char *argv[]) { char *mount_dir = NULL; - int fails = 0; int i; int fd, count; - struct options options = {}; - if (parse_options(argc, argv, &options)) + if (parse_options(argc, argv)) ksft_exit_fail_msg("Bad options\n"); // Seed randomness pool for testing on QEMU @@ -3655,17 +3567,12 @@ int main(int argc, char *argv[]) if (options.test <= 0 || options.test > ARRAY_SIZE(cases)) ksft_exit_fail_msg("Invalid test\n"); - run_one_test(mount_dir, &cases[options.test - 1], &fails); + run_one_test(mount_dir, &cases[options.test - 1]); } else for (i = 0; i < ARRAY_SIZE(cases); ++i) - run_one_test(mount_dir, &cases[i], &fails); + run_one_test(mount_dir, &cases[i]); umount2(mount_dir, MNT_FORCE); rmdir(mount_dir); - - if (fails > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); - return 0; + return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail(); } From 63ddad8bced0124c06cfab82e97c30ed00bfe0a5 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Tue, 27 Oct 2020 07:43:06 -0700 Subject: [PATCH 459/636] ANDROID: Incremental fs: Initialize mount options correctly report_uid was not being initialized, leading to random behavior Bug: 172480517 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Ib121136d6f570f99e9060bdde9aa43ff2995514e --- fs/incfs/vfs.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 250e8bf4d5dd..2634cf37ca92 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -180,12 +180,13 @@ static int parse_options(struct mount_options *opts, char *str) if (opts == NULL) return -EFAULT; - opts->read_timeout_ms = 1000; /* Default: 1s */ - opts->readahead_pages = 10; - opts->read_log_pages = 2; - opts->read_log_wakeup_count = 10; - opts->no_backing_file_cache = false; - opts->no_backing_file_readahead = false; + *opts = (struct mount_options) { + .read_timeout_ms = 1000, /* Default: 1s */ + .readahead_pages = 10, + .read_log_pages = 2, + .read_log_wakeup_count = 10, + }; + if (str == NULL || *str == 0) return 0; From 28520c784ade9e65553b37d784e0de3991654665 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Thu, 8 Oct 2020 13:08:21 -0700 Subject: [PATCH 460/636] ANDROID: Incremental fs: Small improvements Rmove bc_mutex used to protect metadata chain, now that is only read at file open time Remove certain unused mount options Bug: 172482559 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Id70e5a5d08e5de79f391e19ea97e356f39a3ed51 --- fs/incfs/data_mgmt.c | 38 +++++--------------------------------- fs/incfs/data_mgmt.h | 8 +------- fs/incfs/format.c | 28 +--------------------------- fs/incfs/format.h | 5 ----- fs/incfs/pseudo_files.c | 7 +++++++ fs/incfs/vfs.c | 18 ------------------ 6 files changed, 14 insertions(+), 90 deletions(-) diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index 14cba02b8189..b6a8b47b4f6d 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -19,6 +19,8 @@ #include "format.h" #include "integrity.h" +static int incfs_scan_metadata_chain(struct data_file *df); + static void log_wake_up_all(struct work_struct *work) { struct delayed_work *dw = container_of(work, struct delayed_work, work); @@ -240,12 +242,8 @@ struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) data_file_segment_init(&df->df_segments[i]); - error = mutex_lock_interruptible(&bfc->bc_mutex); - if (error) - goto out; error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id, &size, &df->df_header_flags); - mutex_unlock(&bfc->bc_mutex); if (error) goto out; @@ -1373,7 +1371,7 @@ static int process_status_md(struct incfs_status *is, return 0; } -int incfs_scan_metadata_chain(struct data_file *df) +static int incfs_scan_metadata_chain(struct data_file *df) { struct metadata_handler *handler = NULL; int result = 0; @@ -1390,12 +1388,6 @@ int incfs_scan_metadata_chain(struct data_file *df) if (!handler) return -ENOMEM; - /* No writing to the backing file while it's being scanned. */ - error = mutex_lock_interruptible(&bfc->bc_mutex); - if (error) - goto out; - - /* Reading superblock */ handler->md_record_offset = df->df_metadata_off; handler->context = df; handler->handle_blockmap = process_blockmap_md; @@ -1418,7 +1410,6 @@ int incfs_scan_metadata_chain(struct data_file *df) result = error; } else result = records_count; - mutex_unlock(&bfc->bc_mutex); if (df->df_hash_tree) { int hash_block_count = get_blocks_count_for_size( @@ -1430,7 +1421,6 @@ int incfs_scan_metadata_chain(struct data_file *df) } else if (df->df_data_block_count != df->df_total_block_count) result = -EINVAL; -out: kfree(handler); return result; } @@ -1443,10 +1433,6 @@ bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) { bool result = false; - /* - * We could probably get away with spin locks here; - * if we use atomic_read() on both these variables. - */ spin_lock(&mi->pending_read_lock); result = (mi->mi_last_pending_read_number > last_number) && (mi->mi_pending_reads_count > 0); @@ -1461,7 +1447,6 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, { int reported_reads = 0; struct pending_read *entry = NULL; - bool result = false; if (!mi) return -EFAULT; @@ -1469,15 +1454,8 @@ int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, if (reads_size <= 0) return 0; - spin_lock(&mi->pending_read_lock); - - result = ((mi->mi_last_pending_read_number <= sn_lowerbound) - || (mi->mi_pending_reads_count == 0)); - - spin_unlock(&mi->pending_read_lock); - - if (result) - return reported_reads; + if (!incfs_fresh_pending_reads_exist(mi, sn_lowerbound)) + return 0; rcu_read_lock(); @@ -1609,9 +1587,3 @@ int incfs_collect_logged_reads(struct mount_info *mi, return dst_idx; } -bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) -{ - if (lhs.len != rhs.len) - return false; - return memcmp(lhs.data, rhs.data, lhs.len) == 0; -} diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 13b663a1d53e..7bbaf6ddbba4 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -41,7 +41,7 @@ struct same_file_record { enum LOG_RECORD_TYPE type : 2; /* SAME_FILE */ u32 block_index : 30; u32 relative_ts_us; /* max 2^32 us ~= 1 hour (1:11:30) */ -} __packed; /* 12 bytes */ +} __packed; /* 8 bytes */ struct same_file_next_block { enum LOG_RECORD_TYPE type : 2; /* SAME_FILE_NEXT_BLOCK */ @@ -102,8 +102,6 @@ struct mount_options { unsigned int readahead_pages; unsigned int read_log_pages; unsigned int read_log_wakeup_count; - bool no_backing_file_cache; - bool no_backing_file_readahead; bool report_uid; }; @@ -326,8 +324,6 @@ struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); void incfs_free_data_file(struct data_file *df); -int incfs_scan_metadata_chain(struct data_file *df); - struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); void incfs_free_dir_file(struct dir_file *dir); @@ -448,6 +444,4 @@ static inline int get_blocks_count_for_size(u64 size) return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; } -bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); - #endif /* _INCFS_DATA_MGMT_H */ diff --git a/fs/incfs/format.c b/fs/incfs/format.c index 06ddaade8523..52079b5a45e6 100644 --- a/fs/incfs/format.c +++ b/fs/incfs/format.c @@ -40,7 +40,7 @@ void incfs_free_bfc(struct backing_file_context *bfc) kfree(bfc); } -loff_t incfs_get_end_offset(struct file *f) +static loff_t incfs_get_end_offset(struct file *f) { /* * This function assumes that file size and the end-offset @@ -503,29 +503,6 @@ int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); } -/* Initialize a new image in a given backing file. */ -int incfs_make_empty_backing_file(struct backing_file_context *bfc, - incfs_uuid_t *uuid, u64 file_size) -{ - int result = 0; - - if (!bfc || !bfc->bc_file) - return -EFAULT; - - result = mutex_lock_interruptible(&bfc->bc_mutex); - if (result) - goto out; - - result = truncate_backing_file(bfc, 0); - if (result) - goto out; - - result = incfs_write_fh_to_backing_file(bfc, uuid, file_size); -out: - mutex_unlock(&bfc->bc_mutex); - return result; -} - int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, loff_t bm_base_off, struct incfs_blockmap_entry *bm_entry) @@ -579,7 +556,6 @@ int incfs_read_file_header(struct backing_file_context *bfc, if (!bfc || !first_md_off) return -EFAULT; - LOCK_REQUIRED(bfc->bc_mutex); bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0); if (bytes_read < 0) return bytes_read; @@ -627,8 +603,6 @@ int incfs_read_next_metadata_record(struct backing_file_context *bfc, if (!bfc || !handler) return -EFAULT; - LOCK_REQUIRED(bfc->bc_mutex); - if (handler->md_record_offset == 0) return -EPERM; diff --git a/fs/incfs/format.h b/fs/incfs/format.h index 2f6c7c8f9118..a7fa36f6f941 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -292,8 +292,6 @@ struct metadata_handler { #define INCFS_MAX_METADATA_RECORD_SIZE \ FIELD_SIZEOF(struct metadata_handler, md_buffer) -loff_t incfs_get_end_offset(struct file *f); - /* Backing file context management */ struct backing_file_context *incfs_alloc_bfc(struct file *backing_file); @@ -329,9 +327,6 @@ int incfs_write_status_to_backing_file(struct backing_file_context *bfc, u32 data_blocks_written, u32 hash_blocks_written); -int incfs_make_empty_backing_file(struct backing_file_context *bfc, - incfs_uuid_t *uuid, u64 file_size); - /* Reading stuff */ int incfs_read_file_header(struct backing_file_context *bfc, loff_t *first_md_off, incfs_uuid_t *uuid, diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index d407209c0d99..768abd4269cf 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -336,6 +336,13 @@ retry_deleg: return error; } +static bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) +{ + if (lhs.len != rhs.len) + return false; + return memcmp(lhs.data, rhs.data, lhs.len) == 0; +} + static bool is_pseudo_filename(struct mem_range name) { if (incfs_equal_ranges(pending_reads_file_name_range, name)) diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 2634cf37ca92..40192863eb4e 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -152,8 +152,6 @@ struct inode_search { enum parse_parameter { Opt_read_timeout, Opt_readahead_pages, - Opt_no_backing_file_cache, - Opt_no_backing_file_readahead, Opt_rlog_pages, Opt_rlog_wakeup_cnt, Opt_report_uid, @@ -163,8 +161,6 @@ enum parse_parameter { static const match_table_t option_tokens = { { Opt_read_timeout, "read_timeout_ms=%u" }, { Opt_readahead_pages, "readahead=%u" }, - { Opt_no_backing_file_cache, "no_bf_cache=%u" }, - { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, { Opt_rlog_pages, "rlog_pages=%u" }, { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, { Opt_report_uid, "report_uid" }, @@ -209,16 +205,6 @@ static int parse_options(struct mount_options *opts, char *str) return -EINVAL; opts->readahead_pages = value; break; - case Opt_no_backing_file_cache: - if (match_int(&args[0], &value)) - return -EINVAL; - opts->no_backing_file_cache = (value != 0); - break; - case Opt_no_backing_file_readahead: - if (match_int(&args[0], &value)) - return -EINVAL; - opts->no_backing_file_readahead = (value != 0); - break; case Opt_rlog_pages: if (match_int(&args[0], &value)) return -EINVAL; @@ -1674,10 +1660,6 @@ static int show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",rlog_wakeup_cnt=%u", mi->mi_options.read_log_wakeup_count); } - if (mi->mi_options.no_backing_file_cache) - seq_puts(m, ",no_bf_cache"); - if (mi->mi_options.no_backing_file_readahead) - seq_puts(m, ",no_bf_readahead"); if (mi->mi_options.report_uid) seq_puts(m, ",report_uid"); return 0; From 41a12dfcf58e8b53d18f62948080834a60ff5069 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Wed, 14 Oct 2020 07:42:33 -0700 Subject: [PATCH 461/636] ANDROID: Incremental fs: Add zstd compression support Bug: 160634783 Test: incfs_test passes Signed-off-by: Paul Lawrence Change-Id: Iba28b535d2d5183859ffc721204b036434132d9b --- fs/incfs/Kconfig | 2 + fs/incfs/data_mgmt.c | 94 +++++++++++++++++-- fs/incfs/data_mgmt.h | 7 ++ fs/incfs/format.h | 6 +- include/uapi/linux/incrementalfs.h | 3 +- .../selftests/filesystems/incfs/Makefile | 2 +- .../selftests/filesystems/incfs/incfs_test.c | 34 ++++++- 7 files changed, 132 insertions(+), 16 deletions(-) diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig index 8c0e8ae2030f..8f7d5226266b 100644 --- a/fs/incfs/Kconfig +++ b/fs/incfs/Kconfig @@ -2,6 +2,8 @@ config INCREMENTAL_FS tristate "Incremental file system support" depends on BLOCK select DECOMPRESS_LZ4 + select DECOMPRESS_ZSTD + select CRYPTO_ZSTD select CRYPTO_SHA256 help Incremental FS is a read-only virtual file system that facilitates execution diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c index b6a8b47b4f6d..48ab7428d627 100644 --- a/fs/incfs/data_mgmt.c +++ b/fs/incfs/data_mgmt.c @@ -28,6 +28,19 @@ static void log_wake_up_all(struct work_struct *work) wake_up_all(&rl->ml_notif_wq); } +static void zstd_free_workspace(struct work_struct *work) +{ + struct delayed_work *dw = container_of(work, struct delayed_work, work); + struct mount_info *mi = + container_of(dw, struct mount_info, mi_zstd_cleanup_work); + + mutex_lock(&mi->mi_zstd_workspace_mutex); + kvfree(mi->mi_zstd_workspace); + mi->mi_zstd_workspace = NULL; + mi->mi_zstd_stream = NULL; + mutex_unlock(&mi->mi_zstd_workspace_mutex); +} + struct mount_info *incfs_alloc_mount_info(struct super_block *sb, struct mount_options *options, struct path *backing_dir_path) @@ -53,6 +66,8 @@ struct mount_info *incfs_alloc_mount_info(struct super_block *sb, spin_lock_init(&mi->pending_read_lock); INIT_LIST_HEAD(&mi->mi_reads_list_head); spin_lock_init(&mi->mi_per_uid_read_timeouts_lock); + mutex_init(&mi->mi_zstd_workspace_mutex); + INIT_DELAYED_WORK(&mi->mi_zstd_cleanup_work, zstd_free_workspace); error = incfs_realloc_mount_info(mi, options); if (error) @@ -112,11 +127,13 @@ void incfs_free_mount_info(struct mount_info *mi) return; flush_delayed_work(&mi->mi_log.ml_wakeup_work); + flush_delayed_work(&mi->mi_zstd_cleanup_work); dput(mi->mi_index_dir); dput(mi->mi_incomplete_dir); path_put(&mi->mi_backing_dir_path); mutex_destroy(&mi->mi_dir_struct_mutex); + mutex_destroy(&mi->mi_zstd_workspace_mutex); put_cred(mi->mi_owner); kfree(mi->mi_log.rl_ring_buf); kfree(mi->log_xattr); @@ -357,16 +374,73 @@ void incfs_free_dir_file(struct dir_file *dir) kfree(dir); } -static ssize_t decompress(struct mem_range src, struct mem_range dst) +static ssize_t zstd_decompress_safe(struct mount_info *mi, + struct mem_range src, struct mem_range dst) { - int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len); + ssize_t result; + ZSTD_inBuffer inbuf = {.src = src.data, .size = src.len}; + ZSTD_outBuffer outbuf = {.dst = dst.data, .size = dst.len}; - if (result < 0) - return -EBADMSG; + result = mutex_lock_interruptible(&mi->mi_zstd_workspace_mutex); + if (result) + return result; + if (!mi->mi_zstd_stream) { + unsigned int workspace_size = ZSTD_DStreamWorkspaceBound( + INCFS_DATA_FILE_BLOCK_SIZE); + void *workspace = kvmalloc(workspace_size, GFP_NOFS); + ZSTD_DStream *stream; + + if (!workspace) { + result = -ENOMEM; + goto out; + } + + stream = ZSTD_initDStream(INCFS_DATA_FILE_BLOCK_SIZE, workspace, + workspace_size); + if (!stream) { + kvfree(workspace); + result = -EIO; + goto out; + } + + mi->mi_zstd_workspace = workspace; + mi->mi_zstd_stream = stream; + } + + result = ZSTD_decompressStream(mi->mi_zstd_stream, &outbuf, &inbuf) ? + -EBADMSG : outbuf.pos; + + mod_delayed_work(system_wq, &mi->mi_zstd_cleanup_work, + msecs_to_jiffies(5000)); + +out: + mutex_unlock(&mi->mi_zstd_workspace_mutex); return result; } +static ssize_t decompress(struct mount_info *mi, + struct mem_range src, struct mem_range dst, int alg) +{ + int result; + + switch (alg) { + case INCFS_BLOCK_COMPRESSED_LZ4: + result = LZ4_decompress_safe(src.data, dst.data, src.len, + dst.len); + if (result < 0) + return -EBADMSG; + return result; + + case INCFS_BLOCK_COMPRESSED_ZSTD: + return zstd_decompress_safe(mi, src, dst); + + default: + WARN_ON(true); + return -EOPNOTSUPP; + } +} + static void log_read_one_record(struct read_log *rl, struct read_log_state *rs) { union log_record *record = @@ -636,9 +710,7 @@ static void convert_data_file_block(struct incfs_blockmap_entry *bme, res_block->db_backing_file_data_offset |= le32_to_cpu(bme->me_data_offset_lo); res_block->db_stored_size = le16_to_cpu(bme->me_data_size); - res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? - COMPRESSION_LZ4 : - COMPRESSION_NONE; + res_block->db_comp_alg = flags & INCFS_BLOCK_COMPRESSED_MASK; } static int get_data_file_block(struct data_file *df, int index, @@ -1089,7 +1161,8 @@ ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, result = incfs_kread(bf, tmp.data, bytes_to_read, pos); if (result == bytes_to_read) { result = - decompress(range(tmp.data, bytes_to_read), dst); + decompress(mi, range(tmp.data, bytes_to_read), + dst, block.db_comp_alg); if (result < 0) { const char *name = bf->f_path.dentry->d_name.name; @@ -1139,8 +1212,13 @@ int incfs_process_new_data_block(struct data_file *df, segment = get_file_segment(df, block->block_index); if (!segment) return -EFAULT; + if (block->compression == COMPRESSION_LZ4) flags |= INCFS_BLOCK_COMPRESSED_LZ4; + else if (block->compression == COMPRESSION_ZSTD) + flags |= INCFS_BLOCK_COMPRESSED_ZSTD; + else if (block->compression) + return -EINVAL; error = down_read_killable(&segment->rwsem); if (error) diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 7bbaf6ddbba4..a63af708fa6d 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -166,6 +167,12 @@ struct mount_info { spinlock_t mi_per_uid_read_timeouts_lock; struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; int mi_per_uid_read_timeouts_size; + + /* zstd workspace */ + struct mutex mi_zstd_workspace_mutex; + void *mi_zstd_workspace; + ZSTD_DStream *mi_zstd_stream; + struct delayed_work mi_zstd_cleanup_work; }; struct data_file_block { diff --git a/fs/incfs/format.h b/fs/incfs/format.h index a7fa36f6f941..87d10157dfe1 100644 --- a/fs/incfs/format.h +++ b/fs/incfs/format.h @@ -195,7 +195,11 @@ struct incfs_file_header { } __packed; enum incfs_block_map_entry_flags { - INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), + INCFS_BLOCK_COMPRESSED_LZ4 = 1, + INCFS_BLOCK_COMPRESSED_ZSTD = 2, + + /* Reserve 3 bits for compression alg */ + INCFS_BLOCK_COMPRESSED_MASK = 7, }; /* Block map entry pointing to an actual location of the data block. */ diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h index fbb83a2d9479..625db40356f2 100644 --- a/include/uapi/linux/incrementalfs.h +++ b/include/uapi/linux/incrementalfs.h @@ -143,7 +143,8 @@ enum incfs_compression_alg { COMPRESSION_NONE = 0, - COMPRESSION_LZ4 = 1 + COMPRESSION_LZ4 = 1, + COMPRESSION_ZSTD = 2, }; enum incfs_block_flags { diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index 83e2ecb7bad9..f3798029247a 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. -LDLIBS := -llz4 -lcrypto -lpthread +LDLIBS := -llz4 -lzstd -lcrypto -lpthread TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index d375827257d4..de9fccf6b608 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -321,6 +322,12 @@ static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); } +ssize_t ZSTD_compress_default(char *data, char *comp_data, size_t data_size, + size_t comp_size) +{ + return ZSTD_compress(comp_data, comp_size, data, data_size, 1); +} + static int emit_test_blocks(const char *mnt_dir, struct test_file *file, int blocks[], int count) { @@ -345,7 +352,8 @@ static int emit_test_blocks(const char *mnt_dir, struct test_file *file, for (i = 0; i < block_count; i++) { int block_index = blocks[i]; - bool compress = (file->index + block_index) % 2 == 0; + bool compress_zstd = (file->index + block_index) % 4 == 2; + bool compress_lz4 = (file->index + block_index) % 4 == 0; int seed = get_file_block_seed(file->index, block_index); off_t block_offset = ((off_t)block_index) * INCFS_DATA_FILE_BLOCK_SIZE; @@ -362,10 +370,10 @@ static int emit_test_blocks(const char *mnt_dir, struct test_file *file, block_size = file->size - block_offset; rnd_buf(data, block_size, seed); - if (compress) { - size_t comp_size = LZ4_compress_default( - (char *)data, (char *)comp_data, block_size, - ARRAY_SIZE(comp_data)); + if (compress_lz4) { + size_t comp_size = LZ4_compress_default((char *)data, + (char *)comp_data, block_size, + ARRAY_SIZE(comp_data)); if (comp_size <= 0) { error = -EBADMSG; @@ -378,6 +386,22 @@ static int emit_test_blocks(const char *mnt_dir, struct test_file *file, memcpy(current_data, comp_data, comp_size); block_size = comp_size; block_buf[i].compression = COMPRESSION_LZ4; + } else if (compress_zstd) { + size_t comp_size = ZSTD_compress(comp_data, + ARRAY_SIZE(comp_data), data, block_size, + 1); + + if (comp_size <= 0) { + error = -EBADMSG; + break; + } + if (current_data + comp_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, comp_data, comp_size); + block_size = comp_size; + block_buf[i].compression = COMPRESSION_ZSTD; } else { if (current_data + block_size > data_end) { error = -ENOMEM; From 1c5ad1eb2d3789df8b97c3501d4f083288090415 Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 20 Nov 2020 07:08:51 -0800 Subject: [PATCH 462/636] Revert "ANDROID: Add dependencies of dm-user.ko" This reverts commit 4a1753307d32ac3ef2c202f215a5cec80373fed1. Bug: 169084168 Signed-off-by: Paul Lawrence Change-Id: Ie72f9b8d295b9ba5f72d0befd7687cd4607e18d6 Signed-off-by: Will McVicker --- android/abi_gki_aarch64 | 36 +- android/abi_gki_aarch64.xml | 3084 ++++++++++++++--------------------- 2 files changed, 1239 insertions(+), 1881 deletions(-) diff --git a/android/abi_gki_aarch64 b/android/abi_gki_aarch64 index 31b7d495cb9b..7bb7e004c3de 100644 --- a/android/abi_gki_aarch64 +++ b/android/abi_gki_aarch64 @@ -10,9 +10,6 @@ dummy_dma_ops find_next_bit find_next_zero_bit - finish_wait - init_wait_entry - __init_waitqueue_head kfree __kmalloc kmalloc_caches @@ -29,19 +26,16 @@ of_property_read_variable_u32_array platform_get_irq platform_get_resource - prepare_to_wait_event printk __put_task_struct ___ratelimit _raw_spin_lock_irqsave _raw_spin_unlock_irqrestore - schedule snprintf __stack_chk_fail __stack_chk_guard strcmp __udelay - __wake_up # required by arm-smmu.ko alloc_io_pgtable_ops @@ -86,30 +80,6 @@ platform_driver_unregister put_device -# required by dm-user.ko - bio_advance - bio_endio - bio_put - __check_object_size - _copy_from_iter - copy_page_from_iter - copy_page_to_iter - _copy_to_iter - dm_register_target - dm_unregister_target - kasprintf - __list_del_entry_valid - __ll_sc_atomic_add - mempool_alloc - mempool_exit - mempool_free - mempool_init - mempool_kfree - mempool_kmalloc - misc_deregister - misc_register - no_llseek - # required by ufshcd-core.ko __alloc_workqueue_key async_schedule @@ -151,10 +121,13 @@ down_write event_triggers_call find_last_bit + finish_wait flush_work free_irq __init_rwsem init_timer_key + init_wait_entry + __init_waitqueue_head jiffies jiffies_to_usecs keyslot_manager_create @@ -177,6 +150,7 @@ __pm_runtime_idle __pm_runtime_resume preempt_schedule_notrace + prepare_to_wait_event print_hex_dump queue_delayed_work_on queue_work_on @@ -188,6 +162,7 @@ regulator_set_load regulator_set_voltage request_threaded_irq + schedule schedule_timeout __scsi_add_device scsi_add_host_with_dma @@ -233,6 +208,7 @@ usleep_range utf16s_to_utf8s wait_for_completion_timeout + __wake_up __warn_printk # required by ufshcd-pltfrm.ko diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index e7bd92444d33..e7b884328a84 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -182,7 +182,6 @@ - @@ -252,12 +251,9 @@ - - - @@ -418,8 +414,6 @@ - - @@ -639,8 +633,6 @@ - - @@ -1405,9 +1397,7 @@ - - @@ -2823,7 +2813,7 @@ - + @@ -3137,6 +3127,14 @@ + + + + + + + + @@ -3314,7 +3312,7 @@ - + @@ -3567,7 +3565,7 @@ - + @@ -5061,7 +5059,7 @@ - + @@ -5213,14 +5211,6 @@ - - - - - - - - @@ -5283,7 +5273,7 @@ - + @@ -5828,7 +5818,7 @@ - + @@ -7110,7 +7100,7 @@ - + @@ -7503,7 +7493,7 @@ - + @@ -7550,7 +7540,7 @@ - + @@ -7599,7 +7589,7 @@ - + @@ -8068,11 +8058,6 @@ - - - - - @@ -8538,7 +8523,7 @@ - + @@ -8694,7 +8679,7 @@ - + @@ -9402,6 +9387,7 @@ + @@ -10388,7 +10374,7 @@ - + @@ -10669,7 +10655,7 @@ - + @@ -11049,7 +11035,7 @@ - + @@ -11168,7 +11154,7 @@ - + @@ -11730,7 +11716,7 @@ - + @@ -12316,7 +12302,7 @@ - + @@ -12436,7 +12422,7 @@ - + @@ -12445,7 +12431,7 @@ - + @@ -14551,17 +14537,6 @@ - - - - - - - - - - - @@ -15645,7 +15620,6 @@ - @@ -16637,7 +16611,7 @@ - + @@ -17898,7 +17872,7 @@ - + @@ -17953,7 +17927,7 @@ - + @@ -18672,10 +18646,6 @@ - - - - @@ -19698,7 +19668,7 @@ - + @@ -19790,7 +19760,7 @@ - + @@ -20108,7 +20078,7 @@ - + @@ -22095,7 +22065,7 @@ - + @@ -22258,7 +22228,7 @@ - + @@ -22909,6 +22879,7 @@ + @@ -22958,6 +22929,7 @@ + @@ -23145,7 +23117,7 @@ - + @@ -23906,7 +23878,7 @@ - + @@ -23949,7 +23921,7 @@ - + @@ -24018,7 +23990,7 @@ - + @@ -24157,7 +24129,7 @@ - + @@ -25263,7 +25235,7 @@ - + @@ -25420,14 +25392,14 @@ - + - + @@ -25447,7 +25419,7 @@ - + @@ -25523,7 +25495,7 @@ - + @@ -25556,7 +25528,7 @@ - + @@ -25599,7 +25571,7 @@ - + @@ -25713,13 +25685,13 @@ - + - + @@ -25753,7 +25725,7 @@ - + @@ -26051,7 +26023,7 @@ - + @@ -26202,7 +26174,7 @@ - + @@ -26314,7 +26286,7 @@ - + @@ -26594,7 +26566,7 @@ - + @@ -26603,12 +26575,12 @@ - + - + @@ -26646,7 +26618,7 @@ - + @@ -26671,7 +26643,7 @@ - + @@ -26906,11 +26878,11 @@ - + - + @@ -27081,7 +27053,7 @@ - + @@ -27186,23 +27158,23 @@ - + - + - + - + @@ -27486,7 +27458,6 @@ - @@ -28077,7 +28048,6 @@ - @@ -28211,7 +28181,7 @@ - + @@ -28278,7 +28248,7 @@ - + @@ -28462,7 +28432,7 @@ - + @@ -28482,7 +28452,7 @@ - + @@ -28505,6 +28475,25 @@ + + + + + + + + + + + + + + + + + + + @@ -28526,7 +28515,7 @@ - + @@ -28535,7 +28524,7 @@ - + @@ -28630,6 +28619,17 @@ + + + + + + + + + + + @@ -28720,41 +28720,24 @@ - + - + - + - + - + - - - - + + - - - - - - - - - - - - - - - - - + + @@ -28798,18 +28781,27 @@ - + - + - + - - + + - - + + + + + + + + + + + @@ -28932,7 +28924,7 @@ - + @@ -29003,10 +28995,7 @@ - - - @@ -29029,14 +29018,14 @@ - + - + @@ -29047,14 +29036,14 @@ - + - + @@ -29075,44 +29064,44 @@ - - + + - + - + - + - + - + - + - + @@ -29124,27 +29113,27 @@ - + - - + + - + - + - + @@ -29155,11 +29144,11 @@ - + - + @@ -29183,7 +29172,7 @@ - + @@ -29282,7 +29271,7 @@ - + @@ -29392,7 +29381,7 @@ - + @@ -29450,9 +29439,7 @@ - - @@ -30169,7 +30156,7 @@ - + @@ -30199,7 +30186,7 @@ - + @@ -30208,7 +30195,7 @@ - + @@ -30373,7 +30360,7 @@ - + @@ -30710,7 +30697,7 @@ - + @@ -30888,13 +30875,13 @@ - + - + @@ -31255,7 +31242,7 @@ - + @@ -31281,7 +31268,7 @@ - + @@ -31294,7 +31281,7 @@ - + @@ -31957,7 +31944,7 @@ - + @@ -32014,7 +32001,7 @@ - + @@ -32040,7 +32027,7 @@ - + @@ -32049,7 +32036,7 @@ - + @@ -32073,7 +32060,7 @@ - + @@ -32134,7 +32121,7 @@ - + @@ -32333,7 +32320,6 @@ - @@ -32345,8 +32331,6 @@ - - @@ -32373,18 +32357,7 @@ - - - - - - - - - - - - + @@ -32422,10 +32395,9 @@ - + - @@ -33030,7 +33002,7 @@ - + @@ -33603,7 +33575,7 @@ - + @@ -34679,7 +34651,7 @@ - + @@ -34976,7 +34948,7 @@ - + @@ -35146,7 +35118,7 @@ - + @@ -35173,7 +35145,7 @@ - + @@ -35434,7 +35406,7 @@ - + @@ -35959,236 +35931,11 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -36203,18 +35950,10 @@ - - - - - - - - @@ -36236,110 +35975,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -36413,10 +36048,22 @@ - + + + + + + + + + + + + + @@ -36427,7 +36074,6 @@ - @@ -36729,7 +36375,7 @@ - + @@ -36928,6 +36574,11 @@ + + + + + @@ -37444,26 +37095,26 @@ - - + + - + - + - + - + - + @@ -37501,105 +37152,105 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37686,13 +37337,13 @@ - - + + - + - + @@ -37706,82 +37357,82 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37793,7 +37444,7 @@ - + @@ -37837,75 +37488,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37976,7 +37627,7 @@ - + @@ -38074,18 +37725,18 @@ - + - + - + - + - + @@ -38211,69 +37862,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38296,7 +37947,7 @@ - + @@ -38334,7 +37985,7 @@ - + @@ -38430,7 +38081,7 @@ - + @@ -38440,7 +38091,7 @@ - + @@ -38804,289 +38455,289 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -39098,150 +38749,150 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -39257,586 +38908,586 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -40745,11 +40396,6 @@ - - - - - @@ -40912,6 +40558,17 @@ + + + + + + + + + + + @@ -41120,8 +40777,8 @@ - - + + @@ -41136,7 +40793,7 @@ - + @@ -41144,7 +40801,7 @@ - + @@ -41351,7 +41008,7 @@ - + @@ -41381,7 +41038,7 @@ - + @@ -41491,107 +41148,107 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -41779,7 +41436,7 @@ - + @@ -41792,17 +41449,6 @@ - - - - - - - - - - - @@ -42086,7 +41732,7 @@ - + @@ -42326,8 +41972,8 @@ - - + + @@ -42336,7 +41982,7 @@ - + @@ -42630,17 +42276,6 @@ - - - - - - - - - - - @@ -42786,7 +42421,6 @@ - @@ -44858,7 +44492,7 @@ - + @@ -44883,7 +44517,7 @@ - + @@ -45402,7 +45036,7 @@ - + @@ -45669,7 +45303,7 @@ - + @@ -47404,7 +47038,7 @@ - + @@ -47413,7 +47047,7 @@ - + @@ -47487,7 +47121,7 @@ - + @@ -47495,7 +47129,7 @@ - + @@ -48858,7 +48492,6 @@ - @@ -48956,7 +48589,6 @@ - @@ -49758,8 +49390,8 @@ - - + + @@ -49937,7 +49569,7 @@ - + @@ -50056,7 +49688,7 @@ - + @@ -50467,7 +50099,7 @@ - + @@ -50537,7 +50169,7 @@ - + @@ -50903,7 +50535,7 @@ - + @@ -50978,7 +50610,7 @@ - + @@ -51621,7 +51253,7 @@ - + @@ -51646,7 +51278,7 @@ - + @@ -51957,17 +51589,6 @@ - - - - - - - - - - - @@ -51993,7 +51614,6 @@ - @@ -52541,19 +52161,8 @@ - - - - - - - - - - - - + @@ -52683,7 +52292,6 @@ - @@ -53413,14 +53021,6 @@ - - - - - - - - @@ -53521,6 +53121,14 @@ + + + + + + + + @@ -53539,8 +53147,8 @@ - - + + @@ -54505,7 +54113,7 @@ - + @@ -54524,7 +54132,7 @@ - + @@ -54551,7 +54159,7 @@ - + @@ -54562,34 +54170,34 @@ - + - + - + - + - + - + - + - + - + - + @@ -54950,7 +54558,7 @@ - + @@ -55484,12 +55092,12 @@ - + - + @@ -55499,7 +55107,7 @@ - + @@ -55585,7 +55193,7 @@ - + @@ -55830,7 +55438,7 @@ - + @@ -55976,7 +55584,7 @@ - + @@ -56925,7 +56533,7 @@ - + @@ -57477,7 +57085,7 @@ - + @@ -57487,7 +57095,7 @@ - + @@ -57526,7 +57134,7 @@ - + @@ -57547,7 +57155,7 @@ - + @@ -57580,12 +57188,12 @@ - + - + @@ -57674,7 +57282,7 @@ - + @@ -57686,7 +57294,7 @@ - + @@ -58649,7 +58257,7 @@ - + @@ -58775,7 +58383,7 @@ - + @@ -58783,7 +58391,7 @@ - + @@ -58826,7 +58434,7 @@ - + @@ -58852,7 +58460,7 @@ - + @@ -58860,7 +58468,7 @@ - + @@ -59750,17 +59358,6 @@ - - - - - - - - - - - @@ -59880,6 +59477,17 @@ + + + + + + + + + + + @@ -60811,7 +60419,7 @@ - + @@ -60848,7 +60456,7 @@ - + @@ -61035,7 +60643,7 @@ - + @@ -61081,7 +60689,7 @@ - + @@ -62119,7 +61727,7 @@ - + @@ -62131,7 +61739,7 @@ - + @@ -62670,7 +62278,7 @@ - + @@ -62688,7 +62296,7 @@ - + @@ -63019,7 +62627,7 @@ - + @@ -63042,15 +62650,15 @@ - + - + - + - + @@ -63217,67 +62825,67 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -63527,7 +63135,7 @@ - + @@ -63901,17 +63509,6 @@ - - - - - - - - - - - @@ -64029,17 +63626,17 @@ - + - + - + @@ -64050,7 +63647,7 @@ - + @@ -64207,7 +63804,6 @@ - @@ -64217,7 +63813,7 @@ - + @@ -64345,30 +63941,30 @@ - + - + - + - + - + @@ -64436,13 +64032,13 @@ - + - + @@ -64783,7 +64379,7 @@ - + @@ -65868,17 +65464,6 @@ - - - - - - - - - - - @@ -65912,7 +65497,7 @@ - + @@ -65928,7 +65513,7 @@ - + @@ -65952,7 +65537,7 @@ - + @@ -65994,7 +65579,6 @@ - @@ -66043,23 +65627,11 @@ - - - - - - - - - - - - @@ -66076,24 +65648,24 @@ - + - + - + - - - - + + + + - + @@ -66112,7 +65684,7 @@ - + @@ -66200,14 +65772,6 @@ - - - - - - - - @@ -66334,6 +65898,14 @@ + + + + + + + + @@ -66357,17 +65929,6 @@ - - - - - - - - - - - @@ -66417,7 +65978,6 @@ - @@ -66604,17 +66164,6 @@ - - - - - - - - - - - @@ -66668,7 +66217,7 @@ - + @@ -66684,7 +66233,7 @@ - + @@ -66788,7 +66337,6 @@ - @@ -67101,7 +66649,7 @@ - + @@ -67122,31 +66670,27 @@ - + - + - - - - + - - + - + - + @@ -67155,15 +66699,6 @@ - - - - - - - - - @@ -67174,34 +66709,14 @@ - - - - - - - - - - - - - - - - - - - - + - + - @@ -67217,7 +66732,7 @@ - + @@ -67354,7 +66869,7 @@ - + @@ -67629,17 +67144,6 @@ - - - - - - - - - - - @@ -67709,7 +67213,6 @@ - @@ -67913,7 +67416,7 @@ - + @@ -67923,7 +67426,7 @@ - + @@ -67964,17 +67467,6 @@ - - - - - - - - - - - @@ -68193,7 +67685,6 @@ - @@ -68370,7 +67861,7 @@ - + @@ -68655,10 +68146,6 @@ - - - - @@ -68675,24 +68162,12 @@ - - - - - - - - - - - - @@ -68781,17 +68256,6 @@ - - - - - - - - - - - @@ -68804,7 +68268,6 @@ - @@ -68817,7 +68280,6 @@ - @@ -68863,7 +68325,7 @@ - + @@ -68891,7 +68353,7 @@ - + @@ -69437,10 +68899,10 @@ - - - - + + + + @@ -70025,32 +69487,32 @@ - - - - + + + + - - + + - - - - + + + + - - + + - - + + - - + + @@ -70426,14 +69888,6 @@ - - - - - - - - @@ -70485,7 +69939,7 @@ - + @@ -70776,75 +70230,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -71896,7 +71350,7 @@ - + @@ -72989,7 +72443,7 @@ - + @@ -73029,7 +72483,7 @@ - + @@ -74855,7 +74309,7 @@ - + @@ -74942,10 +74396,10 @@ - + - + @@ -74954,7 +74408,7 @@ - + @@ -75089,7 +74543,7 @@ - + @@ -75179,7 +74633,7 @@ - + @@ -75938,7 +75392,7 @@ - + @@ -76805,7 +76259,7 @@ - + @@ -76820,7 +76274,7 @@ - + @@ -76964,7 +76418,7 @@ - + @@ -77067,7 +76521,7 @@ - + @@ -77241,7 +76695,7 @@ - + @@ -77372,7 +76826,7 @@ - + @@ -77504,7 +76958,7 @@ - + @@ -77744,7 +77198,7 @@ - + @@ -78011,7 +77465,7 @@ - + @@ -78337,7 +77791,7 @@ - + @@ -78372,7 +77826,7 @@ - + @@ -78600,7 +78054,7 @@ - + @@ -78674,7 +78128,7 @@ - + @@ -78898,7 +78352,7 @@ - + @@ -78906,7 +78360,7 @@ - + @@ -79047,7 +78501,7 @@ - + @@ -79417,7 +78871,7 @@ - + @@ -80269,7 +79723,7 @@ - + @@ -80666,7 +80120,7 @@ - + @@ -80723,29 +80177,18 @@ - + - + - + - + - - - - - - - - - - - @@ -80816,7 +80259,7 @@ - + @@ -80992,7 +80435,7 @@ - + @@ -81069,7 +80512,7 @@ - + @@ -81091,7 +80534,7 @@ - + @@ -81113,7 +80556,7 @@ - + @@ -81121,7 +80564,7 @@ - + @@ -81130,7 +80573,7 @@ - + @@ -81159,7 +80602,7 @@ - + @@ -81223,7 +80666,7 @@ - + @@ -81276,8 +80719,8 @@ - - + + @@ -81733,7 +81176,6 @@ - @@ -81823,8 +81265,8 @@ - - + + @@ -81987,17 +81429,17 @@ - + - + - + @@ -82529,7 +81971,7 @@ - + @@ -82791,7 +82233,7 @@ - + @@ -82899,7 +82341,7 @@ - + @@ -82978,11 +82420,11 @@ - + - + @@ -82994,11 +82436,6 @@ - - - - - @@ -83362,7 +82799,7 @@ - + @@ -83378,7 +82815,7 @@ - + @@ -83386,7 +82823,7 @@ - + @@ -83437,7 +82874,7 @@ - + @@ -83469,7 +82906,7 @@ - + @@ -83518,7 +82955,7 @@ - + @@ -83526,8 +82963,7 @@ - - + @@ -83609,10 +83045,10 @@ - - - - + + + + @@ -83921,17 +83357,6 @@ - - - - - - - - - - - @@ -83953,12 +83378,12 @@ - + - + @@ -84023,7 +83448,7 @@ - + @@ -84052,7 +83477,7 @@ - + @@ -84062,7 +83487,7 @@ - + @@ -84234,6 +83659,17 @@ + + + + + + + + + + + @@ -84304,6 +83740,7 @@ + @@ -84353,7 +83790,7 @@ - + @@ -84379,7 +83816,7 @@ - + @@ -84712,46 +84149,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -85831,13 +85268,6 @@ - - - - - - - @@ -85883,6 +85313,12 @@ + + + + + + @@ -85922,7 +85358,7 @@ - + @@ -86067,15 +85503,6 @@ - - - - - - - - - @@ -86410,7 +85837,7 @@ - + @@ -86460,7 +85887,7 @@ - + @@ -86835,7 +86262,7 @@ - + @@ -87247,7 +86674,7 @@ - + @@ -87395,7 +86822,7 @@ - + @@ -87411,7 +86838,7 @@ - + @@ -88657,7 +88084,7 @@ - + @@ -89039,6 +88466,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -89279,84 +88776,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -89577,10 +88996,6 @@ - - - - @@ -89931,6 +89346,14 @@ + + + + + + + + @@ -90027,7 +89450,7 @@ - + @@ -91122,7 +90545,7 @@ - + @@ -91402,7 +90825,7 @@ - + @@ -91435,7 +90858,7 @@ - + @@ -91558,7 +90981,7 @@ - + @@ -91604,7 +91027,7 @@ - + @@ -91741,7 +91164,7 @@ - + @@ -91781,7 +91204,7 @@ - + @@ -91800,7 +91223,7 @@ - + @@ -92027,27 +91450,27 @@ - + - + - + - + @@ -92200,14 +91623,6 @@ - - - - - - - - @@ -92658,7 +92073,7 @@ - + @@ -92705,7 +92120,7 @@ - + @@ -92719,7 +92134,7 @@ - + @@ -92738,7 +92153,7 @@ - + @@ -92747,7 +92162,7 @@ - + @@ -92762,7 +92177,7 @@ - + @@ -92854,7 +92269,7 @@ - + @@ -93324,7 +92739,7 @@ - + @@ -93613,7 +93028,7 @@ - + @@ -93627,7 +93042,7 @@ - + @@ -93635,7 +93050,7 @@ - + @@ -93700,7 +93115,7 @@ - + @@ -93801,7 +93216,7 @@ - + @@ -93820,7 +93235,7 @@ - + @@ -93850,7 +93265,7 @@ - + @@ -93996,7 +93411,7 @@ - + @@ -94046,7 +93461,7 @@ - + @@ -94903,7 +94318,7 @@ - + @@ -94988,27 +94403,7 @@ - - - - - - - - - - - - - - - - - - - - - + @@ -95016,7 +94411,7 @@ - + @@ -95028,7 +94423,7 @@ - + @@ -95174,7 +94569,7 @@ - + @@ -95621,7 +95016,7 @@ - + @@ -95642,7 +95037,7 @@ - + @@ -95674,7 +95069,7 @@ - + @@ -96162,7 +95557,7 @@ - + @@ -96183,8 +95578,8 @@ - - + + @@ -96251,7 +95646,7 @@ - + @@ -96308,17 +95703,6 @@ - - - - - - - - - - - @@ -96378,6 +95762,16 @@ + + + + + + + + + + @@ -97049,14 +96443,14 @@ - + - + - + @@ -97925,13 +97319,13 @@ - - + + - + @@ -97941,8 +97335,8 @@ - - + + @@ -97987,7 +97381,7 @@ - + @@ -98232,17 +97626,6 @@ - - - - - - - - - - - @@ -98338,7 +97721,7 @@ - + @@ -98424,7 +97807,6 @@ - @@ -98435,27 +97817,27 @@ - - + + - + - + - + - + @@ -98463,7 +97845,7 @@ - + @@ -98471,14 +97853,14 @@ - + - + @@ -98486,7 +97868,7 @@ - + @@ -98495,7 +97877,7 @@ - + @@ -98504,11 +97886,11 @@ - + - + @@ -98862,14 +98244,14 @@ - + - + @@ -99086,7 +98468,7 @@ - + @@ -99668,7 +99050,7 @@ - + @@ -99708,7 +99090,7 @@ - + @@ -99716,7 +99098,7 @@ - + @@ -99747,13 +99129,13 @@ - + - + @@ -101843,10 +101225,10 @@ - - - - + + + + @@ -101896,6 +101278,6 @@ From fbe7247e2f0b7621427ccf6e21a615c1b13dbfaa Mon Sep 17 00:00:00 2001 From: Paul Lawrence Date: Fri, 20 Nov 2020 07:30:43 -0800 Subject: [PATCH 463/636] ANDROID: GKI: Update ABI for incfs and dm-user Leaf changes summary: 16 artifacts changed (378 filtered out) Changed leaf types summary: 0 (281 filtered out) leaf types changed Removed/Changed/Added functions summary: 0 Removed, 0 Changed (92 filtered out), 16 Added functions Removed/Changed/Added variables summary: 0 Removed, 0 Changed (5 filtered out), 0 Added variable 16 Added functions: [A] 'function size_t ZSTD_DStreamWorkspaceBound(size_t)' [A] 'function size_t ZSTD_decompressStream(ZSTD_DStream*, ZSTD_outBuffer*, ZSTD_inBuffer*)' [A] 'function ZSTD_DStream* ZSTD_initDStream(size_t, void*, size_t)' [A] 'function unsigned long int _copy_from_iter(void*, unsigned long int, iov_iter*)' [A] 'function void bio_advance(bio*, unsigned int)' [A] 'function void bio_endio(bio*)' [A] 'function void bio_put(bio*)' [A] 'function unsigned long int copy_page_from_iter(page*, unsigned long int, unsigned long int, iov_iter*)' [A] 'function unsigned long int copy_page_to_iter(page*, unsigned long int, unsigned long int, iov_iter*)' [A] 'function int dm_register_target(target_type*)' [A] 'function void dm_unregister_target(target_type*)' [A] 'function int down_read_killable(rw_semaphore*)' [A] 'function int down_write_killable(rw_semaphore*)' [A] 'function void mempool_exit(mempool_s*)' [A] 'function int mempool_init(mempool_s*, int, void* (unsigned int, void*)*, void (void*, void*)*, void*)' [A] 'function int vfs_fsync(file*, int)' Bug: 169084168 Fixes: 4a1753307d32a ("ANDROID: Add dependencies of dm-user.ko") Test: Builds Signed-off-by: Paul Lawrence Change-Id: I4813f5400003882d08556f00542413058782457e Signed-off-by: Will McVicker --- android/abi_gki_aarch64.xml | 3752 +++++++++++++++++++++------------- android/abi_gki_aarch64_qcom | 233 ++- 2 files changed, 2493 insertions(+), 1492 deletions(-) diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml index e7b884328a84..e9815f9c5414 100644 --- a/android/abi_gki_aarch64.xml +++ b/android/abi_gki_aarch64.xml @@ -3,6 +3,9 @@ + + + @@ -182,6 +185,7 @@ + @@ -251,9 +255,12 @@ + + + @@ -414,6 +421,8 @@ + + @@ -633,6 +642,8 @@ + + @@ -679,8 +690,10 @@ + + @@ -1397,7 +1410,9 @@ + + @@ -2497,6 +2512,7 @@ + @@ -2813,7 +2829,7 @@ - + @@ -3312,7 +3328,7 @@ - + @@ -3565,7 +3581,7 @@ - + @@ -5059,7 +5075,7 @@ - + @@ -5273,7 +5289,7 @@ - + @@ -5818,7 +5834,7 @@ - + @@ -7100,7 +7116,7 @@ - + @@ -7254,6 +7270,10 @@ + + + + @@ -7493,7 +7513,7 @@ - + @@ -7540,7 +7560,7 @@ - + @@ -7589,7 +7609,7 @@ - + @@ -8058,6 +8078,11 @@ + + + + + @@ -8523,7 +8548,7 @@ - + @@ -8679,7 +8704,7 @@ - + @@ -9387,7 +9412,6 @@ - @@ -10374,7 +10398,7 @@ - + @@ -10655,7 +10679,7 @@ - + @@ -11035,7 +11059,7 @@ - + @@ -11154,7 +11178,7 @@ - + @@ -11716,7 +11740,7 @@ - + @@ -12302,7 +12326,7 @@ - + @@ -12422,7 +12446,7 @@ - + @@ -12431,7 +12455,7 @@ - + @@ -14537,6 +14561,17 @@ + + + + + + + + + + + @@ -15620,6 +15655,7 @@ + @@ -16308,30 +16344,30 @@ - - - - - + + + + + - - - - + + + + - - + + - - + + - - - + + + @@ -16611,7 +16647,7 @@ - + @@ -16885,19 +16921,19 @@ - - + + - - + + - - + + - + @@ -17872,7 +17908,7 @@ - + @@ -17927,7 +17963,7 @@ - + @@ -18646,6 +18682,10 @@ + + + + @@ -18811,10 +18851,10 @@ - - - - + + + + @@ -19668,7 +19708,7 @@ - + @@ -19760,7 +19800,7 @@ - + @@ -20078,7 +20118,7 @@ - + @@ -20698,58 +20738,58 @@ - - - + + + - - - - - + + + + + - - - + + + - - - - - + + + + + - - - + + + - - - - + + + + - - + + - - - + + + - - - + + + - - - + + + @@ -22065,7 +22105,7 @@ - + @@ -22228,7 +22268,7 @@ - + @@ -22825,29 +22865,29 @@ - - + + - - + + - - + + - - + + - - - + + + - - + + @@ -22873,13 +22913,12 @@ - - + + - @@ -22929,7 +22968,6 @@ - @@ -23117,7 +23155,7 @@ - + @@ -23878,7 +23916,7 @@ - + @@ -23921,7 +23959,7 @@ - + @@ -23951,7 +23989,7 @@ - + @@ -23990,7 +24028,7 @@ - + @@ -24129,7 +24167,7 @@ - + @@ -25235,7 +25273,7 @@ - + @@ -25392,14 +25430,14 @@ - + - + @@ -25419,7 +25457,7 @@ - + @@ -25495,7 +25533,7 @@ - + @@ -25528,7 +25566,7 @@ - + @@ -25571,7 +25609,7 @@ - + @@ -25685,13 +25723,13 @@ - + - + @@ -25725,7 +25763,7 @@ - + @@ -26023,7 +26061,7 @@ - + @@ -26111,7 +26149,6 @@ - @@ -26174,7 +26211,7 @@ - + @@ -26286,7 +26323,7 @@ - + @@ -26566,7 +26603,7 @@ - + @@ -26575,12 +26612,12 @@ - + - + @@ -26618,7 +26655,7 @@ - + @@ -26643,7 +26680,7 @@ - + @@ -26878,11 +26915,11 @@ - + - + @@ -27053,7 +27090,7 @@ - + @@ -27158,23 +27195,23 @@ - + - + - + - + @@ -27458,6 +27495,7 @@ + @@ -28048,6 +28086,7 @@ + @@ -28181,7 +28220,7 @@ - + @@ -28248,7 +28287,7 @@ - + @@ -28432,7 +28471,7 @@ - + @@ -28452,7 +28491,7 @@ - + @@ -28475,25 +28514,6 @@ - - - - - - - - - - - - - - - - - - - @@ -28515,7 +28535,7 @@ - + @@ -28524,7 +28544,7 @@ - + @@ -28619,17 +28639,6 @@ - - - - - - - - - - - @@ -28720,24 +28729,41 @@ - + - + - + - + - + - - + + + + - - + + + + + + + + + + + + + + + + + @@ -28781,27 +28807,18 @@ - + - + - + - - + + - - - - - - - - - - - + + @@ -28924,7 +28941,7 @@ - + @@ -28995,7 +29012,10 @@ + + + @@ -29018,14 +29038,14 @@ - + - + @@ -29036,14 +29056,14 @@ - + - + @@ -29064,44 +29084,44 @@ - - + + - + - + - + - + - + - + - + @@ -29113,27 +29133,27 @@ - + - - + + - + - + - + @@ -29144,11 +29164,11 @@ - + - + @@ -29172,7 +29192,7 @@ - + @@ -29271,7 +29291,7 @@ - + @@ -29381,7 +29401,7 @@ - + @@ -29439,7 +29459,9 @@ + + @@ -29719,30 +29741,30 @@ - - - + + + - - - - - + + + + + - - - + + + - - - + + + @@ -30156,7 +30178,7 @@ - + @@ -30186,7 +30208,7 @@ - + @@ -30195,7 +30217,7 @@ - + @@ -30360,7 +30382,7 @@ - + @@ -30697,7 +30719,7 @@ - + @@ -30875,13 +30897,13 @@ - + - + @@ -31242,7 +31264,7 @@ - + @@ -31268,7 +31290,7 @@ - + @@ -31281,7 +31303,7 @@ - + @@ -31944,7 +31966,7 @@ - + @@ -32001,7 +32023,7 @@ - + @@ -32027,7 +32049,7 @@ - + @@ -32036,7 +32058,7 @@ - + @@ -32060,7 +32082,7 @@ - + @@ -32121,7 +32143,7 @@ - + @@ -32320,6 +32342,7 @@ + @@ -32331,6 +32354,8 @@ + + @@ -32357,7 +32382,18 @@ - + + + + + + + + + + + + @@ -32395,9 +32431,10 @@ - + + @@ -33002,7 +33039,7 @@ - + @@ -33228,9 +33265,9 @@ - - - + + + @@ -33575,7 +33612,7 @@ - + @@ -34651,7 +34688,7 @@ - + @@ -34948,7 +34985,7 @@ - + @@ -35118,7 +35155,7 @@ - + @@ -35145,7 +35182,7 @@ - + @@ -35406,7 +35443,7 @@ - + @@ -35931,11 +35968,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -35950,10 +36212,18 @@ + + + + + + + + @@ -35975,6 +36245,110 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -36048,22 +36422,11 @@ - + - + - - - - - - - - - - - @@ -36375,7 +36738,7 @@ - + @@ -36574,11 +36937,6 @@ - - - - - @@ -37095,26 +37453,26 @@ - - + + - + - + - + - + - + @@ -37152,105 +37510,105 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37337,13 +37695,13 @@ - - + + - + - + @@ -37357,82 +37715,82 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37444,7 +37802,7 @@ - + @@ -37488,75 +37846,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37627,7 +37985,7 @@ - + @@ -37725,18 +38083,18 @@ - + - + - + - + - + @@ -37862,69 +38220,69 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -37947,7 +38305,7 @@ - + @@ -37985,7 +38343,7 @@ - + @@ -38081,7 +38439,7 @@ - + @@ -38091,7 +38449,7 @@ - + @@ -38455,289 +38813,289 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38749,150 +39107,150 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -38908,586 +39266,586 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -40396,6 +40754,11 @@ + + + + + @@ -40558,17 +40921,6 @@ - - - - - - - - - - - @@ -40777,8 +41129,8 @@ - - + + @@ -40793,7 +41145,7 @@ - + @@ -40801,7 +41153,7 @@ - + @@ -41008,7 +41360,7 @@ - + @@ -41038,7 +41390,7 @@ - + @@ -41148,107 +41500,107 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -41436,7 +41788,7 @@ - + @@ -41449,6 +41801,17 @@ + + + + + + + + + + + @@ -41732,7 +42095,7 @@ - + @@ -41972,8 +42335,8 @@ - - + + @@ -41982,7 +42345,7 @@ - + @@ -42276,6 +42639,17 @@ + + + + + + + + + + + @@ -42421,6 +42795,7 @@ + @@ -44492,7 +44867,7 @@ - + @@ -44517,7 +44892,7 @@ - + @@ -45036,7 +45411,7 @@ - + @@ -45303,7 +45678,7 @@ - + @@ -47038,7 +47413,7 @@ - + @@ -47047,7 +47422,7 @@ - + @@ -47121,7 +47496,7 @@ - + @@ -47129,7 +47504,7 @@ - + @@ -49390,8 +49765,8 @@ - - + + @@ -49435,7 +49810,7 @@ - + @@ -49569,7 +49944,7 @@ - + @@ -49688,7 +50063,7 @@ - + @@ -50099,7 +50474,7 @@ - + @@ -50169,7 +50544,7 @@ - + @@ -50535,7 +50910,7 @@ - + @@ -50610,7 +50985,7 @@ - + @@ -51253,7 +51628,7 @@ - + @@ -51278,7 +51653,7 @@ - + @@ -51510,8 +51885,8 @@ - - + + @@ -51520,50 +51895,50 @@ - - - - + + + + - - - + + + - - + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - - + + + + @@ -51571,24 +51946,35 @@ - - + + - - + + - - + + - - + + + + + + + + + + + + + @@ -51614,6 +52000,7 @@ + @@ -52161,8 +52548,19 @@ + + + + + + + + + + + - + @@ -52292,6 +52690,7 @@ + @@ -53147,8 +53546,8 @@ - - + + @@ -54113,7 +54512,7 @@ - + @@ -54132,7 +54531,7 @@ - + @@ -54159,7 +54558,7 @@ - + @@ -54170,34 +54569,34 @@ - + - + - + - + - + - + - + - + - + - + @@ -54558,7 +54957,7 @@ - + @@ -55092,12 +55491,12 @@ - + - + @@ -55107,7 +55506,7 @@ - + @@ -55193,7 +55592,7 @@ - + @@ -55438,7 +55837,7 @@ - + @@ -55584,7 +55983,7 @@ - + @@ -56533,7 +56932,7 @@ - + @@ -57085,7 +57484,7 @@ - + @@ -57095,7 +57494,7 @@ - + @@ -57134,7 +57533,7 @@ - + @@ -57155,7 +57554,7 @@ - + @@ -57188,12 +57587,12 @@ - + - + @@ -57282,7 +57681,7 @@ - + @@ -57294,7 +57693,7 @@ - + @@ -58257,7 +58656,7 @@ - + @@ -58383,7 +58782,7 @@ - + @@ -58391,7 +58790,7 @@ - + @@ -58434,7 +58833,7 @@ - + @@ -58460,7 +58859,7 @@ - + @@ -58468,7 +58867,7 @@ - + @@ -59358,6 +59757,17 @@ + + + + + + + + + + + @@ -59477,17 +59887,6 @@ - - - - - - - - - - - @@ -60419,7 +60818,7 @@ - + @@ -60456,7 +60855,7 @@ - + @@ -60643,7 +61042,7 @@ - + @@ -60689,7 +61088,7 @@ - + @@ -61727,7 +62126,7 @@ - + @@ -61739,7 +62138,7 @@ - + @@ -62278,7 +62677,7 @@ - + @@ -62296,7 +62695,7 @@ - + @@ -62627,7 +63026,7 @@ - + @@ -62650,15 +63049,15 @@ - + - + - + - + @@ -62825,67 +63224,67 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -63135,7 +63534,7 @@ - + @@ -63509,6 +63908,17 @@ + + + + + + + + + + + @@ -63626,17 +64036,17 @@ - + - + - + @@ -63647,7 +64057,7 @@ - + @@ -63804,6 +64214,7 @@ + @@ -63813,7 +64224,7 @@ - + @@ -63884,11 +64295,11 @@ - - - - - + + + + + @@ -63941,30 +64352,30 @@ - + - + - + - + - + @@ -64032,13 +64443,13 @@ - + - + @@ -64379,7 +64790,7 @@ - + @@ -65464,6 +65875,17 @@ + + + + + + + + + + + @@ -65497,7 +65919,7 @@ - + @@ -65513,7 +65935,7 @@ - + @@ -65537,7 +65959,7 @@ - + @@ -65579,6 +66001,7 @@ + @@ -65627,11 +66050,23 @@ + + + + + + + + + + + + @@ -65648,24 +66083,24 @@ - + - + - + - - - - + + + + - + @@ -65684,7 +66119,7 @@ - + @@ -65772,6 +66207,14 @@ + + + + + + + + @@ -65929,6 +66372,17 @@ + + + + + + + + + + + @@ -65978,6 +66432,7 @@ + @@ -66164,6 +66619,17 @@ + + + + + + + + + + + @@ -66217,7 +66683,7 @@ - + @@ -66233,7 +66699,7 @@ - + @@ -66337,6 +66803,7 @@ + @@ -66649,7 +67116,7 @@ - + @@ -66670,27 +67137,31 @@ - + - + - + + + + + - + - + - + @@ -66699,6 +67170,15 @@ + + + + + + + + + @@ -66709,14 +67189,34 @@ + + + + + + + + + + + + + + + + + + + - + - + + @@ -66732,7 +67232,7 @@ - + @@ -66742,7 +67242,7 @@ - + @@ -66869,7 +67369,7 @@ - + @@ -67144,6 +67644,17 @@ + + + + + + + + + + + @@ -67213,6 +67724,14 @@ + + + + + + + + @@ -67416,7 +67935,7 @@ - + @@ -67426,7 +67945,7 @@ - + @@ -67467,6 +67986,17 @@ + + + + + + + + + + + @@ -67685,6 +68215,7 @@ + @@ -67861,7 +68392,7 @@ - + @@ -68146,6 +68677,10 @@ + + + + @@ -68162,12 +68697,24 @@ + + + + + + + + + + + + @@ -68256,6 +68803,17 @@ + + + + + + + + + + + @@ -68268,6 +68826,7 @@ + @@ -68280,23 +68839,7 @@ - - - - - - - - - - - - - - - - - + @@ -68325,7 +68868,7 @@ - + @@ -68353,7 +68896,7 @@ - + @@ -68899,10 +69442,10 @@ - - - - + + + + @@ -69006,9 +69549,9 @@ - - - + + + @@ -69103,7 +69646,7 @@ - + @@ -69487,32 +70030,32 @@ - - - - + + + + - - + + - - - - + + + + - - + + - - + + - - + + @@ -69580,6 +70123,10 @@ + + + + @@ -69888,6 +70435,14 @@ + + + + + + + + @@ -69939,7 +70494,7 @@ - + @@ -70230,75 +70785,75 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -71944,7 +72499,7 @@ - + @@ -72443,7 +72998,7 @@ - + @@ -72483,7 +73038,7 @@ - + @@ -74309,7 +74864,7 @@ - + @@ -74543,7 +75098,7 @@ - + @@ -75392,7 +75947,7 @@ - + @@ -76274,7 +76829,7 @@ - + @@ -76418,7 +76973,7 @@ - + @@ -76521,7 +77076,7 @@ - + @@ -76695,7 +77250,7 @@ - + @@ -76826,7 +77381,7 @@ - + @@ -76958,7 +77513,7 @@ - + @@ -77198,7 +77753,7 @@ - + @@ -77465,7 +78020,7 @@ - + @@ -77791,7 +78346,7 @@ - + @@ -77826,7 +78381,7 @@ - + @@ -78054,7 +78609,7 @@ - + @@ -78128,7 +78683,7 @@ - + @@ -78352,7 +78907,7 @@ - + @@ -78360,7 +78915,7 @@ - + @@ -78501,7 +79056,7 @@ - + @@ -78871,7 +79426,7 @@ - + @@ -79723,7 +80278,7 @@ - + @@ -80120,7 +80675,7 @@ - + @@ -80177,18 +80732,29 @@ - + - + - + - + + + + + + + + + + + + @@ -80259,7 +80825,7 @@ - + @@ -80435,7 +81001,7 @@ - + @@ -80512,7 +81078,7 @@ - + @@ -80534,7 +81100,7 @@ - + @@ -80556,7 +81122,7 @@ - + @@ -80564,7 +81130,7 @@ - + @@ -80573,7 +81139,7 @@ - + @@ -80602,7 +81168,7 @@ - + @@ -80719,8 +81285,8 @@ - - + + @@ -81176,6 +81742,7 @@ + @@ -82436,6 +83003,11 @@ + + + + + @@ -82799,7 +83371,7 @@ - + @@ -82815,7 +83387,7 @@ - + @@ -82823,7 +83395,7 @@ - + @@ -82874,7 +83446,7 @@ - + @@ -82906,7 +83478,7 @@ - + @@ -82963,7 +83535,8 @@ - + + @@ -83045,10 +83618,10 @@ - - - - + + + + @@ -83357,6 +83930,17 @@ + + + + + + + + + + + @@ -83378,12 +83962,12 @@ - + - + @@ -83448,7 +84032,7 @@ - + @@ -83477,7 +84061,7 @@ - + @@ -83487,12 +84071,12 @@ - + - - + + @@ -83570,12 +84154,12 @@ - - + + - - + + @@ -83589,8 +84173,8 @@ - - + + @@ -83659,17 +84243,6 @@ - - - - - - - - - - - @@ -83740,7 +84313,6 @@ - @@ -83790,7 +84362,7 @@ - + @@ -83816,7 +84388,7 @@ - + @@ -84149,46 +84721,46 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -85181,6 +85753,364 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -85268,6 +86198,13 @@ + + + + + + + @@ -85313,12 +86250,6 @@ - - - - - - @@ -85358,7 +86289,7 @@ - + @@ -85440,41 +86371,41 @@ - - - + + + - - - + + + - - - - - + + + + + - - - - - + + + + + - - - + + + - - - + + + - + @@ -85503,6 +86434,15 @@ + + + + + + + + + @@ -85764,7 +86704,7 @@ - + @@ -85772,6 +86712,23 @@ + + + + + + + + + + + + + + + + + @@ -85837,7 +86794,7 @@ - + @@ -85887,7 +86844,7 @@ - + @@ -86262,7 +87219,7 @@ - + @@ -86649,7 +87606,7 @@ - + @@ -86673,8 +87630,8 @@ - - + + @@ -86822,7 +87779,7 @@ - + @@ -86838,7 +87795,7 @@ - + @@ -88084,7 +89041,7 @@ - + @@ -88466,76 +89423,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -88776,6 +89663,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -88996,6 +89961,10 @@ + + + + @@ -89450,7 +90419,7 @@ - + @@ -90545,7 +91514,7 @@ - + @@ -90825,7 +91794,7 @@ - + @@ -90858,7 +91827,7 @@ - + @@ -90981,7 +91950,7 @@ - + @@ -91027,7 +91996,7 @@ - + @@ -91164,7 +92133,7 @@ - + @@ -91204,7 +92173,7 @@ - + @@ -91223,7 +92192,7 @@ - + @@ -91450,27 +92419,27 @@ - + - + - + - + @@ -92073,7 +93042,7 @@ - + @@ -92120,7 +93089,7 @@ - + @@ -92134,7 +93103,7 @@ - + @@ -92153,7 +93122,7 @@ - + @@ -92177,7 +93146,7 @@ - + @@ -92269,7 +93238,7 @@ - + @@ -92739,7 +93708,7 @@ - + @@ -93028,7 +93997,7 @@ - + @@ -93042,7 +94011,7 @@ - + @@ -93115,7 +94084,7 @@ - + @@ -93216,7 +94185,7 @@ - + @@ -93235,7 +94204,7 @@ - + @@ -93265,7 +94234,7 @@ - + @@ -93411,7 +94380,7 @@ - + @@ -93461,7 +94430,7 @@ - + @@ -94318,7 +95287,7 @@ - + @@ -94423,7 +95392,7 @@ - + @@ -94569,7 +95538,7 @@ - + @@ -95016,7 +95985,7 @@ - + @@ -95037,7 +96006,7 @@ - + @@ -95069,7 +96038,7 @@ - + @@ -95537,17 +96506,17 @@ - - - - - - + + + + + + - - - + + + @@ -95557,7 +96526,7 @@ - + @@ -95578,8 +96547,8 @@ - - + + @@ -95646,7 +96615,7 @@ - + @@ -95703,6 +96672,17 @@ + + + + + + + + + + + @@ -95762,16 +96742,6 @@ - - - - - - - - - - @@ -96443,14 +97413,14 @@ - + - + - + @@ -97319,13 +98289,13 @@ - - + + - + @@ -97335,8 +98305,8 @@ - - + + @@ -97381,7 +98351,7 @@ - + @@ -97626,6 +98596,17 @@ + + + + + + + + + + + @@ -97721,7 +98702,7 @@ - + @@ -97807,6 +98788,7 @@ + @@ -97817,27 +98799,27 @@ - - + + - + - + - + - + @@ -97845,7 +98827,7 @@ - + @@ -97853,14 +98835,14 @@ - + - + @@ -97868,7 +98850,7 @@ - + @@ -97877,7 +98859,7 @@ - + @@ -97886,11 +98868,11 @@ - + - + @@ -98244,14 +99226,14 @@ - + - + @@ -98468,7 +99450,7 @@ - + @@ -99050,7 +100032,7 @@ - + @@ -99090,7 +100072,7 @@ - + @@ -99098,7 +100080,7 @@ - + @@ -99129,13 +100111,13 @@ - + - + @@ -101225,10 +102207,10 @@ - - - - + + + + @@ -101278,6 +102260,6 @@ diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index 07ff4483e2be..bbc7c01ccb07 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -41,6 +41,7 @@ bus_for_each_dev bus_register bus_unregister + call_rcu cancel_delayed_work cancel_delayed_work_sync cancel_work_sync @@ -287,7 +288,6 @@ get_cpu_device get_device __get_free_pages - get_next_event_cpu get_pid_task get_random_bytes __get_task_comm @@ -467,6 +467,7 @@ __ll_sc_atomic64_fetch_or __ll_sc_atomic64_or __ll_sc_atomic64_sub + __ll_sc_atomic64_sub_return __ll_sc_atomic_add __ll_sc_atomic_add_return __ll_sc_atomic_sub @@ -491,6 +492,10 @@ __memcpy_toio memdup_user memmove + mempool_alloc + mempool_free + mempool_kfree + mempool_kmalloc memremap memset __memset_io @@ -506,6 +511,7 @@ module_put __msecs_to_jiffies msleep + msleep_interruptible __mutex_init mutex_lock mutex_lock_interruptible @@ -1210,15 +1216,24 @@ crc_ccitt_table kernel_restart kernel_setsockopt - mempool_alloc mempool_create mempool_destroy - mempool_free - mempool_kfree - mempool_kmalloc send_sig_info time64_to_tm +# required by dm-user.ko + bio_advance + bio_endio + bio_put + _copy_from_iter + copy_page_from_iter + copy_page_to_iter + _copy_to_iter + dm_register_target + dm_unregister_target + mempool_exit + mempool_init + # required by dwc3-haps.ko pcim_enable_device __pci_register_driver @@ -1315,9 +1330,6 @@ proc_create seq_write -# required by google-battery.ko - simple_strtoull - # required by google-bms.ko full_name_hash @@ -1362,7 +1374,6 @@ __break_lease clear_inode __close_fd - crc32_le d_add d_drop deactivate_locked_super @@ -1370,6 +1381,8 @@ dget_parent d_instantiate d_make_root + down_read_killable + down_write_killable dput flush_dcache_page fs_kobj @@ -1419,6 +1432,7 @@ user_path_at_empty vfs_create vfs_fallocate + vfs_fsync vfs_getattr vfs_getxattr vfs_link @@ -1428,19 +1442,19 @@ vfs_rmdir vfs_setxattr vfs_unlink + ZSTD_decompressStream + ZSTD_DStreamWorkspaceBound + ZSTD_initDStream # required by ion-alloc.ko dma_buf_export dma_get_device_base dma_get_size - __ll_sc_atomic64_sub_return mm_event_count __next_zones_zonelist ptr_to_hashval sched_setattr split_page - vm_map_ram - vm_unmap_ram zone_watermark_ok_safe # required by ipa3.ko @@ -2223,7 +2237,6 @@ # required by sctp.ko __bitmap_shift_right __bitmap_weight - call_rcu compat_ip_getsockopt compat_ip_setsockopt compat_ipv6_getsockopt @@ -2518,100 +2531,6 @@ trace_output_call utf16s_to_utf8s -# required by wlan.ko - bitmap_print_to_pagebuf - __cfg80211_alloc_event_skb - __cfg80211_alloc_reply_skb - cfg80211_ap_stopped - cfg80211_calculate_bitrate - cfg80211_chandef_create - cfg80211_ch_switch_notify - cfg80211_connect_done - cfg80211_del_sta_sinfo - cfg80211_disconnected - cfg80211_external_auth_request - cfg80211_ft_event - cfg80211_get_bss - cfg80211_gtk_rekey_notify - cfg80211_ibss_joined - cfg80211_inform_bss_frame_data - cfg80211_mgmt_tx_status - cfg80211_michael_mic_failure - cfg80211_new_sta - cfg80211_pmksa_candidate_notify - cfg80211_put_bss - cfg80211_ready_on_channel - cfg80211_remain_on_channel_expired - cfg80211_roamed - cfg80211_rx_mgmt - cfg80211_rx_unprot_mlme_mgmt - cfg80211_scan_done - cfg80211_sched_scan_results - __cfg80211_send_event_skb - cfg80211_tdls_oper_request - cfg80211_unlink_bss - cfg80211_update_owe_info_event - cfg80211_vendor_cmd_reply - complete_and_exit - cpufreq_quick_get_max - __cpuhp_remove_state - cpu_topology - crypto_aead_setauthsize - crypto_aead_setkey - crypto_alloc_aead - crypto_alloc_base - crypto_alloc_skcipher - crypto_shash_final - crypto_shash_update - dev_alloc_name - dump_stack - hex2bin - hex_to_bin - ieee80211_channel_to_frequency - ieee80211_frequency_to_channel - ieee80211_get_channel - ieee80211_hdrlen - iommu_iova_to_phys - irq_set_affinity_hint - mac_pton - msleep_interruptible - netif_tx_stop_all_queues - netlink_broadcast - __netlink_kernel_create - netlink_kernel_release - nla_put_64bit - nla_strlcpy - param_get_string - param_ops_byte - param_set_copystring - pci_read_config_dword - pci_read_config_word - pci_write_config_dword - pci_write_config_word - PDE_DATA - pm_freezing - pm_system_wakeup - proc_create_data - proc_mkdir - register_netevent_notifier - register_sysctl_table - regulatory_set_wiphy_regd - rtnl_lock - save_stack_trace_tsk - schedule_timeout_interruptible - seq_vprintf - set_cpus_allowed_ptr - skip_spaces - strchrnul - unregister_netevent_notifier - unregister_sysctl_table - vprintk - wiphy_free - wiphy_new_nm - wiphy_register - wiphy_unregister - wireless_send_event - # required by usb-audio-qmi.ko find_snd_usb_substream snd_usb_enable_audio_stream @@ -2684,6 +2603,99 @@ irq_create_mapping regmap_irq_get_virq +# required by wlan.ko + bitmap_print_to_pagebuf + __cfg80211_alloc_event_skb + __cfg80211_alloc_reply_skb + cfg80211_ap_stopped + cfg80211_calculate_bitrate + cfg80211_chandef_create + cfg80211_ch_switch_notify + cfg80211_connect_done + cfg80211_del_sta_sinfo + cfg80211_disconnected + cfg80211_external_auth_request + cfg80211_ft_event + cfg80211_get_bss + cfg80211_gtk_rekey_notify + cfg80211_ibss_joined + cfg80211_inform_bss_frame_data + cfg80211_mgmt_tx_status + cfg80211_michael_mic_failure + cfg80211_new_sta + cfg80211_pmksa_candidate_notify + cfg80211_put_bss + cfg80211_ready_on_channel + cfg80211_remain_on_channel_expired + cfg80211_roamed + cfg80211_rx_mgmt + cfg80211_rx_unprot_mlme_mgmt + cfg80211_scan_done + cfg80211_sched_scan_results + __cfg80211_send_event_skb + cfg80211_tdls_oper_request + cfg80211_unlink_bss + cfg80211_update_owe_info_event + cfg80211_vendor_cmd_reply + complete_and_exit + cpufreq_quick_get_max + __cpuhp_remove_state + cpu_topology + crypto_aead_setauthsize + crypto_aead_setkey + crypto_alloc_aead + crypto_alloc_base + crypto_alloc_skcipher + crypto_shash_final + crypto_shash_update + dev_alloc_name + dump_stack + hex2bin + hex_to_bin + ieee80211_channel_to_frequency + ieee80211_frequency_to_channel + ieee80211_get_channel + ieee80211_hdrlen + iommu_iova_to_phys + irq_set_affinity_hint + mac_pton + netif_tx_stop_all_queues + netlink_broadcast + __netlink_kernel_create + netlink_kernel_release + nla_put_64bit + nla_strlcpy + param_get_string + param_ops_byte + param_set_copystring + pci_read_config_dword + pci_read_config_word + pci_write_config_dword + pci_write_config_word + PDE_DATA + pm_freezing + pm_system_wakeup + proc_create_data + proc_mkdir + register_netevent_notifier + register_sysctl_table + regulatory_set_wiphy_regd + rtnl_lock + save_stack_trace_tsk + schedule_timeout_interruptible + seq_vprintf + set_cpus_allowed_ptr + skip_spaces + strchrnul + unregister_netevent_notifier + unregister_sysctl_table + vprintk + wiphy_free + wiphy_new_nm + wiphy_register + wiphy_unregister + wireless_send_event + # required by wsa883x_dlkm.ko snd_info_create_module_entry snd_soc_component_exit_regmap @@ -2702,3 +2714,10 @@ xhci_resume xhci_run xhci_suspend + +# preserved by --additions-only + crc32_le + get_next_event_cpu + simple_strtoull + vm_map_ram + vm_unmap_ram From b4e80478115bc7fcea627110c133f4afe1a40899 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 20 Nov 2020 10:41:57 +1100 Subject: [PATCH 464/636] powerpc/64s: move some exception handlers out of line (backport only) We're about to grow the exception handlers, which will make a bunch of them no longer fit within the space available. We move them out of line. This is a fiddly and error-prone business, so in the interests of reviewability I haven't merged this in with the addition of the entry flush. Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/exceptions-64s.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e1dab9b1e447..fcf459694ccb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -572,13 +572,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) + b tramp_data_access_slb +EXC_REAL_END(data_access_slb, 0x380, 0x80) + +TRAMP_REAL_BEGIN(tramp_data_access_slb) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) mr r12,r3 /* save r3 */ mfspr r3,SPRN_DAR mfspr r11,SPRN_SRR1 crset 4*cr6+eq BRANCH_TO_COMMON(r10, slb_miss_common) -EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) SET_SCRATCH0(r13) @@ -616,13 +619,16 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) + b tramp_instruction_access_slb +EXC_REAL_END(instruction_access_slb, 0x480, 0x80) + +TRAMP_REAL_BEGIN(tramp_instruction_access_slb) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) mr r12,r3 /* save r3 */ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ mfspr r11,SPRN_SRR1 crclr 4*cr6+eq BRANCH_TO_COMMON(r10, slb_miss_common) -EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) SET_SCRATCH0(r13) From f69bb4e51f41973fb7594be1479fa689831efe1a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 Nov 2020 10:41:58 +1100 Subject: [PATCH 465/636] powerpc/64s: flush L1D on kernel entry commit f79643787e0a0762d2409b7b8334e83f22d85695 upstream. IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache on kernel entry. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 3 + arch/powerpc/include/asm/exception-64s.h | 9 ++- arch/powerpc/include/asm/feature-fixups.h | 10 ++++ arch/powerpc/include/asm/security_features.h | 4 ++ arch/powerpc/include/asm/setup.h | 3 + arch/powerpc/kernel/exceptions-64s.S | 47 +++++++++++++-- arch/powerpc/kernel/setup_64.c | 60 ++++++++++++++++++- arch/powerpc/kernel/vmlinux.lds.S | 7 +++ arch/powerpc/lib/feature-fixups.c | 54 +++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 11 ++++ arch/powerpc/platforms/pseries/setup.c | 4 ++ 11 files changed, 205 insertions(+), 7 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8dbc8d4ec8f0..973861421264 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2560,6 +2560,7 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] Exceptions: This does not have any effect on @@ -2870,6 +2871,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index a86feddddad0..35fb5b11955a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -90,11 +90,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..9ad779d87b23 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +245,10 @@ label##3: \ #include extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index ccf44c135389..082b56bf678d 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -84,12 +84,16 @@ static inline bool security_ftr_enabled(unsigned long feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b8..556635217e5c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,15 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fcf459694ccb..75551690e96e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -540,7 +540,7 @@ EXC_COMMON_BEGIN(unrecover_mce) b 1b -EXC_REAL(data_access, 0x300, 0x80) +EXC_REAL_OOL(data_access, 0x300, 0x80) EXC_VIRT(data_access, 0x4300, 0x80, 0x300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) @@ -596,7 +596,7 @@ EXC_VIRT_END(data_access_slb, 0x4380, 0x80) TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) -EXC_REAL(instruction_access, 0x400, 0x80) +EXC_REAL_OOL(instruction_access, 0x400, 0x80) EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) TRAMP_KVM(PACA_EXGEN, 0x400) @@ -889,13 +889,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) -EXC_REAL_HV(hdecrementer, 0x980, 0x80) -EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980) +EXC_REAL_OOL_HV(hdecrementer, 0x980, 0x80) +EXC_VIRT_OOL_HV(hdecrementer, 0x4980, 0x80, 0x980) TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) @@ -1529,6 +1529,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load addresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bd4996958b13..7bbd9d22d66e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -863,7 +863,9 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; bool rfi_flush; +bool entry_flush; static int __init handle_no_rfi_flush(char *p) { @@ -873,6 +875,14 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -904,6 +914,18 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -957,10 +979,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -988,9 +1019,36 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index d081d726ca8e..1432cf996201 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -140,6 +140,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index dbe478e7b8e0..22bae8741cae 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -232,6 +232,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((dest + 1), instrs[1]); + + patch_instruction((dest + 2), instrs[2]); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index adddde023622..ad51349e479b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -125,12 +125,23 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry: it fixes a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c2d318d1df02..8aa4dd87cbf2 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -565,6 +565,10 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } #ifdef CONFIG_PCI_IOV From 357a5e6febe8839bd217121baa394b11e8fe4085 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 20 Nov 2020 10:41:59 +1100 Subject: [PATCH 466/636] powerpc: Add a framework for user access tracking Backported from commit de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection"). Here we don't try to add the KUAP framework, we just want the helper functions because we want to put uaccess flush helpers in them. In terms of fixes, we don't need commit 1d8f739b07bd ("powerpc/kuap: Fix set direction in allow/prevent_user_access()") as we don't have real KUAP. Likewise as all our allows are noops and all our prevents are just flushes, we don't need commit 9dc086f1e9ef ("powerpc/futex: Fix incorrect user access blocking") The other 2 fixes we do need. The original description is: This patch implements a framework for Kernel Userspace Access Protection. Then subarches will have the possibility to provide their own implementation by providing setup_kuap() and allow/prevent_user_access(). Some platforms will need to know the area accessed and whether it is accessed from read, write or both. Therefore source, destination and size and handed over to the two functions. mpe: Rename to allow/prevent rather than unlock/lock, and add read/write wrappers. Drop the 32-bit code for now until we have an implementation for it. Add kuap to pt_regs for 64-bit as well as 32-bit. Don't split strings, use pr_crit_ratelimited(). Signed-off-by: Christophe Leroy Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/futex.h | 4 +++ arch/powerpc/include/asm/kup.h | 36 ++++++++++++++++++++++++++ arch/powerpc/include/asm/uaccess.h | 38 ++++++++++++++++++++++------ arch/powerpc/lib/checksum_wrappers.c | 4 +++ 4 files changed, 74 insertions(+), 8 deletions(-) create mode 100644 arch/powerpc/include/asm/kup.h diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a7b01f97a56..1eabc20dddd3 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -61,6 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -74,6 +76,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -94,6 +97,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h new file mode 100644 index 000000000000..7895d5eeaf21 --- /dev/null +++ b/arch/powerpc/include/asm/kup.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_H_ +#define _ASM_POWERPC_KUP_H_ + +#ifndef __ASSEMBLY__ + +#include + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) { } + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + allow_user_access(NULL, from, size); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + allow_user_access(to, NULL, size); +} + +static inline void prevent_read_from_user(const void __user *from, unsigned long size) +{ + prevent_user_access(NULL, from, size); +} + +static inline void prevent_write_to_user(void __user *to, unsigned long size) +{ + prevent_user_access(to, NULL, size); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_KUP_H_ */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 38a25ff8afb7..b604bb140a30 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include #include #include +#include /* * The fs value determines whether argument validity checking should be @@ -141,6 +142,7 @@ extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ do { \ retval = 0; \ + allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -148,6 +150,7 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ + prevent_write_to_user(ptr, size); \ } while (0) #define __put_user_nocheck(x, ptr, size) \ @@ -240,6 +243,7 @@ do { \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ + allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -247,6 +251,7 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ + prevent_read_from_user(ptr, size); \ } while (0) /* @@ -306,16 +311,22 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { + unsigned long ret; + barrier_nospec(); - return __copy_tofrom_user(to, from, n); + allow_user_access(to, from, n); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(to, from, n); + return ret; } #endif /* __powerpc64__ */ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -340,14 +351,18 @@ static inline unsigned long raw_copy_from_user(void *to, } barrier_nospec(); - return __copy_tofrom_user((__force void __user *)to, from, n); + allow_read_from_user(from, n); + ret = __copy_tofrom_user((__force void __user *)to, from, n); + prevent_read_from_user(from, n); + return ret; } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -367,17 +382,24 @@ static inline unsigned long raw_copy_to_user(void __user *to, return 0; } - return __copy_tofrom_user(to, (__force const void __user *)from, n); + allow_write_to_user(to, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + prevent_write_to_user(to, n); + return ret; } extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { + unsigned long ret = size; might_fault(); - if (likely(access_ok(VERIFY_WRITE, addr, size))) - return __clear_user(addr, size); - return size; + if (likely(access_ok(VERIFY_WRITE, addr, size))) { + allow_write_to_user(addr, size); + ret = __clear_user(addr, size); + prevent_write_to_user(addr, size); + } + return ret; } extern long strncpy_from_user(char *dst, const char __user *src, long count); diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index a0cb63fb76a1..8d83c39be7e4 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, unsigned int csum; might_sleep(); + allow_read_from_user(src, len); *err_ptr = 0; @@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, } out: + prevent_read_from_user(src, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, unsigned int csum; might_sleep(); + allow_write_to_user(dst, len); *err_ptr = 0; @@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, } out: + prevent_write_to_user(dst, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_to_user); From 8145b3a014e8504f02e15a38f04051fec826f4f2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 20 Nov 2020 10:42:00 +1100 Subject: [PATCH 467/636] powerpc: Implement user_access_begin and friends commit 5cd623333e7cf4e3a334c70529268b65f2a6c2c7 upstream. Today, when a function like strncpy_from_user() is called, the userspace access protection is de-activated and re-activated for every word read. By implementing user_access_begin and friends, the protection is de-activated at the beginning of the copy and re-activated at the end. Implement user_access_begin(), user_access_end() and unsafe_get_user(), unsafe_put_user() and unsafe_copy_to_user() For the time being, we keep user_access_save() and user_access_restore() as nops. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/36d4fbf9e56a75994aca4ee2214c77b26a5a8d35.1579866752.git.christophe.leroy@c-s.fr Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/uaccess.h | 75 ++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index b604bb140a30..463e3d3dd0a3 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -92,9 +92,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size, __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) #define __get_user(x, ptr) \ - __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true) #define __put_user(x, ptr) \ - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true) + +#define __get_user_allowed(x, ptr) \ + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false) +#define __put_user_allowed(x, ptr) \ + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), false) #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) @@ -139,10 +144,9 @@ extern long __put_user_bad(void); : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __put_user_size(x, ptr, size, retval) \ +#define __put_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ - allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -150,17 +154,26 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ +} while (0) + +#define __put_user_size(x, ptr, size, retval) \ +do { \ + allow_write_to_user(ptr, size); \ + __put_user_size_allowed(x, ptr, size, retval); \ prevent_write_to_user(ptr, size); \ } while (0) -#define __put_user_nocheck(x, ptr, size) \ +#define __put_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (do_allow) \ + __put_user_size((x), __pu_addr, (size), __pu_err); \ + else \ + __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \ __pu_err; \ }) @@ -237,13 +250,12 @@ extern long __get_user_bad(void); : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ -#define __get_user_size(x, ptr, size, retval) \ +#define __get_user_size_allowed(x, ptr, size, retval) \ do { \ retval = 0; \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ - allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -251,6 +263,12 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ +} while (0) + +#define __get_user_size(x, ptr, size, retval) \ +do { \ + allow_read_from_user(ptr, size); \ + __get_user_size_allowed(x, ptr, size, retval); \ prevent_read_from_user(ptr, size); \ } while (0) @@ -261,7 +279,7 @@ do { \ #define __long_type(x) \ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) -#define __get_user_nocheck(x, ptr, size) \ +#define __get_user_nocheck(x, ptr, size, do_allow) \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ @@ -270,7 +288,10 @@ do { \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + if (do_allow) \ + __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + else \ + __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ __gu_err; \ }) @@ -357,33 +378,40 @@ static inline unsigned long raw_copy_from_user(void *to, return ret; } -static inline unsigned long raw_copy_to_user(void __user *to, - const void *from, unsigned long n) +static inline unsigned long +raw_copy_to_user_allowed(void __user *to, const void *from, unsigned long n) { - unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - ret = 1; + unsigned long ret = 1; switch (n) { case 1: - __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret); + __put_user_size_allowed(*(u8 *)from, (u8 __user *)to, 1, ret); break; case 2: - __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret); + __put_user_size_allowed(*(u16 *)from, (u16 __user *)to, 2, ret); break; case 4: - __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret); + __put_user_size_allowed(*(u32 *)from, (u32 __user *)to, 4, ret); break; case 8: - __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret); + __put_user_size_allowed(*(u64 *)from, (u64 __user *)to, 8, ret); break; } if (ret == 0) return 0; } + return __copy_tofrom_user(to, (__force const void __user *)from, n); +} + +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + unsigned long ret; + allow_write_to_user(to, n); - ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + ret = raw_copy_to_user_allowed(to, from, n); prevent_write_to_user(to, n); return ret; } @@ -410,4 +438,13 @@ extern long __copy_from_user_flushcache(void *dst, const void __user *src, extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, size_t len); +#define user_access_begin(type, ptr, len) access_ok(type, ptr, len) +#define user_access_end() prevent_user_access(NULL, NULL, ~0ul) + +#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) +#define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e) +#define unsafe_put_user(x, p, e) unsafe_op_wrap(__put_user_allowed(x, p), e) +#define unsafe_copy_to_user(d, s, l, e) \ + unsafe_op_wrap(raw_copy_to_user_allowed(d, s, l), e) + #endif /* _ARCH_POWERPC_UACCESS_H */ From 92e7ec289b955384fba88953584562ce43164fa7 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 20 Nov 2020 10:42:01 +1100 Subject: [PATCH 468/636] powerpc: Fix __clear_user() with KUAP enabled commit 61e3acd8c693a14fc69b824cb5b08d02cb90a6e7 upstream. The KUAP implementation adds calls in clear_user() to enable and disable access to userspace memory. However, it doesn't add these to __clear_user(), which is used in the ptrace regset code. As there's only one direct user of __clear_user() (the regset code), and the time taken to set the AMR for KUAP purposes is going to dominate the cost of a quick access_ok(), there's not much point having a separate path. Rename __clear_user() to __arch_clear_user(), and make __clear_user() just call clear_user(). Reported-by: syzbot+f25ecf4b2982d8c7a640@syzkaller-ppc64.appspotmail.com Reported-by: Daniel Axtens Suggested-by: Michael Ellerman Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection") Signed-off-by: Andrew Donnellan [mpe: Use __arch_clear_user() for the asm version like arm64 & nds32] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191209132221.15328-1-ajd@linux.ibm.com Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/uaccess.h | 9 +++++++-- arch/powerpc/lib/string_32.S | 4 ++-- arch/powerpc/lib/string_64.S | 6 +++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 463e3d3dd0a3..15d9706a9f9c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -416,7 +416,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) return ret; } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { @@ -424,12 +424,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) { allow_write_to_user(addr, size); - ret = __clear_user(addr, size); + ret = __arch_clear_user(addr, size); prevent_write_to_user(addr, size); } return ret; } +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); +} + extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfb..1ddb26394e8a 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 56aac4c22025..ea3798f4f25f 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -29,7 +29,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -70,7 +70,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -193,4 +193,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) From 6b36099dc92354dc0a895115605d4bf31f252142 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 Nov 2020 10:42:02 +1100 Subject: [PATCH 469/636] powerpc/uaccess: Evaluate macro arguments once, before user access is allowed commit d02f6b7dab8228487268298ea1f21081c0b4b3eb upstream. get/put_user() can be called with nontrivial arguments. fs/proc/page.c has a good example: if (put_user(stable_page_flags(ppage), out)) { stable_page_flags() is quite a lot of code, including spin locks in the page allocator. Ensure these arguments are evaluated before user access is allowed. This improves security by reducing code with access to userspace, but it also fixes a PREEMPT bug with KUAP on powerpc/64s: stable_page_flags() is currently called with AMR set to allow writes, it ends up calling spin_unlock(), which can call preempt_schedule. But the task switch code can not be called with AMR set (it relies on interrupts saving the register), so this blows up. It's fine if the code inside allow_user_access() is preemptible, because a timer or IPI will save the AMR, but it's not okay to explicitly cause a reschedule. Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200407041245.600651-1-npiggin@gmail.com Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/uaccess.h | 49 +++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 15d9706a9f9c..ab6612e35ace 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -167,13 +167,17 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ - __chk_user_ptr(ptr); \ + __chk_user_ptr(__pu_addr); \ if (do_allow) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ else \ - __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \ + __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -181,9 +185,13 @@ do { \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ might_fault(); \ - if (access_ok(VERIFY_WRITE, __pu_addr, size)) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (access_ok(VERIFY_WRITE, __pu_addr, __pu_size)) \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -191,8 +199,12 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ + __chk_user_ptr(__pu_addr); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -284,15 +296,18 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ if (do_allow) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ - __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -301,12 +316,15 @@ do { \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(size) __gu_size = (size); \ + \ might_fault(); \ - if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ + if (access_ok(VERIFY_READ, __gu_addr, __gu_size)) { \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -315,10 +333,13 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) From 31ebc2fe02df202566d0e36c1106b4902d6e2f8c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 20 Nov 2020 10:42:03 +1100 Subject: [PATCH 470/636] powerpc/64s: flush L1D after user accesses commit 9a32a7e78bd0cd9a9b6332cbdc345ee5ffd0c5de upstream. IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache after user accesses. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin Signed-off-by: Daniel Axtens Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/kernel-parameters.txt | 4 + .../powerpc/include/asm/book3s/64/kup-radix.h | 22 +++++ arch/powerpc/include/asm/feature-fixups.h | 9 +++ arch/powerpc/include/asm/kup.h | 4 + arch/powerpc/include/asm/security_features.h | 3 + arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 81 ++++++------------- arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 ++ arch/powerpc/lib/feature-fixups.c | 50 ++++++++++++ arch/powerpc/platforms/powernv/setup.c | 10 ++- arch/powerpc/platforms/pseries/setup.c | 4 + 12 files changed, 198 insertions(+), 59 deletions(-) create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 973861421264..7371643dd8d4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2561,6 +2561,7 @@ tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] + no_uaccess_flush [PPC] Exceptions: This does not have any effect on @@ -2922,6 +2923,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..aa54ac2e5659 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); +} + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 9ad779d87b23..5bf3f0779b93 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + #define ENTRY_FLUSH_FIXUP_SECTION \ 957: \ .pushsection __entry_flush_fixup,"a"; \ @@ -248,6 +256,7 @@ extern long stf_barrier_fallback; extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 7895d5eeaf21..f0f8e36ad71f 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,10 +6,14 @@ #include +#ifdef CONFIG_PPC_BOOK3S_64 +#include +#else static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size) { } +#endif /* CONFIG_PPC_BOOK3S_64 */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 082b56bf678d..3b45a64e491e 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -87,6 +87,8 @@ static inline bool security_ftr_enabled(unsigned long feature) // The L1-D cache should be flushed when entering the kernel #define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ @@ -94,6 +96,7 @@ static inline bool security_ftr_enabled(unsigned long feature) SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 556635217e5c..6f2f4497e13b 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -60,6 +60,7 @@ void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 75551690e96e..344e2758b22d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1529,11 +1529,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(entry_flush_fallback) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -1559,7 +1556,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -1575,32 +1579,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -1618,32 +1597,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -1652,6 +1606,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +USE_TEXT_SECTION() + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7bbd9d22d66e..122365624d3d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -864,8 +864,12 @@ static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -883,6 +887,14 @@ static int __init handle_no_entry_flush(char *p) } early_param("no_entry_flush", handle_no_entry_flush); +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -926,6 +938,20 @@ void entry_flush_enable(bool enable) entry_flush = enable; } +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -992,6 +1018,15 @@ void setup_entry_flush(bool enable) entry_flush_enable(enable); } +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1045,10 +1080,37 @@ static int entry_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 1432cf996201..695432965f20 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -140,6 +140,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + . = ALIGN(8); __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { __start___entry_flush_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 22bae8741cae..065a3426f0eb 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -232,6 +232,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + patch_instruction((dest + 1), instrs[1]); + patch_instruction((dest + 2), instrs[2]); + patch_instruction((dest + 3), instrs[3]); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_entry_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ad51349e479b..5068dd7f6e74 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -127,10 +127,12 @@ static void pnv_setup_rfi_flush(void) /* * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry: it fixes a P9 specific vulnerability. + * entry or after user access: they fix a P9 specific vulnerability. */ - if (!pvr_version_is(PVR_POWER9)) + if (!pvr_version_is(PVR_POWER9)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ @@ -142,6 +144,10 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8aa4dd87cbf2..2e0d38cafdd4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -569,6 +569,10 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV From 5e7ceb3ace37cbb90067611a0576da11368363bd Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Fri, 20 Nov 2020 08:39:09 +0100 Subject: [PATCH 471/636] Revert "perf cs-etm: Move definition of 'traceid_list' global variable from header file" This reverts commit 168200b6d6ea0cb5765943ec5da5b8149701f36a upstream. (but only from 4.19.y) The original commit introduces a build failure as seen on Debian buster when compiled with gcc (Debian 8.3.0-6) 8.3.0: $ LC_ALL=C.UTF-8 ARCH=x86 make perf [...] Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' CC util/cs-etm-decoder/cs-etm-decoder.o CC util/intel-pt.o util/cs-etm-decoder/cs-etm-decoder.c: In function 'cs_etm_decoder__buffer_packet': util/cs-etm-decoder/cs-etm-decoder.c:287:24: error: 'traceid_list' undeclared (first use in this function); did you mean 'trace_event'? inode = intlist__find(traceid_list, trace_chan_id); ^~~~~~~~~~~~ trace_event util/cs-etm-decoder/cs-etm-decoder.c:287:24: note: each undeclared identifier is reported only once for each function it appears in make[6]: *** [/build/linux-stable/tools/build/Makefile.build:97: util/cs-etm-decoder/cs-etm-decoder.o] Error 1 make[5]: *** [/build/linux-stable/tools/build/Makefile.build:139: cs-etm-decoder] Error 2 make[5]: *** Waiting for unfinished jobs.... make[4]: *** [/build/linux-stable/tools/build/Makefile.build:139: util] Error 2 make[3]: *** [Makefile.perf:633: libperf-in.o] Error 2 make[2]: *** [Makefile.perf:206: sub-make] Error 2 make[1]: *** [Makefile:70: all] Error 2 make: *** [Makefile:77: perf] Error 2 Link: https://lore.kernel.org/stable/20201114083501.GA468764@eldamar.lan/ Cc: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Tor Jeremiassen Cc: linux-arm-kernel@lists.infradead.org Cc: Arnaldo Carvalho de Melo Cc: Guenter Roeck Cc: Greg Kroah-Hartman Cc: # 4.19.y Signed-off-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/cs-etm.c | 3 --- tools/perf/util/cs-etm.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ad33b99f5d21..7b5e15cc6b71 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -87,9 +87,6 @@ struct cs_etm_queue { struct cs_etm_packet *packet; }; -/* RB tree for quick conversion between traceID and metadata pointers */ -static struct intlist *traceid_list; - static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid, u64 time_); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index c7ef97b198c7..37f8d48179ca 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -53,6 +53,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) From f1b0b8c2b7e7f0e6c90ec09696d92b02b81d2307 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 Oct 2020 08:54:31 +0000 Subject: [PATCH 472/636] powerpc/8xx: Always fault when _PAGE_ACCESSED is not set commit 29daf869cbab69088fe1755d9dd224e99ba78b56 upstream. The kernel expects pte_young() to work regardless of CONFIG_SWAP. Make sure a minor fault is taken to set _PAGE_ACCESSED when it is not already set, regardless of the selection of CONFIG_SWAP. This adds at least 3 instructions to the TLB miss exception handlers fast path. Following patch will reduce this overhead. Also update the rotation instruction to the correct number of bits to reflect all changes done to _PAGE_ACCESSED over time. Fixes: d069cb4373fe ("powerpc/8xx: Don't touch ACCESSED when no SWAP.") Fixes: 5f356497c384 ("powerpc/8xx: remove unused _PAGE_WRITETHRU") Fixes: e0a8e0d90a9f ("powerpc/8xx: Handle PAGE_USER via APG bits") Fixes: 5b2753fc3e8a ("powerpc/8xx: Implementation of PAGE_EXEC") Fixes: a891c43b97d3 ("powerpc/8xx: Prepare handlers for _PAGE_HUGE for 512k pages.") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/af834e8a0f1fa97bfae65664950f0984a70c4750.1602492856.git.christophe.leroy@csgroup.eu Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/head_8xx.S | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9fd2ff28b8ff..dc99258f2e8c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -356,11 +356,9 @@ _ENTRY(ITLBMiss_cmp) /* Load the MI_TWC with the attributes for this "segment." */ mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif li r11, RPN_PATTERN | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. @@ -482,11 +480,9 @@ _ENTRY(DTLBMiss_jmp) * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior From 8bf2e8fe8a8e2e05246215697bb55f8901faef44 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 26 Oct 2020 13:36:17 -0700 Subject: [PATCH 473/636] Input: sunkbd - avoid use-after-free in teardown paths commit 77e70d351db7de07a46ac49b87a6c3c7a60fca7e upstream. We need to make sure we cancel the reinit work before we tear down the driver structures. Reported-by: Bodong Zhao Tested-by: Bodong Zhao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/sunkbd.c | 41 ++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c index ad5d7f94f95a..1c7aa86c92ab 100644 --- a/drivers/input/keyboard/sunkbd.c +++ b/drivers/input/keyboard/sunkbd.c @@ -111,7 +111,8 @@ static irqreturn_t sunkbd_interrupt(struct serio *serio, switch (data) { case SUNKBD_RET_RESET: - schedule_work(&sunkbd->tq); + if (sunkbd->enabled) + schedule_work(&sunkbd->tq); sunkbd->reset = -1; break; @@ -212,16 +213,12 @@ static int sunkbd_initialize(struct sunkbd *sunkbd) } /* - * sunkbd_reinit() sets leds and beeps to a state the computer remembers they - * were in. + * sunkbd_set_leds_beeps() sets leds and beeps to a state the computer remembers + * they were in. */ -static void sunkbd_reinit(struct work_struct *work) +static void sunkbd_set_leds_beeps(struct sunkbd *sunkbd) { - struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); - - wait_event_interruptible_timeout(sunkbd->wait, sunkbd->reset >= 0, HZ); - serio_write(sunkbd->serio, SUNKBD_CMD_SETLED); serio_write(sunkbd->serio, (!!test_bit(LED_CAPSL, sunkbd->dev->led) << 3) | @@ -234,11 +231,39 @@ static void sunkbd_reinit(struct work_struct *work) SUNKBD_CMD_BELLOFF - !!test_bit(SND_BELL, sunkbd->dev->snd)); } + +/* + * sunkbd_reinit() wait for the keyboard reset to complete and restores state + * of leds and beeps. + */ + +static void sunkbd_reinit(struct work_struct *work) +{ + struct sunkbd *sunkbd = container_of(work, struct sunkbd, tq); + + /* + * It is OK that we check sunkbd->enabled without pausing serio, + * as we only want to catch true->false transition that will + * happen once and we will be woken up for it. + */ + wait_event_interruptible_timeout(sunkbd->wait, + sunkbd->reset >= 0 || !sunkbd->enabled, + HZ); + + if (sunkbd->reset >= 0 && sunkbd->enabled) + sunkbd_set_leds_beeps(sunkbd); +} + static void sunkbd_enable(struct sunkbd *sunkbd, bool enable) { serio_pause_rx(sunkbd->serio); sunkbd->enabled = enable; serio_continue_rx(sunkbd->serio); + + if (!enable) { + wake_up_interruptible(&sunkbd->wait); + cancel_work_sync(&sunkbd->tq); + } } /* From 3661391c463edaf588fe94bcca14cb114b37a96e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 14:17:11 +0200 Subject: [PATCH 474/636] mac80211: always wind down STA state commit dcd479e10a0510522a5d88b29b8f79ea3467d501 upstream. When (for example) an IBSS station is pre-moved to AUTHORIZED before it's inserted, and then the insertion fails, we don't clean up the fast RX/TX states that might already have been created, since we don't go through all the state transitions again on the way down. Do that, if it hasn't been done already, when the station is freed. I considered only freeing the fast TX/RX state there, but we might add more state so it's more robust to wind down the state properly. Note that we warn if the station was ever inserted, it should have been properly cleaned up in that case, and the driver will probably not like things happening out of order. Reported-by: syzbot+2e293dbd67de2836ba42@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201009141710.7223b322a955.I95bd08b9ad0e039c034927cce0b75beea38e059b@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 9968b8a976f1..d11eb5139c92 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + /* + * If we had used sta_info_pre_move_state() then we might not + * have gone through the state transitions down again, so do + * it here now (and warn if it's inserted). + * + * This will clear state such as fast TX/RX that may have been + * allocated during state transitions. + */ + while (sta->sta_state > IEEE80211_STA_NONE) { + int ret; + + WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); + + ret = sta_info_move_state(sta, sta->sta_state - 1); + if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) + break; + } + if (sta->rate_ctrl) rate_control_free_sta(sta); From 567401f58e5e47f5ed41bef03a9541283054e463 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 14 Jul 2020 14:44:50 +0800 Subject: [PATCH 475/636] can: proc: can_remove_proc(): silence remove_proc_entry warning commit 3accbfdc36130282f5ae9e6eecfdf820169fedce upstream. If can_init_proc() fail to create /proc/net/can directory, can_remove_proc() will trigger a warning: WARNING: CPU: 6 PID: 7133 at fs/proc/generic.c:672 remove_proc_entry+0x17b0 Kernel panic - not syncing: panic_on_warn set ... Fix to return early from can_remove_proc() if can proc_dir does not exists. Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1594709090-3203-1-git-send-email-zhangchangzhong@huawei.com Fixes: 8e8cda6d737d ("can: initial support for network namespaces") Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/can/proc.c b/net/can/proc.c index 70fea17bb04c..a3071f43acd7 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -467,6 +467,9 @@ void can_init_proc(struct net *net) */ void can_remove_proc(struct net *net) { + if (!net->can.proc_dir) + return; + if (net->can.pde_version) remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir); @@ -494,6 +497,5 @@ void can_remove_proc(struct net *net) if (net->can.pde_rcvlist_sff) remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (net->can.proc_dir) - remove_proc_entry("can", net->proc_net); + remove_proc_entry("can", net->proc_net); } From 509b5cf71edf59c0e8e879b7295151bc0ba1525a Mon Sep 17 00:00:00 2001 From: David Edmondson Date: Tue, 3 Nov 2020 12:04:00 +0000 Subject: [PATCH 476/636] KVM: x86: clflushopt should be treated as a no-op by emulation commit 51b958e5aeb1e18c00332e0b37c5d4e95a3eff84 upstream. The instruction emulator ignores clflush instructions, yet fails to support clflushopt. Treat both similarly. Fixes: 13e457e0eebf ("KVM: x86: Emulator does not decode clflush well") Signed-off-by: David Edmondson Message-Id: <20201103120400.240882-1-david.edmondson@oracle.com> Reviewed-by: Joao Martins Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 670c2aedcefa..3e182c7ae771 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3994,6 +3994,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4507,7 +4513,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { From 7a74bd35c932698921c08399d8fcb2e84cff32cc Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sat, 7 Nov 2020 00:49:39 -0800 Subject: [PATCH 477/636] ACPI: GED: fix -Wformat commit 9debfb81e7654fe7388a49f45bc4d789b94c1103 upstream. Clang is more aggressive about -Wformat warnings when the format flag specifies a type smaller than the parameter. It turns out that gsi is an int. Fixes: drivers/acpi/evged.c:105:48: warning: format specifies type 'unsigned char' but the argument has type 'unsigned int' [-Wformat] trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); ^~~ Link: https://github.com/ClangBuiltLinux/linux/issues/378 Fixes: ea6f3af4c5e6 ("ACPI: GED: add support for _Exx / _Lxx handler methods") Acked-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/evged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/evged.c b/drivers/acpi/evged.c index 73f6093a5c16..9f4b405a5c20 100644 --- a/drivers/acpi/evged.c +++ b/drivers/acpi/evged.c @@ -110,7 +110,7 @@ static acpi_status acpi_ged_request_interrupt(struct acpi_resource *ares, switch (gsi) { case 0 ... 255: - sprintf(ev_name, "_%c%02hhX", + sprintf(ev_name, "_%c%02X", trigger == ACPI_EDGE_SENSITIVE ? 'E' : 'L', gsi); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) From 76bda503e6406539b1ad5adefe69d3df439ee97f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 22 Nov 2020 10:02:27 +0100 Subject: [PATCH 478/636] Linux 4.19.159 Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Pavel Machek (CIP) Tested-by: Linux Kernel Functional Testing Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20201120104539.806156260@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 698a9cc2864b..593fdbce712d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 158 +SUBLEVEL = 159 EXTRAVERSION = NAME = "People's Front" From f8c18eab5413d1edbfdeda8e8102780c5af8c64d Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 10:45:05 +0800 Subject: [PATCH 479/636] ah6: fix error return code in ah6_input() [ Upstream commit a5ebcbdf34b65fcc07f38eaf2d60563b42619a59 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605581105-35295-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ah6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 78c974391567..2b68bd7c83c4 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -600,7 +600,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); - if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN)) + err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN); + if (err) goto out_free; ip6h->priority = 0; From 735d1d973e6d0aae5f6875e7d9b087d2ccfed076 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 16 Nov 2020 17:21:14 +0100 Subject: [PATCH 480/636] atm: nicstar: Unmap DMA on send error [ Upstream commit 6dceaa9f56e22d0f9b4c4ad2ed9e04e315ce7fe5 ] The `skb' is mapped for DMA in ns_send() but does not unmap DMA in case push_scqe() fails to submit the `skb'. The memory of the `skb' is released so only the DMA mapping is leaking. Unmap the DMA mapping in case push_scqe() failed. Fixes: 864a3ff635fa7 ("atm: [nicstar] remove virt_to_bus() and support 64-bit platforms") Cc: Chas Williams <3chas3@gmail.com> Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/atm/nicstar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index cbec9adc01c7..0d3754a4ac20 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -1705,6 +1705,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb) if (push_scqe(card, vc, scq, &scqe, skb) != 0) { atomic_inc(&vcc->stats->tx_err); + dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len, + DMA_TO_DEVICE); dev_kfree_skb_any(skb); return -EIO; } From d05c9bcab23ecbc68632c4fc8eda9af9c89eb647 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 15 Nov 2020 19:27:49 -0500 Subject: [PATCH 481/636] bnxt_en: read EEPROM A2h address using page 0 [ Upstream commit 4260330b32b14330cfe427d568ac5f5b29b5be3d ] The module eeprom address range returned by bnxt_get_module_eeprom() should be 256 bytes of A0h address space, the lower half of the A2h address space, and page 0 for the upper half of the A2h address space. Fix the firmware call by passing page_number 0 for the A2h slave address space. Fixes: 42ee18fe4ca2 ("bnxt_en: Add Support for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPRO") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1ea81c23039f..511240e8246f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2300,7 +2300,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev, /* Read A2 portion of the EEPROM */ if (length) { start -= ETH_MODULE_SFF_8436_LEN; - rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1, + rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 0, start, length, data); } return rc; From 8196e11a66d751752a25b62c6dc0e2923a5e6ff0 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 13 Nov 2020 19:16:22 +0800 Subject: [PATCH 482/636] devlink: Add missing genlmsg_cancel() in devlink_nl_sb_port_pool_fill() [ Upstream commit 849920c703392957f94023f77ec89ca6cf119d43 ] If sb_occ_port_pool_get() failed in devlink_nl_sb_port_pool_fill(), msg should be canceled by genlmsg_cancel(). Fixes: df38dafd2559 ("devlink: implement shared buffer occupancy monitoring interface") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201113111622.11040-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/devlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index a77e3777c8dd..6ad095264896 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1113,7 +1113,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index, pool_index, &cur, &max); if (err && err != -EOPNOTSUPP) - return err; + goto sb_occ_get_failure; if (!err) { if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur)) goto nla_put_failure; @@ -1126,8 +1126,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg, return 0; nla_put_failure: + err = -EMSGSIZE; +sb_occ_get_failure: genlmsg_cancel(msg, hdr); - return -EMSGSIZE; + return err; } static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb, From f0ec2cd03100ab287c9e5fce5ae56a76d6fc17a4 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 16 Nov 2020 16:20:18 +0800 Subject: [PATCH 483/636] inet_diag: Fix error path to cancel the meseage in inet_req_diag_fill() [ Upstream commit e33de7c5317e2827b2ba6fd120a505e9eb727b05 ] nlmsg_cancel() needs to be called in the error path of inet_req_diag_fill to cancel the message. Fixes: d545caca827b ("net: inet: diag: expose the socket mark to privileged processes.") Reported-by: Hulk Robot Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20201116082018.16496-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/inet_diag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f0957ebf82cf..d07917059d70 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -392,8 +392,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_inode = 0; if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, - inet_rsk(reqsk)->ir_mark)) + inet_rsk(reqsk)->ir_mark)) { + nlmsg_cancel(skb, nlh); return -EMSGSIZE; + } nlmsg_end(skb, nlh); return 0; From 65be1fe87ebcf45aa1773999435bcc7aa3810091 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 12 Nov 2020 13:59:49 -0500 Subject: [PATCH 484/636] lan743x: fix issue causing intermittent kernel log warnings [ Upstream commit e35df62e04cc6fc4b9d90d054732f138349ff9b1 ] When running this chip on arm imx6, we intermittently observe the following kernel warning in the log, especially when the system is under high load: [ 50.119484] ------------[ cut here ]------------ [ 50.124377] WARNING: CPU: 0 PID: 303 at kernel/softirq.c:169 __local_bh_enable_ip+0x100/0x184 [ 50.132925] IRQs not enabled as expected [ 50.159250] CPU: 0 PID: 303 Comm: rngd Not tainted 5.7.8 #1 [ 50.164837] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 50.171395] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 50.179162] [] (show_stack) from [] (dump_stack+0xac/0xd8) [ 50.186408] [] (dump_stack) from [] (__warn+0xd0/0x10c) [ 50.193391] [] (__warn) from [] (warn_slowpath_fmt+0x98/0xc4) [ 50.200892] [] (warn_slowpath_fmt) from [] (__local_bh_enable_ip+0x100/0x184) [ 50.209860] [] (__local_bh_enable_ip) from [] (destroy_conntrack+0x48/0xd8 [nf_conntrack]) [ 50.220038] [] (destroy_conntrack [nf_conntrack]) from [] (nf_conntrack_destroy+0x94/0x168) [ 50.230160] [] (nf_conntrack_destroy) from [] (skb_release_head_state+0xa0/0xd0) [ 50.239314] [] (skb_release_head_state) from [] (skb_release_all+0xc/0x24) [ 50.247946] [] (skb_release_all) from [] (consume_skb+0x74/0x17c) [ 50.255796] [] (consume_skb) from [] (lan743x_tx_release_desc+0x120/0x124) [ 50.264428] [] (lan743x_tx_release_desc) from [] (lan743x_tx_napi_poll+0x5c/0x18c) [ 50.273755] [] (lan743x_tx_napi_poll) from [] (net_rx_action+0x118/0x4a4) [ 50.282306] [] (net_rx_action) from [] (__do_softirq+0x13c/0x53c) [ 50.290157] [] (__do_softirq) from [] (irq_exit+0x150/0x17c) [ 50.297575] [] (irq_exit) from [] (__handle_domain_irq+0x60/0xb0) [ 50.305423] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0x90) [ 50.313790] [] (gic_handle_irq) from [] (__irq_usr+0x54/0x80) [ 50.321287] Exception stack(0xecd99fb0 to 0xecd99ff8) [ 50.326355] 9fa0: 1cf1aa74 00000001 00000001 00000000 [ 50.334547] 9fc0: 00000001 00000000 00000000 00000000 00000000 00000000 00004097 b6d17d14 [ 50.342738] 9fe0: 00000001 b6d17c60 00000000 b6e71f94 800b0010 ffffffff [ 50.349364] irq event stamp: 2525027 [ 50.352955] hardirqs last enabled at (2525026): [] net_rx_action+0xb4/0x4a4 [ 50.360892] hardirqs last disabled at (2525027): [] _raw_spin_lock_irqsave+0x1c/0x50 [ 50.369517] softirqs last enabled at (2524660): [] __do_softirq+0x38c/0x53c [ 50.377446] softirqs last disabled at (2524693): [] irq_exit+0x150/0x17c [ 50.385027] ---[ end trace c0b571db4bc8087d ]--- The driver is calling dev_kfree_skb() from code inside a spinlock, where h/w interrupts are disabled. This is forbidden, as documented in include/linux/netdevice.h. The correct function to use dev_kfree_skb_irq(), or dev_kfree_skb_any(). Fix by using the correct dev_kfree_skb_xxx() functions: in lan743x_tx_release_desc(): called by lan743x_tx_release_completed_descriptors() called by in lan743x_tx_napi_poll() which holds a spinlock called by lan743x_tx_release_all_descriptors() called by lan743x_tx_close() which can-sleep conclusion: use dev_kfree_skb_any() in lan743x_tx_xmit_frame(): which holds a spinlock conclusion: use dev_kfree_skb_irq() in lan743x_tx_close(): which can-sleep conclusion: use dev_kfree_skb() in lan743x_rx_release_ring_element(): called by lan743x_rx_close() which can-sleep called by lan743x_rx_open() which can-sleep conclusion: use dev_kfree_skb() Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201112185949.11315-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microchip/lan743x_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 085fdceb3821..0a70fdabca9c 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1245,13 +1245,13 @@ clean_up_data_descriptor: goto clear_active; if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED)) { - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); goto clear_skb; } if (cleanup) { lan743x_ptp_unrequest_tx_timestamp(tx->adapter); - dev_kfree_skb(buffer_info->skb); + dev_kfree_skb_any(buffer_info->skb); } else { ignore_sync = (buffer_info->flags & TX_BUFFER_INFO_FLAG_IGNORE_SYNC) != 0; @@ -1561,7 +1561,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, if (required_number_of_descriptors > lan743x_tx_get_avail_desc(tx)) { if (required_number_of_descriptors > (tx->ring_size - 1)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); } else { /* save to overflow buffer */ tx->overflow_skb = skb; @@ -1594,7 +1594,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, start_frame_length, do_timestamp, skb->ip_summed == CHECKSUM_PARTIAL)) { - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } @@ -1614,7 +1614,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx, * frame assembler clean up was performed inside * lan743x_tx_frame_add_fragment */ - dev_kfree_skb(skb); + dev_kfree_skb_irq(skb); goto unlock; } } From 3e513faa2b7562fd1cce2ecefd60bf9d91981351 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck Date: Thu, 12 Nov 2020 15:47:41 -0500 Subject: [PATCH 485/636] lan743x: prevent entire kernel HANG on open, for some platforms [ Upstream commit 796a2665ca3e91ebaba7222f76fd9a035714e2d8 ] On arm imx6, when opening the chip's netdev, the whole Linux kernel intermittently hangs/freezes. This is caused by a bug in the driver code which tests if pcie interrupts are working correctly, using the software interrupt: 1. open: enable the software interrupt 2. open: tell the chip to assert the software interrupt 3. open: wait for flag 4. ISR: acknowledge s/w interrupt, set flag 5. open: notice flag, disable the s/w interrupt, continue Unfortunately the ISR only acknowledges the s/w interrupt, but does not disable it. This will re-trigger the ISR in a tight loop. On some (lucky) platforms, open proceeds to disable the s/w interrupt even while the ISR is 'spinning'. On arm imx6, the spinning ISR does not allow open to proceed, resulting in a hung Linux kernel. Fix minimally by disabling the s/w interrupt in the ISR, which will prevent it from spinning. This won't break anything because the s/w interrupt is used as a one-shot interrupt. Note that this is a minimal fix, overlooking many possible cleanups, e.g.: - lan743x_intr_software_isr() is completely redundant and reads INT_STS twice for no apparent reason - disabling the s/w interrupt in lan743x_intr_test_isr() is now redundant, but harmless - waiting on software_isr_flag can be converted from a sleeping poll loop to wait_event_timeout() Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Tested-by: Sven Van Asbroeck # arm imx6 lan7430 Signed-off-by: Sven Van Asbroeck Link: https://lore.kernel.org/r/20201112204741.12375-1-TheSven73@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microchip/lan743x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 0a70fdabca9c..df4519c04ba0 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -145,7 +145,8 @@ static void lan743x_intr_software_isr(void *context) int_sts = lan743x_csr_read(adapter, INT_STS); if (int_sts & INT_BIT_SW_GP_) { - lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_); + /* disable the interrupt to prevent repeated re-triggering */ + lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_); intr->software_isr_flag = 1; } } From 5ca57e8ee2ccf5dcab7a6be6db8b23f7c26f0cb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 17 Nov 2020 19:33:52 +0200 Subject: [PATCH 486/636] mlxsw: core: Use variable timeout for EMAD retries [ Upstream commit 1f492eab67bced119a0ac7db75ef2047e29a30c6 ] The driver sends Ethernet Management Datagram (EMAD) packets to the device for configuration purposes and waits for up to 200ms for a reply. A request is retried up to 5 times. When the system is under heavy load, replies are not always processed in time and EMAD transactions fail. Make the process more robust to such delays by using exponential backoff. First wait for up to 200ms, then retransmit and wait for up to 400ms and so on. Fixes: caf7297e7ab5 ("mlxsw: core: Introduce support for asynchronous EMAD register access") Reported-by: Denis Yulevich Tested-by: Denis Yulevich Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 423c3e9925d0..049ca4ba49de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -439,7 +439,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) if (trans->core->fw_flash_in_progress) timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS); - queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout); + queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, + timeout << trans->retries); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, From fe43f8dbd1386bc632c5b0348d528c76f80d6e96 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 17 Nov 2020 11:02:11 +0800 Subject: [PATCH 487/636] net: b44: fix error return code in b44_init_one() [ Upstream commit 7b027c249da54f492699c43e26cba486cfd48035 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 39a6f4bce6b4 ("b44: replace the ssb_dma API with the generic DMA API") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/1605582131-36735-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/b44.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 88f8d31e4c83..7aeb2805fec4 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2389,7 +2389,8 @@ static int b44_init_one(struct ssb_device *sdev, goto err_out_free_dev; } - if (dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30))) { + err = dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30)); + if (err) { dev_err(sdev->dev, "Required 30BIT DMA mask unsupported by the system\n"); goto err_out_powerdown; From b1197f5d6f532a3f82a66940c8cec44cf302bf6d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 13 Nov 2020 10:27:27 +0100 Subject: [PATCH 488/636] net: bridge: add missing counters to ndo_get_stats64 callback [ Upstream commit 7a30ecc9237681bb125cbd30eee92bef7e86293d ] In br_forward.c and br_input.c fields dev->stats.tx_dropped and dev->stats.multicast are populated, but they are ignored in ndo_get_stats64. Fixes: 28172739f0a2 ("net: fix 64 bit counters on 32 bit arches") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/58ea9963-77ad-a7cf-8dfd-fc95ab95f606@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9ce661e2590d..a350c05b7ff5 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -215,6 +215,7 @@ static void br_get_stats64(struct net_device *dev, sum.rx_packets += tmp.rx_packets; } + netdev_stats_to_stats64(stats, &dev->stats); stats->tx_bytes = sum.tx_bytes; stats->tx_packets = sum.tx_packets; stats->rx_bytes = sum.rx_bytes; From 16e056beca596f6e03c2597914786f2b1c202f86 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Thu, 12 Nov 2020 12:43:35 +0100 Subject: [PATCH 489/636] net: dsa: mv88e6xxx: Avoid VTU corruption on 6097 [ Upstream commit 92307069a96c07d9b6e74b96b79390e7cd7d2111 ] As soon as you add the second port to a VLAN, all other port membership configuration is overwritten with zeroes. The HW interprets this as all ports being "unmodified members" of the VLAN. In the simple case when all ports belong to the same VLAN, switching will still work. But using multiple VLANs or trying to set multiple ports as tagged members will not work. On the 6352, doing a VTU GetNext op, followed by an STU GetNext op will leave you with both the member- and state- data in the VTU/STU data registers. But on the 6097 (which uses the same implementation), the STU GetNext will override the information gathered from the VTU GetNext. Separate the two stages, parsing the result of the VTU GetNext before doing the STU GetNext. We opt to update the existing implementation for all applicable chips, as opposed to creating a separate callback for 6097, because although the previous implementation did work for (at least) 6352, the datasheet does not mention the masking behavior. Fixes: ef6fcea37f01 ("net: dsa: mv88e6xxx: get STU entry on VTU GetNext") Signed-off-by: Tobias Waldekranz Link: https://lore.kernel.org/r/20201112114335.27371-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx/global1_vtu.c | 61 ++++++++++++++++++++----- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 7a6667e0b9f9..e17158139aa8 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -127,11 +127,9 @@ static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip, * Offset 0x08: VTU/STU Data Register 2 * Offset 0x09: VTU/STU Data Register 3 */ - -static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) +static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip, + u16 *regs) { - u16 regs[3]; int i; /* Read all 3 VTU/STU Data registers */ @@ -144,12 +142,45 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, return err; } - /* Extract MemberTag and PortState data */ + return 0; +} + +static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract MemberTag data */ for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int member_offset = (i % 4) * 4; - unsigned int state_offset = member_offset + 2; entry->member[i] = (regs[i / 4] >> member_offset) & 0x3; + } + + return 0; +} + +static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_vtu_entry *entry) +{ + u16 regs[3]; + int err; + int i; + + err = mv88e6185_g1_vtu_stu_data_read(chip, regs); + if (err) + return err; + + /* Extract PortState data */ + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + unsigned int state_offset = (i % 4) * 4 + 2; + entry->state[i] = (regs[i / 4] >> state_offset) & 0x3; } @@ -322,6 +353,10 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; + /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[7:4] are located in VTU Operation 11:8 */ @@ -347,11 +382,6 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; if (entry->valid) { - /* Fetch (and mask) VLAN PortState data from the STU */ - err = mv88e6xxx_g1_vtu_stu_get(chip, entry); - if (err) - return err; - err = mv88e6185_g1_vtu_data_read(chip, entry); if (err) return err; @@ -359,6 +389,15 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, err = mv88e6xxx_g1_vtu_fid_read(chip, entry); if (err) return err; + + /* Fetch VLAN PortState data from the STU */ + err = mv88e6xxx_g1_vtu_stu_get(chip, entry); + if (err) + return err; + + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; } return 0; From 24184dfd51c9fbe0e478570f4cb3c3fbc8d1b594 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 16 Nov 2020 19:52:34 -0800 Subject: [PATCH 490/636] net: Have netpoll bring-up DSA management interface [ Upstream commit 1532b9778478577152201adbafa7738b1e844868 ] DSA network devices rely on having their DSA management interface up and running otherwise their ndo_open() will return -ENETDOWN. Without doing this it would not be possible to use DSA devices as netconsole when configured on the command line. These devices also do not utilize the upper/lower linking so the check about the netpoll device having upper is not going to be a problem. The solution adopted here is identical to the one done for net/ipv4/ipconfig.c with 728c02089a0e ("net: ipv4: handle DSA enabled master network devices"), with the network namespace scope being restricted to that of the process configuring netpoll. Fixes: 04ff53f96a93 ("net: dsa: Add netconsole support") Tested-by: Vladimir Oltean Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20201117035236.22658-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/core/netpoll.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 023ce0fbb496..41e32a958d08 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -638,15 +639,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL; + struct net_device *ndev = NULL, *dev = NULL; + struct net *net = current->nsproxy->net_ns; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) { - struct net *net = current->nsproxy->net_ns; + if (np->dev_name[0]) ndev = __dev_get_by_name(net, np->dev_name); - } + if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -654,6 +655,19 @@ int netpoll_setup(struct netpoll *np) } dev_hold(ndev); + /* bring up DSA management network devices up first */ + for_each_netdev(net, dev) { + if (!netdev_uses_dsa(dev)) + continue; + + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + np_err(np, "%s failed to open %s\n", + np->dev_name, dev->name); + goto put; + } + } + if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; From c0e617c4e3c4b4dec1579bbcb02c5c60d1ae3a6c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Sun, 8 Nov 2020 09:08:26 -0500 Subject: [PATCH 491/636] netlabel: fix our progress tracking in netlbl_unlabel_staticlist() [ Upstream commit 866358ec331f8faa394995fb4b511af1db0247c8 ] The current NetLabel code doesn't correctly keep track of the netlink dump state in some cases, in particular when multiple interfaces with large configurations are loaded. The problem manifests itself by not reporting the full configuration to userspace, even though it is loaded and active in the kernel. This patch fixes this by ensuring that the dump state is properly reset when necessary inside the netlbl_unlabel_staticlist() function. Fixes: 8cc44579d1bd ("NetLabel: Introduce static network labels for unlabeled connections") Signed-off-by: Paul Moore Link: https://lore.kernel.org/r/160484450633.3752.16512718263560813473.stgit@sifl Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_unlabeled.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c92894c3e40a..f238837a2717 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1179,12 +1179,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, struct netlbl_unlhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; - u32 iter_bkt; - u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; + u32 skip_addr4 = cb->args[2]; + u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; #if IS_ENABLED(CONFIG_IPV6) + u32 skip_addr6 = cb->args[3]; struct netlbl_af6list *addr6; #endif @@ -1195,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, rcu_read_lock(); for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; - iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { + iter_bkt++) { iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || @@ -1203,7 +1204,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, continue; netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { - if (iter_addr4++ < cb->args[2]) + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1216,10 +1217,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr4 = 0; + skip_addr4 = 0; #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { - if (iter_addr6++ < cb->args[3]) + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( NLBL_UNLABEL_C_STATICLIST, @@ -1232,8 +1235,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, goto unlabel_staticlist_return; } } + iter_addr6 = 0; + skip_addr6 = 0; #endif /* IPv6 */ } + iter_chain = 0; + skip_chain = 0; } unlabel_staticlist_return: From 57e57c89063963128ef85522c230097cdc1ab4b3 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 13 Nov 2020 16:30:40 -0500 Subject: [PATCH 492/636] netlabel: fix an uninitialized warning in netlbl_unlabel_staticlist() [ Upstream commit 1ba86d4366e023d96df3dbe415eea7f1dc08c303 ] Static checking revealed that a previous fix to netlbl_unlabel_staticlist() leaves a stack variable uninitialized, this patches fixes that. Fixes: 866358ec331f ("netlabel: fix our progress tracking in netlbl_unlabel_staticlist()") Reported-by: Dan Carpenter Signed-off-by: Paul Moore Reviewed-by: James Morris Link: https://lore.kernel.org/r/160530304068.15651.18355773009751195447.stgit@sifl Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/netlabel/netlabel_unlabeled.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index f238837a2717..0067f472367b 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1180,7 +1180,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; u32 skip_addr4 = cb->args[2]; - u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0; + u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; struct list_head *iter_list; struct netlbl_af4list *addr4; From 32515a2761a8ca0502a95b994706d1fb20c58fd1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 18 Nov 2020 10:19:22 +0200 Subject: [PATCH 493/636] net/mlx4_core: Fix init_hca fields offset [ Upstream commit 6d9c8d15af0ef20a66a0b432cac0d08319920602 ] Slave function read the following capabilities from the wrong offset: 1. log_mc_entry_sz 2. fs_log_entry_sz 3. log_mc_hash_sz Fix that by adjusting these capabilities offset to match firmware layout. Due to the wrong offset read, the following issues might occur: 1+2. Negative value reported at max_mcast_qp_attach. 3. Driver to init FW with multicast hash size of zero. Fixes: a40ded604365 ("net/mlx4_core: Add masking for a few queries on HCA caps") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20201118081922.553-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/fw.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 04ebce738db9..926407f0bbd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1861,8 +1861,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 #define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) -#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) -#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13) +#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x17) #define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18) #define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) #define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6 @@ -1870,7 +1870,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_DRIVER_VERSION_SZ 0x40 #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) -#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x13) #define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 650ae08c71de..8f020f26ebf5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -182,8 +182,8 @@ struct mlx4_init_hca_param { u64 cmpt_base; u64 mtt_base; u64 global_caps; - u16 log_mc_entry_sz; - u16 log_mc_hash_sz; + u8 log_mc_entry_sz; + u8 log_mc_hash_sz; u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */ u8 log_num_qps; u8 log_num_srqs; From af1b19b16b91b685ce2c093d7cd1f936c2fd7fd8 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Fri, 13 Nov 2020 13:12:05 -0700 Subject: [PATCH 494/636] net: qualcomm: rmnet: Fix incorrect receive packet handling during cleanup [ Upstream commit fc70f5bf5e525dde81565f0a30d5e39168062eba ] During rmnet unregistration, the real device rx_handler is first cleared followed by the removal of rx_handler_data after the rcu synchronization. Any packets in the receive path may observe that the rx_handler is NULL. However, there is no check when dereferencing this value to use the rmnet_port information. This fixes following splat by adding the NULL check. Unable to handle kernel NULL pointer dereference at virtual address 000000000000000d pc : rmnet_rx_handler+0x124/0x284 lr : rmnet_rx_handler+0x124/0x284 rmnet_rx_handler+0x124/0x284 __netif_receive_skb_core+0x758/0xd74 __netif_receive_skb+0x50/0x17c process_backlog+0x15c/0x1b8 napi_poll+0x88/0x284 net_rx_action+0xbc/0x23c __do_softirq+0x20c/0x48c Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Link: https://lore.kernel.org/r/1605298325-3705-1-git-send-email-subashab@codeaurora.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index c9d43bad1e2f..9b1804a228d9 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -197,6 +197,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) dev = skb->dev; port = rmnet_get_port_rcu(dev); + if (unlikely(!port)) { + atomic_long_inc(&skb->dev->rx_nohandler); + kfree_skb(skb); + goto done; + } switch (port->rmnet_mode) { case RMNET_EPMODE_VND: From dd7c76948628fb92faf794bc4fcfb662d6d395ce Mon Sep 17 00:00:00 2001 From: Xie He Date: Thu, 12 Nov 2020 02:35:06 -0800 Subject: [PATCH 495/636] net: x25: Increase refcnt of "struct x25_neigh" in x25_rx_call_request [ Upstream commit 4ee18c179e5e815fa5575e0d2db0c05795a804ee ] The x25_disconnect function in x25_subr.c would decrease the refcount of "x25->neighbour" (struct x25_neigh) and reset this pointer to NULL. However, the x25_rx_call_request function in af_x25.c, which is called when we receive a connection request, does not increase the refcount when it assigns the pointer. Fix this issue by increasing the refcount of "struct x25_neigh" in x25_rx_call_request. This patch fixes frequent kernel crashes when using AF_X25 sockets. Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Cc: Martin Schiller Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201112103506.5875-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 372f4194db5a..b3db0b0a52f5 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1049,6 +1049,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->lci = lci; makex25->dest_addr = dest_addr; makex25->source_addr = source_addr; + x25_neigh_hold(nb); makex25->neighbour = nb; makex25->facilities = facilities; makex25->dte_facilities= dte_facilities; From 082b21d03479b566681f43b74677c249f6aad337 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Sun, 15 Nov 2020 12:10:29 -0800 Subject: [PATCH 496/636] page_frag: Recover from memory pressure [ Upstream commit d8c19014bba8f565d8a2f1f46b4e38d1d97bf1a7 ] The ethernet driver may allocate skb (and skb->data) via napi_alloc_skb(). This ends up to page_frag_alloc() to allocate skb->data from page_frag_cache->va. During the memory pressure, page_frag_cache->va may be allocated as pfmemalloc page. As a result, the skb->pfmemalloc is always true as skb->data is from page_frag_cache->va. The skb will be dropped if the sock (receiver) does not have SOCK_MEMALLOC. This is expected behaviour under memory pressure. However, once kernel is not under memory pressure any longer (suppose large amount of memory pages are just reclaimed), the page_frag_alloc() may still re-use the prior pfmemalloc page_frag_cache->va to allocate skb->data. As a result, the skb->pfmemalloc is always true unless page_frag_cache->va is re-allocated, even if the kernel is not under memory pressure any longer. Here is how kernel runs into issue. 1. The kernel is under memory pressure and allocation of PAGE_FRAG_CACHE_MAX_ORDER in __page_frag_cache_refill() will fail. Instead, the pfmemalloc page is allocated for page_frag_cache->va. 2: All skb->data from page_frag_cache->va (pfmemalloc) will have skb->pfmemalloc=true. The skb will always be dropped by sock without SOCK_MEMALLOC. This is an expected behaviour. 3. Suppose a large amount of pages are reclaimed and kernel is not under memory pressure any longer. We expect skb->pfmemalloc drop will not happen. 4. Unfortunately, page_frag_alloc() does not proactively re-allocate page_frag_alloc->va and will always re-use the prior pfmemalloc page. The skb->pfmemalloc is always true even kernel is not under memory pressure any longer. Fix this by freeing and re-allocating the page instead of recycling it. References: https://lore.kernel.org/lkml/20201103193239.1807-1-dongli.zhang@oracle.com/ References: https://lore.kernel.org/linux-mm/20201105042140.5253-1-willy@infradead.org/ Suggested-by: Matthew Wilcox (Oracle) Cc: Aruna Ramakrishna Cc: Bert Barbe Cc: Rama Nichanamatlu Cc: Venkat Venkatsubra Cc: Manjunath Patil Cc: Joe Jin Cc: SRINIVAS Fixes: 79930f5892e1 ("net: do not deplete pfmemalloc reserve") Signed-off-by: Dongli Zhang Acked-by: Vlastimil Babka Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20201115201029.11903-1-dongli.zhang@oracle.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- mm/page_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4325e7d58115..4446a523e684 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4554,6 +4554,11 @@ refill: if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; + if (unlikely(nc->pfmemalloc)) { + free_the_page(page, compound_order(page)); + goto refill; + } + #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* if size can vary use size else just use PAGE_SIZE */ size = nc->size; From 3ac1662ac496f825033dfc44efb03af291c5bcba Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 16 Nov 2020 21:07:13 +0800 Subject: [PATCH 497/636] qed: fix error return code in qed_iwarp_ll2_start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit cb47d16ea21045c66eebbf5ed792e74a8537e27a ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 469981b17a4f ("qed: Add unaligned and packed packet processing") Fixes: fcb39f6c10b2 ("qed: Add mpa buffer descriptors for storing and processing mpa fpdus") Fixes: 1e28eaad07ea ("qed: Add iWARP support for fpdu spanned over more than two tcp packets") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Acked-by: Michal Kalderon  Link: https://lore.kernel.org/r/1605532033-27373-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 39787bb885c8..80afc8f36e00 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -2737,14 +2737,18 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps, sizeof(*iwarp_info->partial_fpdus), GFP_KERNEL); - if (!iwarp_info->partial_fpdus) + if (!iwarp_info->partial_fpdus) { + rc = -ENOMEM; goto err; + } iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL); - if (!iwarp_info->mpa_intermediate_buf) + if (!iwarp_info->mpa_intermediate_buf) { + rc = -ENOMEM; goto err; + } /* The mpa_bufs array serves for pending RX packets received on the * mpa ll2 that don't have place on the tx ring and require later @@ -2754,8 +2758,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc, sizeof(*iwarp_info->mpa_bufs), GFP_KERNEL); - if (!iwarp_info->mpa_bufs) + if (!iwarp_info->mpa_bufs) { + rc = -ENOMEM; goto err; + } INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list); INIT_LIST_HEAD(&iwarp_info->mpa_buf_list); From e2776f4b00520aa6042ececcf601f9a71b6ea6e3 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 13 Nov 2020 14:16:26 +0800 Subject: [PATCH 498/636] qlcnic: fix error return code in qlcnic_83xx_restart_hw() [ Upstream commit 3beb9be165083c2964eba1923601c3bfac0b02d4 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 3ced0a88cd4c ("qlcnic: Add support to run firmware POST") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1605248186-16013-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index cda5b0a9e948..10286215092f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2251,7 +2251,8 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter) /* Boot either flash image or firmware image from host file system */ if (qlcnic_load_fw_file == 1) { - if (qlcnic_83xx_load_fw_image_from_host(adapter)) + err = qlcnic_83xx_load_fw_image_from_host(adapter); + if (err) return err; } else { QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID, From c962aae4207730ec026a4a1f39128db11626953c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 14 Nov 2020 13:22:53 +0800 Subject: [PATCH 499/636] sctp: change to hold/put transport for proto_unreach_timer [ Upstream commit 057a10fa1f73d745c8e69aa54ab147715f5630ae ] A call trace was found in Hangbin's Codenomicon testing with debug kernel: [ 2615.981988] ODEBUG: free active (active state 0) object type: timer_list hint: sctp_generate_proto_unreach_event+0x0/0x3a0 [sctp] [ 2615.995050] WARNING: CPU: 17 PID: 0 at lib/debugobjects.c:328 debug_print_object+0x199/0x2b0 [ 2616.095934] RIP: 0010:debug_print_object+0x199/0x2b0 [ 2616.191533] Call Trace: [ 2616.194265] [ 2616.202068] debug_check_no_obj_freed+0x25e/0x3f0 [ 2616.207336] slab_free_freelist_hook+0xeb/0x140 [ 2616.220971] kfree+0xd6/0x2c0 [ 2616.224293] rcu_do_batch+0x3bd/0xc70 [ 2616.243096] rcu_core+0x8b9/0xd00 [ 2616.256065] __do_softirq+0x23d/0xacd [ 2616.260166] irq_exit+0x236/0x2a0 [ 2616.263879] smp_apic_timer_interrupt+0x18d/0x620 [ 2616.269138] apic_timer_interrupt+0xf/0x20 [ 2616.273711] This is because it holds asoc when transport->proto_unreach_timer starts and puts asoc when the timer stops, and without holding transport the transport could be freed when the timer is still running. So fix it by holding/putting transport instead for proto_unreach_timer in transport, just like other timers in transport. v1->v2: - Also use sctp_transport_put() for the "out_unlock:" path in sctp_generate_proto_unreach_event(), as Marcelo noticed. Fixes: 50b5d6ad6382 ("sctp: Fix a race between ICMP protocol unreachable and connect()") Reported-by: Hangbin Liu Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/102788809b554958b13b95d33440f5448113b8d6.1605331373.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sctp/input.c | 4 ++-- net/sctp/sm_sideeffect.c | 4 ++-- net/sctp/transport.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index f64d882c8698..3dd900e42b85 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -461,7 +461,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, else { if (!mod_timer(&t->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(t); } } else { struct net *net = sock_net(sk); @@ -470,7 +470,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk, "encountered!\n", __func__); if (del_timer(&t->proto_unreach_timer)) - sctp_association_put(asoc); + sctp_transport_put(t); sctp_do_sm(net, SCTP_EVENT_T_OTHER, SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bb4f1d0da153..2a94240eac36 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -434,7 +434,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, jiffies + (HZ/20))) - sctp_association_hold(asoc); + sctp_transport_hold(transport); goto out_unlock; } @@ -450,7 +450,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t) out_unlock: bh_unlock_sock(sk); - sctp_association_put(asoc); + sctp_transport_put(transport); } /* Handle the timeout of the RE-CONFIG timer. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index c0d55ed62d2e..78302e547b2b 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -148,7 +148,7 @@ void sctp_transport_free(struct sctp_transport *transport) /* Delete the ICMP proto unreachable timer if it's active. */ if (del_timer(&transport->proto_unreach_timer)) - sctp_association_put(transport->asoc); + sctp_transport_put(transport); sctp_transport_put(transport); } From 88c2cfa3f78120bc755e538ffa8b456fc2ecf874 Mon Sep 17 00:00:00 2001 From: Ryan Sharpelletti Date: Mon, 16 Nov 2020 17:44:13 +0000 Subject: [PATCH 500/636] tcp: only postpone PROBE_RTT if RTT is < current min_rtt estimate [ Upstream commit 1b9e2a8c99a5c021041bfb2d512dc3ed92a94ffd ] During loss recovery, retransmitted packets are forced to use TCP timestamps to calculate the RTT samples, which have a millisecond granularity. BBR is designed using a microsecond granularity. As a result, multiple RTT samples could be truncated to the same RTT value during loss recovery. This is problematic, as BBR will not enter PROBE_RTT if the RTT sample is <= the current min_rtt sample, meaning that if there are persistent losses, PROBE_RTT will constantly be pushed off and potentially never re-entered. This patch makes sure that BBR enters PROBE_RTT by checking if RTT sample is < the current min_rtt sample, rather than <=. The Netflix transport/TCP team discovered this bug in the Linux TCP BBR code during lab tests. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Ryan Sharpelletti Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Yuchung Cheng Link: https://lore.kernel.org/r/20201116174412.1433277-1-sharpelletti.kdev@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 93f176336297..b70c9365e131 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -917,7 +917,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) filter_expired = after(tcp_jiffies32, bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); if (rs->rtt_us >= 0 && - (rs->rtt_us <= bbr->min_rtt_us || + (rs->rtt_us < bbr->min_rtt_us || (filter_expired && !rs->is_ack_delayed))) { bbr->min_rtt_us = rs->rtt_us; bbr->min_rtt_stamp = tcp_jiffies32; From ba0046dc414d64fff9bd5d74a0641cd25b214a2a Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Wed, 21 Oct 2020 11:05:41 +0300 Subject: [PATCH 501/636] net/mlx5: Disable QoS when min_rates on all VFs are zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 470b74758260e4abc2508cf1614573c00a00465c ] Currently when QoS is enabled for VF and any min_rate is configured, the driver sets bw_share value to at least 1 and doesn’t allow to set it to 0 to make minimal rate unlimited. It means there is always a minimal rate configured for every VF, even if user tries to remove it. In order to make QoS disable possible, check whether all vports have configured min_rate = 0. If this is true, set their bw_share to 0 to disable min_rate limitations. Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7366033cd31c..2190daace873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1999,12 +1999,15 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) max_guarantee = evport->info.min_rate; } - return max_t(u32, max_guarantee / fw_max_bw_share, 1); + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + return 0; } -static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +static int normalize_vports_min_rate(struct mlx5_eswitch *esw) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = calculate_vports_min_rate_divider(esw); struct mlx5_vport *evport; u32 vport_max_rate; u32 vport_min_rate; @@ -2018,9 +2021,9 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) continue; vport_min_rate = evport->info.min_rate; vport_max_rate = evport->info.max_rate; - bw_share = MLX5_MIN_BW_SHARE; + bw_share = 0; - if (vport_min_rate) + if (divider) bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, divider, fw_max_bw_share); @@ -2045,7 +2048,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, struct mlx5_vport *evport; u32 fw_max_bw_share; u32 previous_min_rate; - u32 divider; bool min_rate_supported; bool max_rate_supported; int err = 0; @@ -2071,8 +2073,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, previous_min_rate = evport->info.min_rate; evport->info.min_rate = min_rate; - divider = calculate_vports_min_rate_divider(esw); - err = normalize_vports_min_rate(esw, divider); + err = normalize_vports_min_rate(esw); if (err) { evport->info.min_rate = previous_min_rate; goto unlock; From d36ae6f988ad045af852a846288d0dea7ab1447f Mon Sep 17 00:00:00 2001 From: Filip Moc Date: Tue, 17 Nov 2020 18:36:31 +0100 Subject: [PATCH 502/636] net: usb: qmi_wwan: Set DTR quirk for MR400 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit df8d85d8c69d6837817e54dcb73c84a8b5a13877 ] LTE module MR400 embedded in TL-MR6400 v4 requires DTR to be set. Signed-off-by: Filip Moc Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20201117173631.GA550981@moc6.cz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6e0b3dc14aa4..ebd630a94571 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1029,7 +1029,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9011, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9021, 1)}, {QMI_FIXED_INTF(0x05c6, 0x9022, 2)}, - {QMI_FIXED_INTF(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ + {QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ {QMI_FIXED_INTF(0x05c6, 0x9026, 3)}, {QMI_FIXED_INTF(0x05c6, 0x902e, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9031, 5)}, From 82e2d2b06485a51191a5a72538e0bef5397f1014 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 12 Nov 2020 16:42:10 +1030 Subject: [PATCH 503/636] net/ncsi: Fix netlink registration [ Upstream commit 1922a46b8c18cb09d33e06a6cc2e43844ac1b9d0 ] If a user unbinds and re-binds a NC-SI aware driver the kernel will attempt to register the netlink interface at runtime. The structure is marked __ro_after_init so registration fails spectacularly at this point. # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/bind ftgmac100 1e660000.ethernet: Read MAC address 52:54:00:12:34:56 from chip ftgmac100 1e660000.ethernet: Using NCSI interface 8<--- cut here --- Unable to handle kernel paging request at virtual address 80a8f858 pgd = 8c768dd6 [80a8f858] *pgd=80a0841e(bad) Internal error: Oops: 80d [#1] SMP ARM CPU: 0 PID: 116 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00003-gdd25b227ec1e #51 Hardware name: Generic DT based system PC is at genl_register_family+0x1f8/0x6d4 LR is at 0xff26ffff pc : [<8073f930>] lr : [] psr: 20000153 sp : 8553bc80 ip : 81406244 fp : 8553bd04 r10: 8085d12c r9 : 80a8f73c r8 : 85739000 r7 : 00000017 r6 : 80a8f860 r5 : 80c8ab98 r4 : 80a8f858 r3 : 00000000 r2 : 00000000 r1 : 81406130 r0 : 00000017 Flags: nzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none Control: 00c5387d Table: 85524008 DAC: 00000051 Process sh (pid: 116, stack limit = 0x1f1988d6) ... Backtrace: [<8073f738>] (genl_register_family) from [<80860ac0>] (ncsi_init_netlink+0x20/0x48) r10:8085d12c r9:80c8fb0c r8:85739000 r7:00000000 r6:81218000 r5:85739000 r4:8121c000 [<80860aa0>] (ncsi_init_netlink) from [<8085d740>] (ncsi_register_dev+0x1b0/0x210) r5:8121c400 r4:8121c000 [<8085d590>] (ncsi_register_dev) from [<805a8060>] (ftgmac100_probe+0x6e0/0x778) r10:00000004 r9:80950228 r8:8115bc10 r7:8115ab00 r6:9eae2c24 r5:813b6f88 r4:85739000 [<805a7980>] (ftgmac100_probe) from [<805355ec>] (platform_drv_probe+0x58/0xa8) r9:80c76bb0 r8:00000000 r7:80cd4974 r6:80c76bb0 r5:8115bc10 r4:00000000 [<80535594>] (platform_drv_probe) from [<80532d58>] (really_probe+0x204/0x514) r7:80cd4974 r6:00000000 r5:80cd4868 r4:8115bc10 Jakub pointed out that ncsi_register_dev is obviously broken, because there is only one family so it would never work if there was more than one ncsi netdev. Fix the crash by registering the netlink family once on boot, and drop the code to unregister it. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Joel Stanley Reviewed-by: Samuel Mendoza-Jonas Link: https://lore.kernel.org/r/20201112061210.914621-1-joel@jms.id.au Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-manage.c | 5 ----- net/ncsi/ncsi-netlink.c | 22 +++------------------- net/ncsi/ncsi-netlink.h | 3 --- 3 files changed, 3 insertions(+), 27 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 091284760d21..f65afa7e7d28 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1484,9 +1484,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); - /* Set up generic netlink interface */ - ncsi_init_netlink(dev); - return nd; } EXPORT_SYMBOL_GPL(ncsi_register_dev); @@ -1566,8 +1563,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd) #endif spin_unlock_irqrestore(&ncsi_dev_lock, flags); - ncsi_unregister_netlink(nd->dev); - kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 45f33d6dedf7..a2f4280e2889 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -397,24 +397,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = { .n_ops = ARRAY_SIZE(ncsi_ops), }; -int ncsi_init_netlink(struct net_device *dev) +static int __init ncsi_init_netlink(void) { - int rc; - - rc = genl_register_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to register netlink family\n"); - - return rc; -} - -int ncsi_unregister_netlink(struct net_device *dev) -{ - int rc; - - rc = genl_unregister_family(&ncsi_genl_family); - if (rc) - netdev_err(dev, "ncsi: failed to unregister netlink family\n"); - - return rc; + return genl_register_family(&ncsi_genl_family); } +subsys_initcall(ncsi_init_netlink); diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h index 91a5c256f8c4..6c55a6775157 100644 --- a/net/ncsi/ncsi-netlink.h +++ b/net/ncsi/ncsi-netlink.h @@ -14,7 +14,4 @@ #include "internal.h" -int ncsi_init_netlink(struct net_device *dev); -int ncsi_unregister_netlink(struct net_device *dev); - #endif /* __NCSI_NETLINK_H__ */ From fe428936b9157d6254e5d7c09d5f62ed98eb66c6 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 17 Nov 2020 13:14:48 +1030 Subject: [PATCH 504/636] net: ftgmac100: Fix crash when removing driver [ Upstream commit 3d5179458d22dc0b4fdc724e4bed4231a655112a ] When removing the driver we would hit BUG_ON(!list_empty(&dev->ptype_specific)) in net/core/dev.c due to still having the NC-SI packet handler registered. # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind ------------[ cut here ]------------ kernel BUG at net/core/dev.c:10254! Internal error: Oops - BUG: 0 [#1] SMP ARM CPU: 0 PID: 115 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00007-g02e0365710c4 #46 Hardware name: Generic DT based system PC is at netdev_run_todo+0x314/0x394 LR is at cpumask_next+0x20/0x24 pc : [<806f5830>] lr : [<80863cb0>] psr: 80000153 sp : 855bbd58 ip : 00000001 fp : 855bbdac r10: 80c03d00 r9 : 80c06228 r8 : 81158c54 r7 : 00000000 r6 : 80c05dec r5 : 80c05d18 r4 : 813b9280 r3 : 813b9054 r2 : 8122c470 r1 : 00000002 r0 : 00000002 Flags: Nzcv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none Control: 00c5387d Table: 85514008 DAC: 00000051 Process sh (pid: 115, stack limit = 0x7cb5703d) ... Backtrace: [<806f551c>] (netdev_run_todo) from [<80707eec>] (rtnl_unlock+0x18/0x1c) r10:00000051 r9:854ed710 r8:81158c54 r7:80c76bb0 r6:81158c10 r5:8115b410 r4:813b9000 [<80707ed4>] (rtnl_unlock) from [<806f5db8>] (unregister_netdev+0x2c/0x30) [<806f5d8c>] (unregister_netdev) from [<805a8180>] (ftgmac100_remove+0x20/0xa8) r5:8115b410 r4:813b9000 [<805a8160>] (ftgmac100_remove) from [<805355e4>] (platform_drv_remove+0x34/0x4c) Fixes: bd466c3fb5a4 ("net/faraday: Support NCSI mode") Signed-off-by: Joel Stanley Link: https://lore.kernel.org/r/20201117024448.1170761-1-joel@jms.id.au Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/faraday/ftgmac100.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e4fc38cbe853..acf27c395286 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1884,6 +1884,8 @@ static int ftgmac100_probe(struct platform_device *pdev) return 0; err_ncsi_dev: + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); err_register_netdev: ftgmac100_destroy_mdio(netdev); err_setup_mdio: @@ -1904,6 +1906,8 @@ static int ftgmac100_remove(struct platform_device *pdev) netdev = platform_get_drvdata(pdev); priv = netdev_priv(netdev); + if (priv->ndev) + ncsi_unregister_dev(priv->ndev); unregister_netdev(netdev); clk_disable_unprepare(priv->clk); From 6c5f86abc3b58054455f23eaee5537dba353c8d6 Mon Sep 17 00:00:00 2001 From: Jianqun Xu Date: Tue, 13 Oct 2020 14:37:30 +0800 Subject: [PATCH 505/636] pinctrl: rockchip: enable gpio pclk for rockchip_gpio_to_irq [ Upstream commit 63fbf8013b2f6430754526ef9594f229c7219b1f ] There need to enable pclk_gpio when do irq_create_mapping, since it will do access to gpio controller. Signed-off-by: Jianqun Xu Reviewed-by: Heiko Stuebner Reviewed-by: Kever Yang Link: https://lore.kernel.org/r/20201013063731.3618-3-jay.xu@rock-chips.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-rockchip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 005df24f5b3f..4d3b62707524 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -2778,7 +2778,9 @@ static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned offset) if (!bank->domain) return -ENXIO; + clk_enable(bank->clk); virq = irq_create_mapping(bank->domain, offset); + clk_disable(bank->clk); return (virq) ? : -ENXIO; } From 4d3413ba2360363f77fa133af128a3034203a021 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 2 Nov 2020 22:24:39 -0800 Subject: [PATCH 506/636] scsi: ufs: Fix unbalanced scsi_block_reqs_cnt caused by ufshcd_hold() [ Upstream commit da3fecb0040324c08f1587e5bff1f15f36be1872 ] The scsi_block_reqs_cnt increased in ufshcd_hold() is supposed to be decreased back in ufshcd_ungate_work() in a paired way. However, if specific ufshcd_hold/release sequences are met, it is possible that scsi_block_reqs_cnt is increased twice but only one ungate work is queued. To make sure scsi_block_reqs_cnt is handled by ufshcd_hold() and ufshcd_ungate_work() in a paired way, increase it only if queue_work() returns true. Link: https://lore.kernel.org/r/1604384682-15837-2-git-send-email-cang@codeaurora.org Reviewed-by: Hongwu Su Reviewed-by: Stanley Chu Reviewed-by: Bean Huo Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b2cbdd01ab10..a63119c35fde 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1592,12 +1592,12 @@ start: * work and to enable clocks. */ case CLKS_OFF: - ufshcd_scsi_block_requests(hba); hba->clk_gating.state = REQ_CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - queue_work(hba->clk_gating.clk_gating_workq, - &hba->clk_gating.ungate_work); + if (queue_work(hba->clk_gating.clk_gating_workq, + &hba->clk_gating.ungate_work)) + ufshcd_scsi_block_requests(hba); /* * fall through to check if we should wait for this * work to be done or not. From 496b92f27f1fa5be414a6f537c3afff7afdb006d Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Mon, 12 Oct 2020 12:47:13 -0700 Subject: [PATCH 507/636] selftests: kvm: Fix the segment descriptor layout to match the actual layout [ Upstream commit df11f7dd5834146defa448acba097e8d7703cc42 ] Fix the layout of 'struct desc64' to match the layout described in the SDM Vol 3, Chapter 3 "Protected-Mode Memory Management", section 3.4.5 "Segment Descriptors", Figure 3-8 "Segment Descriptor". The test added later in this series relies on this and crashes if this layout is not correct. Signed-off-by: Aaron Lewis Reviewed-by: Alexander Graf Message-Id: <20201012194716.3950330-2-aaronlewis@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- tools/testing/selftests/kvm/include/x86.h | 2 +- tools/testing/selftests/kvm/lib/x86.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h index 42c3596815b8..a7667a613bbc 100644 --- a/tools/testing/selftests/kvm/include/x86.h +++ b/tools/testing/selftests/kvm/include/x86.h @@ -59,7 +59,7 @@ enum x86_register { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c index 4d35eba73dc9..800fe36064f9 100644 --- a/tools/testing/selftests/kvm/lib/x86.c +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -449,11 +449,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; From 2e22375d80bebc8ea9aa930c047da93adb8d2390 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 7 Nov 2020 14:32:54 +0100 Subject: [PATCH 508/636] ACPI: button: Add DMI quirk for Medion Akoya E2228T [ Upstream commit 7daaa06357bf7f1874b62bb1ea9d66a51d4e567e ] The Medion Akoya E2228T's ACPI _LID implementation is quite broken, it has the same issues as the one from the Medion Akoya E2215T: 1. For notifications it uses an ActiveLow Edge GpioInt, rather then an ActiveBoth one, meaning that the device is only notified when the lid is closed, not when it is opened. 2. Matching with this its _LID method simply always returns 0 (closed) In order for the Linux LID code to work properly with this implementation, the lid_init_state selection needs to be set to ACPI_BUTTON_LID_INIT_OPEN, add a DMI quirk for this. While working on this I also found out that the MD60### part of the model number differs per country/batch while all of the E2215T and E2228T models have this issue, so also remove the " MD60198" part from the E2215T quirk. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/button.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index f43f5adc21b6..abf101451c92 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -98,7 +98,18 @@ static const struct dmi_system_id lid_blacklst[] = { */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), - DMI_MATCH(DMI_PRODUCT_NAME, "E2215T MD60198"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2215T"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, + }, + { + /* + * Medion Akoya E2228T, notification of the LID device only + * happens on close, not on open and _LID always returns closed. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MEDION"), + DMI_MATCH(DMI_PRODUCT_NAME, "E2228T"), }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, }, From 331298e711e7da5c296c55b4e7ec9ee7894126a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Nov 2020 09:57:55 +0000 Subject: [PATCH 509/636] arm64: psci: Avoid printing in cpu_psci_cpu_die() [ Upstream commit 891deb87585017d526b67b59c15d38755b900fea ] cpu_psci_cpu_die() is called in the context of the dying CPU, which will no longer be online or tracked by RCU. It is therefore not generally safe to call printk() if the PSCI "cpu off" request fails, so remove the pr_crit() invocation. Cc: Qian Cai Cc: "Paul E. McKenney" Cc: Catalin Marinas Link: https://lore.kernel.org/r/20201106103602.9849-2-will@kernel.org Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/psci.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 3856d51c645b..3ebb2a56e5f7 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -69,7 +69,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -77,9 +76,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) From c7d7fe001b6fb46d2e625c2f28acecf0c2d7dae6 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 10 Nov 2020 16:49:29 -0800 Subject: [PATCH 510/636] vfs: remove lockdep bogosity in __sb_start_write [ Upstream commit 22843291efc986ce7722610073fcf85a39b4cb13 ] __sb_start_write has some weird looking lockdep code that claims to exist to handle nested freeze locking requests from xfs. The code as written seems broken -- if we think we hold a read lock on any of the higher freeze levels (e.g. we hold SB_FREEZE_WRITE and are trying to lock SB_FREEZE_PAGEFAULT), it converts a blocking lock attempt into a trylock. However, it's not correct to downgrade a blocking lock attempt to a trylock unless the downgrading code or the callers are prepared to deal with that situation. Neither __sb_start_write nor its callers handle this at all. For example: sb_start_pagefault ignores the return value completely, with the result that if xfs_filemap_fault loses a race with a different thread trying to fsfreeze, it will proceed without pagefault freeze protection (thereby breaking locking rules) and then unlocks the pagefault freeze lock that it doesn't own on its way out (thereby corrupting the lock state), which leads to a system hang shortly afterwards. Normally, this won't happen because our ownership of a read lock on a higher freeze protection level blocks fsfreeze from grabbing a write lock on that higher level. *However*, if lockdep is offline, lock_is_held_type unconditionally returns 1, which means that percpu_rwsem_is_held returns 1, which means that __sb_start_write unconditionally converts blocking freeze lock attempts into trylocks, even when we *don't* hold anything that would block a fsfreeze. Apparently this all held together until 5.10-rc1, when bugs in lockdep caused lockdep to shut itself off early in an fstests run, and once fstests gets to the "race writes with freezer" tests, kaboom. This might explain the long trail of vanishingly infrequent livelocks in fstests after lockdep goes offline that I've never been able to diagnose. We could fix it by spinning on the trylock if wait==true, but AFAICT the locking works fine if lockdep is not built at all (and I didn't see any complaints running fstests overnight), so remove this snippet entirely. NOTE: Commit f4b554af9931 in 2015 created the current weird logic (which used to exist in a different form in commit 5accdf82ba25c from 2012) in __sb_start_write. XFS solved this whole problem in the late 2.6 era by creating a variant of transactions (XFS_TRANS_NO_WRITECOUNT) that don't grab intwrite freeze protection, thus making lockdep's solution unnecessary. The commit claims that Dave Chinner explained that the trylock hack + comment could be removed, but nobody ever did. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Sasha Levin --- fs/super.c | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/fs/super.c b/fs/super.c index f3a8c008e164..9fb4553c46e6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1360,36 +1360,11 @@ EXPORT_SYMBOL(__sb_end_write); */ int __sb_start_write(struct super_block *sb, int level, bool wait) { - bool force_trylock = false; - int ret = 1; + if (!wait) + return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); -#ifdef CONFIG_LOCKDEP - /* - * We want lockdep to tell us about possible deadlocks with freezing - * but it's it bit tricky to properly instrument it. Getting a freeze - * protection works as getting a read lock but there are subtle - * problems. XFS for example gets freeze protection on internal level - * twice in some cases, which is OK only because we already hold a - * freeze protection also on higher level. Due to these cases we have - * to use wait == F (trylock mode) which must not fail. - */ - if (wait) { - int i; - - for (i = 0; i < level - 1; i++) - if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) { - force_trylock = true; - break; - } - } -#endif - if (wait && !force_trylock) - percpu_down_read(sb->s_writers.rw_sem + level-1); - else - ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - WARN_ON(force_trylock && !ret); - return ret; + percpu_down_read(sb->s_writers.rw_sem + level-1); + return 1; } EXPORT_SYMBOL(__sb_start_write); From 69bad74c825cc2992c22ad8db7f3758a37c9b1c7 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 22 Oct 2020 23:13:01 +0200 Subject: [PATCH 511/636] arm64: dts: allwinner: a64: Pine64 Plus: Fix ethernet node [ Upstream commit 927f42fcc1b4f7d04a2ac5cf02f25612aa8923a4 ] According to board schematic, PHY provides both, RX and TX delays. However, according to "fix" Realtek provided for this board, only TX delay should be provided by PHY. Tests show that both variants work but TX only PHY delay works slightly better. Update ethernet node to reflect the fact that PHY provides TX delay. Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201022211301.3548422-1-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index d5b6e8159a33..5d0905f0f1c1 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -52,7 +52,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; From ca174b841b23c482eccc98fdaca91cd8015e0d24 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Fri, 23 Oct 2020 20:48:58 +0200 Subject: [PATCH 512/636] arm64: dts: allwinner: h5: OrangePi PC2: Fix ethernet node [ Upstream commit b34bf9f6a623ddb82600a5ed5c644224122395e1 ] RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201023184858.3272918-1-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 3e0d5a9c096d..5fbfa76daae2 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -157,7 +157,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 135d34f9e2d4a132e3567fa9c09f621df86ee1a0 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 25 Oct 2020 09:19:49 +0100 Subject: [PATCH 513/636] ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix ethernet node [ Upstream commit b3eec3212e66ece33f69be0de98d54e67834e798 ] Ethernet PHY on BananaPi M2 Ultra provides RX and TX delays. Fix ethernet node to reflect that fact. Fixes: c36fd5a48bd2 ("ARM: dts: sun8i: r40: bananapi-m2-ultra: Enable GMAC ethernet controller") Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20201025081949.783443-1-jernej.skrabec@siol.net Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index b2a773a718e1..5e5223a48ac7 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -121,7 +121,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; From 763c1552ba021ca38cc4f54aec5e018037105257 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:06 +0800 Subject: [PATCH 514/636] Revert "arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high" [ Upstream commit 8d80e2f00a42ef10b54e1b2d9e97314f8fd046c0 ] This reverts commit 75ee680cbd2e4d0156b94f9fec50076361ab12f2. Turns out the activity and link LEDs on the RJ45 port are active low, just like on the Orange Pi PC. Revert the commit that says otherwise. Fixes: 75ee680cbd2e ("arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high") Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Jernej Skrabec Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-1-wens@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; From 23a1a2eb19f3205f21ec7f30f8914cfab025196a Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:10 +0800 Subject: [PATCH 515/636] ARM: dts: sun8i: h3: orangepi-plus2e: Enable RGMII RX/TX delay on Ethernet PHY [ Upstream commit e080ab31a0aa126b0a7e4f67f2b01b371b852c88 ] The Ethernet PHY on the Orange Pi Plus 2E has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Fixes: 7a78ef92cdc5 ("ARM: sun8i: h3: Enable EMAC with external PHY on Orange Pi Plus 2E") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Jernej Skrabec Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-5-wens@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 986e9f5de08c794d31a39ef777336aa769ac29da Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:11 +0800 Subject: [PATCH 516/636] ARM: dts: sun8i: a83t: Enable both RGMII RX/TX delay on Ethernet PHY [ Upstream commit 57dbe558457bf4042169bc1f334e3b53a8480a1c ] The Ethernet PHY on the Bananapi M3 and Cubietruck Plus have the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 039359948a4b ("ARM: dts: sun8i: a83t: Enable Ethernet on two boards") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-6-wens@kernel.org Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts | 2 +- arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index f250b20af493..9be1c4a3d95f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index 7e74ba83f809..75396993195d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -168,7 +168,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 5f86796e5b68286687d3b83e6c602ed0e10c2a8c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sun, 25 Oct 2020 00:25:15 +0800 Subject: [PATCH 517/636] arm64: dts: allwinner: a64: bananapi-m64: Enable RGMII RX/TX delay on PHY [ Upstream commit 1a9a8910b2153cd3c4f3f2f8defcb853ead3b1fd ] The Ethernet PHY on the Bananapi M64 has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i") Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Tested-by: Corentin Labbe Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201024162515.30032-10-wens@kernel.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 094cfed13df9..13ce24e922ee 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -97,7 +97,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; From cc63818d353767d4fec5820072ad9e80bcc495fd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 26 Oct 2020 17:10:09 -0700 Subject: [PATCH 518/636] Input: adxl34x - clean up a data type in adxl34x_probe() [ Upstream commit 33b6c39e747c552fa770eecebd1776f1f4a222b1 ] The "revid" is used to store negative error codes so it should be an int type. Fixes: e27c729219ad ("Input: add driver for ADXL345/346 Digital Accelerometers") Signed-off-by: Dan Carpenter Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20201026072824.GA1620546@mwanda Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/adxl34x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index a3e79bf5a04b..3695dd7dbb9b 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -696,7 +696,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq, struct input_dev *input_dev; const struct adxl34x_platform_data *pdata; int err, range, i; - unsigned char revid; + int revid; if (!irq) { dev_err(dev, "no IRQ?\n"); From f09326086d2368c6ab7aa50e5aed18b4092c59a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 23 Oct 2020 12:44:40 -0700 Subject: [PATCH 519/636] MIPS: export has_transparent_hugepage() for modules [ Upstream commit 31b4d8e172f614adc53ddecb4b6b2f6411a49b84 ] MIPS should export its local version of "has_transparent_hugepage" so that loadable modules (dax) can use it. Fixes this build error: ERROR: modpost: "has_transparent_hugepage" [drivers/dax/dax.ko] undefined! Fixes: fd8cfd300019 ("arch: fix has_transparent_hugepage()") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Dan Williams Cc: Vishal Verma Cc: Dave Jiang Cc: linux-nvdimm@lists.01.org Cc: Hugh Dickins Cc: Andrew Morton Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/mm/tlb-r4k.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 0596505770db..11985399c469 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -424,6 +424,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ From 4f25ec59fa3607ad8bcf5b0969848bbeddf0c024 Mon Sep 17 00:00:00 2001 From: Nenad Peric Date: Wed, 28 Oct 2020 12:58:17 +0100 Subject: [PATCH 520/636] arm64: dts: allwinner: h5: OrangePi Prime: Fix ethernet node [ Upstream commit 107954afc5df667da438644aa4982606663f9b17 ] RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Nenad Peric Signed-off-by: Maxime Ripard Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20201028115817.68113-1-nperic@gmail.com Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index b75ca4d7d001..7a30211d59ef 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -164,7 +164,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From de42130e243d67aea194a4f4d1495d221b6200b7 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Sat, 24 Oct 2020 23:11:20 +0300 Subject: [PATCH 521/636] arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy [ Upstream commit 7dd8f0ba88fce98e2953267a66af74c6f4792a56 ] Commit bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") fixed micrel phy driver adding proper support for phy modes. Adapt imx6q-udoo board phy settings : explicitly set required delay configuration using "rgmii-id". Fixes: cbd54fe0b2bc ("ARM: dts: imx6dl-udoo: Add board support based off imx6q-udoo") Signed-off-by: Sergey Matyukevich Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-udoo.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 4f27861bbb32..4cc9858f7ff8 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -97,7 +97,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; From 84820e918532f9d575d532e9a5291b4fdbe9a684 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 5 Nov 2020 18:13:20 -0300 Subject: [PATCH 522/636] ARM: dts: imx50-evk: Fix the chip select 1 IOMUX [ Upstream commit 33d0d843872c5ddbe28457a92fc6f2487315fb9f ] The SPI chip selects are represented as: cs-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>, <&gpio4 13 GPIO_ACTIVE_LOW>; , which means that they are used in GPIO function instead of native SPI mode. Fix the IOMUX for the chip select 1 to use GPIO4_13 instead of the native CSPI_SSI function. Fixes: c605cbf5e135 ("ARM: dts: imx: add device tree support for Freescale imx50evk board") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx50-evk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index a25da415cb02..907339bc81e5 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; From 5c602c3908c985b31295fd5e95480158cdd6b029 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Wed, 11 Nov 2020 17:48:52 -0800 Subject: [PATCH 523/636] Input: resistive-adc-touch - fix kconfig dependency on IIO_BUFFER [ Upstream commit 676650d007e06fddcf3fe38238251d71bd179641 ] When TOUCHSCREEN_ADC is enabled and IIO_BUFFER is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for IIO_BUFFER_CB Depends on [n]: IIO [=y] && IIO_BUFFER [=n] Selected by [y]: - TOUCHSCREEN_ADC [=y] && !UML && INPUT [=y] && INPUT_TOUCHSCREEN [=y] && IIO [=y] The reason is that TOUCHSCREEN_ADC selects IIO_BUFFER_CB without depending on or selecting IIO_BUFFER while IIO_BUFFER_CB depends on IIO_BUFFER. This can also fail building the kernel. Honor the kconfig dependency to remove unmet direct dependency warnings and avoid any potential build failures. Fixes: aa132ffb6b0a ("input: touchscreen: resistive-adc-touch: add generic resistive ADC touchscreen") Signed-off-by: Necip Fazil Yildiran Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/20201102221504.541279-1-fazilyildiran@gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 2a80675cfd94..de400d76df55 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -95,6 +95,7 @@ config TOUCHSCREEN_AD7879_SPI config TOUCHSCREEN_ADC tristate "Generic ADC based resistive touchscreen" depends on IIO + select IIO_BUFFER select IIO_BUFFER_CB help Say Y here if you want to use the generic ADC From c4d54937576e5b9cdedb84e371eedf1f0ba67c0b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 4 Nov 2020 17:42:29 +0800 Subject: [PATCH 524/636] perf lock: Don't free "lock_seq_stat" if read_count isn't zero [ Upstream commit b0e5a05cc9e37763c7f19366d94b1a6160c755bc ] When execute command "perf lock report", it hits failure and outputs log as follows: perf: builtin-lock.c:623: report_lock_release_event: Assertion `!(seq->read_count < 0)' failed. Aborted This is an imbalance issue. The locking sequence structure "lock_seq_stat" contains the reader counter and it is used to check if the locking sequence is balance or not between acquiring and releasing. If the tool wrongly frees "lock_seq_stat" when "read_count" isn't zero, the "read_count" will be reset to zero when allocate a new structure at the next time; thus it causes the wrong counting for reader and finally results in imbalance issue. To fix this issue, if detects "read_count" is not zero (means still have read user in the locking sequence), goto the "end" tag to skip freeing structure "lock_seq_stat". Fixes: e4cef1f65061 ("perf lock: Fix state machine to recognize lock sequence") Signed-off-by: Leo Yan Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20201104094229.17509-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-lock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6e0189df2b3b..0cb7f7b731fb 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -620,7 +620,7 @@ static int report_lock_release_event(struct perf_evsel *evsel, case SEQ_STATE_READ_ACQUIRED: seq->read_count--; BUG_ON(seq->read_count < 0); - if (!seq->read_count) { + if (seq->read_count) { ls->nr_release++; goto end; } From 901e04cd478133ba6458fecbd3b232b528d26f3b Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 10 Nov 2020 16:16:40 -0800 Subject: [PATCH 525/636] ip_tunnels: Set tunnel option flag when tunnel metadata is present [ Upstream commit 9c2e14b48119b39446031d29d994044ae958d8fc ] Currently, we may set the tunnel option flag when the size of metadata is zero. For example, we set TUNNEL_GENEVE_OPT in the receive function no matter the geneve option is present or not. As this may result in issues on the tunnel flags consumers, this patch fixes the issue. Related discussion: * https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags") Signed-off-by: Yi-Hung Wei Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 3 +-- include/net/ip_tunnels.h | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index d0b5844c8a31..2e2afc824a6a 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -223,8 +223,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, if (ip_tunnel_collect_metadata() || gs->collect_md) { __be16 flags; - flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | - (gnvh->oam ? TUNNEL_OAM : 0) | + flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | (gnvh->critical ? TUNNEL_CRIT_OPT : 0); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index e11423530d64..f8873c4eb003 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -489,9 +489,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, __be16 flags) { - memcpy(ip_tunnel_info_opts(info), from, len); info->options_len = len; - info->key.tun_flags |= flags; + if (len > 0) { + memcpy(ip_tunnel_info_opts(info), from, len); + info->key.tun_flags |= flags; + } } static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate) @@ -537,7 +539,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, __be16 flags) { info->options_len = 0; - info->key.tun_flags |= flags; } #endif /* CONFIG_INET */ From 038004dfd337b6f10b0fd54b445620132801c180 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 4 Nov 2020 03:09:05 +0530 Subject: [PATCH 526/636] can: af_can: prevent potential access of uninitialized member in can_rcv() [ Upstream commit c8c958a58fc67f353289986850a0edf553435702 ] In can_rcv(), cfd->len is uninitialized when skb->len = 0, and this uninitialized cfd->len is accessed nonetheless by pr_warn_once(). Fix this uninitialized variable access by checking cfd->len's validity condition (cfd->len > CAN_MAX_DLEN) separately after the skb->len's condition is checked, and appropriately modify the log messages that are generated as well. In case either of the required conditions fail, the skb is freed and NET_RX_DROP is returned, same as before. Fixes: 8cb68751c115 ("can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once") Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com Tested-by: Anant Thazhemadam Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201103213906.24219-2-anant.thazhemadam@gmail.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/af_can.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 04132b0b5d36..1201846dc07e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -722,16 +722,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CAN_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, From e2b9ee9fe6c11f1b6ea5965ede224e33ca1358b9 Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Wed, 4 Nov 2020 03:09:06 +0530 Subject: [PATCH 527/636] can: af_can: prevent potential access of uninitialized member in canfd_rcv() [ Upstream commit 9aa9379d8f868e91719333a7f063ccccc0579acc ] In canfd_rcv(), cfd->len is uninitialized when skb->len = 0, and this uninitialized cfd->len is accessed nonetheless by pr_warn_once(). Fix this uninitialized variable access by checking cfd->len's validity condition (cfd->len > CANFD_MAX_DLEN) separately after the skb->len's condition is checked, and appropriately modify the log messages that are generated as well. In case either of the required conditions fail, the skb is freed and NET_RX_DROP is returned, same as before. Fixes: d4689846881d ("can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once") Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com Tested-by: Anant Thazhemadam Signed-off-by: Anant Thazhemadam Link: https://lore.kernel.org/r/20201103213906.24219-3-anant.thazhemadam@gmail.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/af_can.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 1201846dc07e..b3edb8092124 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -748,16 +748,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n", + if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", + dev->type, skb->len); + goto free_skb; + } + + /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ + if (unlikely(cfd->len > CANFD_MAX_DLEN)) { + pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n", dev->type, skb->len, cfd->len); - kfree_skb(skb); - return NET_RX_DROP; + goto free_skb; } can_receive(skb, dev); return NET_RX_SUCCESS; + +free_skb: + kfree_skb(skb); + return NET_RX_DROP; } /* From cef79b5249ea3bf7889f222999a3bcbc560d9a41 Mon Sep 17 00:00:00 2001 From: Alejandro Concepcion Rodriguez Date: Thu, 5 Nov 2020 21:51:47 +0000 Subject: [PATCH 528/636] can: dev: can_restart(): post buffer from the right context [ Upstream commit a1e654070a60d5d4f7cce59c38f4ca790bb79121 ] netif_rx() is meant to be called from interrupt contexts. can_restart() may be called by can_restart_work(), which is called from a worqueue, so it may run in process context. Use netif_rx_ni() instead. Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") Co-developed-by: Loris Fauster Signed-off-by: Loris Fauster Signed-off-by: Alejandro Concepcion Rodriguez Link: https://lore.kernel.org/r/4e84162b-fb31-3a73-fa9a-9438b4bd5234@acoro.eu [mkl: use netif_rx_ni() instead of netif_rx_any_context()] Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 1950b13f22df..f88590074569 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -579,7 +579,7 @@ static void can_restart(struct net_device *dev) } cf->can_id |= CAN_ERR_RESTARTED; - netif_rx(skb); + netif_rx_ni(skb); stats->rx_packets++; stats->rx_bytes += cf->can_dlc; From 80b9f0191a3b458f7cc3c4ed72011963920b59d6 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Sat, 14 Nov 2020 19:17:08 +0800 Subject: [PATCH 529/636] can: ti_hecc: Fix memleak in ti_hecc_probe [ Upstream commit 7968c7c79d3be8987feb8021f0c46e6866831408 ] In the error handling, we should goto the probe_exit_candev to free ndev to prevent memory leak. Fixes: dabf54dd1c63 ("can: ti_hecc: Convert TI HECC driver to DT only driver") Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201114111708.3465543-1-zhangqilong3@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/ti_hecc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index db6ea936dc3f..81a3fdd5e010 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -903,7 +903,8 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->base)) { dev_err(&pdev->dev, "hecc ioremap failed\n"); - return PTR_ERR(priv->base); + err = PTR_ERR(priv->base); + goto probe_exit_candev; } /* handle hecc-ram memory */ @@ -916,7 +917,8 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->hecc_ram = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->hecc_ram)) { dev_err(&pdev->dev, "hecc-ram ioremap failed\n"); - return PTR_ERR(priv->hecc_ram); + err = PTR_ERR(priv->hecc_ram); + goto probe_exit_candev; } /* handle mbx memory */ @@ -929,13 +931,14 @@ static int ti_hecc_probe(struct platform_device *pdev) priv->mbx = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(priv->mbx)) { dev_err(&pdev->dev, "mbx ioremap failed\n"); - return PTR_ERR(priv->mbx); + err = PTR_ERR(priv->mbx); + goto probe_exit_candev; } irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (!irq) { dev_err(&pdev->dev, "No irq resource\n"); - goto probe_exit; + goto probe_exit_candev; } priv->ndev = ndev; @@ -988,7 +991,7 @@ probe_exit_clk: clk_put(priv->clk); probe_exit_candev: free_candev(ndev); -probe_exit: + return err; } From 6521ffa41b5b6e59dd72a502bdbea507a8d287a2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 28 Aug 2019 21:16:55 +0200 Subject: [PATCH 530/636] can: mcba_usb: mcba_usb_start_xmit(): first fill skb, then pass to can_put_echo_skb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 81c9c8e0adef3285336b942f93287c554c89e6c6 ] The driver has to first fill the skb with data and then handle it to can_put_echo_skb(). This patch moves the can_put_echo_skb() down, right before sending the skb out via USB. Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer") Cc: Remigiusz Kołłątaj Link: https://lore.kernel.org/r/20201111221204.1639007-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/mcba_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 1b0afeaf1a3c..896f5b022729 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -337,8 +337,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (!ctx) return NETDEV_TX_BUSY; - can_put_echo_skb(skb, priv->netdev, ctx->ndx); - if (cf->can_id & CAN_EFF_FLAG) { /* SIDH | SIDL | EIDH | EIDL * 28 - 21 | 20 19 18 x x x 17 16 | 15 - 8 | 7 - 0 @@ -368,6 +366,8 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (cf->can_id & CAN_RTR_FLAG) usb_msg.dlc |= MCBA_DLC_RTR_MASK; + can_put_echo_skb(skb, priv->netdev, ctx->ndx); + err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx); if (err) goto xmit_failed; From 0069f22dc29ec971161c22b79516c998d01852bb Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Nov 2020 11:24:27 +0000 Subject: [PATCH 531/636] can: peak_usb: fix potential integer overflow on shift of a int [ Upstream commit 8a68cc0d690c9e5730d676b764c6f059343b842c ] The left shift of int 32 bit integer constant 1 is evaluated using 32 bit arithmetic and then assigned to a signed 64 bit variable. In the case where time_ref->adapter->ts_used_bits is 32 or more this can lead to an oveflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201105112427.40688-1-colin.king@canonical.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index db156a11e6db..f7d653d48a1e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -164,7 +164,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) if (time_ref->ts_dev_1 < time_ref->ts_dev_2) { /* case when event time (tsw) wraps */ if (ts < time_ref->ts_dev_1) - delta_ts = 1 << time_ref->adapter->ts_used_bits; + delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits); /* Otherwise, sync time counter (ts_dev_2) has wrapped: * handle case when event time (tsn) hasn't. @@ -176,7 +176,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time) * tsn ts */ } else if (time_ref->ts_dev_1 < ts) { - delta_ts = -(1 << time_ref->adapter->ts_used_bits); + delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits); } /* add delay between last sync and event timestamps */ From c8148a10054f5ae23afa2a8c3da0f89890b08a77 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Wed, 29 Jan 2020 10:23:30 +0800 Subject: [PATCH 532/636] can: m_can: m_can_handle_state_change(): fix state change [ Upstream commit cd0d83eab2e0c26fe87a10debfedbb23901853c1 ] m_can_handle_state_change() is called with the new_state as an argument. In the switch statements for CAN_STATE_ERROR_ACTIVE, the comment and the following code indicate that a CAN_STATE_ERROR_WARNING is handled. This patch fixes this problem by changing the case to CAN_STATE_ERROR_WARNING. Signed-off-by: Wu Bo Link: http://lore.kernel.org/r/20200129022330.21248-2-wubo.oduw@gmail.com Cc: Dan Murphy Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index deb274a19ba0..efaa342600c4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -675,7 +675,7 @@ static int m_can_handle_state_change(struct net_device *dev, unsigned int ecr; switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ priv->can.can_stats.error_warning++; priv->can.state = CAN_STATE_ERROR_WARNING; @@ -704,7 +704,7 @@ static int m_can_handle_state_change(struct net_device *dev, __m_can_get_berr_counter(dev, &bec); switch (new_state) { - case CAN_STATE_ERROR_ACTIVE: + case CAN_STATE_ERROR_WARNING: /* error warning state */ cf->can_id |= CAN_ERR_CRTL; cf->data[1] = (bec.txerr > bec.rxerr) ? From e6f49ea48162b0c2fefcd499b7af1e102750e8e4 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Sun, 15 Nov 2020 10:26:50 +0530 Subject: [PATCH 533/636] ASoC: qcom: lpass-platform: Fix memory leak [ Upstream commit bd6327fda2f3ded85b69b3c3125c99aaa51c7881 ] lpass_pcm_data is not freed in error paths. Free it in error paths to avoid memory leak. Fixes: 022d00ee0b55 ("ASoC: lpass-platform: Fix broken pcm data usage") Signed-off-by: Pavel Machek Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1605416210-14530-1-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/lpass-platform.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 2f2967247789..1d06e2b7bb63 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -81,8 +81,10 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) else dma_ch = 0; - if (dma_ch < 0) + if (dma_ch < 0) { + kfree(data); return dma_ch; + } drvdata->substream[dma_ch] = substream; @@ -103,6 +105,7 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream) ret = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); if (ret < 0) { + kfree(data); dev_err(soc_runtime->dev, "setting constraints failed: %d\n", ret); return -EINVAL; From 62c285777cf7473f9b91fc31e6d13efca6fa3075 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Fri, 13 Nov 2020 21:18:56 +0800 Subject: [PATCH 534/636] MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu [ Upstream commit ac3b57adf87ad9bac7e33ca26bbbb13fae1ed62b ] If the clk_register fails, we should free h before function returns to prevent memleak. Fixes: 474402291a0ad ("MIPS: Alchemy: clock framework integration of onchip clocks") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/alchemy/common/clock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index d129475fd40d..4254ba13c5c5 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ From f2cd428c31ba89936455125448b627d611da96a8 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 16 Nov 2020 09:09:29 +0800 Subject: [PATCH 535/636] drm/sun4i: dw-hdmi: fix error return code in sun8i_dw_hdmi_bind() [ Upstream commit 6654b57866b98230a270953dd34f67de17ab1708 ] Fix to return a negative error code from the error handling case instead of 0 in function sun8i_dw_hdmi_bind(). Fixes: b7c7436a5ff0 ("drm/sun4i: Implement A83T HDMI driver") Reported-by: Hulk Robot Signed-off-by: Xiongfeng Wang Reviewed-by: Jernej Skrabec Signed-off-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/1605488969-5211-1-git-send-email-wangxiongfeng2@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index 31875b636434..5073622cbb56 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -140,6 +140,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, phy_node = of_parse_phandle(dev->of_node, "phys", 0); if (!phy_node) { dev_err(dev, "Can't found PHY phandle\n"); + ret = -EINVAL; goto err_disable_clk_tmds; } From 789cbe71299d4c648ae6aa88945dddff6cabe469 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Sun, 15 Nov 2020 17:30:23 +0100 Subject: [PATCH 536/636] can: kvaser_usb: kvaser_usb_hydra: Fix KCAN bittiming limits [ Upstream commit d003868d7f8579838ed58b6429af91844039b6f8 ] Use correct bittiming limits for the KCAN CAN controller. Fixes: aec5fb2268b7 ("can: kvaser_usb: Add support for Kvaser USB hydra family") Signed-off-by: Jimmy Assarsson Link: https://lore.kernel.org/r/20201115163027.16851-2-jimmyassarsson@gmail.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 7ab87a758754..218fadc91155 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -367,7 +367,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = { .tseg2_max = 32, .sjw_max = 16, .brp_min = 1, - .brp_max = 4096, + .brp_max = 8192, .brp_inc = 1, }; From 804b269c252445d3b98497c8b02db0152c65cd63 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:41 -0800 Subject: [PATCH 537/636] xfs: fix the minrecs logic when dealing with inode root child blocks [ Upstream commit e95b6c3ef1311dd7b20467d932a24b6d0fd88395 ] The comment and logic in xchk_btree_check_minrecs for dealing with inode-rooted btrees isn't quite correct. While the direct children of the inode root are allowed to have fewer records than what would normally be allowed for a regular ondisk btree block, this is only true if there is only one child block and the number of records don't fit in the inode root. Fixes: 08a3a692ef58 ("xfs: btree scrub should check minrecs") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/btree.c | 47 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 4ae959f7ad2c..c924fe3cdad6 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -450,32 +450,41 @@ xchk_btree_check_minrecs( int level, struct xfs_btree_block *block) { - unsigned int numrecs; - int ok_level; - - numrecs = be16_to_cpu(block->bb_numrecs); + struct xfs_btree_cur *cur = bs->cur; + unsigned int root_level = cur->bc_nlevels - 1; + unsigned int numrecs = be16_to_cpu(block->bb_numrecs); /* More records than minrecs means the block is ok. */ - if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return; /* - * Certain btree blocks /can/ have fewer than minrecs records. Any - * level greater than or equal to the level of the highest dedicated - * btree block are allowed to violate this constraint. - * - * For a btree rooted in a block, the btree root can have fewer than - * minrecs records. If the btree is rooted in an inode and does not - * store records in the root, the direct children of the root and the - * root itself can have fewer than minrecs records. + * For btrees rooted in the inode, it's possible that the root block + * contents spilled into a regular ondisk block because there wasn't + * enough space in the inode root. The number of records in that + * child block might be less than the standard minrecs, but that's ok + * provided that there's only one direct child of the root. */ - ok_level = bs->cur->bc_nlevels - 1; - if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - ok_level--; - if (level >= ok_level) - return; + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 2) { + struct xfs_btree_block *root_block; + struct xfs_buf *root_bp; + int root_maxrecs; - xchk_btree_set_corrupt(bs->sc, bs->cur, level); + root_block = xfs_btree_get_block(cur, root_level, &root_bp); + root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); + if (be16_to_cpu(root_block->bb_numrecs) != 1 || + numrecs <= root_maxrecs) + xchk_btree_set_corrupt(bs->sc, cur, level); + return; + } + + /* + * Otherwise, only the root level is allowed to have fewer than minrecs + * records or keyptrs. + */ + if (level < root_level) + xchk_btree_set_corrupt(bs->sc, cur, level); } /* From d88cc6f344a2b4a8095a0c8f392b4fa632707276 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sun, 8 Nov 2020 16:32:41 -0800 Subject: [PATCH 538/636] xfs: strengthen rmap record flags checking [ Upstream commit 498fe261f0d6d5189f8e11d283705dd97b474b54 ] We always know the correct state of the rmap record flags (attr, bmbt, unwritten) so check them by direct comparison. Fixes: d852657ccfc0 ("xfs: cross-reference reverse-mapping btree") Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Reviewed-by: Christoph Hellwig Signed-off-by: Sasha Levin --- fs/xfs/scrub/bmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index b05d65fd360b..cf005e18d618 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -225,13 +225,13 @@ xchk_bmap_xref_rmap( * which doesn't track unwritten state. */ if (owner != XFS_RMAP_OWN_COW && - irec->br_state == XFS_EXT_UNWRITTEN && - !(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) + !!(irec->br_state == XFS_EXT_UNWRITTEN) != + !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (info->whichfork == XFS_ATTR_FORK && - !(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) + if (!!(info->whichfork == XFS_ATTR_FORK) != + !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) From edca48315fc31107f541c10da796e591fc50765b Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Wed, 18 Nov 2020 08:50:09 -0600 Subject: [PATCH 539/636] regulator: ti-abb: Fix array out of bound read access on the first transition [ Upstream commit 2ba546ebe0ce2af47833d8912ced9b4a579f13cb ] At the start of driver initialization, we do not know what bias setting the bootloader has configured the system for and we only know for certain the very first time we do a transition. However, since the initial value of the comparison index is -EINVAL, this negative value results in an array out of bound access on the very first transition. Since we don't know what the setting is, we just set the bias configuration as there is nothing to compare against. This prevents the array out of bound access. NOTE: Even though we could use a more relaxed check of "< 0" the only valid values(ignoring cosmic ray induced bitflips) are -EINVAL, 0+. Fixes: 40b1936efebd ("regulator: Introduce TI Adaptive Body Bias(ABB) on-chip LDO driver") Link: https://lore.kernel.org/linux-mm/CA+G9fYuk4imvhyCN7D7T6PMDH6oNp6HDCRiTUKMQ6QXXjBa4ag@mail.gmail.com/ Reported-by: Naresh Kamboju Reviewed-by: Arnd Bergmann Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20201118145009.10492-1-nm@ti.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/ti-abb-regulator.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index 89b9314d64c9..016330f909c0 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -342,8 +342,17 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) return ret; } - /* If data is exactly the same, then just update index, no change */ info = &abb->info[sel]; + /* + * When Linux kernel is starting up, we are'nt sure of the + * Bias configuration that bootloader has configured. + * So, we get to know the actual setting the first time + * we are asked to transition. + */ + if (abb->current_info_idx == -EINVAL) + goto just_set_abb; + + /* If data is exactly the same, then just update index, no change */ oinfo = &abb->info[abb->current_info_idx]; if (!memcmp(info, oinfo, sizeof(*info))) { dev_dbg(dev, "%s: Same data new idx=%d, old idx=%d\n", __func__, @@ -351,6 +360,7 @@ static int ti_abb_set_voltage_sel(struct regulator_dev *rdev, unsigned sel) goto out; } +just_set_abb: ret = ti_abb_set_opp(rdev, abb, info); out: From 730b192ad262545a269483609f52d29bff503507 Mon Sep 17 00:00:00 2001 From: Luo Meng Date: Wed, 18 Nov 2020 22:49:31 +0900 Subject: [PATCH 540/636] fail_function: Remove a redundant mutex unlock [ Upstream commit 2801a5da5b25b7af9dd2addd19b2315c02d17b64 ] Fix a mutex_unlock() issue where before copy_from_user() is not called mutex_locked. Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework") Reported-by: Hulk Robot Signed-off-by: Luo Meng Signed-off-by: Masami Hiramatsu Signed-off-by: Alexei Starovoitov Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/bpf/160570737118.263807.8358435412898356284.stgit@devnote2 Signed-off-by: Sasha Levin --- kernel/fail_function.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/fail_function.c b/kernel/fail_function.c index bc80a4e268c0..a52151a2291f 100644 --- a/kernel/fail_function.c +++ b/kernel/fail_function.c @@ -261,7 +261,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, if (copy_from_user(buf, buffer, count)) { ret = -EFAULT; - goto out; + goto out_free; } buf[count] = '\0'; sym = strstrip(buf); @@ -315,8 +315,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer, ret = count; } out: - kfree(buf); mutex_unlock(&fei_lock); +out_free: + kfree(buf); return ret; } From 088ad76f42062164775af47464f56c7fc6a13fca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 19 Nov 2020 15:17:50 -0800 Subject: [PATCH 541/636] xfs: revert "xfs: fix rmap key and record comparison functions" [ Upstream commit eb8409071a1d47e3593cfe077107ac46853182ab ] This reverts commit 6ff646b2ceb0eec916101877f38da0b73e3a5b7f. Your maintainer committed a major braino in the rmap code by adding the attr fork, bmbt, and unwritten extent usage bits into rmap record key comparisons. While XFS uses the usage bits *in the rmap records* for cross-referencing metadata in xfs_scrub and xfs_repair, it only needs the owner and offset information to distinguish between reverse mappings of the same physical extent into the data fork of a file at multiple offsets. The other bits are not important for key comparisons for index lookups, and never have been. Eric Sandeen reports that this causes regressions in generic/299, so undo this patch before it does more damage. Reported-by: Eric Sandeen Fixes: 6ff646b2ceb0 ("xfs: fix rmap key and record comparison functions") Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Signed-off-by: Sasha Levin --- fs/xfs/libxfs/xfs_rmap_btree.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 77528f413286..f79cf040d745 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -247,8 +247,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = be64_to_cpu(kp->rm_offset); - y = xfs_rmap_irec_offset_pack(rec); + x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); + y = rec->rm_offset; if (x > y) return 1; else if (y > x) @@ -279,8 +279,8 @@ xfs_rmapbt_diff_two_keys( else if (y > x) return -1; - x = be64_to_cpu(kp1->rm_offset); - y = be64_to_cpu(kp2->rm_offset); + x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); + y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); if (x > y) return 1; else if (y > x) @@ -393,8 +393,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = be64_to_cpu(k1->rmap.rm_offset); - b = be64_to_cpu(k2->rmap.rm_offset); + a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); + b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); if (a <= b) return 1; return 0; @@ -423,8 +423,8 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = be64_to_cpu(r1->rmap.rm_offset); - b = be64_to_cpu(r2->rmap.rm_offset); + a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); + b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); if (a <= b) return 1; return 0; From b4013fde63242f76859a236ba3ca1c162cb7ebe5 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 10 Nov 2020 11:39:19 -0500 Subject: [PATCH 542/636] efi/x86: Free efi_pgd with free_pages() [ Upstream commit c2fe61d8be491ff8188edaf22e838f819999146b ] Commit d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") changed the PGD allocation to allocate PGD_ALLOCATION_ORDER pages, so in the error path it should be freed using free_pages() rather than free_page(). Commit 06ace26f4e6f ("x86/efi: Free efi_pgd with free_pages()") fixed one instance of this, but missed another. Move the freeing out-of-line to avoid code duplication and fix this bug. Fixes: d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") Link: https://lore.kernel.org/r/20201110163919.1134431-1-nivedita@alum.mit.edu Signed-off-by: Arvind Sankar Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- arch/x86/platform/efi/efi_64.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 52dd59af873e..77d05b56089a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -214,28 +214,30 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; } /* From 7dfd3751915578b219cf1a25e312569fe0571739 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Sat, 21 Nov 2020 22:17:19 -0800 Subject: [PATCH 543/636] libfs: fix error cast of negative value in simple_attr_write() [ Upstream commit 488dac0c9237647e9b8f788b6a342595bfa40bda ] The attr->set() receive a value of u64, but simple_strtoll() is used for doing the conversion. It will lead to the error cast if user inputs a negative value. Use kstrtoull() instead of simple_strtoll() to convert a string got from the user to an unsigned value. The former will return '-EINVAL' if it gets a negetive value, but the latter can't handle the situation correctly. Make 'val' unsigned long long as what kstrtoull() takes, this will eliminate the compile warning on no 64-bit architectures. Fixes: f7b88631a897 ("fs/libfs.c: fix simple_attr_write() on 32bit machines") Signed-off-by: Yicong Yang Signed-off-by: Andrew Morton Cc: Al Viro Link: https://lkml.kernel.org/r/1605341356-11872-1-git-send-email-yangyicong@hisilicon.com Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/libfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 02158618f4c9..be57e64834e5 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -868,7 +868,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { struct simple_attr *attr; - u64 val; + unsigned long long val; size_t size; ssize_t ret; @@ -886,7 +886,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtoll(attr->set_buf, NULL, 0); + ret = kstrtoull(attr->set_buf, 0, &val); + if (ret) + goto out; ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ From 3560603ef82f11277143a433170bca05bd9288a8 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 10 Nov 2020 19:35:41 +0100 Subject: [PATCH 544/636] speakup: Do not let the line discipline be used several times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d4122754442799187d5d537a9c039a49a67e57f1 upstream. Speakup has only one speakup_tty variable to store the tty it is managing. This makes sense since its codebase currently assumes that there is only one user who controls the screen reading. That however means that we have to forbid using the line discipline several times, otherwise the second closure would try to free a NULL ldisc_data, leading to general protection fault: 0000 [#1] SMP KASAN PTI RIP: 0010:spk_ttyio_ldisc_close+0x2c/0x60 Call Trace: tty_ldisc_release+0xa2/0x340 tty_release_struct+0x17/0xd0 tty_release+0x9d9/0xcc0 __fput+0x231/0x740 task_work_run+0x12c/0x1a0 do_exit+0x9b5/0x2230 ? release_task+0x1240/0x1240 ? __do_page_fault+0x562/0xa30 do_group_exit+0xd5/0x2a0 __x64_sys_exit_group+0x35/0x40 do_syscall_64+0x89/0x2b0 ? page_fault+0x8/0x30 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: stable@vger.kernel.org Reported-by: 秦世松 Signed-off-by: Samuel Thibault Tested-by: Shisong Qin Link: https://lore.kernel.org/r/20201110183541.fzgnlwhjpgqzjeth@function Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/spk_ttyio.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/staging/speakup/spk_ttyio.c b/drivers/staging/speakup/spk_ttyio.c index 93742dbdee77..6c754ddf1257 100644 --- a/drivers/staging/speakup/spk_ttyio.c +++ b/drivers/staging/speakup/spk_ttyio.c @@ -49,15 +49,25 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty) if (tty->ops->write == NULL) return -EOPNOTSUPP; + + mutex_lock(&speakup_tty_mutex); + if (speakup_tty) { + mutex_unlock(&speakup_tty_mutex); + return -EBUSY; + } speakup_tty = tty; ldisc_data = kmalloc(sizeof(struct spk_ldisc_data), GFP_KERNEL); - if (!ldisc_data) + if (!ldisc_data) { + speakup_tty = NULL; + mutex_unlock(&speakup_tty_mutex); return -ENOMEM; + } sema_init(&ldisc_data->sem, 0); ldisc_data->buf_free = true; speakup_tty->disc_data = ldisc_data; + mutex_unlock(&speakup_tty_mutex); return 0; } From e711b6e1c8ab0afe900d7a51b45ec4465e73aac8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Nov 2020 13:12:41 +0300 Subject: [PATCH 545/636] ALSA: firewire: Clean up a locking issue in copy_resp_to_buf() commit 02a9c6ee4183af2e438454c55098b828a96085fb upstream. The spin_lock/unlock_irq() functions cannot be nested. The problem is that presumably we would want the IRQs to be re-enabled on the second call the spin_unlock_irq() but instead it will be enabled at the first call so IRQs will be enabled earlier than expected. In this situation the copy_resp_to_buf() function is only called from one function and it is called with IRQs disabled. We can just use the regular spin_lock/unlock() functions. Fixes: 555e8a8f7f14 ("ALSA: fireworks: Add command/response functionality into hwdep interface") Signed-off-by: Dan Carpenter Acked-by: Takashi Sakamoto Cc: Link: https://lore.kernel.org/r/20201113101241.GB168908@mwanda Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/firewire/fireworks/fireworks_transaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index 36a08ba51ec7..58674a711132 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -124,7 +124,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) t = (struct snd_efw_transaction *)data; length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length); - spin_lock_irq(&efw->lock); + spin_lock(&efw->lock); if (efw->push_ptr < efw->pull_ptr) capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr); @@ -191,7 +191,7 @@ handle_resp_for_user(struct fw_card *card, int generation, int source, copy_resp_to_buf(efw, data, length, rcode); end: - spin_unlock_irq(&instances_lock); + spin_unlock(&instances_lock); } static void From def6149e400bb8a12a3032214a3fb3f54fec35ab Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 17 Nov 2020 13:28:03 +0100 Subject: [PATCH 546/636] ALSA: usb-audio: Add delay quirk for all Logitech USB devices commit 54a2a3898f469a915510038fe84ef4f083131d3e upstream. Found one more Logitech device, BCC950 ConferenceCam, which needs the same delay here. This makes 3 out of 3 devices I have tried. Therefore, add a delay for all Logitech devices as it does not hurt. Signed-off-by: Joakim Tjernlund Cc: # 4.19.y, 5.4.y Link: https://lore.kernel.org/r/20201117122803.24310-1-joakim.tjernlund@infinera.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 3effe2e86197..d4aae3fcd3cd 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1338,13 +1338,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); - /* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX - * needs a tiny delay here, otherwise requests like get/set - * frequency return as failed despite actually succeeding. + /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950), + * Jabra 550a, Kingston HyperX needs a tiny delay here, + * otherwise requests like get/set frequency return + * as failed despite actually succeeding. */ if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || - chip->usb_id == USB_ID(0x046d, 0x0a46) || - chip->usb_id == USB_ID(0x046d, 0x0a56) || + USB_ID_VENDOR(chip->usb_id) == 0x046d || /* Logitech */ chip->usb_id == USB_ID(0x0b0e, 0x0349) || chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) From 2e870e4116e09e870bb915265a66b87efc2d82e2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 13 Nov 2020 18:20:43 +0900 Subject: [PATCH 547/636] ALSA: ctl: fix error path at adding user-defined element set commit 95a793c3bc75cf888e0e641d656e7d080f487d8b upstream. When processing request to add/replace user-defined element set, check of given element identifier and decision of numeric identifier is done in "__snd_ctl_add_replace()" helper function. When the result of check is wrong, the helper function returns error code. The error code shall be returned to userspace application. Current implementation includes bug to return zero to userspace application regardless of the result. This commit fixes the bug. Cc: Fixes: e1a7bfe38079 ("ALSA: control: Fix race between adding and removing a user element") Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20201113092043.16148-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index d1312f14d78f..9cec0e8c9d53 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1388,7 +1388,7 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, unlock: up_write(&card->controls_rwsem); - return 0; + return err; } static int snd_ctl_elem_add_user(struct snd_ctl_file *file, From 19de211a4a131ef56db05e040d8ef7281830ea8c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Nov 2020 13:14:40 +0100 Subject: [PATCH 548/636] ALSA: mixart: Fix mutex deadlock commit d21b96c8ed2aea7e6b7bf4735e1d2503cfbf4072 upstream. The code change for switching to non-atomic mode brought the unexpected mutex deadlock in get_msg(). It converted the spinlock with the existing mutex, but there were calls with the already holding the mutex. Since the only place that needs the extra lock is the code path from snd_mixart_send_msg(), remove the mutex lock in get_msg() and apply in the caller side for fixing the mutex deadlock. Fixes: 8d3a8b5cb57d ("ALSA: mixart: Use nonatomic PCM ops") Reported-by: Dan Carpenter Cc: Link: https://lore.kernel.org/r/20201119121440.18945-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/mixart/mixart_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/pci/mixart/mixart_core.c b/sound/pci/mixart/mixart_core.c index 71776bfe0485..5a956f8bfa9e 100644 --- a/sound/pci/mixart/mixart_core.c +++ b/sound/pci/mixart/mixart_core.c @@ -83,7 +83,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, unsigned int i; #endif - mutex_lock(&mgr->msg_lock); err = 0; /* copy message descriptor from miXart to driver */ @@ -132,8 +131,6 @@ static int get_msg(struct mixart_mgr *mgr, struct mixart_msg *resp, writel_be(headptr, MIXART_MEM(mgr, MSG_OUTBOUND_FREE_HEAD)); _clean_exit: - mutex_unlock(&mgr->msg_lock); - return err; } @@ -271,7 +268,9 @@ int snd_mixart_send_msg(struct mixart_mgr *mgr, struct mixart_msg *request, int resp.data = resp_data; resp.size = max_resp_size; + mutex_lock(&mgr->msg_lock); err = get_msg(mgr, &resp, msg_frame); + mutex_unlock(&mgr->msg_lock); if( request->message_id != resp.message_id ) dev_err(&mgr->pci->dev, "RESPONSE ERROR!\n"); From 2d031d2452a969d956c7ce73a9e7988378c2e151 Mon Sep 17 00:00:00 2001 From: PeiSen Hou Date: Wed, 11 Nov 2020 08:58:59 +0100 Subject: [PATCH 549/636] ALSA: hda/realtek: Add some Clove SSID in the ALC293(ALC1220) commit b5acfe152abaa2721c9ca8aa67f941d7de55d24e upstream. Fix "use as headset mic, without its own jack detect" problem. [ Minor coding style fixes by tiwai ] Signed-off-by: PeiSen Hou Cc: Link: https://lore.kernel.org/r/481963e4a5694ff19f27ae1e283d79ad@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 52 +++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 382a8d179eb0..935ca7989cfd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2498,13 +2498,23 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x950A, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e3, "Clevo P955[ER]T", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e4, "Clevo P955ER", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e5, "Clevo P955EE6", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e6, "Clevo P950R[CDF]", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x97e2, "Clevo P970RC-M", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x50d3, "Clevo PC50[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), @@ -7114,11 +7124,49 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x1325, "System76 Darter Pro (darp5)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1401, "Clevo L140[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1403, "Clevo N140CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8535, "Clevo NH50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8536, "Clevo NH79D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8550, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS), SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), From 123667ed90ef625ced9d0d071dc889985bfc6d55 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Wed, 11 Nov 2020 10:51:36 +0800 Subject: [PATCH 550/636] tty: serial: imx: keep console clocks always on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e67c139c488e84e7eae6c333231e791f0e89b3fb upstream. For below code, there has chance to cause deadlock in SMP system: Thread 1: clk_enable_lock(); pr_info("debug message"); clk_enable_unlock(); Thread 2: imx_uart_console_write() clk_enable() clk_enable_lock(); Thread 1: Acuired clk enable_lock -> printk -> console_trylock_spinning Thread 2: console_unlock() -> imx_uart_console_write -> clk_disable -> Acquite clk enable_lock So the patch is to keep console port clocks always on like other console drivers. Fixes: 1cf93e0d5488 ("serial: imx: remove the uart_console() check") Acked-by: Uwe Kleine-König Signed-off-by: Fugang Duan Link: https://lore.kernel.org/r/20201111025136.29818-1-fugang.duan@nxp.com Cc: stable [fix up build warning - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4066cb2b79cb..7a6e26b12bf6 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1915,16 +1915,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) unsigned int ucr1; unsigned long flags = 0; int locked = 1; - int retval; - - retval = clk_enable(sport->clk_per); - if (retval) - return; - retval = clk_enable(sport->clk_ipg); - if (retval) { - clk_disable(sport->clk_per); - return; - } if (sport->port.sysrq) locked = 0; @@ -1960,9 +1950,6 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) if (locked) spin_unlock_irqrestore(&sport->port.lock, flags); - - clk_disable(sport->clk_ipg); - clk_disable(sport->clk_per); } /* @@ -2063,15 +2050,14 @@ imx_uart_console_setup(struct console *co, char *options) retval = uart_set_options(&sport->port, co, baud, parity, bits, flow); - clk_disable(sport->clk_ipg); if (retval) { - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); goto error_console; } - retval = clk_prepare(sport->clk_per); + retval = clk_prepare_enable(sport->clk_per); if (retval) - clk_unprepare(sport->clk_ipg); + clk_disable_unprepare(sport->clk_ipg); error_console: return retval; From 3a07581b04648c8acb634045c7060db392853efe Mon Sep 17 00:00:00 2001 From: Vamshi K Sthambamkadi Date: Fri, 23 Oct 2020 17:24:39 +0530 Subject: [PATCH 551/636] efivarfs: fix memory leak in efivarfs_create() commit fe5186cf12e30facfe261e9be6c7904a170bd822 upstream. kmemleak report: unreferenced object 0xffff9b8915fcb000 (size 4096): comm "efivarfs.sh", pid 2360, jiffies 4294920096 (age 48.264s) hex dump (first 32 bytes): 2d 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 -............... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000cc4d897c>] kmem_cache_alloc_trace+0x155/0x4b0 [<000000007d1dfa72>] efivarfs_create+0x6e/0x1a0 [<00000000e6ee18fc>] path_openat+0xe4b/0x1120 [<000000000ad0414f>] do_filp_open+0x91/0x100 [<00000000ce93a198>] do_sys_openat2+0x20c/0x2d0 [<000000002a91be6d>] do_sys_open+0x46/0x80 [<000000000a854999>] __x64_sys_openat+0x20/0x30 [<00000000c50d89c9>] do_syscall_64+0x38/0x90 [<00000000cecd6b5f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 In efivarfs_create(), inode->i_private is setup with efivar_entry object which is never freed. Cc: Signed-off-by: Vamshi K Sthambamkadi Link: https://lore.kernel.org/r/20201023115429.GA2479@cosmos Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- fs/efivarfs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 834615f13f3e..7808a26bd33f 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -23,6 +23,7 @@ LIST_HEAD(efivarfs_list); static void efivarfs_evict_inode(struct inode *inode) { clear_inode(inode); + kfree(inode->i_private); } static const struct super_operations efivarfs_ops = { From e9bfab3073834efa616ec86184d63eba1a64cfef Mon Sep 17 00:00:00 2001 From: Brian O'Keefe Date: Fri, 6 Nov 2020 10:10:34 -0500 Subject: [PATCH 552/636] staging: rtl8723bs: Add 024c:0627 to the list of SDIO device-ids commit aee9dccc5b64e878cf1b18207436e73f66d74157 upstream. Add 024c:0627 to the list of SDIO device-ids, based on hardware found in the wild. This hardware exists on at least some Acer SW1-011 tablets. Signed-off-by: Brian O'Keefe Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/b9e1523f-2ba7-fb82-646a-37f095b4440e@alum.wpi.edu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 49ea780f9f42..dc69c41f3e38 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -20,6 +20,7 @@ static const struct sdio_device_id sdio_ids[] = { SDIO_DEVICE(0x024c, 0x0525), }, { SDIO_DEVICE(0x024c, 0x0623), }, { SDIO_DEVICE(0x024c, 0x0626), }, + { SDIO_DEVICE(0x024c, 0x0627), }, { SDIO_DEVICE(0x024c, 0xb723), }, { /* end: all zeroes */ }, }; From f2a32b26684550173f9fdd326923c0799b552916 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Nov 2020 16:30:32 +0100 Subject: [PATCH 553/636] ext4: fix bogus warning in ext4_update_dx_flag() commit f902b216501094495ff75834035656e8119c537f upstream. The idea of the warning in ext4_update_dx_flag() is that we should warn when we are clearing EXT4_INODE_INDEX on a filesystem with metadata checksums enabled since after clearing the flag, checksums for internal htree nodes will become invalid. So there's no need to warn (or actually do anything) when EXT4_INODE_INDEX is not set. Link: https://lore.kernel.org/r/20201118153032.17281-1-jack@suse.cz Fixes: 48a34311953d ("ext4: fix checksum errors with indexed dirs") Reported-by: Eric Biggers Reviewed-by: Eric Biggers Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 19109c04710e..a436307eed00 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2427,7 +2427,8 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) { + if (!ext4_has_feature_dir_index(inode->i_sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { /* ext4_iget() should have caught this... */ WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); From 1ee67cb29e7c4f2f1c692009e3138aaaab92c678 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Nov 2020 14:38:34 +0100 Subject: [PATCH 554/636] iio: accel: kxcjk1013: Replace is_smo8500_device with an acpi_type enum commit 11e94f28c3de35d5ad1ac6a242a5b30f4378991a upstream. Replace the boolean is_smo8500_device variable with an acpi_type enum. For now this can be either ACPI_GENERIC or ACPI_SMO8500, this is a preparation patch for adding special handling for the KIOX010A ACPI HID, which will add a ACPI_KIOX010A acpi_type to the introduced enum. For stable as needed as precursor for next patch. Signed-off-by: Hans de Goede Fixes: 7f6232e69539 ("iio: accel: kxcjk1013: Add KIOX010A ACPI Hardware-ID") Cc: Link: https://lore.kernel.org/r/20201110133835.129080-2-hdegoede@redhat.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index e5fdca74a630..0288a3ee7ef5 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -134,6 +134,11 @@ enum kx_chipset { KX_MAX_CHIPS /* this must be last */ }; +enum kx_acpi_type { + ACPI_GENERIC, + ACPI_SMO8500, +}; + struct kxcjk1013_data { struct i2c_client *client; struct iio_trigger *dready_trig; @@ -150,7 +155,7 @@ struct kxcjk1013_data { bool motion_trigger_on; int64_t timestamp; enum kx_chipset chipset; - bool is_smo8500_device; + enum kx_acpi_type acpi_type; }; enum kxcjk1013_axis { @@ -1241,7 +1246,7 @@ static irqreturn_t kxcjk1013_data_rdy_trig_poll(int irq, void *private) static const char *kxcjk1013_match_acpi_device(struct device *dev, enum kx_chipset *chipset, - bool *is_smo8500_device) + enum kx_acpi_type *acpi_type) { const struct acpi_device_id *id; @@ -1250,7 +1255,7 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, return NULL; if (strcmp(id->id, "SMO8500") == 0) - *is_smo8500_device = true; + *acpi_type = ACPI_SMO8500; *chipset = (enum kx_chipset)id->driver_data; @@ -1286,7 +1291,7 @@ static int kxcjk1013_probe(struct i2c_client *client, } else if (ACPI_HANDLE(&client->dev)) { name = kxcjk1013_match_acpi_device(&client->dev, &data->chipset, - &data->is_smo8500_device); + &data->acpi_type); } else return -ENODEV; @@ -1304,7 +1309,7 @@ static int kxcjk1013_probe(struct i2c_client *client, indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &kxcjk1013_info; - if (client->irq > 0 && !data->is_smo8500_device) { + if (client->irq > 0 && data->acpi_type != ACPI_SMO8500) { ret = devm_request_threaded_irq(&client->dev, client->irq, kxcjk1013_data_rdy_trig_poll, kxcjk1013_event_handler, From 03fe4c08801afb39635ff44783d22d0b375962aa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Nov 2020 14:38:35 +0100 Subject: [PATCH 555/636] iio: accel: kxcjk1013: Add support for KIOX010A ACPI DSM for setting tablet-mode commit e5b1032a656e9aa4c7a4df77cb9156a2a651a5f9 upstream. Some 360 degree hinges (yoga) style 2-in-1 devices use 2 KXCJ91008-s to allow the OS to determine the angle between the display and the base of the device, so that the OS can determine if the 2-in-1 is in laptop or in tablet-mode. On Windows both accelerometers are read by a special HingeAngleService process; and this process calls a DSM (Device Specific Method) on the ACPI KIOX010A device node for the sensor in the display, to let the embedded-controller (EC) know about the mode so that it can disable the kbd and touchpad to avoid spurious input while folded into tablet-mode. This notifying of the EC is problematic because sometimes the EC comes up thinking that device is in tablet-mode and the kbd and touchpad do not work. This happens for example on Irbis NB111 devices after a suspend / resume cycle (after a complete battery drain / hard reset without having booted Windows at least once). Other 2-in-1s which are likely affected too are e.g. the Teclast F5 and F6 series. The kxcjk-1013 driver may seem like a strange place to deal with this, but since it is *the* driver for the ACPI KIOX010A device, it is also the driver which has access to the ACPI handle needed by the DSM. Add support for calling the DSM and on probe unconditionally tell the EC that the device is laptop mode, fixing the kbd and touchpad sometimes not working. Fixes: 7f6232e69539 ("iio: accel: kxcjk1013: Add KIOX010A ACPI Hardware-ID") Reported-and-tested-by: russianneuromancer Signed-off-by: Hans de Goede Cc: Link: https://lore.kernel.org/r/20201110133835.129080-3-hdegoede@redhat.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/accel/kxcjk-1013.c | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 0288a3ee7ef5..c22afc979206 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -137,6 +137,7 @@ enum kx_chipset { enum kx_acpi_type { ACPI_GENERIC, ACPI_SMO8500, + ACPI_KIOX010A, }; struct kxcjk1013_data { @@ -282,6 +283,32 @@ static const struct { {19163, 1, 0}, {38326, 0, 1} }; +#ifdef CONFIG_ACPI +enum kiox010a_fn_index { + KIOX010A_SET_LAPTOP_MODE = 1, + KIOX010A_SET_TABLET_MODE = 2, +}; + +static int kiox010a_dsm(struct device *dev, int fn_index) +{ + acpi_handle handle = ACPI_HANDLE(dev); + guid_t kiox010a_dsm_guid; + union acpi_object *obj; + + if (!handle) + return -ENODEV; + + guid_parse("1f339696-d475-4e26-8cad-2e9f8e6d7a91", &kiox010a_dsm_guid); + + obj = acpi_evaluate_dsm(handle, &kiox010a_dsm_guid, 1, fn_index, NULL); + if (!obj) + return -EIO; + + ACPI_FREE(obj); + return 0; +} +#endif + static int kxcjk1013_set_mode(struct kxcjk1013_data *data, enum kxcjk1013_mode mode) { @@ -359,6 +386,13 @@ static int kxcjk1013_chip_init(struct kxcjk1013_data *data) { int ret; +#ifdef CONFIG_ACPI + if (data->acpi_type == ACPI_KIOX010A) { + /* Make sure the kbd and touchpad on 2-in-1s using 2 KXCJ91008-s work */ + kiox010a_dsm(&data->client->dev, KIOX010A_SET_LAPTOP_MODE); + } +#endif + ret = i2c_smbus_read_byte_data(data->client, KXCJK1013_REG_WHO_AM_I); if (ret < 0) { dev_err(&data->client->dev, "Error reading who_am_i\n"); @@ -1256,6 +1290,8 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev, if (strcmp(id->id, "SMO8500") == 0) *acpi_type = ACPI_SMO8500; + else if (strcmp(id->id, "KIOX010A") == 0) + *acpi_type = ACPI_KIOX010A; *chipset = (enum kx_chipset)id->driver_data; From 8f1f52f659b9e0b13bd2202fa7a558adc3b3994e Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 10 Nov 2020 18:41:13 +0100 Subject: [PATCH 556/636] regulator: pfuze100: limit pfuze-support-disable-sw to pfuze{100,200} commit 365ec8b61689bd64d6a61e129e0319bf71336407 upstream. Limit the fsl,pfuze-support-disable-sw to the pfuze100 and pfuze200 variants. When enabling fsl,pfuze-support-disable-sw and using a pfuze3000 or pfuze3001, the driver would choose pfuze100_sw_disable_regulator_ops instead of the newly introduced and correct pfuze3000_sw_regulator_ops. Signed-off-by: Sean Nyekjaer Fixes: 6f1cf5257acc ("regualtor: pfuze100: correct sw1a/sw2 on pfuze3000") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201110174113.2066534-1-sean@geanix.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/pfuze100-regulator.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 30e92a9cc97e..4b8306594c3f 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -755,11 +755,14 @@ static int pfuze100_regulator_probe(struct i2c_client *client, * the switched regulator till yet. */ if (pfuze_chip->flags & PFUZE_FLAG_DISABLE_SW) { - if (pfuze_chip->regulator_descs[i].sw_reg) { - desc->ops = &pfuze100_sw_disable_regulator_ops; - desc->enable_val = 0x8; - desc->disable_val = 0x0; - desc->enable_time = 500; + if (pfuze_chip->chip_id == PFUZE100 || + pfuze_chip->chip_id == PFUZE200) { + if (pfuze_chip->regulator_descs[i].sw_reg) { + desc->ops = &pfuze100_sw_disable_regulator_ops; + desc->enable_val = 0x8; + desc->disable_val = 0x0; + desc->enable_time = 500; + } } } From 675b9c121c46166b189344fd158f10e064061505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:27 +0100 Subject: [PATCH 557/636] regulator: fix memory leak with repeated set_machine_constraints() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 57a6ad482af256b2a13de14194fb8f67c1a65f10 upstream. Fixed commit introduced a possible second call to set_machine_constraints() and that allocates memory for rdev->constraints. Move the allocation to the caller so it's easier to manage and done once. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/78c3d4016cebc08d441aad18cb924b4e4d9cf9df.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ad5235ca8e4e..cee101d66249 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1091,7 +1091,6 @@ static int _regulator_do_enable(struct regulator_dev *rdev); /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source - * @constraints: constraints to apply * * Allows platform initialisation code to define and constrain * regulator circuits e.g. valid voltage/current ranges, etc. NOTE: @@ -1099,21 +1098,11 @@ static int _regulator_do_enable(struct regulator_dev *rdev); * regulator operations to proceed i.e. set_voltage, set_current_limit, * set_mode. */ -static int set_machine_constraints(struct regulator_dev *rdev, - const struct regulation_constraints *constraints) +static int set_machine_constraints(struct regulator_dev *rdev) { int ret = 0; const struct regulator_ops *ops = rdev->desc->ops; - if (constraints) - rdev->constraints = kmemdup(constraints, sizeof(*constraints), - GFP_KERNEL); - else - rdev->constraints = kzalloc(sizeof(*constraints), - GFP_KERNEL); - if (!rdev->constraints) - return -ENOMEM; - ret = machine_constraints_voltage(rdev, rdev->constraints); if (ret != 0) return ret; @@ -4257,7 +4246,6 @@ struct regulator_dev * regulator_register(const struct regulator_desc *regulator_desc, const struct regulator_config *cfg) { - const struct regulation_constraints *constraints = NULL; const struct regulator_init_data *init_data; struct regulator_config *config = NULL; static atomic_t regulator_no = ATOMIC_INIT(-1); @@ -4358,14 +4346,23 @@ regulator_register(const struct regulator_desc *regulator_desc, /* set regulator constraints */ if (init_data) - constraints = &init_data->constraints; + rdev->constraints = kmemdup(&init_data->constraints, + sizeof(*rdev->constraints), + GFP_KERNEL); + else + rdev->constraints = kzalloc(sizeof(*rdev->constraints), + GFP_KERNEL); + if (!rdev->constraints) { + ret = -ENOMEM; + goto wash; + } if (init_data && init_data->supply_regulator) rdev->supply_name = init_data->supply_regulator; else if (regulator_desc->supply_name) rdev->supply_name = regulator_desc->supply_name; - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); if (ret == -EPROBE_DEFER) { /* Regulator might be in bypass mode and so needs its supply * to set the constraints */ @@ -4374,7 +4371,7 @@ regulator_register(const struct regulator_desc *regulator_desc, * that is just being created */ ret = regulator_resolve_supply(rdev); if (!ret) - ret = set_machine_constraints(rdev, constraints); + ret = set_machine_constraints(rdev); else rdev_dbg(rdev, "unable to resolve supply early: %pe\n", ERR_PTR(ret)); From 00aa5e8d1a3334701cdf0a7d0965e7650a7cd797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:28 +0100 Subject: [PATCH 558/636] regulator: avoid resolve_supply() infinite recursion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4b639e254d3d4f15ee4ff2b890a447204cfbeea9 upstream. When a regulator's name equals its supply's name the regulator_resolve_supply() recurses indefinitely. Add a check so that debugging the problem is easier. The "fixed" commit just exposed the problem. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Reported-by: Ahmad Fatoum Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/c6171057cfc0896f950c4d8cb82df0f9f1b89ad9.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cee101d66249..2dd7e87ee378 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1595,6 +1595,12 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } } + if (r == rdev) { + dev_err(dev, "Supply for %s (%s) resolved to itself\n", + rdev->desc->name, rdev->supply_name); + return -EINVAL; + } + /* * If the supply's parent device is not the same as the * regulator's parent device, then ensure the parent device From 47a15b5c7ad24d854e4399a855ff2117ce31e73d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 13 Nov 2020 01:20:28 +0100 Subject: [PATCH 559/636] regulator: workaround self-referent regulators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f5c042b23f7429e5c2ac987b01a31c69059a978b upstream. Workaround regulators whose supply name happens to be the same as its own name. This fixes boards that used to work before the early supply resolving was removed. The error message is left in place so that offending drivers can be detected. Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator") Cc: stable@vger.kernel.org Reported-by: Ahmad Fatoum Signed-off-by: Michał Mirosław Tested-by: Ahmad Fatoum # stpmic1 Link: https://lore.kernel.org/r/d703acde2a93100c3c7a81059d716c50ad1b1f52.1605226675.git.mirq-linux@rere.qmqm.pl Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 2dd7e87ee378..8a6ca06d9c16 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1598,7 +1598,10 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); - return -EINVAL; + if (!have_full_constraints()) + return -EINVAL; + r = dummy_regulator_rdev; + get_device(&r->dev); } /* From 7d2f4b758a04748f86786d2a99fe43bbbda2c22f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 16 Nov 2020 01:38:59 -0800 Subject: [PATCH 560/636] xtensa: disable preemption around cache alias management calls commit 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 upstream. Although cache alias management calls set up and tear down TLB entries and fast_second_level_miss is able to restore TLB entry should it be evicted they absolutely cannot preempt each other because they use the same TLBTEMP area for different purposes. Disable preemption around all cache alias management calls to enforce that. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/mm/cache.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 9220dcde7520..d1ebe67c68d4 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -71,8 +71,10 @@ static inline void kmap_invalidate_coherent(struct page *page, kvaddr = TLBTEMP_BASE_1 + (page_to_phys(page) & DCACHE_ALIAS_MASK); + preempt_disable(); __invalidate_dcache_page_alias(kvaddr, page_to_phys(page)); + preempt_enable(); } } } @@ -157,6 +159,7 @@ void flush_dcache_page(struct page *page) if (!alias && !mapping) return; + preempt_disable(); virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(virt, phys); @@ -167,6 +170,7 @@ void flush_dcache_page(struct page *page) if (mapping) __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } /* There shouldn't be an entry in the cache for this page anymore. */ @@ -200,8 +204,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long phys = page_to_phys(pfn_to_page(pfn)); unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } EXPORT_SYMBOL(local_flush_cache_page); @@ -228,11 +234,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) unsigned long phys = page_to_phys(page); unsigned long tmp; + preempt_disable(); tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); + preempt_enable(); clear_bit(PG_arch_1, &page->flags); } @@ -266,7 +274,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } /* Copy data */ @@ -281,9 +291,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_range((unsigned long) dst, len); if ((vma->vm_flags & VM_EXEC) != 0) __invalidate_icache_page_alias(t, phys); + preempt_enable(); } else if ((vma->vm_flags & VM_EXEC) != 0) { __flush_dcache_range((unsigned long)dst,len); @@ -305,7 +317,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } memcpy(dst, src, len); From 2216a93ce6841cde69fb6e59763ca029f15e4ddf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Nov 2020 19:33:58 +0100 Subject: [PATCH 561/636] mac80211: minstrel: remove deferred sampling code commit 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d upstream. Deferring sampling attempts to the second stage has some bad interactions with drivers that process the rate table in hardware and use the probe flag to indicate probing packets (e.g. most mt76 drivers). On affected drivers it can lead to probing not working at all. If the link conditions turn worse, it might not be such a good idea to do a lot of sampling for lower rates in this case. Fix this by simply skipping the sample attempt instead of deferring it, but keep the checks that would allow it to be sampled if it was skipped too often, but only if it has less than 95% success probability. Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing packets. Cc: stable@vger.kernel.org Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rc80211_minstrel.c | 25 ++++--------------------- net/mac80211/rc80211_minstrel.h | 1 - 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 07fb219327d6..968675166291 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -289,12 +289,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->r[ndx].stats.success += success; } - if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) - mi->sample_packets++; - - if (mi->sample_deferred > 0) - mi->sample_deferred--; - if (time_after(jiffies, mi->last_stats_update + (mp->update_interval * HZ) / 1000)) minstrel_update_stats(mp, mi); @@ -373,7 +367,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; delta = (mi->total_packets * sampling_ratio / 100) - - (mi->sample_packets + mi->sample_deferred / 2); + mi->sample_packets; /* delta < 0: no sampling required */ prev_sample = mi->prev_sample; @@ -382,7 +376,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, return; if (mi->total_packets >= 10000) { - mi->sample_deferred = 0; mi->sample_packets = 0; mi->total_packets = 0; } else if (delta > mi->n_rates * 2) { @@ -407,19 +400,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * rate sampling method should be used. * Respect such rates that are not sampled for 20 interations. */ - if (mrr_capable && - msr->perfect_tx_time > mr->perfect_tx_time && - msr->stats.sample_skipped < 20) { - /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark - * packets that have the sampling rate deferred to the - * second MRR stage. Increase the sample counter only - * if the deferred sample rate was actually used. - * Use the sample_deferred counter to make sure that - * the sampling is not done in large bursts */ - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - rate++; - mi->sample_deferred++; - } else { + if (msr->perfect_tx_time < mr->perfect_tx_time || + msr->stats.sample_skipped >= 20) { if (!msr->sample_limit) return; @@ -439,6 +421,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate->idx = mi->r[ndx].rix; rate->count = minstrel_get_retry_count(&mi->r[ndx], info); + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; } diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index be6c3f35f48b..d60413adb215 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -98,7 +98,6 @@ struct minstrel_sta_info { u8 max_prob_rate; unsigned int total_packets; unsigned int sample_packets; - int sample_deferred; unsigned int sample_row; unsigned int sample_column; From b48b65efcdf698fb3118c44197b664f3e3bfe708 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 11 Nov 2020 19:33:59 +0100 Subject: [PATCH 562/636] mac80211: minstrel: fix tx status processing corner case commit b2911a84396f72149dce310a3b64d8948212c1b3 upstream. Some drivers fill the status rate list without setting the rate index after the final rate to -1. minstrel_ht already deals with this, but minstrel doesn't, which causes it to get stuck at the lowest rate on these drivers. Fix this by checking the count as well. Cc: stable@vger.kernel.org Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rc80211_minstrel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 968675166291..1f3c420c01c7 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -276,7 +276,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, success = !!(info->flags & IEEE80211_TX_STAT_ACK); for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - if (ar[i].idx < 0) + if (ar[i].idx < 0 || !ar[i].count) break; ndx = rix_to_ndx(mi, ar[i].idx); From 499b109be6889b4a5442b7652c32370bb2d741a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Nov 2020 11:22:04 +0100 Subject: [PATCH 563/636] mac80211: free sta in sta_info_insert_finish() on errors commit 7bc40aedf24d31d8bea80e1161e996ef4299fb10 upstream. If sta_info_insert_finish() fails, we currently keep the station around and free it only in the caller, but there's only one such caller and it always frees it immediately. As syzbot found, another consequence of this split is that we can put things that sleep only into __cleanup_single_sta() and not in sta_info_free(), but this is the only place that requires such of sta_info_free() now. Change this to free the station in sta_info_insert_finish(), in which case we can still sleep. This will also let us unify the cleanup code later. Cc: stable@vger.kernel.org Fixes: dcd479e10a05 ("mac80211: always wind down STA state") Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/sta_info.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d11eb5139c92..c0fae954082b 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -634,7 +634,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_drop_sta: local->num_sta--; synchronize_net(); - __cleanup_single_sta(sta); + cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); kfree(sinfo); @@ -653,19 +653,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) err = sta_info_insert_check(sta); if (err) { + sta_info_free(local, sta); mutex_unlock(&local->sta_mtx); rcu_read_lock(); - goto out_free; + return err; } - err = sta_info_insert_finish(sta); - if (err) - goto out_free; - - return 0; - out_free: - sta_info_free(local, sta); - return err; + return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) From a1bf9efcf4a047c80662d5509806da4cf50cfaf6 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 11 Nov 2020 16:26:25 +0100 Subject: [PATCH 564/636] s390/cpum_sf.c: fix file permission for cpum_sfb_size commit 78d732e1f326f74f240d416af9484928303d9951 upstream. This file is installed by the s390 CPU Measurement sampling facility device driver to export supported minimum and maximum sample buffer sizes. This file is read by lscpumf tool to display the details of the device driver capabilities. The lscpumf tool might be invoked by a non-root user. In this case it does not print anything because the file contents can not be read. Fix this by allowing read access for all users. Reading the file contents is ok, changing the file contents is left to the root user only. For further reference and details see: [1] https://github.com/ibm-s390-tools/s390-tools/issues/97 Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur") Cc: # 3.14 Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/perf_cpum_sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0e6d01225a67..c8e1e325215b 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2097,4 +2097,4 @@ out: return err; } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); From 497fc376641993f8d6d2613f3b6c9e39b2f54b3a Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 16 Nov 2020 16:23:47 +0100 Subject: [PATCH 565/636] s390/dasd: fix null pointer dereference for ERP requests commit 6f117cb854a44a79898d844e6ae3fd23bd94e786 upstream. When requeueing all requests on the device request queue to the blocklayer we might get to an ERP (error recovery) request that is a copy of an original CQR. Those requests do not have blocklayer request information or a pointer to the dasd_queue set. When trying to access those data it will lead to a null pointer dereference in dasd_requeue_all_requests(). Fix by checking if the request is an ERP request that can simply be ignored. The blocklayer request will be requeued by the original CQR that is on the device queue right behind the ERP request. Fixes: 9487cfd3430d ("s390/dasd: fix handling of internal requests") Cc: #4.16 Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a23e7d394a0a..157bbb13c26c 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2833,6 +2833,12 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr) if (!block) return -EINVAL; + /* + * If the request is an ERP request there is nothing to requeue. + * This will be done with the remaining original request. + */ + if (cqr->refers) + return 0; spin_lock_irq(&cqr->dq->lock); req = (struct request *) cqr->callback_data; blk_mq_requeue_request(req, false); From 26c5606ff755a521aa9dcfe111ce89f08b9aac0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 30 Oct 2020 13:38:48 +0100 Subject: [PATCH 566/636] ptrace: Set PF_SUPERPRIV when checking capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cf23705244c947151179f929774fabf71e239eee upstream. Commit 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") replaced the use of ns_capable() with has_ns_capability{,_noaudit}() which doesn't set PF_SUPERPRIV. Commit 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") replaced has_ns_capability{,_noaudit}() with security_capable(), which doesn't set PF_SUPERPRIV neither. Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a new ns_capable_noaudit() helper is available. Let's use it! As a result, the signature of ptrace_has_cap() is restored to its original one. Cc: Christian Brauner Cc: Eric Paris Cc: Jann Horn Cc: Kees Cook Cc: Oleg Nesterov Cc: Serge E. Hallyn Cc: Tyler Hicks Cc: stable@vger.kernel.org Fixes: 6b3ad6649a4c ("ptrace: reintroduce usage of subjective credentials in ptrace_has_cap()") Fixes: 69f594a38967 ("ptrace: do not audit capability check when outputing /proc/pid/stat") Signed-off-by: Mickaël Salaün Reviewed-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201030123849.770769-2-mic@digikod.net Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b93eb4eaf7ac..ecdb7402072f 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -258,17 +258,11 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } -static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, - unsigned int mode) +static bool ptrace_has_cap(struct user_namespace *ns, unsigned int mode) { - int ret; - if (mode & PTRACE_MODE_NOAUDIT) - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NOAUDIT); - else - ret = security_capable(cred, ns, CAP_SYS_PTRACE, CAP_OPT_NONE); - - return ret == 0; + return ns_capable_noaudit(ns, CAP_SYS_PTRACE); + return ns_capable(ns, CAP_SYS_PTRACE); } /* Returns 0 on success, -errno on denial. */ @@ -320,7 +314,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(cred, tcred->user_ns, mode)) + if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -339,7 +333,7 @@ ok: mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(cred, mm->user_ns, mode))) + !ptrace_has_cap(mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); From 3863935f06bd8d43a7fb4fbf65a33a5596060974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Fri, 30 Oct 2020 13:38:49 +0100 Subject: [PATCH 567/636] seccomp: Set PF_SUPERPRIV when checking capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fb14528e443646dd3fd02df4437fcf5265b66baa upstream. Replace the use of security_capable(current_cred(), ...) with ns_capable_noaudit() which set PF_SUPERPRIV. Since commit 98f368e9e263 ("kernel: Add noaudit variant of ns_capable()"), a new ns_capable_noaudit() helper is available. Let's use it! Cc: Jann Horn Cc: Kees Cook Cc: Tyler Hicks Cc: Will Drewry Cc: stable@vger.kernel.org Fixes: e2cfabdfd075 ("seccomp: add system call filtering using BPF") Signed-off-by: Mickaël Salaün Reviewed-by: Jann Horn Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201030123849.770769-3-mic@digikod.net Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 56e69203b658..b9bd5a6b5138 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -383,8 +383,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) * behavior of privileged children. */ if (!task_no_new_privs(current) && - security_capable(current_cred(), current_user_ns(), - CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0) + !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); /* Allocate a new seccomp_filter */ From 526d963441acf60edb9f0d348eef2b24edfdbffe Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 13 Nov 2020 09:59:23 +0800 Subject: [PATCH 568/636] x86/microcode/intel: Check patch signature before saving microcode for early loading commit 1a371e67dc77125736cc56d3a0893f06b75855b6 upstream. Currently, scan_microcode() leverages microcode_matches() to check if the microcode matches the CPU by comparing the family and model. However, the processor stepping and flags of the microcode signature should also be considered when saving a microcode patch for early update. Use find_matching_signature() in scan_microcode() and get rid of the now-unused microcode_matches() which is a good cleanup in itself. Complete the verification of the patch being saved for early loading in save_microcode_patch() directly. This needs to be done there too because save_mc_for_early() will call save_microcode_patch() too. The second reason why this needs to be done is because the loader still tries to support, at least hypothetically, mixed-steppings systems and thus adds all patches to the cache that belong to the same CPU model albeit with different steppings. For example: microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6 microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23 The patch which is being saved for early loading, however, can only be the one which fits the CPU this runs on so do the signature verification before saving. [ bp: Do signature verification in save_microcode_patch() and rewrite commit message. ] Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") Signed-off-by: Chen Yu Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535 Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/microcode/intel.c | 63 +++++---------------------- 1 file changed, 10 insertions(+), 53 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 16936a24795c..3aa0e5a45303 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -103,53 +103,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -167,7 +120,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -213,6 +166,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -347,13 +303,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -486,14 +443,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -937,7 +894,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); From dc94f8e932b596926649fa2778eff8d90495e0ff Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Sat, 21 Nov 2020 22:17:15 -0800 Subject: [PATCH 569/636] mm/userfaultfd: do not access vma->vm_mm after calling handle_userfault() commit bfe8cc1db02ab243c62780f17fc57f65bde0afe1 upstream. Alexander reported a syzkaller / KASAN finding on s390, see below for complete output. In do_huge_pmd_anonymous_page(), the pre-allocated pagetable will be freed in some cases. In the case of userfaultfd_missing(), this will happen after calling handle_userfault(), which might have released the mmap_lock. Therefore, the following pte_free(vma->vm_mm, pgtable) will access an unstable vma->vm_mm, which could have been freed or re-used already. For all architectures other than s390 this will go w/o any negative impact, because pte_free() simply frees the page and ignores the passed-in mm. The implementation for SPARC32 would also access mm->page_table_lock for pte_free(), but there is no THP support in SPARC32, so the buggy code path will not be used there. For s390, the mm->context.pgtable_list is being used to maintain the 2K pagetable fragments, and operating on an already freed or even re-used mm could result in various more or less subtle bugs due to list / pagetable corruption. Fix this by calling pte_free() before handle_userfault(), similar to how it is already done in __do_huge_pmd_anonymous_page() for the WRITE / non-huge_zero_page case. Commit 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") actually introduced both, the do_huge_pmd_anonymous_page() and also __do_huge_pmd_anonymous_page() changes wrt to calling handle_userfault(), but only in the latter case it put the pte_free() before calling handle_userfault(). BUG: KASAN: use-after-free in do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 Read of size 8 at addr 00000000962d6988 by task syz-executor.0/9334 CPU: 1 PID: 9334 Comm: syz-executor.0 Not tainted 5.10.0-rc1-syzkaller-07083-g4c9720875573 #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: do_huge_pmd_anonymous_page+0xcda/0xd90 mm/huge_memory.c:744 create_huge_pmd mm/memory.c:4256 [inline] __handle_mm_fault+0xe6e/0x1068 mm/memory.c:4480 handle_mm_fault+0x288/0x748 mm/memory.c:4607 do_exception+0x394/0xae0 arch/s390/mm/fault.c:479 do_dat_exception+0x34/0x80 arch/s390/mm/fault.c:567 pgm_check_handler+0x1da/0x22c arch/s390/kernel/entry.S:706 copy_from_user_mvcos arch/s390/lib/uaccess.c:111 [inline] raw_copy_from_user+0x3a/0x88 arch/s390/lib/uaccess.c:174 _copy_from_user+0x48/0xa8 lib/usercopy.c:16 copy_from_user include/linux/uaccess.h:192 [inline] __do_sys_sigaltstack kernel/signal.c:4064 [inline] __s390x_sys_sigaltstack+0xc8/0x240 kernel/signal.c:4060 system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Allocated by task 9334: slab_alloc_node mm/slub.c:2891 [inline] slab_alloc mm/slub.c:2899 [inline] kmem_cache_alloc+0x118/0x348 mm/slub.c:2904 vm_area_dup+0x9c/0x2b8 kernel/fork.c:356 __split_vma+0xba/0x560 mm/mmap.c:2742 split_vma+0xca/0x108 mm/mmap.c:2800 mlock_fixup+0x4ae/0x600 mm/mlock.c:550 apply_vma_lock_flags+0x2c6/0x398 mm/mlock.c:619 do_mlock+0x1aa/0x718 mm/mlock.c:711 __do_sys_mlock2 mm/mlock.c:738 [inline] __s390x_sys_mlock2+0x86/0xa8 mm/mlock.c:728 system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Freed by task 9333: slab_free mm/slub.c:3142 [inline] kmem_cache_free+0x7c/0x4b8 mm/slub.c:3158 __vma_adjust+0x7b2/0x2508 mm/mmap.c:960 vma_merge+0x87e/0xce0 mm/mmap.c:1209 userfaultfd_release+0x412/0x6b8 fs/userfaultfd.c:868 __fput+0x22c/0x7a8 fs/file_table.c:281 task_work_run+0x200/0x320 kernel/task_work.c:151 tracehook_notify_resume include/linux/tracehook.h:188 [inline] do_notify_resume+0x100/0x148 arch/s390/kernel/signal.c:538 system_call+0xe6/0x28c arch/s390/kernel/entry.S:416 The buggy address belongs to the object at 00000000962d6948 which belongs to the cache vm_area_struct of size 200 The buggy address is located 64 bytes inside of 200-byte region [00000000962d6948, 00000000962d6a10) The buggy address belongs to the page: page:00000000313a09fe refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x962d6 flags: 0x3ffff00000000200(slab) raw: 3ffff00000000200 000040000257e080 0000000c0000000c 000000008020ba00 raw: 0000000000000000 000f001e00000000 ffffffff00000001 0000000096959501 page dumped because: kasan: bad access detected page->mem_cgroup:0000000096959501 Memory state around the buggy address: 00000000962d6880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000000962d6900: 00 fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb >00000000962d6980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ 00000000962d6a00: fb fb fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00000000962d6a80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Fixes: 6b251fc96cf2c ("userfaultfd: call handle_userfault() for userfaultfd_missing() faults") Reported-by: Alexander Egorenkov Signed-off-by: Gerald Schaefer Signed-off-by: Andrew Morton Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: [4.3+] Link: https://lkml.kernel.org/r/20201110190329.11920-1-gerald.schaefer@linux.ibm.com Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8b137248b146..d74afc256c07 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -694,7 +694,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) transparent_hugepage_use_zero_page()) { pgtable_t pgtable; struct page *zero_page; - bool set; vm_fault_t ret; pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) @@ -707,25 +706,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ret = 0; - set = false; if (pmd_none(*vmf->pmd)) { ret = check_stable_address_space(vma->vm_mm); if (ret) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); } else { set_huge_zero_page(pgtable, vma->vm_mm, vma, haddr, vmf->pmd, zero_page); spin_unlock(vmf->ptl); - set = true; } - } else + } else { spin_unlock(vmf->ptl); - if (!set) pte_free(vma->vm_mm, pgtable); + } return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); From 0c88e405c97ed1828443b67891e6d4bb6e56cd4e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Nov 2020 13:27:27 +0100 Subject: [PATCH 570/636] Linux 4.19.160 Tested-by: Jon Hunter Tested-by: Guenter Roeck Tested-by: Shuah Khan Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20201123121809.285416732@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 593fdbce712d..c58711498422 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 159 +SUBLEVEL = 160 EXTRAVERSION = NAME = "People's Front" From 444683e9df5852431539e1814df912759cf80370 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 12 Nov 2020 11:12:01 +0000 Subject: [PATCH 571/636] BACKPORT: sched/fair: Fix overutilized update in enqueue_task_fair() [ Upstream commit 8e1ac4299a6e8726de42310d9c1379f188140c71 ] enqueue_task_fair() attempts to skip the overutilized update for new tasks as their util_avg is not accurate yet. However, the flag we check to do so is overwritten earlier on in the function, which makes the condition pretty much a nop. Fix this by saving the flag early on. Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator") Reported-by: Rick Yiu Signed-off-by: Quentin Perret Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/20201112111201.2081902-1-qperret@google.com Change-Id: I04a99c7db2d0559e838343762a928ac6caa1a9c4 --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f7fa1a2ee32..14c863e52bfe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5246,6 +5246,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int task_new = !(flags & ENQUEUE_WAKEUP); /* * The code below (indirectly) updates schedutil which looks at @@ -5327,7 +5328,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * into account, but that is not straightforward to implement, * and the following generally works well enough in practice. */ - if (flags & ENQUEUE_WAKEUP) + if (!task_new) update_overutilized_status(rq); } From 442c302e2e34daa6e16b3938b9fe7b93daae8a1a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 25 Nov 2020 08:44:09 -0800 Subject: [PATCH 572/636] ANDROID: vmlinux.lds.h: merge compound literal sections After LLVM rG9e33c096476a ("[ELF] Keep orphan section names (.rodata.foo .text.foo) unchanged if !hasSectionsCommand"), LLD splits compound literals to separate sections with -fdata-sections, which it always enables with LTO. Merge these sections to allow LLVM to be upgraded. Bug: 174047799 Change-Id: I858c5fcc48283d5528c1d3e2b2d7a3c72f9e03d6 Link: https://github.com/ClangBuiltLinux/linux/issues/958 Suggested-by: Danny Lin Suggested-by: Fangrui Song Signed-off-by: Sami Tolvanen --- include/asm-generic/vmlinux.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eff86b973109..6a21d161f724 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -69,10 +69,10 @@ #if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* #define TEXT_CFI_MAIN .text.[0-9a-zA-Z_]*.cfi -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* -#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text From 98c9afedbfc11028c1e1b96bdb6a2401785f6941 Mon Sep 17 00:00:00 2001 From: Igor Lubashev Date: Mon, 26 Aug 2019 21:39:12 -0400 Subject: [PATCH 573/636] perf event: Check ref_reloc_sym before using it commit e9a6882f267a8105461066e3ea6b4b6b9be1b807 upstream. Check for ref_reloc_sym before using it instead of checking symbol_conf.kptr_restrict and relying solely on that check. Reported-by: Mathieu Poirier Signed-off-by: Igor Lubashev Tested-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Alexey Budankov Cc: James Morris Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1566869956-7154-2-git-send-email-ilubashe@akamai.com Signed-off-by: Arnaldo Carvalho de Melo Cc: Alexander Dahl Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/event.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index aa9c7df120ca..9c22729e2ad6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -912,11 +912,13 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; - if (symbol_conf.kptr_restrict) - return -1; if (map == NULL) return -1; + kmap = map__kmap(map); + if (!kmap->ref_reloc_sym) + return -1; + /* * We should get this from /sys/kernel/sections/.text, but till that is * available use this, and after it is use this as a fallback for older @@ -939,7 +941,6 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); From 5460d62d661c0fc53bfe83493821b1dc3dc969f4 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 20 Nov 2020 19:43:17 -0800 Subject: [PATCH 574/636] netfilter: clear skb->next in NF_HOOK_LIST() NF_HOOK_LIST() uses list_del() to remove skb from the linked list, however, it is not sufficient as skb->next still points to other skb. We should just call skb_list_del_init() to clear skb->next, like the rest places which using skb list. This has been fixed in upstream by commit ca58fbe06c54 ("netfilter: add and use nf_hook_slow_list()"). Fixes: 9f17dbf04ddf ("netfilter: fix use-after-free in NF_HOOK_LIST") Reported-by: liuzx@knownsec.com Tested-by: liuzx@knownsec.com Cc: Florian Westphal Cc: Edward Cree Cc: stable@vger.kernel.org # between 4.19 and 5.4 Signed-off-by: Cong Wang Signed-off-by: Greg Kroah-Hartman --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 72cb19c3db6a..9460a5635c90 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -300,7 +300,7 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - list_del(&skb->list); + skb_list_del_init(skb); if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1) list_add_tail(&skb->list, &sublist); } From aec62fa475afe706ae210c28eca38ef62ac19dbc Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 18 Nov 2020 18:03:26 +0900 Subject: [PATCH 575/636] btrfs: don't access possibly stale fs_info data for printing duplicate device commit 0697d9a610998b8bdee6b2390836cb2391d8fd1a upstream. Syzbot reported a possible use-after-free when printing a duplicate device warning device_list_add(). At this point it can happen that a btrfs_device::fs_info is not correctly setup yet, so we're accessing stale data, when printing the warning message using the btrfs_printk() wrappers. ================================================================== BUG: KASAN: use-after-free in btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245 Read of size 8 at addr ffff8880878e06a8 by task syz-executor225/7068 CPU: 1 PID: 7068 Comm: syz-executor225 Not tainted 5.9.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1d6/0x29e lib/dump_stack.c:118 print_address_description+0x66/0x620 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report+0x132/0x1d0 mm/kasan/report.c:530 btrfs_printk+0x3eb/0x435 fs/btrfs/super.c:245 device_list_add+0x1a88/0x1d60 fs/btrfs/volumes.c:943 btrfs_scan_one_device+0x196/0x490 fs/btrfs/volumes.c:1359 btrfs_mount_root+0x48f/0xb60 fs/btrfs/super.c:1634 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 fc_mount fs/namespace.c:978 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008 btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x179d/0x29e0 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount+0x126/0x180 fs/namespace.c:3390 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x44840a RSP: 002b:00007ffedfffd608 EFLAGS: 00000293 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007ffedfffd670 RCX: 000000000044840a RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffedfffd630 RBP: 00007ffedfffd630 R08: 00007ffedfffd670 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 000000000000001a R13: 0000000000000004 R14: 0000000000000003 R15: 0000000000000003 Allocated by task 6945: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc+0x100/0x130 mm/kasan/common.c:461 kmalloc_node include/linux/slab.h:577 [inline] kvmalloc_node+0x81/0x110 mm/util.c:574 kvmalloc include/linux/mm.h:757 [inline] kvzalloc include/linux/mm.h:765 [inline] btrfs_mount_root+0xd0/0xb60 fs/btrfs/super.c:1613 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 fc_mount fs/namespace.c:978 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008 btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x179d/0x29e0 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount+0x126/0x180 fs/namespace.c:3390 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 6945: kasan_save_stack mm/kasan/common.c:48 [inline] kasan_set_track+0x3d/0x70 mm/kasan/common.c:56 kasan_set_free_info+0x17/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xdd/0x110 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kfree+0x113/0x200 mm/slab.c:3756 deactivate_locked_super+0xa7/0xf0 fs/super.c:335 btrfs_mount_root+0x72b/0xb60 fs/btrfs/super.c:1678 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 fc_mount fs/namespace.c:978 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1008 btrfs_mount+0x33c/0xae0 fs/btrfs/super.c:1732 legacy_get_tree+0xea/0x180 fs/fs_context.c:592 vfs_get_tree+0x88/0x270 fs/super.c:1547 do_new_mount fs/namespace.c:2875 [inline] path_mount+0x179d/0x29e0 fs/namespace.c:3192 do_mount fs/namespace.c:3205 [inline] __do_sys_mount fs/namespace.c:3413 [inline] __se_sys_mount+0x126/0x180 fs/namespace.c:3390 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8880878e0000 which belongs to the cache kmalloc-16k of size 16384 The buggy address is located 1704 bytes inside of 16384-byte region [ffff8880878e0000, ffff8880878e4000) The buggy address belongs to the page: page:0000000060704f30 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x878e0 head:0000000060704f30 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfffe0000010200(slab|head) raw: 00fffe0000010200 ffffea00028e9a08 ffffea00021e3608 ffff8880aa440b00 raw: 0000000000000000 ffff8880878e0000 0000000100000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880878e0580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880878e0600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8880878e0680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880878e0700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880878e0780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The syzkaller reproducer for this use-after-free crafts a filesystem image and loop mounts it twice in a loop. The mount will fail as the crafted image has an invalid chunk tree. When this happens btrfs_mount_root() will call deactivate_locked_super(), which then cleans up fs_info and fs_info::sb. If a second thread now adds the same block-device to the filesystem, it will get detected as a duplicate device and device_list_add() will reject the duplicate and print a warning. But as the fs_info pointer passed in is non-NULL this will result in a use-after-free. Instead of printing possibly uninitialized or already freed memory in btrfs_printk(), explicitly pass in a NULL fs_info so the printing of the device name will be skipped altogether. There was a slightly different approach discussed in https://lore.kernel.org/linux-btrfs/20200114060920.4527-1-anand.jain@oracle.com/t/#u Link: https://lore.kernel.org/linux-btrfs/000000000000c9e14b05afcc41ba@google.com Reported-by: syzbot+582e66e5edf36a22c7b0@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Nikolay Borisov Reviewed-by: Anand Jain Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7e000d061813..95b6a4ea18f7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -857,7 +857,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (device->bdev != path_bdev) { bdput(path_bdev); mutex_unlock(&fs_devices->device_list_mutex); - btrfs_warn_in_rcu(device->fs_info, + /* + * device->fs_info may not be reliable here, so + * pass in a NULL instead. This avoids a + * possible use-after-free when the fs_info and + * fs_info->sb are already torn down. + */ + btrfs_warn_in_rcu(NULL, "duplicate device %s devid %llu generation %llu scanned by %s (%d)", path, devid, found_transid, current->comm, From d0ffa8527ba351f7249236b2f836efd6ead8a3d2 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 23 Nov 2020 14:28:44 +0000 Subject: [PATCH 576/636] btrfs: fix lockdep splat when reading qgroup config on mount commit 3d05cad3c357a2b749912914356072b38435edfa upstream. Lockdep reported the following splat when running test btrfs/190 from fstests: [ 9482.126098] ====================================================== [ 9482.126184] WARNING: possible circular locking dependency detected [ 9482.126281] 5.10.0-rc4-btrfs-next-73 #1 Not tainted [ 9482.126365] ------------------------------------------------------ [ 9482.126456] mount/24187 is trying to acquire lock: [ 9482.126534] ffffa0c869a7dac0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}, at: qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.126647] but task is already holding lock: [ 9482.126777] ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs] [ 9482.126886] which lock already depends on the new lock. [ 9482.127078] the existing dependency chain (in reverse order) is: [ 9482.127213] -> #1 (btrfs-quota-00){++++}-{3:3}: [ 9482.127366] lock_acquire+0xd8/0x490 [ 9482.127436] down_read_nested+0x45/0x220 [ 9482.127528] __btrfs_tree_read_lock+0x27/0x120 [btrfs] [ 9482.127613] btrfs_read_lock_root_node+0x41/0x130 [btrfs] [ 9482.127702] btrfs_search_slot+0x514/0xc30 [btrfs] [ 9482.127788] update_qgroup_status_item+0x72/0x140 [btrfs] [ 9482.127877] btrfs_qgroup_rescan_worker+0xde/0x680 [btrfs] [ 9482.127964] btrfs_work_helper+0xf1/0x600 [btrfs] [ 9482.128039] process_one_work+0x24e/0x5e0 [ 9482.128110] worker_thread+0x50/0x3b0 [ 9482.128181] kthread+0x153/0x170 [ 9482.128256] ret_from_fork+0x22/0x30 [ 9482.128327] -> #0 (&fs_info->qgroup_rescan_lock){+.+.}-{3:3}: [ 9482.128464] check_prev_add+0x91/0xc60 [ 9482.128551] __lock_acquire+0x1740/0x3110 [ 9482.128623] lock_acquire+0xd8/0x490 [ 9482.130029] __mutex_lock+0xa3/0xb30 [ 9482.130590] qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.131577] btrfs_read_qgroup_config+0x43a/0x550 [btrfs] [ 9482.132175] open_ctree+0x1228/0x18a0 [btrfs] [ 9482.132756] btrfs_mount_root.cold+0x13/0xed [btrfs] [ 9482.133325] legacy_get_tree+0x30/0x60 [ 9482.133866] vfs_get_tree+0x28/0xe0 [ 9482.134392] fc_mount+0xe/0x40 [ 9482.134908] vfs_kern_mount.part.0+0x71/0x90 [ 9482.135428] btrfs_mount+0x13b/0x3e0 [btrfs] [ 9482.135942] legacy_get_tree+0x30/0x60 [ 9482.136444] vfs_get_tree+0x28/0xe0 [ 9482.136949] path_mount+0x2d7/0xa70 [ 9482.137438] do_mount+0x75/0x90 [ 9482.137923] __x64_sys_mount+0x8e/0xd0 [ 9482.138400] do_syscall_64+0x33/0x80 [ 9482.138873] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 9482.139346] other info that might help us debug this: [ 9482.140735] Possible unsafe locking scenario: [ 9482.141594] CPU0 CPU1 [ 9482.142011] ---- ---- [ 9482.142411] lock(btrfs-quota-00); [ 9482.142806] lock(&fs_info->qgroup_rescan_lock); [ 9482.143216] lock(btrfs-quota-00); [ 9482.143629] lock(&fs_info->qgroup_rescan_lock); [ 9482.144056] *** DEADLOCK *** [ 9482.145242] 2 locks held by mount/24187: [ 9482.145637] #0: ffffa0c8411c40e8 (&type->s_umount_key#44/1){+.+.}-{3:3}, at: alloc_super+0xb9/0x400 [ 9482.146061] #1: ffffa0c892ebd3a0 (btrfs-quota-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x120 [btrfs] [ 9482.146509] stack backtrace: [ 9482.147350] CPU: 1 PID: 24187 Comm: mount Not tainted 5.10.0-rc4-btrfs-next-73 #1 [ 9482.147788] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 9482.148709] Call Trace: [ 9482.149169] dump_stack+0x8d/0xb5 [ 9482.149628] check_noncircular+0xff/0x110 [ 9482.150090] check_prev_add+0x91/0xc60 [ 9482.150561] ? kvm_clock_read+0x14/0x30 [ 9482.151017] ? kvm_sched_clock_read+0x5/0x10 [ 9482.151470] __lock_acquire+0x1740/0x3110 [ 9482.151941] ? __btrfs_tree_read_lock+0x27/0x120 [btrfs] [ 9482.152402] lock_acquire+0xd8/0x490 [ 9482.152887] ? qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.153354] __mutex_lock+0xa3/0xb30 [ 9482.153826] ? qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.154301] ? qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.154768] ? qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.155226] qgroup_rescan_init+0x43/0xf0 [btrfs] [ 9482.155690] btrfs_read_qgroup_config+0x43a/0x550 [btrfs] [ 9482.156160] open_ctree+0x1228/0x18a0 [btrfs] [ 9482.156643] btrfs_mount_root.cold+0x13/0xed [btrfs] [ 9482.157108] ? rcu_read_lock_sched_held+0x5d/0x90 [ 9482.157567] ? kfree+0x31f/0x3e0 [ 9482.158030] legacy_get_tree+0x30/0x60 [ 9482.158489] vfs_get_tree+0x28/0xe0 [ 9482.158947] fc_mount+0xe/0x40 [ 9482.159403] vfs_kern_mount.part.0+0x71/0x90 [ 9482.159875] btrfs_mount+0x13b/0x3e0 [btrfs] [ 9482.160335] ? rcu_read_lock_sched_held+0x5d/0x90 [ 9482.160805] ? kfree+0x31f/0x3e0 [ 9482.161260] ? legacy_get_tree+0x30/0x60 [ 9482.161714] legacy_get_tree+0x30/0x60 [ 9482.162166] vfs_get_tree+0x28/0xe0 [ 9482.162616] path_mount+0x2d7/0xa70 [ 9482.163070] do_mount+0x75/0x90 [ 9482.163525] __x64_sys_mount+0x8e/0xd0 [ 9482.163986] do_syscall_64+0x33/0x80 [ 9482.164437] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 9482.164902] RIP: 0033:0x7f51e907caaa This happens because at btrfs_read_qgroup_config() we can call qgroup_rescan_init() while holding a read lock on a quota btree leaf, acquired by the previous call to btrfs_search_slot_for_read(), and qgroup_rescan_init() acquires the mutex qgroup_rescan_lock. A qgroup rescan worker does the opposite: it acquires the mutex qgroup_rescan_lock, at btrfs_qgroup_rescan_worker(), and then tries to update the qgroup status item in the quota btree through the call to update_qgroup_status_item(). This inversion of locking order between the qgroup_rescan_lock mutex and quota btree locks causes the splat. Fix this simply by releasing and freeing the path before calling qgroup_rescan_init() at btrfs_read_qgroup_config(). CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index c8ed4db73b84..9936e4b991aa 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -488,13 +488,13 @@ next2: break; } out: + btrfs_free_path(path); fs_info->qgroup_flags |= flags; if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && ret >= 0) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); - btrfs_free_path(path); if (ret < 0) { ulist_free(fs_info->qgroup_ulist); From a6b9a7f781b88f1a1ddba93b504206d86bfbf68e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Thu, 21 May 2020 22:14:22 +0200 Subject: [PATCH 577/636] wireless: Use linux/stddef.h instead of stddef.h commit 1b9ae0c92925ac40489be526d67d0010d0724ce0 upstream. When compiling inside the kernel include linux/stddef.h instead of stddef.h. When I compile this header file in backports for power PC I run into a conflict with ptrdiff_t. I was unable to reproduce this in mainline kernel. I still would like to fix this problem in the kernel. Fixes: 6989310f5d43 ("wireless: Use offsetof instead of custom macro.") Signed-off-by: Hauke Mehrtens Link: https://lore.kernel.org/r/20200521201422.16493-1-hauke@hauke-m.de Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/wireless.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index a2c006a364e0..24f3371ad826 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -74,7 +74,11 @@ #include /* for "struct sockaddr" et al */ #include /* for IFNAMSIZ and co... */ -#include /* for offsetof */ +#ifdef __KERNEL__ +# include /* for offsetof */ +#else +# include /* for offsetof */ +#endif /***************************** VERSION *****************************/ /* From 53ee75b86be1bbaaa17a24149197fe8b4b84ac5e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 17 Nov 2020 23:16:29 +0800 Subject: [PATCH 578/636] KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for userspace commit 23bde34771f1ea92fb5e6682c0d8c04304d34b3b upstream. It was recently reported that if GICR_TYPER is accessed before the RD base address is set, we'll suffer from the unset @rdreg dereferencing. Oops... gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; It's "expected" that users will access registers in the redistributor if the RD has been properly configured (e.g., the RD base address is set). But it hasn't yet been covered by the existing documentation. Per discussion on the list [1], the reporting of the GICR_TYPER.Last bit for userspace never actually worked. And it's difficult for us to emulate it correctly given that userspace has the flexibility to access it any time. Let's just drop the reporting of the Last bit for userspace for now (userspace should have full knowledge about it anyway) and it at least prevents kernel from panic ;-) [1] https://lore.kernel.org/kvmarm/c20865a267e44d1e2c0d52ce4e012263@kernel.org/ Fixes: ba7b3f1275fd ("KVM: arm/arm64: Revisit Redistributor TYPER last bit computation") Reported-by: Keqian Zhu Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20201117151629.1738-1-yuzenghui@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..cbb38a0d1b25 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -226,6 +226,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, return extract_bytes(value, addr & 7, len); } +static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + int target_vcpu_id = vcpu->vcpu_id; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + /* reporting of the Last bit is not supported for userspace */ + return extract_bytes(value, addr & 7, len); +} + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -532,8 +549,9 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_TYPER, - vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, + vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_WAKER, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, From f6d579d770d50d21d5ad26d3dbdafe2237967766 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Nov 2020 08:53:52 +0100 Subject: [PATCH 579/636] KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint commit 72c3bcdcda494cbd600712a32e67702cdee60c07 upstream. Centralize handling of interrupts from the userspace APIC in kvm_cpu_has_extint and kvm_cpu_get_extint, since userspace APIC interrupts are handled more or less the same as ExtINTs are with split irqchip. This removes duplicated code from kvm_cpu_has_injectable_intr and kvm_cpu_has_interrupt, and makes the code more similar between kvm_cpu_has_{extint,interrupt} on one side and kvm_cpu_get_{extint,interrupt} on the other. Cc: stable@vger.kernel.org Reviewed-by: Filippo Sironi Reviewed-by: David Woodhouse Tested-by: David Woodhouse Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/irq.c | 83 ++++++++++++++++++-------------------------- arch/x86/kvm/lapic.c | 2 +- 2 files changed, 34 insertions(+), 51 deletions(-) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 007bc654f928..70cdd01aa668 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -53,28 +53,9 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * non-APIC source without intack. */ static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ - u8 accept = kvm_apic_accept_pic_intr(v); - - if (accept) { - if (irqchip_split(v->kvm)) - return pending_userspace_extint(v); - else - return v->kvm->arch.vpic->output; - } else - return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { /* - * FIXME: interrupt.injected represents an interrupt that it's + * FIXME: interrupt.injected represents an interrupt whose * side-effects have already been applied (e.g. bit from IRR * already moved to ISR). Therefore, it is incorrect to rely * on interrupt.injected to know if there is a pending @@ -87,6 +68,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (!lapic_in_kernel(v)) return v->arch.interrupt.injected; + if (!kvm_apic_accept_pic_intr(v)) + return 0; + + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ if (kvm_cpu_has_extint(v)) return 1; @@ -102,20 +100,6 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* - * FIXME: interrupt.injected represents an interrupt that it's - * side-effects have already been applied (e.g. bit from IRR - * already moved to ISR). Therefore, it is incorrect to rely - * on interrupt.injected to know if there is a pending - * interrupt in the user-mode LAPIC. - * This leads to nVMX/nSVM not be able to distinguish - * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on - * pending interrupt or should re-inject an injected - * interrupt. - */ - if (!lapic_in_kernel(v)) - return v->arch.interrupt.injected; - if (kvm_cpu_has_extint(v)) return 1; @@ -129,16 +113,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) { - if (irqchip_split(v->kvm)) { - int vector = v->arch.pending_external_vector; - - v->arch.pending_external_vector = -1; - return vector; - } else - return kvm_pic_read_irq(v->kvm); /* PIC */ - } else + if (!kvm_cpu_has_extint(v)) { + WARN_ON(!lapic_in_kernel(v)); return -1; + } + + if (!lapic_in_kernel(v)) + return v->arch.interrupt.nr; + + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ } /* @@ -146,13 +135,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - int vector; - - if (!lapic_in_kernel(v)) - return v->arch.interrupt.nr; - - vector = kvm_cpu_get_extint(v); - + int vector = kvm_cpu_get_extint(v); if (vector != -1) return vector; /* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bba2f76c356d..56a4b9762b0c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2284,7 +2284,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); From b43f0efb0b515e94f68dbc20119fa72fa938f3a7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Nov 2020 09:18:20 +0100 Subject: [PATCH 580/636] KVM: x86: Fix split-irqchip vs interrupt injection window request commit 71cc849b7093bb83af966c0e60cb11b7f35cd746 upstream. kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are a hodge-podge of conditions, hacked together to get something that more or less works. But what is actually needed is much simpler; in both cases the fundamental question is, do we have a place to stash an interrupt if userspace does KVM_INTERRUPT? In userspace irqchip mode, that is !vcpu->arch.interrupt.injected. Currently kvm_event_needs_reinjection(vcpu) covers it, but it is unnecessarily restrictive. In split irqchip mode it's a bit more complicated, we need to check kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK cycle and thus requires ExtINTs not to be masked) as well as !pending_userspace_extint(vcpu). However, there is no need to check kvm_event_needs_reinjection(vcpu), since split irqchip keeps pending ExtINT state separate from event injection state, and checking kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher priority than APIC interrupts. In fact the latter fixes a bug: when userspace requests an IRQ window vmexit, an interrupt in the local APIC can cause kvm_cpu_has_interrupt() to be true and thus kvm_vcpu_ready_for_interrupt_injection() to return false. When this happens, vcpu_run does not exit to userspace but the interrupt window vmexits keep occurring. The VM loops without any hope of making progress. Once we try to fix these with something like return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + (!lapic_in_kernel(vcpu) + ? !vcpu->arch.interrupt.injected + : (kvm_apic_accept_pic_intr(vcpu) + && !pending_userspace_extint(v))); we realize two things. First, thanks to the previous patch the complex conditional can reuse !kvm_cpu_has_extint(vcpu). Second, the interrupt window request in vcpu_enter_guest() bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); should be kept in sync with kvm_vcpu_ready_for_interrupt_injection(): it is unnecessary to ask the processor for an interrupt window if we would not be able to return to userspace. Therefore, kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu) ANDed with the existing check for masked ExtINT. It all makes sense: - we can accept an interrupt from userspace if there is a place to stash it (and, for irqchip split, ExtINTs are not masked). Interrupts from userspace _can_ be accepted even if right now EFLAGS.IF=0. - in order to tell userspace we will inject its interrupt ("IRQ window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both KVM and the vCPU need to be ready to accept the interrupt. ... and this is what the patch implements. Reported-by: David Woodhouse Analyzed-by: David Woodhouse Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: Nikos Tsironis Reviewed-by: David Woodhouse Tested-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/irq.c | 2 +- arch/x86/kvm/x86.c | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4876411a072a..98b74711e6b7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1472,6 +1472,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 70cdd01aa668..295ebadb8f2c 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -52,7 +52,7 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * check if there is pending interrupt from * non-APIC source without intack. */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v) { /* * FIXME: interrupt.injected represents an interrupt whose diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd182228be71..7096578ef737 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3351,21 +3351,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) { + /* + * We can accept userspace's request for interrupt injection + * as long as we have a place to store the interrupt number. + * The actual injection will happen when the CPU is able to + * deliver the interrupt. + */ + if (kvm_cpu_has_extint(vcpu)) + return false; + + /* Acknowledging ExtINT does not happen if LINT0 is masked. */ return (!lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu)); } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); } From ad1fc801a3e9bdaed26b112687f6677612da8427 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 20 Nov 2020 13:28:01 +0000 Subject: [PATCH 581/636] arm64: pgtable: Fix pte_accessible() commit 07509e10dcc77627f8b6a57381e878fe269958d3 upstream. pte_accessible() is used by ptep_clear_flush() to figure out whether TLB invalidation is necessary when unmapping pages for reclaim. Although our implementation is correct according to the architecture, returning true only for valid, young ptes in the absence of racing page-table modifications, this is in fact flawed due to lazy invalidation of old ptes in ptep_clear_flush_young() where we elide the expensive DSB instruction for completing the TLB invalidation. Rather than penalise the aging path, adjust pte_accessible() to return true for any valid pte, even if the access flag is cleared. Cc: Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()") Reported-by: Yu Zhao Acked-by: Yu Zhao Reviewed-by: Minchan Kim Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 291fed0a9df8..411a2a9b0568 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -107,8 +107,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -116,9 +114,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* * p??_access_permitted() is true for valid user mappings (subject to the From 46c69325028226332be13b6df9f36f2d6b07b489 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 20 Nov 2020 13:57:48 +0000 Subject: [PATCH 582/636] arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect() commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b upstream. With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry. Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future. Cc: Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 411a2a9b0568..f43519b71061 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -145,13 +145,6 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } -static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -177,6 +170,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -669,12 +676,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); From 72289dc23c3c6311d21d87eee55278aa4308a73e Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 16 Oct 2018 16:39:46 -0400 Subject: [PATCH 583/636] drm/atomic_helper: Stop modesets on unregistered connectors harder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit de9f8eea5a44b0b756d3d6345af7f8e630a3c8c0 upstream. Unfortunately, it appears our fix in: commit b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors") Which attempted to work around the problems introduced by: commit 4d80273976bf ("drm/atomic_helper: Disallow new modesets on unregistered connectors") Is still not the right solution, as modesets can still be triggered outside of drm_atomic_set_crtc_for_connector(). So in order to fix this, while still being careful that we don't break modesets that a driver may perform before being registered with userspace, we replace connector->registered with a tristate member, connector->registration_state. This allows us to keep track of whether or not a connector is still initializing and hasn't been exposed to userspace, is currently registered and exposed to userspace, or has been legitimately removed from the system after having once been present. Using this info, we can prevent userspace from performing new modesets on unregistered connectors while still allowing the driver to perform modesets on unregistered connectors before the driver has finished being registered. Changes since v1: - Fix WARN_ON() in drm_connector_cleanup() that CI caught with this patchset in igt@drv_module_reload@basic-reload-inject and igt@drv_module_reload@basic-reload by checking if the connector is registered instead of unregistered, as calling drm_connector_cleanup() on a connector that hasn't been registered with userspace yet should stay valid. - Remove unregistered_connector_check(), and just go back to what we were doing before in commit 4d80273976bf ("drm/atomic_helper: Disallow new modesets on unregistered connectors") except replacing READ_ONCE(connector->registered) with drm_connector_is_unregistered(). This gets rid of the behavior of allowing DPMS On<->Off, but that should be fine as it's more consistent with the UAPI we had before - danvet - s/drm_connector_unregistered/drm_connector_is_unregistered/ - danvet - Update documentation, fix some typos. Fixes: b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors") Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Rodrigo Vivi Cc: stable@vger.kernel.org Cc: David Airlie Signed-off-by: Lyude Paul Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20181016203946.9601-1-lyude@redhat.com (cherry picked from commit 39b50c603878f4f8ae541ac4088a805d588abc79) Fixes: e96550956fbc ("drm/atomic_helper: Disallow new modesets on unregistered connectors") Fixes: 34ca26a98ad6 ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors") Cc: stable@vger.kernel.org Signed-off-by: Joonas Lahtinen Cc: Christoph Niedermaier Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 21 --------- drivers/gpu/drm/drm_atomic_helper.c | 21 ++++++++- drivers/gpu/drm/drm_connector.c | 11 +++-- drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++-- include/drm/drm_connector.h | 71 ++++++++++++++++++++++++++++- 5 files changed, 99 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 1a4b44923aec..281cf9cbb44c 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1702,27 +1702,6 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, struct drm_connector *connector = conn_state->connector; struct drm_crtc_state *crtc_state; - /* - * For compatibility with legacy users, we want to make sure that - * we allow DPMS On<->Off modesets on unregistered connectors, since - * legacy modesetting users will not be expecting these to fail. We do - * not however, want to allow legacy users to assign a connector - * that's been unregistered from sysfs to another CRTC, since doing - * this with a now non-existent connector could potentially leave us - * in an invalid state. - * - * Since the connector can be unregistered at any point during an - * atomic check or commit, this is racy. But that's OK: all we care - * about is ensuring that userspace can't use this connector for new - * configurations after it's been notified that the connector is no - * longer present. - */ - if (!READ_ONCE(connector->registered) && crtc) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", - connector->base.id, connector->name); - return -EINVAL; - } - if (conn_state->crtc == crtc) return 0; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c22062cc9992..d24a15484e31 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -307,6 +307,26 @@ update_connector_routing(struct drm_atomic_state *state, return 0; } + crtc_state = drm_atomic_get_new_crtc_state(state, + new_connector_state->crtc); + /* + * For compatibility with legacy users, we want to make sure that + * we allow DPMS On->Off modesets on unregistered connectors. Modesets + * which would result in anything else must be considered invalid, to + * avoid turning on new displays on dead connectors. + * + * Since the connector can be unregistered at any point during an + * atomic check or commit, this is racy. But that's OK: all we care + * about is ensuring that userspace can't do anything but shut off the + * display on a connector that was destroyed after its been notified, + * not before. + */ + if (drm_connector_is_unregistered(connector) && crtc_state->active) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", + connector->base.id, connector->name); + return -EINVAL; + } + funcs = connector->helper_private; if (funcs->atomic_best_encoder) @@ -351,7 +371,6 @@ update_connector_routing(struct drm_atomic_state *state, set_best_encoder(state, new_connector_state, new_encoder); - crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc); crtc_state->connectors_changed = true; DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n", diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 6011d769d50b..7bb68ca4aa0b 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -375,7 +375,8 @@ void drm_connector_cleanup(struct drm_connector *connector) /* The connector should have been removed from userspace long before * it is finally destroyed. */ - if (WARN_ON(connector->registered)) + if (WARN_ON(connector->registration_state == + DRM_CONNECTOR_REGISTERED)) drm_connector_unregister(connector); if (connector->tile_group) { @@ -432,7 +433,7 @@ int drm_connector_register(struct drm_connector *connector) return 0; mutex_lock(&connector->mutex); - if (connector->registered) + if (connector->registration_state != DRM_CONNECTOR_INITIALIZING) goto unlock; ret = drm_sysfs_connector_add(connector); @@ -452,7 +453,7 @@ int drm_connector_register(struct drm_connector *connector) drm_mode_object_register(connector->dev, &connector->base); - connector->registered = true; + connector->registration_state = DRM_CONNECTOR_REGISTERED; goto unlock; err_debugfs: @@ -474,7 +475,7 @@ EXPORT_SYMBOL(drm_connector_register); void drm_connector_unregister(struct drm_connector *connector) { mutex_lock(&connector->mutex); - if (!connector->registered) { + if (connector->registration_state != DRM_CONNECTOR_REGISTERED) { mutex_unlock(&connector->mutex); return; } @@ -485,7 +486,7 @@ void drm_connector_unregister(struct drm_connector *connector) drm_sysfs_connector_remove(connector); drm_debugfs_connector_remove(connector); - connector->registered = false; + connector->registration_state = DRM_CONNECTOR_UNREGISTERED; mutex_unlock(&connector->mutex); } EXPORT_SYMBOL(drm_connector_unregister); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c7d52c66ff29..8a19cfcfc4f1 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->pbn = mst_pbn; /* Zombie connectors can't have VCPI slots */ - if (READ_ONCE(connector->registered)) { + if (!drm_connector_is_unregistered(connector)) { slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, port, @@ -317,7 +317,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) struct edid *edid; int ret; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return intel_connector_update_modes(connector, NULL); edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -333,7 +333,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return connector_status_disconnected; return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -376,7 +376,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int bpp = 24; /* MST uses fixed bpp */ int max_rate, mode_rate, max_lanes, max_link_clock; - if (!READ_ONCE(connector->registered)) + if (drm_connector_is_unregistered(connector)) return MODE_ERROR; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 97ea41dc678f..e5f641cdab5a 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -81,6 +81,53 @@ enum drm_connector_status { connector_status_unknown = 3, }; +/** + * enum drm_connector_registration_status - userspace registration status for + * a &drm_connector + * + * This enum is used to track the status of initializing a connector and + * registering it with userspace, so that DRM can prevent bogus modesets on + * connectors that no longer exist. + */ +enum drm_connector_registration_state { + /** + * @DRM_CONNECTOR_INITIALIZING: The connector has just been created, + * but has yet to be exposed to userspace. There should be no + * additional restrictions to how the state of this connector may be + * modified. + */ + DRM_CONNECTOR_INITIALIZING = 0, + + /** + * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized + * and registered with sysfs, as such it has been exposed to + * userspace. There should be no additional restrictions to how the + * state of this connector may be modified. + */ + DRM_CONNECTOR_REGISTERED = 1, + + /** + * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed + * to userspace and has since been unregistered and removed from + * userspace, or the connector was unregistered before it had a chance + * to be exposed to userspace (e.g. still in the + * @DRM_CONNECTOR_INITIALIZING state). When a connector is + * unregistered, there are additional restrictions to how its state + * may be modified: + * + * - An unregistered connector may only have its DPMS changed from + * On->Off. Once DPMS is changed to Off, it may not be switched back + * to On. + * - Modesets are not allowed on unregistered connectors, unless they + * would result in disabling its assigned CRTCs. This means + * disabling a CRTC on an unregistered connector is OK, but enabling + * one is not. + * - Removing a CRTC from an unregistered connector is OK, but new + * CRTCs may never be assigned to an unregistered connector. + */ + DRM_CONNECTOR_UNREGISTERED = 2, +}; + enum subpixel_order { SubPixelUnknown = 0, SubPixelHorizontalRGB, @@ -852,10 +899,12 @@ struct drm_connector { bool ycbcr_420_allowed; /** - * @registered: Is this connector exposed (registered) with userspace? + * @registration_state: Is this connector initializing, exposed + * (registered) with userspace, or unregistered? + * * Protected by @mutex. */ - bool registered; + enum drm_connector_registration_state registration_state; /** * @modes: @@ -1165,6 +1214,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector) drm_connector_put(connector); } +/** + * drm_connector_is_unregistered - has the connector been unregistered from + * userspace? + * @connector: DRM connector + * + * Checks whether or not @connector has been unregistered from userspace. + * + * Returns: + * True if the connector was unregistered, false if the connector is + * registered or has not yet been registered with userspace. + */ +static inline bool +drm_connector_is_unregistered(struct drm_connector *connector) +{ + return READ_ONCE(connector->registration_state) == + DRM_CONNECTOR_UNREGISTERED; +} + const char *drm_get_connector_status_name(enum drm_connector_status status); const char *drm_get_subpixel_order_name(enum subpixel_order order); const char *drm_get_dpms_name(int val); From 3afe38f6c3c6525d529db8e0fee89d1ad1fc345d Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 13 Oct 2020 18:26:28 +0300 Subject: [PATCH 584/636] ALSA: hda/hdmi: fix incorrect locking in hdmi_pcm_close commit ce1558c285f9ad04c03b46833a028230771cc0a7 upstream A race exists between closing a PCM and update of ELD data. In hdmi_pcm_close(), hinfo->nid value is modified without taking spec->pcm_lock. If this happens concurrently while processing an ELD update in hdmi_pcm_setup_pin(), converter assignment may be done incorrectly. This bug was found by hitting a WARN_ON in snd_hda_spdif_ctls_assign() in a HDMI receiver connection stress test: [2739.684569] WARNING: CPU: 5 PID: 2090 at sound/pci/hda/patch_hdmi.c:1898 check_non_pcm_per_cvt+0x41/0x50 [snd_hda_codec_hdmi] ... [2739.684707] Call Trace: [2739.684720] update_eld+0x121/0x5a0 [snd_hda_codec_hdmi] [2739.684736] hdmi_present_sense+0x21e/0x3b0 [snd_hda_codec_hdmi] [2739.684750] check_presence_and_report+0x81/0xd0 [snd_hda_codec_hdmi] [2739.684842] intel_audio_codec_enable+0x122/0x190 [i915] Fixes: 42b2987079ec ("ALSA: hda - hdmi playback without monitor in dynamic pcm bind mode") Signed-off-by: Kai Vehmanen Cc: Link: https://lore.kernel.org/r/20201013152628.920764-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai [sudip: adjust context] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 708efb9b4387..f86b9b0a0607 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1955,20 +1955,23 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, int pinctl; int err = 0; + mutex_lock(&spec->pcm_lock); if (hinfo->nid) { pcm_idx = hinfo_to_pcm_index(codec, hinfo); - if (snd_BUG_ON(pcm_idx < 0)) - return -EINVAL; + if (snd_BUG_ON(pcm_idx < 0)) { + err = -EINVAL; + goto unlock; + } cvt_idx = cvt_nid_to_cvt_index(codec, hinfo->nid); - if (snd_BUG_ON(cvt_idx < 0)) - return -EINVAL; + if (snd_BUG_ON(cvt_idx < 0)) { + err = -EINVAL; + goto unlock; + } per_cvt = get_cvt(spec, cvt_idx); - snd_BUG_ON(!per_cvt->assigned); per_cvt->assigned = 0; hinfo->nid = 0; - mutex_lock(&spec->pcm_lock); snd_hda_spdif_ctls_unassign(codec, pcm_idx); clear_bit(pcm_idx, &spec->pcm_in_use); pin_idx = hinfo_to_pin_index(codec, hinfo); @@ -1996,10 +1999,11 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, per_pin->setup = false; per_pin->channels = 0; mutex_unlock(&per_pin->lock); - unlock: - mutex_unlock(&spec->pcm_lock); } +unlock: + mutex_unlock(&spec->pcm_lock); + return err; } From ac13ba2bd1698c0c65611aa97cfdf09acb194a13 Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Fri, 21 Aug 2020 03:16:50 +0900 Subject: [PATCH 585/636] HID: cypress: Support Varmilo Keyboards' media hotkeys [ Upstream commit 652f3d00de523a17b0cebe7b90debccf13aa8c31 ] The Varmilo VA104M Keyboard (04b4:07b1, reported as Varmilo Z104M) exposes media control hotkeys as a USB HID consumer control device, but these keys do not work in the current (5.8-rc1) kernel due to the incorrect HID report descriptor. Fix the problem by modifying the internal HID report descriptor. More specifically, the keyboard report descriptor specifies the logical boundary as 572~10754 (0x023c ~ 0x2a02) while the usage boundary is specified as 0~10754 (0x00 ~ 0x2a02). This results in an incorrect interpretation of input reports, causing inputs to be ignored. By setting the Logical Minimum to zero, we align the logical boundary with the Usage ID boundary. Some notes: * There seem to be multiple variants of the VA104M keyboard. This patch specifically targets 04b4:07b1 variant. * The device works out-of-the-box on Windows platform with the generic consumer control device driver (hidserv.inf). This suggests that Windows either ignores the Logical Minimum/Logical Maximum or interprets the Usage ID assignment differently from the linux implementation; Maybe there are other devices out there that only works on Windows due to this problem? Signed-off-by: Frank Yang Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-cypress.c | 44 ++++++++++++++++++++++++++++++++++----- drivers/hid/hid-ids.h | 2 ++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-cypress.c b/drivers/hid/hid-cypress.c index 1689568b597d..12c5d7c96527 100644 --- a/drivers/hid/hid-cypress.c +++ b/drivers/hid/hid-cypress.c @@ -26,19 +26,17 @@ #define CP_2WHEEL_MOUSE_HACK 0x02 #define CP_2WHEEL_MOUSE_HACK_ON 0x04 +#define VA_INVAL_LOGICAL_BOUNDARY 0x08 + /* * Some USB barcode readers from cypress have usage min and usage max in * the wrong order */ -static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, +static __u8 *cp_rdesc_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); unsigned int i; - if (!(quirks & CP_RDESC_SWAPPED_MIN_MAX)) - return rdesc; - if (*rsize < 4) return rdesc; @@ -51,6 +49,40 @@ static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static __u8 *va_logical_boundary_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* + * Varmilo VA104M (with VID Cypress and device ID 07B1) incorrectly + * reports Logical Minimum of its Consumer Control device as 572 + * (0x02 0x3c). Fix this by setting its Logical Minimum to zero. + */ + if (*rsize == 25 && + rdesc[0] == 0x05 && rdesc[1] == 0x0c && + rdesc[2] == 0x09 && rdesc[3] == 0x01 && + rdesc[6] == 0x19 && rdesc[7] == 0x00 && + rdesc[11] == 0x16 && rdesc[12] == 0x3c && rdesc[13] == 0x02) { + hid_info(hdev, + "fixing up varmilo VA104M consumer control report descriptor\n"); + rdesc[12] = 0x00; + rdesc[13] = 0x00; + } + return rdesc; +} + +static __u8 *cp_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + unsigned long quirks = (unsigned long)hid_get_drvdata(hdev); + + if (quirks & CP_RDESC_SWAPPED_MIN_MAX) + rdesc = cp_rdesc_fixup(hdev, rdesc, rsize); + if (quirks & VA_INVAL_LOGICAL_BOUNDARY) + rdesc = va_logical_boundary_fixup(hdev, rdesc, rsize); + + return rdesc; +} + static int cp_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -131,6 +163,8 @@ static const struct hid_device_id cp_devices[] = { .driver_data = CP_RDESC_SWAPPED_MIN_MAX }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE), .driver_data = CP_2WHEEL_MOUSE_HACK }, + { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1), + .driver_data = VA_INVAL_LOGICAL_BOUNDARY }, { } }; MODULE_DEVICE_TABLE(hid, cp_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e18d796d985f..e9155f7c8c51 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -330,6 +330,8 @@ #define USB_DEVICE_ID_CYPRESS_BARCODE_4 0xed81 #define USB_DEVICE_ID_CYPRESS_TRUETOUCH 0xc001 +#define USB_DEVICE_ID_CYPRESS_VARMILO_VA104M_07B1 0X07b1 + #define USB_VENDOR_ID_DATA_MODUL 0x7374 #define USB_VENDOR_ID_DATA_MODUL_EASYMAXTOUCH 0x1201 From af13df79a0a006585e644a1c954fc9ad079fea2e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 30 Sep 2020 22:52:31 +0200 Subject: [PATCH 586/636] HID: add support for Sega Saturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1811977cb11354aef8cbd13e35ff50db716728a4 ] This device needs HID_QUIRK_MULTI_INPUT in order to be presented to userspace in a consistent way. Reported-and-tested-by: David Gámiz Jiménez Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e9155f7c8c51..4d2a1d540a82 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -479,6 +479,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR 0x3010 #define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 2d8d20a7f457..493e2e7e12de 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -85,6 +85,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, From 94b6e6ee174193d65c6f6e47a1f8847d85325544 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 26 Oct 2020 20:53:57 -0700 Subject: [PATCH 587/636] Input: i8042 - allow insmod to succeed on devices without an i8042 controller [ Upstream commit b1884583fcd17d6a1b1bba94bbb5826e6b5c6e17 ] The i8042 module exports several symbols which may be used by other modules. Before this commit it would refuse to load (when built as a module itself) on systems without an i8042 controller. This is a problem specifically for the asus-nb-wmi module. Many Asus laptops support the Asus WMI interface. Some of them have an i8042 controller and need to use i8042_install_filter() to filter some kbd events. Other models do not have an i8042 controller (e.g. they use an USB attached kbd). Before this commit the asus-nb-wmi driver could not be loaded on Asus models without an i8042 controller, when the i8042 code was built as a module (as Arch Linux does) because the module_init function of the i8042 module would fail with -ENODEV and thus the i8042_install_filter symbol could not be loaded. This commit fixes this by exiting from module_init with a return code of 0 if no controller is found. It also adds a i8042_present bool to make the module_exit function a no-op in this case and also adds a check for i8042_present to the exported i8042_command function. The latter i8042_present check should not really be necessary because when builtin that function can already be used on systems without an i8042 controller, but better safe then sorry. Reported-and-tested-by: Marius Iacob Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20201008112628.3979-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/serio/i8042.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 95a78ccbd847..fef3b4064f18 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -125,6 +125,7 @@ module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600); MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]"); #endif +static bool i8042_present; static bool i8042_bypass_aux_irq_test; static char i8042_kbd_firmware_id[128]; static char i8042_aux_firmware_id[128]; @@ -345,6 +346,9 @@ int i8042_command(unsigned char *param, int command) unsigned long flags; int retval; + if (!i8042_present) + return -1; + spin_lock_irqsave(&i8042_lock, flags); retval = __i8042_command(param, command); spin_unlock_irqrestore(&i8042_lock, flags); @@ -1613,12 +1617,15 @@ static int __init i8042_init(void) err = i8042_platform_init(); if (err) - return err; + return (err == -ENODEV) ? 0 : err; err = i8042_controller_check(); if (err) goto err_platform_exit; + /* Set this before creating the dev to allow i8042_command to work right away */ + i8042_present = true; + pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); @@ -1637,6 +1644,9 @@ static int __init i8042_init(void) static void __exit i8042_exit(void) { + if (!i8042_present) + return; + platform_device_unregister(i8042_platform_device); platform_driver_unregister(&i8042_driver); i8042_platform_exit(); From efb042598b5f4ad57a4260f5fbfb7e88f313c5ab Mon Sep 17 00:00:00 2001 From: Pablo Ceballos Date: Mon, 2 Nov 2020 19:29:39 -0500 Subject: [PATCH 588/636] HID: hid-sensor-hub: Fix issue with devices with no report ID [ Upstream commit 34a9fa2025d9d3177c99351c7aaf256c5f50691f ] Some HID devices don't use a report ID because they only have a single report. In those cases, the report ID in struct hid_report will be zero and the data for the report will start at the first byte, so don't skip over the first byte. Signed-off-by: Pablo Ceballos Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-sensor-hub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 4256fdc5cd6d..21fbdcde1faa 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -496,7 +496,8 @@ static int sensor_hub_raw_event(struct hid_device *hdev, return 1; ptr = raw_data; - ptr++; /* Skip report id */ + if (report->id) + ptr++; /* Skip report id */ spin_lock_irqsave(&pdata->lock, flags); From 7b64c55791a266339512e6849fece0a73732a618 Mon Sep 17 00:00:00 2001 From: Chris Ye Date: Sun, 1 Nov 2020 11:34:52 -0800 Subject: [PATCH 589/636] HID: add HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE for Gamevice devices [ Upstream commit f59ee399de4a8ca4d7d19cdcabb4b63e94867f09 ] Kernel 5.4 introduces HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE, devices need to be set explicitly with this flag. Signed-off-by: Chris Ye Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 4 ++++ drivers/hid/hid-quirks.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4d2a1d540a82..ad079aca6889 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -444,6 +444,10 @@ #define USB_VENDOR_ID_FRUCTEL 0x25B6 #define USB_DEVICE_ID_GAMETEL_MT_MODE 0x0002 +#define USB_VENDOR_ID_GAMEVICE 0x27F8 +#define USB_DEVICE_ID_GAMEVICE_GV186 0x0BBE +#define USB_DEVICE_ID_GAMEVICE_KISHI 0x0BBF + #define USB_VENDOR_ID_GAMERON 0x0810 #define USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR 0x0001 #define USB_DEVICE_ID_GAMERON_DUAL_PCS_ADAPTOR 0x0002 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 493e2e7e12de..10cb42a00fe8 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -87,6 +87,10 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_FUTABA, USB_DEVICE_ID_LED_DISPLAY), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_SAT_ADAPTOR), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD), HID_QUIRK_MULTI_INPUT }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_GV186), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, + { HID_USB_DEVICE(USB_VENDOR_ID_GAMEVICE, USB_DEVICE_ID_GAMEVICE_KISHI), + HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FIGHTING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_FLYING), HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, From 5369afb5971fd8e55c3ce071034cd97e4327d6b0 Mon Sep 17 00:00:00 2001 From: Marc Ferland Date: Wed, 4 Nov 2020 12:30:04 +0530 Subject: [PATCH 590/636] dmaengine: xilinx_dma: use readl_poll_timeout_atomic variant [ Upstream commit 0ba2df09f1500d3f27398a3382b86d39c3e6abe2 ] The xilinx_dma_poll_timeout macro is sometimes called while holding a spinlock (see xilinx_dma_issue_pending() for an example) this means we shouldn't sleep when polling the dma channel registers. To address it in xilinx poll timeout macro use readl_poll_timeout_atomic instead of readl_poll_timeout variant. Signed-off-by: Marc Ferland Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1604473206-32573-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index d56b6b0e22a8..28592137fb67 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -453,8 +453,8 @@ struct xilinx_dma_device { #define to_dma_tx_descriptor(tx) \ container_of(tx, struct xilinx_dma_tx_descriptor, async_tx) #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \ - readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \ - cond, delay_us, timeout_us) + readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \ + val, cond, delay_us, timeout_us) /* IO accessors */ static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg) From db42adcd476a6b9848c8bbf4a0d37f9a6ef0b239 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Fri, 6 Nov 2020 20:11:19 -0500 Subject: [PATCH 591/636] x86/xen: don't unbind uninitialized lock_kicker_irq [ Upstream commit 65cae18882f943215d0505ddc7e70495877308e6 ] When booting a hyperthreaded system with the kernel parameter 'mitigations=auto,nosmt', the following warning occurs: WARNING: CPU: 0 PID: 1 at drivers/xen/events/events_base.c:1112 unbind_from_irqhandler+0x4e/0x60 ... Hardware name: Xen HVM domU, BIOS 4.2.amazon 08/24/2006 ... Call Trace: xen_uninit_lock_cpu+0x28/0x62 xen_hvm_cpu_die+0x21/0x30 takedown_cpu+0x9c/0xe0 ? trace_suspend_resume+0x60/0x60 cpuhp_invoke_callback+0x9a/0x530 _cpu_up+0x11a/0x130 cpu_up+0x7e/0xc0 bringup_nonboot_cpus+0x48/0x50 smp_init+0x26/0x79 kernel_init_freeable+0xea/0x229 ? rest_init+0xaa/0xaa kernel_init+0xa/0x106 ret_from_fork+0x35/0x40 The secondary CPUs are not activated with the nosmt mitigations and only the primary thread on each CPU core is used. In this situation, xen_hvm_smp_prepare_cpus(), and more importantly xen_init_lock_cpu(), is not called, so the lock_kicker_irq is not initialized for the secondary CPUs. Let's fix this by exiting early in xen_uninit_lock_cpu() if the irq is not set to avoid the warning from above for each secondary CPU. Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20201107011119.631442-1-bmasney@redhat.com Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky Signed-off-by: Sasha Levin --- arch/x86/xen/spinlock.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 717b4847b473..6fffb86a32ad 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -101,10 +101,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; From e7daa8842205b5a2e7929c82390f7b377d37803b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Nov 2020 14:36:58 +0100 Subject: [PATCH 592/636] HID: Add Logitech Dinovo Edge battery quirk [ Upstream commit 7940fb035abd88040d56be209962feffa33b03d0 ] The battery status is also being reported by the logitech-hidpp driver, so ignore the standard HID battery status to avoid reporting the same info twice. Note the logitech-hidpp battery driver provides more info, such as properly differentiating between charging and discharging. Also the standard HID battery info seems to be wrong, reporting a capacity of just 26% after fully charging the device. Signed-off-by: Hans de Goede Signed-off-by: Benjamin Tissoires Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ad079aca6889..6d118da1615d 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -733,6 +733,7 @@ #define USB_VENDOR_ID_LOGITECH 0x046d #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e #define USB_DEVICE_ID_LOGITECH_T651 0xb00c +#define USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD 0xb309 #define USB_DEVICE_ID_LOGITECH_C007 0xc007 #define USB_DEVICE_ID_LOGITECH_C077 0xc077 #define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 11bd2ca22a2e..13deb9a67685 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -331,6 +331,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), HID_BATTERY_QUIRK_IGNORE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, + USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), + HID_BATTERY_QUIRK_IGNORE }, {} }; From 49d04a6948825b309411d13e97109857e4f76de6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Nov 2020 16:47:52 -0700 Subject: [PATCH 593/636] proc: don't allow async path resolution of /proc/self components [ Upstream commit 8d4c3e76e3be11a64df95ddee52e99092d42fc19 ] If this is attempted by a kthread, then return -EOPNOTSUPP as we don't currently support that. Once we can get task_pid_ptr() doing the right thing, then this can go away again. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- fs/proc/self.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/proc/self.c b/fs/proc/self.c index cc6d4253399d..7922edf70ce1 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry, pid_t tgid = task_tgid_nr_ns(current, ns); char *name; + /* + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ + if (current->flags & PF_KTHREAD) + return ERR_PTR(-EOPNOTSUPP); + if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ From fd1c1de8c4589fdd528733bfd01ed0c5f3f69204 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 5 Nov 2020 23:28:47 +0900 Subject: [PATCH 594/636] nvme: free sq/cq dbbuf pointers when dbbuf set fails [ Upstream commit 0f0d2c876c96d4908a9ef40959a44bec21bdd6cf ] If Doorbell Buffer Config command fails even 'dev->dbbuf_dbs != NULL' which means OACS indicates that NVME_CTRL_OACS_DBBUF_SUPP is set, nvme_dbbuf_update_and_check_event() will check event even it's not been successfully set. This patch fixes mismatch among dbbuf for sq/cqs in case that dbbuf command fails. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3c68a5b35ec1..a52b2f15f372 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -276,9 +276,21 @@ static void nvme_dbbuf_init(struct nvme_dev *dev, nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; } +static void nvme_dbbuf_free(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return; + + nvmeq->dbbuf_sq_db = NULL; + nvmeq->dbbuf_cq_db = NULL; + nvmeq->dbbuf_sq_ei = NULL; + nvmeq->dbbuf_cq_ei = NULL; +} + static void nvme_dbbuf_set(struct nvme_dev *dev) { struct nvme_command c; + unsigned int i; if (!dev->dbbuf_dbs) return; @@ -292,6 +304,9 @@ static void nvme_dbbuf_set(struct nvme_dev *dev) dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); /* Free memory and continue on */ nvme_dbbuf_dma_free(dev); + + for (i = 1; i <= dev->online_queues; i++) + nvme_dbbuf_free(&dev->queues[i]); } } From fc52f37bb51c8e88649914b7fe842f6ca713a861 Mon Sep 17 00:00:00 2001 From: Sugar Zhang Date: Sat, 14 Nov 2020 11:55:06 +0800 Subject: [PATCH 595/636] dmaengine: pl330: _prep_dma_memcpy: Fix wrong burst size [ Upstream commit e773ca7da8beeca7f17fe4c9d1284a2b66839cc1 ] Actually, burst size is equal to '1 << desc->rqcfg.brst_size'. we should use burst size, not desc->rqcfg.brst_size. dma memcpy performance on Rockchip RV1126 @ 1512MHz A7, 1056MHz LPDDR3, 200MHz DMA: dmatest: /# echo dma0chan0 > /sys/module/dmatest/parameters/channel /# echo 4194304 > /sys/module/dmatest/parameters/test_buf_size /# echo 8 > /sys/module/dmatest/parameters/iterations /# echo y > /sys/module/dmatest/parameters/norandom /# echo y > /sys/module/dmatest/parameters/verbose /# echo 1 > /sys/module/dmatest/parameters/run dmatest: dma0chan0-copy0: result #1: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #2: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #3: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #4: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #5: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #6: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #7: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 dmatest: dma0chan0-copy0: result #8: 'test passed' with src_off=0x0 dst_off=0x0 len=0x400000 Before: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 48 iops 200338 KB/s (0) After this patch: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 179 iops 734873 KB/s (0) After this patch and increase dma clk to 400MHz: dmatest: dma0chan0-copy0: summary 8 tests, 0 failures 259 iops 1062929 KB/s (0) Signed-off-by: Sugar Zhang Link: https://lore.kernel.org/r/1605326106-55681-1-git-send-email-sugar.zhang@rock-chips.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index c564df713efc..15b30d2d8f7e 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2774,7 +2774,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, * If burst size is smaller than bus width then make sure we only * transfer one at a time to avoid a burst stradling an MFIFO entry. */ - if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width) + if (burst * 8 < pl330->pcfg.data_bus_width) desc->rqcfg.brst_len = 1; desc->bytes_requested = len; From e0172455d4e9e946a2d388386ec48dd7538275ec Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Fri, 6 Nov 2020 11:33:17 -0800 Subject: [PATCH 596/636] scsi: libiscsi: Fix NOP race condition [ Upstream commit fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31 ] iSCSI NOPs are sometimes "lost", mistakenly sent to the user-land iscsid daemon instead of handled in the kernel, as they should be, resulting in a message from the daemon like: iscsid: Got nop in, but kernel supports nop handling. This can occur because of the new forward- and back-locks, and the fact that an iSCSI NOP response can occur before processing of the NOP send is complete. This can result in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), when the pointer is actually in the process of being set. To work around this, we add a new state to the "ping_task" pointer. In addition to NULL (not assigned) and a pointer (assigned), we add the state "being set", which is signaled with an INVALID pointer (using "-1"). Link: https://lore.kernel.org/r/20201106193317.16993-1-leeman.duncan@gmail.com Reviewed-by: Mike Christie Signed-off-by: Lee Duncan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libiscsi.c | 23 +++++++++++++++-------- include/scsi/libiscsi.h | 3 +++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 7a05c7271766..1c69515e870c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -571,8 +571,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) if (conn->task == task) conn->task = NULL; - if (conn->ping_task == task) - conn->ping_task = NULL; + if (READ_ONCE(conn->ping_task) == task) + WRITE_ONCE(conn->ping_task, NULL); /* release get from queueing */ __iscsi_put_task(task); @@ -781,6 +781,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, task->conn->session->age); } + if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK)) + WRITE_ONCE(conn->ping_task, task); + if (!ihost->workq) { if (iscsi_prep_mgmt_task(conn, task)) goto free_task; @@ -988,8 +991,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) struct iscsi_nopout hdr; struct iscsi_task *task; - if (!rhdr && conn->ping_task) - return -EINVAL; + if (!rhdr) { + if (READ_ONCE(conn->ping_task)) + return -EINVAL; + WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK); + } memset(&hdr, 0, sizeof(struct iscsi_nopout)); hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; @@ -1004,11 +1010,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); if (!task) { + if (!rhdr) + WRITE_ONCE(conn->ping_task, NULL); iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); return -EIO; } else if (!rhdr) { /* only track our nops */ - conn->ping_task = task; conn->last_ping = jiffies; } @@ -1021,7 +1028,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task, struct iscsi_conn *conn = task->conn; int rc = 0; - if (conn->ping_task != task) { + if (READ_ONCE(conn->ping_task) != task) { /* * If this is not in response to one of our * nops then it must be from userspace. @@ -1961,7 +1968,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn) */ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) { - if (conn->ping_task && + if (READ_ONCE(conn->ping_task) && time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) return 1; @@ -2096,7 +2103,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Checking the transport already or nop from a cmd timeout still * running */ - if (conn->ping_task) { + if (READ_ONCE(conn->ping_task)) { task->have_checked_conn = true; rc = BLK_EH_RESET_TIMER; goto done; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c9bd935f4fd1..1ee0f30ae190 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -145,6 +145,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent; From 97b5295711ae0a03a5cfaf025d768c6c87a209f6 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 13 Nov 2020 19:46:18 -0600 Subject: [PATCH 597/636] scsi: target: iscsi: Fix cmd abort fabric stop race [ Upstream commit f36199355c64a39fe82cfddc7623d827c7e050da ] Maurizio found a race where the abort and cmd stop paths can race as follows: 1. thread1 runs iscsit_release_commands_from_conn and sets CMD_T_FABRIC_STOP. 2. thread2 runs iscsit_aborted_task and then does __iscsit_free_cmd. It then returns from the aborted_task callout and we finish target_handle_abort and do: target_handle_abort -> transport_cmd_check_stop_to_fabric -> lio_check_stop_free -> target_put_sess_cmd The cmd is now freed. 3. thread1 now finishes iscsit_release_commands_from_conn and runs iscsit_free_cmd while accessing a command we just released. In __target_check_io_state we check for CMD_T_FABRIC_STOP and set the CMD_T_ABORTED if the driver is not cleaning up the cmd because of a session shutdown. However, iscsit_release_commands_from_conn only sets the CMD_T_FABRIC_STOP and does not check to see if the abort path has claimed completion ownership of the command. This adds a check in iscsit_release_commands_from_conn so only the abort or fabric stop path cleanup the command. Link: https://lore.kernel.org/r/1605318378-9269-1-git-send-email-michael.christie@oracle.com Reported-by: Maurizio Lombardi Reviewed-by: Maurizio Lombardi Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/iscsi/iscsi_target.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 2602b57936d4..58ccded1be85 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -492,8 +492,7 @@ EXPORT_SYMBOL(iscsit_queue_rsp); void iscsit_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { spin_lock_bh(&conn->cmd_lock); - if (!list_empty(&cmd->i_conn_node) && - !(cmd->se_cmd.transport_state & CMD_T_FABRIC_STOP)) + if (!list_empty(&cmd->i_conn_node)) list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); @@ -4054,12 +4053,22 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_splice_init(&conn->conn_cmd_list, &tmp_list); - list_for_each_entry(cmd, &tmp_list, i_conn_node) { + list_for_each_entry_safe(cmd, cmd_tmp, &tmp_list, i_conn_node) { struct se_cmd *se_cmd = &cmd->se_cmd; if (se_cmd->se_tfo != NULL) { spin_lock_irq(&se_cmd->t_state_lock); - se_cmd->transport_state |= CMD_T_FABRIC_STOP; + if (se_cmd->transport_state & CMD_T_ABORTED) { + /* + * LIO's abort path owns the cleanup for this, + * so put it back on the list and let + * aborted_task handle it. + */ + list_move_tail(&cmd->i_conn_node, + &conn->conn_cmd_list); + } else { + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + } spin_unlock_irq(&se_cmd->t_state_lock); } } From 448ad389cbc12f72d9cc6ab087a694bb01a99cdf Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 13 Nov 2020 10:31:26 -0800 Subject: [PATCH 598/636] perf/x86: fix sysfs type mismatches [ Upstream commit ebd19fc372e3e78bf165f230e7c084e304441c08 ] This change switches rapl to use PMU_FORMAT_ATTR, and fixes two other macros to use device_attribute instead of kobj_attribute to avoid callback type mismatches that trip indirect call checking with Clang's Control-Flow Integrity (CFI). Reported-by: Sedat Dilek Signed-off-by: Sami Tolvanen Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20201113183126.1239404-1-samitolvanen@google.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/cstate.c | 6 +++--- arch/x86/events/intel/rapl.c | 14 +------------- arch/x86/events/intel/uncore.c | 4 ++-- arch/x86/events/intel/uncore.h | 12 ++++++------ 4 files changed, 12 insertions(+), 24 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4a650eb3d94a..3b3cd12c0692 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -100,14 +100,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 2413169ce362..bc348663da94 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -115,18 +115,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -548,7 +536,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = NULL, /* patched at runtime */ }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7098b9b05d56..2f4ed5aa08ba 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -90,8 +90,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 40e040ec31b5..0fc86ac73b51 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -133,7 +133,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -153,8 +153,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ @@ -163,14 +163,14 @@ ssize_t uncore_event_show(struct kobject *kobj, } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) From 0b1f2cff16d13de92a04e14db96201a2f78d9e41 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 1 Sep 2020 00:09:37 +0300 Subject: [PATCH 599/636] xtensa: uaccess: Add missing __user to strncpy_from_user() prototype [ Upstream commit dc293f2106903ab9c24e9cea18c276e32c394c33 ] When adding __user annotations in commit 2adf5352a34a, the strncpy_from_user() function declaration for the CONFIG_GENERIC_STRNCPY_FROM_USER case was missed. Fix it. Reported-by: kernel test robot Signed-off-by: Laurent Pinchart Message-Id: <20200831210937.17938-1-laurent.pinchart@ideasonboard.com> Signed-off-by: Max Filippov Signed-off-by: Sasha Levin --- arch/xtensa/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index f1158b4c629c..da4effe27007 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -291,7 +291,7 @@ strncpy_from_user(char *dst, const char *src, long count) return -EFAULT; } #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count); #endif /* From ce744a898e3d69853805a0a6a481a12244559646 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2020 10:58:20 +0100 Subject: [PATCH 600/636] phy: tegra: xusb: Fix dangling pointer on probe failure [ Upstream commit eb9c4dd9bdfdebaa13846c16a8c79b5b336066b6 ] If, for some reason, the xusb PHY fails to probe, it leaves a dangling pointer attached to the platform device structure. This would normally be harmless, but the Tegra XHCI driver then goes and extract that pointer from the PHY device. Things go downhill from there: 8.752082] [004d554e5145533c] address between user and kernel address ranges [ 8.752085] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 8.752088] Modules linked in: max77620_regulator(E+) xhci_tegra(E+) sdhci_tegra(E+) xhci_hcd(E) sdhci_pltfm(E) cqhci(E) fixed(E) usbcore(E) scsi_mod(E) sdhci(E) host1x(E+) [ 8.752103] CPU: 4 PID: 158 Comm: systemd-udevd Tainted: G S W E 5.9.0-rc7-00298-gf6337624c4fe #1980 [ 8.752105] Hardware name: NVIDIA Jetson TX2 Developer Kit (DT) [ 8.752108] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--) [ 8.752115] pc : kobject_put+0x1c/0x21c [ 8.752120] lr : put_device+0x20/0x30 [ 8.752121] sp : ffffffc012eb3840 [ 8.752122] x29: ffffffc012eb3840 x28: ffffffc010e82638 [ 8.752125] x27: ffffffc008d56440 x26: 0000000000000000 [ 8.752128] x25: ffffff81eb508200 x24: 0000000000000000 [ 8.752130] x23: ffffff81eb538800 x22: 0000000000000000 [ 8.752132] x21: 00000000fffffdfb x20: ffffff81eb538810 [ 8.752134] x19: 3d4d554e51455300 x18: 0000000000000020 [ 8.752136] x17: ffffffc008d00270 x16: ffffffc008d00c94 [ 8.752138] x15: 0000000000000004 x14: ffffff81ebd4ae90 [ 8.752140] x13: 0000000000000000 x12: ffffff81eb86a4e8 [ 8.752142] x11: ffffff81eb86a480 x10: ffffff81eb862fea [ 8.752144] x9 : ffffffc01055fb28 x8 : ffffff81eb86a4a8 [ 8.752146] x7 : 0000000000000001 x6 : 0000000000000001 [ 8.752148] x5 : ffffff81dff8bc38 x4 : 0000000000000000 [ 8.752150] x3 : 0000000000000001 x2 : 0000000000000001 [ 8.752152] x1 : 0000000000000002 x0 : 3d4d554e51455300 [ 8.752155] Call trace: [ 8.752157] kobject_put+0x1c/0x21c [ 8.752160] put_device+0x20/0x30 [ 8.752164] tegra_xusb_padctl_put+0x24/0x3c [ 8.752170] tegra_xusb_probe+0x8b0/0xd10 [xhci_tegra] [ 8.752174] platform_drv_probe+0x60/0xb4 [ 8.752176] really_probe+0xf0/0x504 [ 8.752179] driver_probe_device+0x100/0x170 [ 8.752181] device_driver_attach+0xcc/0xd4 [ 8.752183] __driver_attach+0xb0/0x17c [ 8.752185] bus_for_each_dev+0x7c/0xd4 [ 8.752187] driver_attach+0x30/0x3c [ 8.752189] bus_add_driver+0x154/0x250 [ 8.752191] driver_register+0x84/0x140 [ 8.752193] __platform_driver_register+0x54/0x60 [ 8.752197] tegra_xusb_init+0x40/0x1000 [xhci_tegra] [ 8.752201] do_one_initcall+0x54/0x2d0 [ 8.752205] do_init_module+0x68/0x29c [ 8.752207] load_module+0x2178/0x26c0 [ 8.752209] __do_sys_finit_module+0xb0/0x120 [ 8.752211] __arm64_sys_finit_module+0x2c/0x40 [ 8.752215] el0_svc_common.constprop.0+0x80/0x240 [ 8.752218] do_el0_svc+0x30/0xa0 [ 8.752220] el0_svc+0x18/0x50 [ 8.752223] el0_sync_handler+0x90/0x318 [ 8.752225] el0_sync+0x158/0x180 [ 8.752230] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (3940f000) [ 8.752232] ---[ end trace 90f6c89d62d85ff5 ]--- Reset the pointer on probe failure fixes the issue. Fixes: 53d2a715c2403 ("phy: Add Tegra XUSB pad controller support") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201013095820.311376-1-maz@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/tegra/xusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index de1b4ebe4de2..39c01ef57d83 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -899,6 +899,7 @@ remove_pads: reset: reset_control_assert(padctl->rst); remove: + platform_set_drvdata(pdev, NULL); soc->ops->remove(padctl); return err; } From fbcca50fd598d7043c44a4bfed8a231306821739 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sun, 15 Nov 2020 10:30:04 +0000 Subject: [PATCH 601/636] batman-adv: set .owner to THIS_MODULE [ Upstream commit 14a2e551faea53d45bc11629a9dac88f88950ca7 ] If THIS_MODULE is not set, the module would be removed while debugfs is being used. It eventually makes kernel panic. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Taehee Yoo Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich Signed-off-by: Sasha Levin --- net/batman-adv/log.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 853773e45f79..837f67c9fad3 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -205,6 +205,7 @@ static const struct file_operations batadv_log_fops = { .read = batadv_log_read, .poll = batadv_log_poll, .llseek = no_llseek, + .owner = THIS_MODULE, }; /** From 593cd9d5c7f8ea6ae46d10a0805a33608b3c326f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Nov 2020 15:12:11 +0100 Subject: [PATCH 602/636] ARM: dts: dra76x: m_can: fix order of clocks [ Upstream commit 05d5de6ba7dbe490dd413b5ca11d0875bd2bc006 ] According to the bosch,m_can.yaml bindings the first clock shall be the "hclk", while the second clock "cclk". This patch fixes the order accordingly. Fixes: 0adbe832f21a ("ARM: dts: dra76x: Add MCAN node") Cc: Faiz Abbas Cc: Tony Lindgren Cc: linux-omap@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/dra76x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 216e1d1a69c7..473d6721b788 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -35,8 +35,8 @@ interrupts = , ; interrupt-names = "int0", "int1"; - clocks = <&mcan_clk>, <&l3_iclk_div>; - clock-names = "cclk", "hclk"; + clocks = <&l3_iclk_div>, <&mcan_clk>; + clock-names = "hclk", "cclk"; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; }; }; From 0b364d26a171a300989148b52e83ec0b75df820a Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Thu, 19 Nov 2020 14:29:16 +0800 Subject: [PATCH 603/636] scsi: ufs: Fix race between shutdown and runtime resume flow [ Upstream commit e92643db514803c2c87d72caf5950b4c0a8faf4a ] If UFS host device is in runtime-suspended state while UFS shutdown callback is invoked, UFS device shall be resumed for register accesses. Currently only UFS local runtime resume function will be invoked to wake up the host. This is not enough because if someone triggers runtime resume from block layer, then race may happen between shutdown and runtime resume flow, and finally lead to unlocked register access. To fix this, in ufshcd_shutdown(), use pm_runtime_get_sync() instead of resuming UFS device by ufshcd_runtime_resume() "internally" to let runtime PM framework manage the whole resume flow. Link: https://lore.kernel.org/r/20201119062916.12931-1-stanley.chu@mediatek.com Fixes: 57d104c153d3 ("ufs: add UFS power management support") Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/ufs/ufshcd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a63119c35fde..7e4e6e982055 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7926,11 +7926,7 @@ int ufshcd_shutdown(struct ufs_hba *hba) if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; - if (pm_runtime_suspended(hba->dev)) { - ret = ufshcd_runtime_resume(hba); - if (ret) - goto out; - } + pm_runtime_get_sync(hba->dev); ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM); out: From 6fc2786e95bdaa4a034a479160c1f4501724d4d7 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Wed, 18 Nov 2020 20:17:31 +0800 Subject: [PATCH 604/636] bnxt_en: fix error return code in bnxt_init_one() [ Upstream commit b5f796b62c98cd8c219c4b788ecb6e1218e648cb ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: c213eae8d3cd ("bnxt_en: Improve VF/PF link change logic.") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Reviewed-by: Edwin Peer Link: https://lore.kernel.org/r/1605701851-20270-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 01d28ede1fb2..50732ab2c2bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9120,6 +9120,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) create_singlethread_workqueue("bnxt_pf_wq"); if (!bnxt_pf_wq) { dev_err(&pdev->dev, "Unable to create workqueue.\n"); + rc = -ENOMEM; goto init_err_pci_clean; } } From 8319dcf5d8b6b718db63d2fc0a9516216057cbf6 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 19 Nov 2020 21:30:21 +0800 Subject: [PATCH 605/636] bnxt_en: fix error return code in bnxt_init_board() [ Upstream commit 3383176efc0fb0c0900a191026468a58668b4214 ] Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Reviewed-by: Edwin Peer Link: https://lore.kernel.org/r/1605792621-6268-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 50732ab2c2bc..913cabc06106 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8021,6 +8021,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0 && dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) { dev_err(&pdev->dev, "System does not support DMA, aborting\n"); + rc = -EIO; goto init_err_disable; } From 57062bed8e4aba46bc2971551ed506a04bb07e5c Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 17 Nov 2020 16:03:05 -0800 Subject: [PATCH 606/636] video: hyperv_fb: Fix the cache type when mapping the VRAM [ Upstream commit 5f1251a48c17b54939d7477305e39679a565382c ] x86 Hyper-V used to essentially always overwrite the effective cache type of guest memory accesses to WB. This was problematic in cases where there is a physical device assigned to the VM, since that often requires that the VM should have control over cache types. Thus, on newer Hyper-V since 2018, Hyper-V always honors the VM's cache type, but unexpectedly Linux VM users start to complain that Linux VM's VRAM becomes very slow, and it turns out that Linux VM should not map the VRAM uncacheable by ioremap(). Fix this slowness issue by using ioremap_cache(). On ARM64, ioremap_cache() is also required as the host also maps the VRAM cacheable, otherwise VM Connect can't display properly with ioremap() or ioremap_wc(). With this change, the VRAM on new Hyper-V is as fast as regular RAM, so it's no longer necessary to use the hacks we added to mitigate the slowness, i.e. we no longer need to allocate physical memory and use it to back up the VRAM in Generation-1 VM, and we also no longer need to allocate physical memory to back up the framebuffer in a Generation-2 VM and copy the framebuffer to the real VRAM. A further big change will address these for v5.11. Fixes: 68a2d20b79b1 ("drivers/video: add Hyper-V Synthetic Video Frame Buffer Driver") Tested-by: Boqun Feng Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/20201118000305.24797-1-decui@microsoft.com Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/video/fbdev/hyperv_fb.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 403d8cd3e582..56e70f12c996 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -712,7 +712,12 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) goto err1; } - fb_virt = ioremap(par->mem->start, screen_fb_size); + /* + * Map the VRAM cacheable for performance. This is also required for + * VM Connect to display properly for ARM64 Linux VM, as the host also + * maps the VRAM cacheable. + */ + fb_virt = ioremap_cache(par->mem->start, screen_fb_size); if (!fb_virt) goto err2; From df9d58deae7effb3b230c2cb637dadabef8d6b2c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 20 Nov 2020 02:44:31 -0500 Subject: [PATCH 607/636] bnxt_en: Release PCI regions when DMA mask setup fails during probe. [ Upstream commit c54bc3ced5106663c2f2b44071800621f505b00e ] Jump to init_err_release to cleanup. bnxt_unmap_bars() will also be called but it will do nothing if the BARs are not mapped yet. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Jakub Kicinski Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1605858271-8209-1-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 913cabc06106..db1a23b8d531 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8022,7 +8022,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) { dev_err(&pdev->dev, "System does not support DMA, aborting\n"); rc = -EIO; - goto init_err_disable; + goto init_err_release; } pci_set_master(pdev); From c08186c3948965e84c30ecee5872fea6e89adc52 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 18 Nov 2020 20:02:13 +0530 Subject: [PATCH 608/636] cxgb4: fix the panic caused by non smac rewrite [ Upstream commit bff453921ae105a8dbbad0ed7dd5f5ce424536e7 ] SMT entry is allocated only when loopback Source MAC rewriting is requested. Accessing SMT entry for non smac rewrite cases results in kernel panic. Fix the panic caused by non smac rewrite Fixes: 937d84205884 ("cxgb4: set up filter action after rewrites") Signed-off-by: Raju Rangoju Link: https://lore.kernel.org/r/20201118143213.13319-1-rajur@chelsio.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index a62c96001761..9160b44c68bb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -626,7 +626,8 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = f->smt->idx; + if (f->fs.newsmac) + fwr->smac_sel = f->smt->idx; fwr->rx_chan_rx_rpl_iq = htons(FW_FILTER_WR_RX_CHAN_V(0) | FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); From c09619911bb4fb15962cb33f4d62d7f44778f31e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 20 Nov 2020 10:09:39 +0100 Subject: [PATCH 609/636] s390/qeth: fix tear down of async TX buffers [ Upstream commit 7ed10e16e50daf74460f54bc922e27c6863c8d61 ] When qeth_iqd_tx_complete() detects that a TX buffer requires additional async completion via QAOB, it might fail to replace the queue entry's metadata (and ends up triggering recovery). Assume now that the device gets torn down, overruling the recovery. If the QAOB notification then arrives before the tear down has sufficiently progressed, the buffer state is changed to QETH_QDIO_BUF_HANDLED_DELAYED by qeth_qdio_handle_aob(). The tear down code calls qeth_drain_output_queue(), where qeth_cleanup_handled_pending() will then attempt to replace such a buffer _again_. If it succeeds this time, the buffer ends up dangling in its replacement's ->next_pending list ... where it will never be freed, since there's no further call to qeth_cleanup_handled_pending(). But the second attempt isn't actually needed, we can simply leave the buffer on the queue and re-use it after a potential recovery has completed. The qeth_clear_output_buffer() in qeth_drain_output_queue() will ensure that it's in a clean state again. Fixes: 72861ae792c2 ("qeth: recovery through asynchronous delivery") Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_core_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 5f59e2dfc7db..d0aaef937b0f 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -470,12 +470,6 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx, } } - if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) == - QETH_QDIO_BUF_HANDLED_DELAYED)) { - /* for recovery situations */ - qeth_init_qdio_out_buf(q, bidx); - QETH_CARD_TEXT(q->card, 2, "clprecov"); - } } From f341a769f37926054ce04b79c259a871dabf35d0 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 20 Nov 2020 09:57:02 +0800 Subject: [PATCH 610/636] IB/mthca: fix return value of error branch in mthca_init_cq() [ Upstream commit 6830ff853a5764c75e56750d59d0bbb6b26f1835 ] We return 'err' in the error branch, but this variable may be set as zero by the above code. Fix it by setting 'err' as a negative value before we goto the error label. Fixes: 74c2174e7be5 ("IB uverbs: add mthca user CQ support") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/r/1605837422-42724-1-git-send-email-wangxiongfeng2@huawei.com Reported-by: Hulk Robot Signed-off-by: Xiongfeng Wang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mthca/mthca_cq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index a6531ffe29a6..a5694dec3f2e 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -808,8 +808,10 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); - if (IS_ERR(mailbox)) + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_arm; + } cq_context = mailbox->buf; @@ -851,9 +853,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, } spin_lock_irq(&dev->cq_table.lock); - if (mthca_array_set(&dev->cq_table.cq, - cq->cqn & (dev->limits.num_cqs - 1), - cq)) { + err = mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), cq); + if (err) { spin_unlock_irq(&dev->cq_table.lock); goto err_out_free_mr; } From 24071f8ddbcd1f3d440b597605508cf375c9d3dd Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 23 Nov 2020 17:23:51 +0100 Subject: [PATCH 611/636] nfc: s3fwrn5: use signed integer for parsing GPIO numbers [ Upstream commit d8f0a86795c69f5b697f7d9e5274c124da93c92d ] GPIOs - as returned by of_get_named_gpio() and used by the gpiolib - are signed integers, where negative number indicates error. The return value of of_get_named_gpio() should not be assigned to an unsigned int because in case of !CONFIG_GPIOLIB such number would be a valid GPIO. Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20201123162351.209100-1-krzk@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/nfc/s3fwrn5/i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c index 4da409e77a72..6c78529a8c89 100644 --- a/drivers/nfc/s3fwrn5/i2c.c +++ b/drivers/nfc/s3fwrn5/i2c.c @@ -37,8 +37,8 @@ struct s3fwrn5_i2c_phy { struct i2c_client *i2c_dev; struct nci_dev *ndev; - unsigned int gpio_en; - unsigned int gpio_fw_wake; + int gpio_en; + int gpio_fw_wake; struct mutex mutex; From 5b02218cf05216216fe3c0a7f2adaac643495b7b Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Mon, 23 Nov 2020 21:08:58 +0200 Subject: [PATCH 612/636] net: ena: set initial DMA width to avoid intel iommu issue [ Upstream commit 09323b3bca95181c0da79daebc8b0603e500f573 ] The ENA driver uses the readless mechanism, which uses DMA, to find out what the DMA mask is supposed to be. If DMA is used without setting the dma_mask first, it causes the Intel IOMMU driver to think that ENA is a 32-bit device and therefore disables IOMMU passthrough permanently. This patch sets the dma_mask to be ENA_MAX_PHYS_ADDR_SIZE_BITS=48 before readless initialization in ena_device_init()->ena_com_mmio_reg_read_request_init(), which is large enough to workaround the intel_iommu issue. DMA mask is set again to the correct value after it's received from the device after readless is initialized. The patch also changes the driver to use dma_set_mask_and_coherent() function instead of the two pci_set_dma_mask() and pci_set_consistent_dma_mask() ones. Both methods achieve the same effect. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Mike Cui Signed-off-by: Arthur Kiyanovski Signed-off-by: Shay Agroskin Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 3c3222e2dcfc..9aea4cf19d0c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2433,16 +2433,9 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, goto err_mmio_read_less; } - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width)); if (rc) { - dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); - goto err_mmio_read_less; - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); - if (rc) { - dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", - rc); + dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc); goto err_mmio_read_less; } @@ -3183,6 +3176,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return rc; } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS)); + if (rc) { + dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc); + goto err_disable_device; + } + pci_set_master(pdev); ena_dev = vzalloc(sizeof(*ena_dev)); From 4b7441fac98ccc3b53dbefd018203397ebf203ff Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Mon, 23 Nov 2020 13:35:45 -0600 Subject: [PATCH 613/636] ibmvnic: fix NULL pointer dereference in reset_sub_crq_queues [ Upstream commit a0faaa27c71608799e0dd765c5af38a089091802 ] adapter->tx_scrq and adapter->rx_scrq could be NULL if the previous reset did not complete after freeing sub crqs. Check for NULL before dereferencing them. Snippet of call trace: ibmvnic 30000006 env6: Releasing sub-CRQ ibmvnic 30000006 env6: Releasing CRQ ... ibmvnic 30000006 env6: Got Control IP offload Response ibmvnic 30000006 env6: Re-setting tx_scrq[0] BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc008000003dea7cc Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag netlink_diag tun bridge stp llc rfkill sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c sd_mod t10_pi sg ibmvscsi ibmvnic ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod CPU: 80 PID: 1856 Comm: kworker/80:2 Tainted: G W 5.8.0+ #4 Workqueue: events __ibmvnic_reset [ibmvnic] NIP: c008000003dea7cc LR: c008000003dea7bc CTR: 0000000000000000 REGS: c0000007ef7db860 TRAP: 0380 Tainted: G W (5.8.0+) MSR: 800000000280b033 CR: 28002422 XER: 0000000d CFAR: c000000000bd9520 IRQMASK: 0 GPR00: c008000003dea7bc c0000007ef7dbaf0 c008000003df7400 c0000007fa26ec00 GPR04: c0000007fcd0d008 c0000007fcd96350 0000000000000027 c0000007fcd0d010 GPR08: 0000000000000023 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000002000 c00000001ec18e00 c0000000001982f8 c0000007bad6e840 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 fffffffffffffef7 GPR24: 0000000000000402 c0000007fa26f3a8 0000000000000003 c00000016f8ec048 GPR28: 0000000000000000 0000000000000000 0000000000000000 c0000007fa26ec00 NIP [c008000003dea7cc] ibmvnic_reset_init+0x15c/0x258 [ibmvnic] LR [c008000003dea7bc] ibmvnic_reset_init+0x14c/0x258 [ibmvnic] Call Trace: [c0000007ef7dbaf0] [c008000003dea7bc] ibmvnic_reset_init+0x14c/0x258 [ibmvnic] (unreliable) [c0000007ef7dbb80] [c008000003de8860] __ibmvnic_reset+0x408/0x970 [ibmvnic] [c0000007ef7dbc50] [c00000000018b7cc] process_one_work+0x2cc/0x800 [c0000007ef7dbd20] [c00000000018bd78] worker_thread+0x78/0x520 [c0000007ef7dbdb0] [c0000000001984c4] kthread+0x1d4/0x1e0 [c0000007ef7dbe20] [c00000000000cea8] ret_from_kernel_thread+0x5c/0x74 Fixes: 57a49436f4e8 ("ibmvnic: Reset sub-crqs during driver reset") Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index d8115a9333e0..2fc8f281c276 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2560,6 +2560,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter) { int i, rc; + if (!adapter->tx_scrq || !adapter->rx_scrq) + return -EINVAL; + for (i = 0; i < adapter->req_tx_queues; i++) { netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i); rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]); From f862c859218200fa2d4f22f8d2c5bfbbd09a53b9 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Mon, 23 Nov 2020 13:35:46 -0600 Subject: [PATCH 614/636] ibmvnic: fix NULL pointer dereference in ibmvic_reset_crq [ Upstream commit 0e435befaea45f7ea58682eecab5e37e05b2ce65 ] crq->msgs could be NULL if the previous reset did not complete after freeing crq->msgs. Check for NULL before dereferencing them. Snippet of call trace: ... ibmvnic 30000003 env3 (unregistering): Releasing sub-CRQ ibmvnic 30000003 env3 (unregistering): Releasing CRQ BUG: Kernel NULL pointer dereference on read at 0x00000000 Faulting instruction address: 0xc0000000000c1a30 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: ibmvnic(E-) rpadlpar_io rpaphp xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables xsk_diag tcp_diag udp_diag tun raw_diag inet_diag unix_diag bridge af_packet_diag netlink_diag stp llc rfkill sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c sd_mod t10_pi sg ibmvscsi ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ibmvnic] CPU: 20 PID: 8426 Comm: kworker/20:0 Tainted: G E 5.10.0-rc1+ #12 Workqueue: events __ibmvnic_reset [ibmvnic] NIP: c0000000000c1a30 LR: c008000001b00c18 CTR: 0000000000000400 REGS: c00000000d05b7a0 TRAP: 0380 Tainted: G E (5.10.0-rc1+) MSR: 800000000280b033 CR: 44002480 XER: 20040000 CFAR: c0000000000c19ec IRQMASK: 0 GPR00: 0000000000000400 c00000000d05ba30 c008000001b17c00 0000000000000000 GPR04: 0000000000000000 0000000000000000 0000000000000000 00000000000001e2 GPR08: 000000000001f400 ffffffffffffd950 0000000000000000 c008000001b0b280 GPR12: c0000000000c19c8 c00000001ec72e00 c00000000019a778 c00000002647b440 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000006 0000000000000001 0000000000000003 0000000000000002 GPR24: 0000000000001000 c008000001b0d570 0000000000000005 c00000007ab5d550 GPR28: c00000007ab5c000 c000000032fcf848 c00000007ab5cc00 c000000032fcf800 NIP [c0000000000c1a30] memset+0x68/0x104 LR [c008000001b00c18] ibmvnic_reset_crq+0x70/0x110 [ibmvnic] Call Trace: [c00000000d05ba30] [0000000000000800] 0x800 (unreliable) [c00000000d05bab0] [c008000001b0a930] do_reset.isra.40+0x224/0x634 [ibmvnic] [c00000000d05bb80] [c008000001b08574] __ibmvnic_reset+0x17c/0x3c0 [ibmvnic] [c00000000d05bc50] [c00000000018d9ac] process_one_work+0x2cc/0x800 [c00000000d05bd20] [c00000000018df58] worker_thread+0x78/0x520 [c00000000d05bdb0] [c00000000019a934] kthread+0x1c4/0x1d0 [c00000000d05be20] [c00000000000d5d0] ret_from_kernel_thread+0x5c/0x6c Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2fc8f281c276..2938ac440fb3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4462,6 +4462,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter) } while (rc == H_BUSY || H_IS_LONG_BUSY(rc)); /* Clean out the queue */ + if (!crq->msgs) + return -EINVAL; + memset(crq->msgs, 0, PAGE_SIZE); crq->cur = 0; crq->active = false; From d6172283a4706aef3136d704e27b8446e879f010 Mon Sep 17 00:00:00 2001 From: Rui Miguel Silva Date: Fri, 13 Nov 2020 15:06:04 +0000 Subject: [PATCH 615/636] optee: add writeback to valid memory type [ Upstream commit 853735e404244f5496cdb6188c5ed9a0f9627ee6 ] Only in smp systems the cache policy is setup as write alloc, in single cpu systems the cache policy is set as writeback and it is normal memory, so, it should pass the is_normal_memory check in the share memory registration. Add the right condition to make it work in no smp systems. Fixes: cdbcf83d29c1 ("tee: optee: check type of registered shared memory") Signed-off-by: Rui Miguel Silva Signed-off-by: Jens Wiklander Signed-off-by: Sasha Levin --- drivers/tee/optee/call.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index a5afbe6dee68..7cb7efe62b01 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -538,7 +538,8 @@ void optee_free_pages_list(void *list, size_t num_entries) static bool is_normal_memory(pgprot_t p) { #if defined(CONFIG_ARM) - return (pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC; + return (((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEALLOC) || + ((pgprot_val(p) & L_PTE_MT_MASK) == L_PTE_MT_WRITEBACK)); #elif defined(CONFIG_ARM64) return (pgprot_val(p) & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL); #else From db9e8d33b066d7022a4ed1c8fa16b6496cd651c3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 25 Nov 2020 08:45:55 +0100 Subject: [PATCH 616/636] efivarfs: revert "fix memory leak in efivarfs_create()" [ Upstream commit ff04f3b6f2e27f8ae28a498416af2a8dd5072b43 ] The memory leak addressed by commit fe5186cf12e3 is a false positive: all allocations are recorded in a linked list, and freed when the filesystem is unmounted. This leads to double frees, and as reported by David, leads to crashes if SLUB is configured to self destruct when double frees occur. So drop the redundant kfree() again, and instead, mark the offending pointer variable so the allocation is ignored by kmemleak. Cc: Vamshi K Sthambamkadi Fixes: fe5186cf12e3 ("efivarfs: fix memory leak in efivarfs_create()") Reported-by: David Laight Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- fs/efivarfs/inode.c | 2 ++ fs/efivarfs/super.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 8c6ab6c95727..7f40343b39b0 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, var->var.VariableName[i] = '\0'; inode->i_private = var; + kmemleak_ignore(var); err = efivar_entry_add(var, &efivarfs_list); if (err) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 7808a26bd33f..834615f13f3e 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -23,7 +23,6 @@ LIST_HEAD(efivarfs_list); static void efivarfs_evict_inode(struct inode *inode) { clear_inode(inode); - kfree(inode->i_private); } static const struct super_operations efivarfs_ops = { From b4446737da080809caedab79c2ea9821adc6de0a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 19 Nov 2020 14:03:17 +0100 Subject: [PATCH 617/636] can: gs_usb: fix endianess problem with candleLight firmware [ Upstream commit 4ba1cb39fce4464151517a37ce0ac0a1a3f580d6 ] The firmware on the original USB2CAN by Geschwister Schneider Technologie Entwicklungs- und Vertriebs UG exchanges all data between the host and the device in host byte order. This is done with the struct gs_host_config::byte_order member, which is sent first to indicate the desired byte order. The widely used open source firmware candleLight doesn't support this feature and exchanges the data in little endian byte order. This breaks if a device with candleLight firmware is used on big endianess systems. To fix this problem, all u32 (but not the struct gs_host_frame::echo_id, which is a transparent cookie) are converted to __le32. Cc: Maximilian Schneider Cc: Hubert Denkmair Reported-by: Michael Rausch Link: https://lore.kernel.org/r/b58aace7-61f3-6df7-c6df-69fee2c66906@netadair.de Tested-by: Oleksij Rempel Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices") Link: https://lore.kernel.org/r/20201120103818.3386964-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/usb/gs_usb.c | 133 +++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 62 deletions(-) diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index cc2e224661b3..6a57169c2715 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -71,21 +71,27 @@ enum gs_can_identify_mode { }; /* data types passed between host and device */ -struct gs_host_config { - u32 byte_order; -} __packed; -/* All data exchanged between host and device is exchanged in host byte order, - * thanks to the struct gs_host_config byte_order member, which is sent first - * to indicate the desired byte order. + +/* The firmware on the original USB2CAN by Geschwister Schneider + * Technologie Entwicklungs- und Vertriebs UG exchanges all data + * between the host and the device in host byte order. This is done + * with the struct gs_host_config::byte_order member, which is sent + * first to indicate the desired byte order. + * + * The widely used open source firmware candleLight doesn't support + * this feature and exchanges the data in little endian byte order. */ +struct gs_host_config { + __le32 byte_order; +} __packed; struct gs_device_config { u8 reserved1; u8 reserved2; u8 reserved3; u8 icount; - u32 sw_version; - u32 hw_version; + __le32 sw_version; + __le32 hw_version; } __packed; #define GS_CAN_MODE_NORMAL 0 @@ -95,26 +101,26 @@ struct gs_device_config { #define GS_CAN_MODE_ONE_SHOT BIT(3) struct gs_device_mode { - u32 mode; - u32 flags; + __le32 mode; + __le32 flags; } __packed; struct gs_device_state { - u32 state; - u32 rxerr; - u32 txerr; + __le32 state; + __le32 rxerr; + __le32 txerr; } __packed; struct gs_device_bittiming { - u32 prop_seg; - u32 phase_seg1; - u32 phase_seg2; - u32 sjw; - u32 brp; + __le32 prop_seg; + __le32 phase_seg1; + __le32 phase_seg2; + __le32 sjw; + __le32 brp; } __packed; struct gs_identify_mode { - u32 mode; + __le32 mode; } __packed; #define GS_CAN_FEATURE_LISTEN_ONLY BIT(0) @@ -125,23 +131,23 @@ struct gs_identify_mode { #define GS_CAN_FEATURE_IDENTIFY BIT(5) struct gs_device_bt_const { - u32 feature; - u32 fclk_can; - u32 tseg1_min; - u32 tseg1_max; - u32 tseg2_min; - u32 tseg2_max; - u32 sjw_max; - u32 brp_min; - u32 brp_max; - u32 brp_inc; + __le32 feature; + __le32 fclk_can; + __le32 tseg1_min; + __le32 tseg1_max; + __le32 tseg2_min; + __le32 tseg2_max; + __le32 sjw_max; + __le32 brp_min; + __le32 brp_max; + __le32 brp_inc; } __packed; #define GS_CAN_FLAG_OVERFLOW 1 struct gs_host_frame { u32 echo_id; - u32 can_id; + __le32 can_id; u8 can_dlc; u8 channel; @@ -337,13 +343,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) if (!skb) return; - cf->can_id = hf->can_id; + cf->can_id = le32_to_cpu(hf->can_id); cf->can_dlc = get_can_dlc(hf->can_dlc); memcpy(cf->data, hf->data, 8); /* ERROR frames tell us information about the controller */ - if (hf->can_id & CAN_ERR_FLAG) + if (le32_to_cpu(hf->can_id) & CAN_ERR_FLAG) gs_update_state(dev, cf); netdev->stats.rx_packets++; @@ -426,11 +432,11 @@ static int gs_usb_set_bittiming(struct net_device *netdev) if (!dbt) return -ENOMEM; - dbt->prop_seg = bt->prop_seg; - dbt->phase_seg1 = bt->phase_seg1; - dbt->phase_seg2 = bt->phase_seg2; - dbt->sjw = bt->sjw; - dbt->brp = bt->brp; + dbt->prop_seg = cpu_to_le32(bt->prop_seg); + dbt->phase_seg1 = cpu_to_le32(bt->phase_seg1); + dbt->phase_seg2 = cpu_to_le32(bt->phase_seg2); + dbt->sjw = cpu_to_le32(bt->sjw); + dbt->brp = cpu_to_le32(bt->brp); /* request bit timings */ rc = usb_control_msg(interface_to_usbdev(intf), @@ -511,7 +517,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, cf = (struct can_frame *)skb->data; - hf->can_id = cf->can_id; + hf->can_id = cpu_to_le32(cf->can_id); hf->can_dlc = cf->can_dlc; memcpy(hf->data, cf->data, cf->can_dlc); @@ -581,6 +587,7 @@ static int gs_can_open(struct net_device *netdev) int rc, i; struct gs_device_mode *dm; u32 ctrlmode; + u32 flags = 0; rc = open_candev(netdev); if (rc) @@ -648,24 +655,24 @@ static int gs_can_open(struct net_device *netdev) /* flags */ ctrlmode = dev->can.ctrlmode; - dm->flags = 0; if (ctrlmode & CAN_CTRLMODE_LOOPBACK) - dm->flags |= GS_CAN_MODE_LOOP_BACK; + flags |= GS_CAN_MODE_LOOP_BACK; else if (ctrlmode & CAN_CTRLMODE_LISTENONLY) - dm->flags |= GS_CAN_MODE_LISTEN_ONLY; + flags |= GS_CAN_MODE_LISTEN_ONLY; /* Controller is not allowed to retry TX * this mode is unavailable on atmels uc3c hardware */ if (ctrlmode & CAN_CTRLMODE_ONE_SHOT) - dm->flags |= GS_CAN_MODE_ONE_SHOT; + flags |= GS_CAN_MODE_ONE_SHOT; if (ctrlmode & CAN_CTRLMODE_3_SAMPLES) - dm->flags |= GS_CAN_MODE_TRIPLE_SAMPLE; + flags |= GS_CAN_MODE_TRIPLE_SAMPLE; /* finally start device */ - dm->mode = GS_CAN_MODE_START; + dm->mode = cpu_to_le32(GS_CAN_MODE_START); + dm->flags = cpu_to_le32(flags); rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0), GS_USB_BREQ_MODE, @@ -745,9 +752,9 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify) return -ENOMEM; if (do_identify) - imode->mode = GS_CAN_IDENTIFY_ON; + imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_ON); else - imode->mode = GS_CAN_IDENTIFY_OFF; + imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_OFF); rc = usb_control_msg(interface_to_usbdev(dev->iface), usb_sndctrlpipe(interface_to_usbdev(dev->iface), @@ -798,6 +805,7 @@ static struct gs_can *gs_make_candev(unsigned int channel, struct net_device *netdev; int rc; struct gs_device_bt_const *bt_const; + u32 feature; bt_const = kmalloc(sizeof(*bt_const), GFP_KERNEL); if (!bt_const) @@ -838,14 +846,14 @@ static struct gs_can *gs_make_candev(unsigned int channel, /* dev settup */ strcpy(dev->bt_const.name, "gs_usb"); - dev->bt_const.tseg1_min = bt_const->tseg1_min; - dev->bt_const.tseg1_max = bt_const->tseg1_max; - dev->bt_const.tseg2_min = bt_const->tseg2_min; - dev->bt_const.tseg2_max = bt_const->tseg2_max; - dev->bt_const.sjw_max = bt_const->sjw_max; - dev->bt_const.brp_min = bt_const->brp_min; - dev->bt_const.brp_max = bt_const->brp_max; - dev->bt_const.brp_inc = bt_const->brp_inc; + dev->bt_const.tseg1_min = le32_to_cpu(bt_const->tseg1_min); + dev->bt_const.tseg1_max = le32_to_cpu(bt_const->tseg1_max); + dev->bt_const.tseg2_min = le32_to_cpu(bt_const->tseg2_min); + dev->bt_const.tseg2_max = le32_to_cpu(bt_const->tseg2_max); + dev->bt_const.sjw_max = le32_to_cpu(bt_const->sjw_max); + dev->bt_const.brp_min = le32_to_cpu(bt_const->brp_min); + dev->bt_const.brp_max = le32_to_cpu(bt_const->brp_max); + dev->bt_const.brp_inc = le32_to_cpu(bt_const->brp_inc); dev->udev = interface_to_usbdev(intf); dev->iface = intf; @@ -862,28 +870,29 @@ static struct gs_can *gs_make_candev(unsigned int channel, /* can settup */ dev->can.state = CAN_STATE_STOPPED; - dev->can.clock.freq = bt_const->fclk_can; + dev->can.clock.freq = le32_to_cpu(bt_const->fclk_can); dev->can.bittiming_const = &dev->bt_const; dev->can.do_set_bittiming = gs_usb_set_bittiming; dev->can.ctrlmode_supported = 0; - if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY) + feature = le32_to_cpu(bt_const->feature); + if (feature & GS_CAN_FEATURE_LISTEN_ONLY) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; - if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK) + if (feature & GS_CAN_FEATURE_LOOP_BACK) dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK; - if (bt_const->feature & GS_CAN_FEATURE_TRIPLE_SAMPLE) + if (feature & GS_CAN_FEATURE_TRIPLE_SAMPLE) dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; - if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT) + if (feature & GS_CAN_FEATURE_ONE_SHOT) dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT; SET_NETDEV_DEV(netdev, &intf->dev); - if (dconf->sw_version > 1) - if (bt_const->feature & GS_CAN_FEATURE_IDENTIFY) + if (le32_to_cpu(dconf->sw_version) > 1) + if (feature & GS_CAN_FEATURE_IDENTIFY) netdev->ethtool_ops = &gs_usb_ethtool_ops; kfree(bt_const); @@ -918,7 +927,7 @@ static int gs_usb_probe(struct usb_interface *intf, if (!hconf) return -ENOMEM; - hconf->byte_order = 0x0000beef; + hconf->byte_order = cpu_to_le32(0x0000beef); /* send host config */ rc = usb_control_msg(interface_to_usbdev(intf), From 0b50485e5d8b7a847a3a041b76f52ff48f1c3f65 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 23 Nov 2020 14:21:57 +0100 Subject: [PATCH 618/636] platform/x86: thinkpad_acpi: Send tablet mode switch at wakeup time [ Upstream commit e40cc1b476d60f22628741e53cf3446a29e6e6b9 ] The lid state may change while the machine is suspended. As such, we may need to re-check the state at wake-up time (at least when waking up from hibernation). Add the appropriate call to the resume handler in order to sync the SW_TABLET_MODE switch state with the hardware state. Fixes: dda3ec0aa631 ("platform/x86: thinkpad_acpi: Implement tablet mode using GMMS method") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210269 Signed-off-by: Benjamin Berg Acked-by: Henrique de Moraes Holschuh Link: https://lore.kernel.org/r/20201123132157.866303-1-benjamin@sipsolutions.net Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 98bd8213b037..8cc01857bc5c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4251,6 +4251,7 @@ static void hotkey_resume(void) pr_err("error while attempting to reset the event firmware interface\n"); tpacpi_send_radiosw_update(); + tpacpi_input_send_tabletsw(); hotkey_tablet_mode_notify_change(); hotkey_wakeup_reason_notify_change(); hotkey_wakeup_hotunplug_complete_notify_change(); From e41d7a7dc67b876a000495498593f7833d88b0aa Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sun, 22 Nov 2020 13:49:37 +0800 Subject: [PATCH 619/636] platform/x86: toshiba_acpi: Fix the wrong variable assignment [ Upstream commit 2a72c46ac4d665614faa25e267c3fb27fb729ed7 ] The commit 78429e55e4057 ("platform/x86: toshiba_acpi: Clean up variable declaration") cleans up variable declaration in video_proc_write(). Seems it does the variable assignment in the wrong place, this results in dead code and changes the source code logic. Fix it by doing the assignment at the beginning of the funciton. Fixes: 78429e55e4057 ("platform/x86: toshiba_acpi: Clean up variable declaration") Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Link: https://lore.kernel.org/r/1606024177-16481-1-git-send-email-kaixuxia@tencent.com Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/toshiba_acpi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index e366977bda41..8c3e9bac4754 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -1497,7 +1497,7 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); char *buffer; char *cmd; - int lcd_out, crt_out, tv_out; + int lcd_out = -1, crt_out = -1, tv_out = -1; int remain = count; int value; int ret; @@ -1529,7 +1529,6 @@ static ssize_t video_proc_write(struct file *file, const char __user *buf, kfree(cmd); - lcd_out = crt_out = tv_out = -1; ret = get_video_status(dev, &video_out); if (!ret) { unsigned int new_video_out = video_out; From cae58721e4f8d881515b9aecc8abac1577686b87 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 24 Nov 2020 19:47:38 +0100 Subject: [PATCH 620/636] can: m_can: fix nominal bitiming tseg2 min for version >= 3.1 [ Upstream commit e3409e4192535fbcc86a84b7a65d9351f46039ec ] At lest the revision 3.3.0 of the bosch m_can IP core specifies that valid register values for "Nominal Time segment after sample point (NTSEG2)" are from 1 to 127. As the hardware uses a value of one more than the programmed value, mean tseg2_min is 2. This patch fixes the tseg2_min value accordingly. Cc: Dan Murphy Cc: Mario Huettel Acked-by: Sriram Dash Link: https://lore.kernel.org/r/20201124190751.3972238-1-mkl@pengutronix.de Fixes: b03cfc5bb0e1 ("can: m_can: Enable M_CAN version dependent initialization") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index efaa342600c4..fbb970220c2d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -976,7 +976,7 @@ static const struct can_bittiming_const m_can_bittiming_const_31X = { .name = KBUILD_MODNAME, .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ .tseg1_max = 256, - .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ + .tseg2_min = 2, /* Time segment 2 = phase_seg2 */ .tseg2_max = 128, .sjw_max = 128, .brp_min = 1, From 9dd3e48e69155ccd3dcc81e629217900bc560118 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 27 Nov 2020 14:48:46 +0900 Subject: [PATCH 621/636] perf probe: Fix to die_entrypc() returns error correctly [ Upstream commit ab4200c17ba6fe71d2da64317aae8a8aa684624c ] Fix die_entrypc() to return error correctly if the DIE has no DW_AT_ranges attribute. Since dwarf_ranges() will treat the case as an empty ranges and return 0, we have to check it by ourselves. Fixes: 91e2f539eeda ("perf probe: Fix to show function entry line as probe-able") Signed-off-by: Masami Hiramatsu Cc: Sumanth Korikkar Cc: Thomas Richter Link: http://lore.kernel.org/lkml/160645612634.2824037.5284932731175079426.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 29e75c051d04..230e94bf7775 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -332,6 +332,7 @@ bool die_is_func_def(Dwarf_Die *dw_die) int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) { Dwarf_Addr base, end; + Dwarf_Attribute attr; if (!addr) return -EINVAL; @@ -339,6 +340,13 @@ int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) if (dwarf_entrypc(dw_die, addr) == 0) return 0; + /* + * Since the dwarf_ranges() will return 0 if there is no + * DW_AT_ranges attribute, we should check it first. + */ + if (!dwarf_attr(dw_die, DW_AT_ranges, &attr)) + return -ENOENT; + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; } From dfd888ee5db1488f2bbbd370348879e703398760 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 19 Nov 2020 12:02:28 -0500 Subject: [PATCH 622/636] USB: core: Change %pK for __user pointers to %px commit f3bc432aa8a7a2bfe9ebb432502be5c5d979d7fe upstream. Commit 2f964780c03b ("USB: core: replace %p with %pK") used the %pK format specifier for a bunch of __user pointers. But as the 'K' in the specifier indicates, it is meant for kernel pointers. The reason for the %pK specifier is to avoid leaks of kernel addresses, but when the pointer is to an address in userspace the security implications are minimal. In particular, no kernel information is leaked. This patch changes the __user %pK specifiers (used in a bunch of debugging output lines) to %px, which will always print the actual address with no mangling. (Notably, there is no printk format specifier particularly intended for __user pointers.) Fixes: 2f964780c03b ("USB: core: replace %p with %pK") CC: Vamsi Krishna Samavedam CC: Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201119170228.GB576844@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 00204824bffd..732e7f1687dd 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -463,11 +463,11 @@ static void snoop_urb(struct usb_device *udev, if (userurb) { /* Async */ if (when == SUBMIT) - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "length %u\n", userurb, ep, t, d, length); else - dev_info(&udev->dev, "userurb %pK, ep%d %s-%s, " + dev_info(&udev->dev, "userurb %px, ep%d %s-%s, " "actual_length %u status %d\n", userurb, ep, t, d, length, timeout_or_status); @@ -1927,7 +1927,7 @@ static int proc_reapurb(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); return retval; @@ -1944,7 +1944,7 @@ static int proc_reapurbnonblock(struct usb_dev_state *ps, void __user *arg) as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl(as, (void __user * __user *)arg); free_async(as); } else { @@ -2070,7 +2070,7 @@ static int proc_reapurb_compat(struct usb_dev_state *ps, void __user *arg) if (as) { int retval; - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); return retval; @@ -2087,7 +2087,7 @@ static int proc_reapurbnonblock_compat(struct usb_dev_state *ps, void __user *ar as = async_getcompleted(ps); if (as) { - snoop(&ps->dev->dev, "reap %pK\n", as->userurb); + snoop(&ps->dev->dev, "reap %px\n", as->userurb); retval = processcompl_compat(as, (void __user * __user *)arg); free_async(as); } else { @@ -2512,7 +2512,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, #endif case USBDEVFS_DISCARDURB: - snoop(&dev->dev, "%s: DISCARDURB %pK\n", __func__, p); + snoop(&dev->dev, "%s: DISCARDURB %px\n", __func__, p); ret = proc_unlinkurb(ps, p); break; From 2b8402862fe46756e3160853aeb8baa072e69ca5 Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 17 Nov 2020 10:16:28 +0800 Subject: [PATCH 623/636] usb: gadget: f_midi: Fix memleak in f_midi_alloc commit e7694cb6998379341fd9bf3bd62b48c4e6a79385 upstream. In the error path, if midi is not null, we should free the midi->id if necessary to prevent memleak. Fixes: b85e9de9e818d ("usb: gadget: f_midi: convert to new function interface with backward compatibility") Reported-by: Hulk Robot Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201117021629.1470544-2-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_midi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 46af0aa07e2e..b2b5b0689667 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -1315,7 +1315,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) midi->id = kstrdup(opts->id, GFP_KERNEL); if (opts->id && !midi->id) { status = -ENOMEM; - goto setup_fail; + goto midi_free; } midi->in_ports = opts->in_ports; midi->out_ports = opts->out_ports; @@ -1327,7 +1327,7 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) status = kfifo_alloc(&midi->in_req_fifo, midi->qlen, GFP_KERNEL); if (status) - goto setup_fail; + goto midi_free; spin_lock_init(&midi->transmit_lock); @@ -1343,9 +1343,13 @@ static struct usb_function *f_midi_alloc(struct usb_function_instance *fi) return &midi->func; +midi_free: + if (midi) + kfree(midi->id); + kfree(midi); setup_fail: mutex_unlock(&opts->lock); - kfree(midi); + return ERR_PTR(status); } From 1c08e7c246cf4ed205d2329249920fd762c5645d Mon Sep 17 00:00:00 2001 From: penghao Date: Wed, 18 Nov 2020 20:30:39 +0800 Subject: [PATCH 624/636] USB: quirks: Add USB_QUIRK_DISCONNECT_SUSPEND quirk for Lenovo A630Z TIO built-in usb-audio card commit 9ca57518361418ad5ae7dc38a2128fbf4855e1a2 upstream. Add a USB_QUIRK_DISCONNECT_SUSPEND quirk for the Lenovo TIO built-in usb-audio. when A630Z going into S3,the system immediately wakeup 7-8 seconds later by usb-audio disconnect interrupt to avoids the issue. eg dmesg: .... [ 626.974091 ] usb 7-1.1: USB disconnect, device number 3 .... .... [ 1774.486691] usb 7-1.1: new full-speed USB device number 5 using xhci_hcd [ 1774.947742] usb 7-1.1: New USB device found, idVendor=17ef, idProduct=a012, bcdDevice= 0.55 [ 1774.956588] usb 7-1.1: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 1774.964339] usb 7-1.1: Product: Thinkcentre TIO24Gen3 for USB-audio [ 1774.970999] usb 7-1.1: Manufacturer: Lenovo [ 1774.975447] usb 7-1.1: SerialNumber: 000000000000 [ 1775.048590] usb 7-1.1: 2:1: cannot get freq at ep 0x1 ....... Seeking a better fix, we've tried a lot of things, including: - Check that the device's power/wakeup is disabled - Check that remote wakeup is off at the USB level - All the quirks in drivers/usb/core/quirks.c e.g. USB_QUIRK_RESET_RESUME, USB_QUIRK_RESET, USB_QUIRK_IGNORE_REMOTE_WAKEUP, USB_QUIRK_NO_LPM. but none of that makes any difference. There are no errors in the logs showing any suspend/resume-related issues. When the system wakes up due to the modem, log-wise it appears to be a normal resume. Introduce a quirk to disable the port during suspend when the modem is detected. Signed-off-by: penghao Link: https://lore.kernel.org/r/20201118123039.11696-1-penghao@uniontech.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 5ad14cdd9762..0f80195d0109 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -421,6 +421,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ + { USB_DEVICE(0x17ef, 0xa012), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, From 37de7cebcf548ebab2e59adf743d8fce386d8add Mon Sep 17 00:00:00 2001 From: Zhang Qilong Date: Tue, 17 Nov 2020 10:16:29 +0800 Subject: [PATCH 625/636] usb: gadget: Fix memleak in gadgetfs_fill_super commit 87bed3d7d26c974948a3d6e7176f304b2d41272b upstream. usb_get_gadget_udc_name will alloc memory for CHIP in "Enomem" branch. we should free it before error returns to prevent memleak. Fixes: 175f712119c57 ("usb: gadget: provide interface for legacy gadgets to get UDC name") Reported-by: Hulk Robot Acked-by: Alan Stern Signed-off-by: Zhang Qilong Link: https://lore.kernel.org/r/20201117021629.1470544-3-zhangqilong3@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 25d417ad9000..09ed5f02c24f 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -2039,6 +2039,9 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) return 0; Enomem: + kfree(CHIP); + CHIP = NULL; + return -ENOMEM; } From 474ba4eb744862295e41d898dea241ec4caf9897 Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Tue, 10 Nov 2020 12:33:53 +1100 Subject: [PATCH 626/636] x86/speculation: Fix prctl() when spectre_v2_user={seccomp,prctl},ibpb commit 33fc379df76b4991e5ae312f07bcd6820811971e upstream. When spectre_v2_user={seccomp,prctl},ibpb is specified on the command line, IBPB is force-enabled and STIPB is conditionally-enabled (or not available). However, since 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") the spectre_v2_user_ibpb variable is set to SPECTRE_V2_USER_{PRCTL,SECCOMP} instead of SPECTRE_V2_USER_STRICT, which is the actual behaviour. Because the issuing of IBPB relies on the switch_mm_*_ibpb static branches, the mitigations behave as expected. Since 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") this discrepency caused the misreporting of IB speculation via prctl(). On CPUs with STIBP always-on and spectre_v2_user=seccomp,ibpb, prctl(PR_GET_SPECULATION_CTRL) would return PR_SPEC_PRCTL | PR_SPEC_ENABLE instead of PR_SPEC_DISABLE since both IBPB and STIPB are always on. It also allowed prctl(PR_SET_SPECULATION_CTRL) to set the IB speculation mode, even though the flag is ignored. Similarly, for CPUs without SMT, prctl(PR_GET_SPECULATION_CTRL) should also return PR_SPEC_DISABLE since IBPB is always on and STIBP is not available. [ bp: Massage commit message. ] Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") Fixes: 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") Signed-off-by: Anand K Mistry Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20201110123349.1.Id0cbf996d2151f4c143c90f9028651a5b49a5908@changeid Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9e482fbdb28f..c524fa1f4c0e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -733,11 +733,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -751,8 +753,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* From e06f39d509816bd60c21708a7cc6847a73110225 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 31 Oct 2020 03:10:53 +0800 Subject: [PATCH 627/636] x86/resctrl: Remove superfluous kernfs_get() calls to prevent refcount leak commit fd8d9db3559a29fd737bcdb7c4fcbe1940caae34 upstream. Willem reported growing of kernfs_node_cache entries in slabtop when repeatedly creating and removing resctrl subdirectories as well as when repeatedly mounting and unmounting the resctrl filesystem. On resource group (control as well as monitoring) creation via a mkdir an extra kernfs_node reference is obtained to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. The kernfs_node reference count is dropped by kernfs_put() in rdtgroup_kn_unlock(). With the above explaining the need for one kernfs_get()/kernfs_put() pair in resctrl there are more places where a kernfs_node reference is obtained without a corresponding release. The excessive amount of reference count on kernfs nodes will never be dropped to 0 and the kernfs nodes will never be freed in the call paths of rmdir and umount. It leads to reference count leak and kernfs_node_cache memory leak. Remove the superfluous kernfs_get() calls and expand the existing comments surrounding the remaining kernfs_get()/kernfs_put() pair that remains in use. Superfluous kernfs_get() calls are removed from two areas: (1) In call paths of mount and mkdir, when kernfs nodes for "info", "mon_groups" and "mon_data" directories and sub-directories are created, the reference count of newly created kernfs node is set to 1. But after kernfs_create_dir() returns, superfluous kernfs_get() are called to take an additional reference. (2) kernfs_get() calls in rmdir call paths. Backporting notes: Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c for older stable trees. Fixes: 17eafd076291 ("x86/intel_rdt: Split resource group removal in two") Fixes: 4af4a88e0c92 ("x86/intel_rdt/cqm: Add mount,umount support") Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Fixes: 5dc1d5c6bac2 ("x86/intel_rdt: Simplify info and base file lists") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Fixes: 4e978d06dedb ("x86/intel_rdt: Add "info" files to resctrl file system") Reported-by: Willem de Bruijn Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Tested-by: Willem de Bruijn Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085053-31639-1-git-send-email-xiaochen.shen@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 35 ++---------------------- 1 file changed, 2 insertions(+), 33 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index e62e416dd116..dc9061a0f47f 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1626,7 +1626,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1649,7 +1648,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1670,12 +1668,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1704,12 +1696,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2025,7 +2011,6 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = ERR_PTR(ret); goto out_info; } - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); @@ -2033,7 +2018,6 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = ERR_PTR(ret); goto out_mongrp; } - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2326,11 +2310,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2622,8 +2601,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped inside rdtgroup_kn_unlock(). */ kernfs_get(kn); @@ -2838,11 +2817,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -2854,11 +2828,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } From dee063b296b6db5b7a205a26c1917917459141ea Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Sat, 31 Oct 2020 03:11:28 +0800 Subject: [PATCH 628/636] x86/resctrl: Add necessary kernfs_put() calls to prevent refcount leak commit 758999246965eeb8b253d47e72f7bfe508804b16 upstream. On resource group creation via a mkdir an extra kernfs_node reference is obtained by kernfs_get() to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. Currently the extra kernfs_node reference count is only dropped by kernfs_put() in rdtgroup_kn_unlock() while the rdtgroup structure is removed in a few other locations that lack the matching reference drop. In call paths of rmdir and umount, when a control group is removed, kernfs_remove() is called to remove the whole kernfs nodes tree of the control group (including the kernfs nodes trees of all child monitoring groups), and then rdtgroup structure is freed by kfree(). The rdtgroup structures of all child monitoring groups under the control group are freed by kfree() in free_all_child_rdtgrp(). Before calling kfree() to free the rdtgroup structures, the kernfs node of the control group itself as well as the kernfs nodes of all child monitoring groups still take the extra references which will never be dropped to 0 and the kernfs nodes will never be freed. It leads to reference count leak and kernfs_node_cache memory leak. For example, reference count leak is observed in these two cases: (1) mount -t resctrl resctrl /sys/fs/resctrl mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 umount /sys/fs/resctrl (2) mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 rmdir /sys/fs/resctrl/c1 The same reference count leak issue also exists in the error exit paths of mkdir in mkdir_rdt_prepare() and rdtgroup_mkdir_ctrl_mon(). Fix this issue by following changes to make sure the extra kernfs_node reference on rdtgroup is dropped before freeing the rdtgroup structure. (1) Introduce rdtgroup removal helper rdtgroup_remove() to wrap up kernfs_put() and kfree(). (2) Call rdtgroup_remove() in rdtgroup removal path where the rdtgroup structure is about to be freed by kfree(). (3) Call rdtgroup_remove() or kernfs_put() as appropriate in the error exit paths of mkdir where an extra reference is taken by kernfs_get(). Backporting notes: Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt files to a separate directory"), the file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to arch/x86/kernel/cpu/resctrl/rdtgroup.c. Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c in older stable trees. Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Reported-by: Willem de Bruijn Signed-off-by: Xiaochen Shen Signed-off-by: Borislav Petkov Reviewed-by: Reinette Chatre Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085088-31707-1-git-send-email-xiaochen.shen@intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 32 ++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index dc9061a0f47f..12083f200e09 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -515,6 +515,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -537,7 +555,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } preempt_disable(); @@ -1959,8 +1977,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2169,7 +2186,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2211,7 +2228,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2602,7 +2619,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the * rdtgroup_kn_unlock(kn) call. Take one extra reference here, - * which will be dropped inside rdtgroup_kn_unlock(). + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2643,6 +2660,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2655,7 +2673,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* From 4584fc5a709540f1d57bf870a6116c6c673d4df0 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 19 Nov 2020 12:00:40 -0500 Subject: [PATCH 629/636] USB: core: Fix regression in Hercules audio card commit 184eead057cc7e803558269babc1f2cfb9113ad1 upstream Commit 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") aimed to make the USB stack more reliable by detecting and skipping over endpoints that are duplicated between interfaces. This caused a regression for a Hercules audio card (reported as Bugzilla #208357), which contains such non-compliant duplications. Although the duplications are harmless, skipping the valid endpoints prevented the device from working. This patch fixes the regression by adding ENDPOINT_IGNORE quirks for the Hercules card, telling the kernel to ignore the invalid duplicate endpoints and thereby allowing the valid endpoints to be used as intended. Fixes: 3e4f8e21c4f2 ("USB: core: fix check for duplicate endpoints") CC: Reported-by: Alexander Chalikiopoulos Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201119170040.GA576844@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman [sudip: use usb_endpoint_blacklist and USB_QUIRK_ENDPOINT_BLACKLIST] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 0f80195d0109..b55c3a699fc6 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -348,6 +348,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Guillemot Webcam Hercules Dualpix Exchange*/ { USB_DEVICE(0x06f8, 0x3005), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Guillemot Hercules DJ Console audio card (BZ 208357) */ + { USB_DEVICE(0x06f8, 0xb000), .driver_info = + USB_QUIRK_ENDPOINT_BLACKLIST }, + /* Midiman M-Audio Keystation 88es */ { USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -525,6 +529,8 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. */ static const struct usb_device_id usb_endpoint_blacklist[] = { + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x01 }, + { USB_DEVICE_INTERFACE_NUMBER(0x06f8, 0xb000, 5), .driver_info = 0x81 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0208, 1), .driver_info = 0x85 }, { } From daefdc9eb24bfa11ab77a4b2a9c3923f1051fe0b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 2 Dec 2020 08:48:14 +0100 Subject: [PATCH 630/636] Linux 4.19.161 Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20201201084647.751612010@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c58711498422..84807cee57b8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 160 +SUBLEVEL = 161 EXTRAVERSION = NAME = "People's Front" From 1309a09163aa3f8795bdcbc701c69bb70c3ea522 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Dec 2020 14:42:23 +0100 Subject: [PATCH 631/636] Revert "drm/atomic_helper: Stop modesets on unregistered connectors harder" This reverts commit 72289dc23c3c6311d21d87eee55278aa4308a73e which is commit de9f8eea5a44b0b756d3d6345af7f8e630a3c8c0 upstream. It breaks the current drm abi and for Android devices, should not be an issue as modesetting isn't a big deal for them. Bug: 161946584 Signed-off-by: Greg Kroah-Hartman Change-Id: I74a8e98a755dcd68a79a9fe6c50d92806cb7a1f2 --- drivers/gpu/drm/drm_atomic.c | 21 +++++++++ drivers/gpu/drm/drm_atomic_helper.c | 21 +-------- drivers/gpu/drm/drm_connector.c | 11 ++--- drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++-- include/drm/drm_connector.h | 71 +---------------------------- 5 files changed, 33 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 86026a5e1e12..cb3cc5a2d2ef 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1706,6 +1706,27 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state, struct drm_connector *connector = conn_state->connector; struct drm_crtc_state *crtc_state; + /* + * For compatibility with legacy users, we want to make sure that + * we allow DPMS On<->Off modesets on unregistered connectors, since + * legacy modesetting users will not be expecting these to fail. We do + * not however, want to allow legacy users to assign a connector + * that's been unregistered from sysfs to another CRTC, since doing + * this with a now non-existent connector could potentially leave us + * in an invalid state. + * + * Since the connector can be unregistered at any point during an + * atomic check or commit, this is racy. But that's OK: all we care + * about is ensuring that userspace can't use this connector for new + * configurations after it's been notified that the connector is no + * longer present. + */ + if (!READ_ONCE(connector->registered) && crtc) { + DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", + connector->base.id, connector->name); + return -EINVAL; + } + if (conn_state->crtc == crtc) return 0; diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 81e7e91b4044..e2350dc56372 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -307,26 +307,6 @@ update_connector_routing(struct drm_atomic_state *state, return 0; } - crtc_state = drm_atomic_get_new_crtc_state(state, - new_connector_state->crtc); - /* - * For compatibility with legacy users, we want to make sure that - * we allow DPMS On->Off modesets on unregistered connectors. Modesets - * which would result in anything else must be considered invalid, to - * avoid turning on new displays on dead connectors. - * - * Since the connector can be unregistered at any point during an - * atomic check or commit, this is racy. But that's OK: all we care - * about is ensuring that userspace can't do anything but shut off the - * display on a connector that was destroyed after its been notified, - * not before. - */ - if (drm_connector_is_unregistered(connector) && crtc_state->active) { - DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n", - connector->base.id, connector->name); - return -EINVAL; - } - funcs = connector->helper_private; if (funcs->atomic_best_encoder) @@ -371,6 +351,7 @@ update_connector_routing(struct drm_atomic_state *state, set_best_encoder(state, new_connector_state, new_encoder); + crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc); crtc_state->connectors_changed = true; DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n", diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 397eec6811aa..47794819e41f 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -391,8 +391,7 @@ void drm_connector_cleanup(struct drm_connector *connector) /* The connector should have been removed from userspace long before * it is finally destroyed. */ - if (WARN_ON(connector->registration_state == - DRM_CONNECTOR_REGISTERED)) + if (WARN_ON(connector->registered)) drm_connector_unregister(connector); if (connector->tile_group) { @@ -449,7 +448,7 @@ int drm_connector_register(struct drm_connector *connector) return 0; mutex_lock(&connector->mutex); - if (connector->registration_state != DRM_CONNECTOR_INITIALIZING) + if (connector->registered) goto unlock; ret = drm_sysfs_connector_add(connector); @@ -469,7 +468,7 @@ int drm_connector_register(struct drm_connector *connector) drm_mode_object_register(connector->dev, &connector->base); - connector->registration_state = DRM_CONNECTOR_REGISTERED; + connector->registered = true; goto unlock; err_debugfs: @@ -491,7 +490,7 @@ EXPORT_SYMBOL(drm_connector_register); void drm_connector_unregister(struct drm_connector *connector) { mutex_lock(&connector->mutex); - if (connector->registration_state != DRM_CONNECTOR_REGISTERED) { + if (!connector->registered) { mutex_unlock(&connector->mutex); return; } @@ -502,7 +501,7 @@ void drm_connector_unregister(struct drm_connector *connector) drm_sysfs_connector_remove(connector); drm_debugfs_connector_remove(connector); - connector->registration_state = DRM_CONNECTOR_UNREGISTERED; + connector->registered = false; mutex_unlock(&connector->mutex); } EXPORT_SYMBOL(drm_connector_unregister); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 8a19cfcfc4f1..c7d52c66ff29 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->pbn = mst_pbn; /* Zombie connectors can't have VCPI slots */ - if (!drm_connector_is_unregistered(connector)) { + if (READ_ONCE(connector->registered)) { slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, port, @@ -317,7 +317,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector) struct edid *edid; int ret; - if (drm_connector_is_unregistered(connector)) + if (!READ_ONCE(connector->registered)) return intel_connector_update_modes(connector, NULL); edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -333,7 +333,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force) struct intel_connector *intel_connector = to_intel_connector(connector); struct intel_dp *intel_dp = intel_connector->mst_port; - if (drm_connector_is_unregistered(connector)) + if (!READ_ONCE(connector->registered)) return connector_status_disconnected; return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr, intel_connector->port); @@ -376,7 +376,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, int bpp = 24; /* MST uses fixed bpp */ int max_rate, mode_rate, max_lanes, max_link_clock; - if (drm_connector_is_unregistered(connector)) + if (!READ_ONCE(connector->registered)) return MODE_ERROR; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e72ca1c96b2b..a3c02470cc39 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -82,53 +82,6 @@ enum drm_connector_status { connector_status_unknown = 3, }; -/** - * enum drm_connector_registration_status - userspace registration status for - * a &drm_connector - * - * This enum is used to track the status of initializing a connector and - * registering it with userspace, so that DRM can prevent bogus modesets on - * connectors that no longer exist. - */ -enum drm_connector_registration_state { - /** - * @DRM_CONNECTOR_INITIALIZING: The connector has just been created, - * but has yet to be exposed to userspace. There should be no - * additional restrictions to how the state of this connector may be - * modified. - */ - DRM_CONNECTOR_INITIALIZING = 0, - - /** - * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized - * and registered with sysfs, as such it has been exposed to - * userspace. There should be no additional restrictions to how the - * state of this connector may be modified. - */ - DRM_CONNECTOR_REGISTERED = 1, - - /** - * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed - * to userspace and has since been unregistered and removed from - * userspace, or the connector was unregistered before it had a chance - * to be exposed to userspace (e.g. still in the - * @DRM_CONNECTOR_INITIALIZING state). When a connector is - * unregistered, there are additional restrictions to how its state - * may be modified: - * - * - An unregistered connector may only have its DPMS changed from - * On->Off. Once DPMS is changed to Off, it may not be switched back - * to On. - * - Modesets are not allowed on unregistered connectors, unless they - * would result in disabling its assigned CRTCs. This means - * disabling a CRTC on an unregistered connector is OK, but enabling - * one is not. - * - Removing a CRTC from an unregistered connector is OK, but new - * CRTCs may never be assigned to an unregistered connector. - */ - DRM_CONNECTOR_UNREGISTERED = 2, -}; - enum subpixel_order { SubPixelUnknown = 0, SubPixelHorizontalRGB, @@ -941,12 +894,10 @@ struct drm_connector { bool ycbcr_420_allowed; /** - * @registration_state: Is this connector initializing, exposed - * (registered) with userspace, or unregistered? - * + * @registered: Is this connector exposed (registered) with userspace? * Protected by @mutex. */ - enum drm_connector_registration_state registration_state; + bool registered; /** * @modes: @@ -1325,24 +1276,6 @@ static inline void drm_connector_unreference(struct drm_connector *connector) drm_connector_put(connector); } -/** - * drm_connector_is_unregistered - has the connector been unregistered from - * userspace? - * @connector: DRM connector - * - * Checks whether or not @connector has been unregistered from userspace. - * - * Returns: - * True if the connector was unregistered, false if the connector is - * registered or has not yet been registered with userspace. - */ -static inline bool -drm_connector_is_unregistered(struct drm_connector *connector) -{ - return READ_ONCE(connector->registration_state) == - DRM_CONNECTOR_UNREGISTERED; -} - const char *drm_get_connector_status_name(enum drm_connector_status status); const char *drm_get_subpixel_order_name(enum subpixel_order order); const char *drm_get_dpms_name(int val); From 6a2d28f7bab3bf7779011b716f1ad157d4b04cc6 Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Mon, 23 Nov 2020 16:16:11 +0000 Subject: [PATCH 632/636] ANDROID: kbuild: speed up ksym_dep_filter This commit reduces the input that tr and sed need to consider to a few lines and so makes the extraction of ksyms faster. On my machine at least, builds are greatly slowed down by tr. A single invocation of sed might be fast too but seems non-trivial. Bug: 174773617 Change-Id: I98698ff8603d816c55d4922ccb5936364b5fd906 Signed-off-by: Giuliano Procida --- scripts/Kbuild.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c830750d725b..0f8fa52828f2 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -290,7 +290,7 @@ ksym_dep_filter = \ $(CPP) $(call flags_nodeps,a_flags) -D__KSYM_DEPS__ $< ;; \ boot*|build*|cpp_its_S|*cpp_lds_S|dtc|host*|vdso*) : ;; \ *) echo "Don't know how to preprocess $(1)" >&2; false ;; \ - esac | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' + esac | fgrep '=== __KSYM_' | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \ From 7a05159d1fcbb03892b148fe31ef55bb54dae8b0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 4 Dec 2020 12:58:01 -0800 Subject: [PATCH 633/636] Revert "Revert "ANDROID: clang: update to 11.0.5"" This reverts commit 998f5e469d48bda634c7289b76da897e8dcb5e53. Re-upgrades clang to 11.0.5/clang-r399163b. Bug: 171827315 Bug: 173479375 Bug: 174047799 Signed-off-by: Nick Desaulniers Change-Id: Iea4177618cce065c0ec683ec191e378e250975d3 --- build.config.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.common b/build.config.common index 1b0034f31452..c9467028ea7f 100644 --- a/build.config.common +++ b/build.config.common @@ -3,7 +3,7 @@ KMI_GENERATION=0 LLVM=1 DEPMOD=depmod -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r383902/bin +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r399163b/bin BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 EXTRA_CMDS='' From 077a82d82599ccf3c63f8e3b263287152c3cdddc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sun, 16 Sep 2018 23:17:23 +0100 Subject: [PATCH 634/636] UPSTREAM: arm64: sysreg: Clean up instructions for modifying PSTATE fields Instructions for modifying the PSTATE fields which were not supported in the older toolchains (e.g, PAN, UAO) are generated using macros. We have so far used the normal sys_reg() helper for defining the PSTATE fields. While this works fine, it is really difficult to correlate the code with the Arm ARM definition. As per Arm ARM, the PSTATE fields are defined only using Op1, Op2 fields, with fixed values for Op0, CRn. Also the CRm field has been reserved for the Immediate value for the instruction. So using the sys_reg() looks quite confusing. This patch cleans up the instruction helpers by bringing them in line with the Arm ARM definitions to make it easier to correlate code with the document. No functional changes. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Catalin Marinas (cherry picked from commit 74e248286e1d04b0d9bfdd002450ef0211f6f29f) Change-Id: I8de4cea0791c5dfa4064dffb124e684946f8abaf Signed-off-by: Todd Kjos --- arch/arm64/include/asm/sysreg.h | 28 +++++++++++++++++++--------- arch/arm64/kernel/cpufeature.c | 6 +++--- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index fbe0c5855b60..a41a5c1a1330 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -86,16 +86,26 @@ #endif /* CONFIG_BROKEN_GAS_INST */ -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3) -#define REG_PSTATE_SSBS_IMM sys_reg(0, 3, 4, 0, 1) +/* + * Instructions for modifying PSTATE fields. + * As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints, + * barriers and CLREX, and PSTATE access", ARM DDI 0487 C.a, system instructions + * for accessing PSTATE fields have the following encoding: + * Op0 = 0, CRn = 4 + * Op1, Op2 encodes the PSTATE field modified and defines the constraints. + * CRm = Imm4 for the instruction. + * Rt = 0x1f + */ +#define pstate_field(op1, op2) ((op1) << Op1_shift | (op2) << Op2_shift) +#define PSTATE_Imm_shift CRm_shift -#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \ - (!!x)<<8 | 0x1f) -#define SET_PSTATE_SSBS(x) __emit_inst(0xd5000000 | REG_PSTATE_SSBS_IMM | \ - (!!x)<<8 | 0x1f) +#define PSTATE_PAN pstate_field(0, 4) +#define PSTATE_UAO pstate_field(0, 3) +#define PSTATE_SSBS pstate_field(3, 1) + +#define SET_PSTATE_PAN(x) __emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift)) +#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2) #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 81690e65fc94..1be1faa7cc05 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1098,7 +1098,7 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) if (user_mode(regs)) return 1; - if (instr & BIT(CRm_shift)) + if (instr & BIT(PSTATE_Imm_shift)) regs->pstate |= PSR_SSBS_BIT; else regs->pstate &= ~PSR_SSBS_BIT; @@ -1108,8 +1108,8 @@ static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) } static struct undef_hook ssbs_emulation_hook = { - .instr_mask = ~(1U << CRm_shift), - .instr_val = 0xd500001f | REG_PSTATE_SSBS_IMM, + .instr_mask = ~(1U << PSTATE_Imm_shift), + .instr_val = 0xd500401f | PSTATE_SSBS, .fn = ssbs_emulation_handler, }; From f921931f80fdb85353011b0ae7f9a3c1593881a5 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Mon, 15 Apr 2019 18:54:50 -0700 Subject: [PATCH 635/636] ANDROID: GKI: usb: gadget: support claiming indexed endpoints by name Partial cherry-pick of commit ee9b63c156e1 ("Give unique name to gsi endpoints") to support claiming an endpoint by name that has been prefixed with an index. Bug: 174539520 Change-Id: I3d6e9a3f9eb3fd0c445afc3b76f28dc9199374f4 Signed-off-by: Hemant Kumar (cherry picked from commit ee9b63c156e12ea607b4eb00d80256ce8218ddcb) Signed-off-by: Will McVicker --- drivers/usb/gadget/epautoconf.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index 744b8336003d..5d73c1564137 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -207,7 +207,7 @@ void usb_ep_autoconfig_reset (struct usb_gadget *gadget) EXPORT_SYMBOL_GPL(usb_ep_autoconfig_reset); /** - * usb_ep_autoconfig_by_name - Used to pick the endpoint by name. eg ep1in-gsi + * usb_ep_autoconfig_by_name - Used to pick the endpoint by name. eg gsi-epin1 * @gadget: The device to which the endpoint must belong. * @desc: Endpoint descriptor, with endpoint direction and transfer mode * initialized. @@ -223,8 +223,12 @@ struct usb_ep *usb_ep_autoconfig_by_name( struct usb_ep *ep; bool ep_found = false; + if (!ep_name || !strlen(ep_name)) + goto err; + list_for_each_entry(ep, &gadget->ep_list, ep_list) - if (strcmp(ep->name, ep_name) == 0 && !ep->driver_data) { + if (strncmp(ep->name, ep_name, strlen(ep_name)) == 0 && + !ep->driver_data) { ep_found = true; break; } @@ -239,6 +243,7 @@ struct usb_ep *usb_ep_autoconfig_by_name( return ep; } +err: pr_err("%s:error finding ep %s\n", __func__, ep_name); return NULL; } From d18cd1bb1028ce48fe4a00136bfcdfc30e8a884f Mon Sep 17 00:00:00 2001 From: Giuliano Procida Date: Fri, 4 Dec 2020 14:49:19 +0000 Subject: [PATCH 636/636] ANDROID: kbuild: use grep -F instead of fgrep Otherwise we need to pull in an extra dependency. Bug: 174773617 Fixes: 6a2d28f7bab3 ("ANDROID: kbuild: speed up ksym_dep_filter") Change-Id: Ica1c433f17077be22f636d6c71db16cff3c7caa0 Signed-off-by: Giuliano Procida --- scripts/Kbuild.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 0f8fa52828f2..3f566c124a28 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -290,7 +290,7 @@ ksym_dep_filter = \ $(CPP) $(call flags_nodeps,a_flags) -D__KSYM_DEPS__ $< ;; \ boot*|build*|cpp_its_S|*cpp_lds_S|dtc|host*|vdso*) : ;; \ *) echo "Don't know how to preprocess $(1)" >&2; false ;; \ - esac | fgrep '=== __KSYM_' | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' + esac | grep -F '=== __KSYM_' | tr ";" "\n" | sed -n 's/^.*=== __KSYM_\(.*\) ===.*$$/_\1/p' cmd_and_fixdep = \ $(echo-cmd) $(cmd_$(1)); \